Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2012 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21.  * IN THE SOFTWARE.
  22.  */
  23.  
  24. /** @file brw_vec4_vp.cpp
  25.  *
  26.  * A translator from Mesa IR to the i965 driver's Vec4 IR, used to implement
  27.  * ARB_vertex_program and fixed-function vertex processing.
  28.  */
  29.  
  30. #include "brw_context.h"
  31. #include "brw_vec4.h"
  32. extern "C" {
  33. #include "program/prog_parameter.h"
  34. #include "program/prog_print.h"
  35. }
  36. using namespace brw;
  37.  
  38. void
  39. vec4_visitor::emit_vp_sop(uint32_t conditional_mod,
  40.                           dst_reg dst, src_reg src0, src_reg src1,
  41.                           src_reg one)
  42. {
  43.    vec4_instruction *inst;
  44.  
  45.    inst = emit(BRW_OPCODE_CMP, dst_null_d(), src0, src1);
  46.    inst->conditional_mod = conditional_mod;
  47.  
  48.    inst = emit(BRW_OPCODE_SEL, dst, one, src_reg(0.0f));
  49.    inst->predicate = BRW_PREDICATE_NORMAL;
  50. }
  51.  
  52. /**
  53.  * Reswizzle a given source register.
  54.  * \sa brw_swizzle().
  55.  */
  56. static inline src_reg
  57. reswizzle(src_reg orig, unsigned x, unsigned y, unsigned z, unsigned w)
  58. {
  59.    src_reg t = orig;
  60.    t.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(orig.swizzle, x),
  61.                             BRW_GET_SWZ(orig.swizzle, y),
  62.                             BRW_GET_SWZ(orig.swizzle, z),
  63.                             BRW_GET_SWZ(orig.swizzle, w));
  64.    return t;
  65. }
  66.  
  67. void
  68. vec4_vs_visitor::emit_program_code()
  69. {
  70.    this->need_all_constants_in_pull_buffer = false;
  71.  
  72.    setup_vp_regs();
  73.  
  74.    /* Keep a reg with 1.0 around, for reuse by emit_vs_sop so that it can just
  75.     * be:
  76.     *
  77.     * sel.f0 dst 1.0 0.0
  78.     *
  79.     * instead of
  80.     *
  81.     * mov    dst 0.0
  82.     * mov.f0 dst 1.0
  83.     */
  84.    src_reg one = src_reg(this, glsl_type::float_type);
  85.    emit(MOV(dst_reg(one), src_reg(1.0f)));
  86.  
  87.    for (unsigned int insn = 0; insn < prog->NumInstructions; insn++) {
  88.       const struct prog_instruction *vpi = &prog->Instructions[insn];
  89.       base_ir = vpi;
  90.  
  91.       dst_reg dst;
  92.       src_reg src[3];
  93.  
  94.       /* We always emit into a temporary destination register to avoid
  95.        * aliasing issues.
  96.        */
  97.       dst = dst_reg(this, glsl_type::vec4_type);
  98.  
  99.       for (int i = 0; i < 3; i++)
  100.          src[i] = get_vp_src_reg(vpi->SrcReg[i]);
  101.  
  102.       switch (vpi->Opcode) {
  103.       case OPCODE_ABS:
  104.          src[0].abs = true;
  105.          src[0].negate = false;
  106.          emit(MOV(dst, src[0]));
  107.          break;
  108.  
  109.       case OPCODE_ADD:
  110.          emit(ADD(dst, src[0], src[1]));
  111.          break;
  112.  
  113.       case OPCODE_ARL:
  114.          if (brw->gen >= 6) {
  115.             dst.writemask = WRITEMASK_X;
  116.             dst_reg dst_f = dst;
  117.             dst_f.type = BRW_REGISTER_TYPE_F;
  118.  
  119.             emit(RNDD(dst_f, src[0]));
  120.             emit(MOV(dst, src_reg(dst_f)));
  121.          } else {
  122.             emit(RNDD(dst, src[0]));
  123.          }
  124.          break;
  125.  
  126.       case OPCODE_DP3:
  127.          emit(DP3(dst, src[0], src[1]));
  128.          break;
  129.       case OPCODE_DP4:
  130.          emit(DP4(dst, src[0], src[1]));
  131.          break;
  132.       case OPCODE_DPH:
  133.          emit(DPH(dst, src[0], src[1]));
  134.          break;
  135.  
  136.       case OPCODE_DST: {
  137.          dst_reg t = dst;
  138.          if (vpi->DstReg.WriteMask & WRITEMASK_X) {
  139.             t.writemask = WRITEMASK_X;
  140.             emit(MOV(t, src_reg(1.0f)));
  141.          }
  142.          if (vpi->DstReg.WriteMask & WRITEMASK_Y) {
  143.             t.writemask = WRITEMASK_Y;
  144.             emit(MUL(t, src[0], src[1]));
  145.          }
  146.          if (vpi->DstReg.WriteMask & WRITEMASK_Z) {
  147.             t.writemask = WRITEMASK_Z;
  148.             emit(MOV(t, src[0]));
  149.          }
  150.          if (vpi->DstReg.WriteMask & WRITEMASK_W) {
  151.             t.writemask = WRITEMASK_W;
  152.             emit(MOV(t, src[1]));
  153.          }
  154.          break;
  155.       }
  156.  
  157.       case OPCODE_EXP: {
  158.          dst_reg result = dst;
  159.          if (vpi->DstReg.WriteMask & WRITEMASK_X) {
  160.             /* tmp_d = floor(src[0].x) */
  161.             src_reg tmp_d = src_reg(this, glsl_type::ivec4_type);
  162.             assert(tmp_d.type == BRW_REGISTER_TYPE_D);
  163.             emit(RNDD(dst_reg(tmp_d), reswizzle(src[0], 0, 0, 0, 0)));
  164.  
  165.             /* result[0] = 2.0 ^ tmp */
  166.             /* Adjust exponent for floating point: exp += 127 */
  167.             dst_reg tmp_d_x(GRF, tmp_d.reg, glsl_type::int_type, WRITEMASK_X);
  168.             emit(ADD(tmp_d_x, tmp_d, src_reg(127)));
  169.  
  170.             /* Install exponent and sign.  Excess drops off the edge: */
  171.             dst_reg res_d_x(GRF, result.reg, glsl_type::int_type, WRITEMASK_X);
  172.             emit(BRW_OPCODE_SHL, res_d_x, tmp_d, src_reg(23));
  173.          }
  174.          if (vpi->DstReg.WriteMask & WRITEMASK_Y) {
  175.             result.writemask = WRITEMASK_Y;
  176.             emit(FRC(result, src[0]));
  177.          }
  178.          if (vpi->DstReg.WriteMask & WRITEMASK_Z) {
  179.             result.writemask = WRITEMASK_Z;
  180.             emit_math(SHADER_OPCODE_EXP2, result, src[0]);
  181.          }
  182.          if (vpi->DstReg.WriteMask & WRITEMASK_W) {
  183.             result.writemask = WRITEMASK_W;
  184.             emit(MOV(result, src_reg(1.0f)));
  185.          }
  186.          break;
  187.       }
  188.  
  189.       case OPCODE_EX2:
  190.          emit_math(SHADER_OPCODE_EXP2, dst, src[0]);
  191.          break;
  192.  
  193.       case OPCODE_FLR:
  194.          emit(RNDD(dst, src[0]));
  195.          break;
  196.  
  197.       case OPCODE_FRC:
  198.          emit(FRC(dst, src[0]));
  199.          break;
  200.  
  201.       case OPCODE_LG2:
  202.          emit_math(SHADER_OPCODE_LOG2, dst, src[0]);
  203.          break;
  204.  
  205.       case OPCODE_LIT: {
  206.          dst_reg result = dst;
  207.          /* From the ARB_vertex_program spec:
  208.           *
  209.           *      tmp = VectorLoad(op0);
  210.           *      if (tmp.x < 0) tmp.x = 0;
  211.           *      if (tmp.y < 0) tmp.y = 0;
  212.           *      if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
  213.           *      else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
  214.           *      result.x = 1.0;
  215.           *      result.y = tmp.x;
  216.           *      result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
  217.           *      result.w = 1.0;
  218.           *
  219.           * Note that we don't do the clamping to +/- 128.  We didn't in
  220.           * brw_vs_emit.c either.
  221.           */
  222.          if (vpi->DstReg.WriteMask & WRITEMASK_XW) {
  223.             result.writemask = WRITEMASK_XW;
  224.             emit(MOV(result, src_reg(1.0f)));
  225.          }
  226.          if (vpi->DstReg.WriteMask & WRITEMASK_YZ) {
  227.             result.writemask = WRITEMASK_YZ;
  228.             emit(MOV(result, src_reg(0.0f)));
  229.  
  230.             src_reg tmp_x = reswizzle(src[0], 0, 0, 0, 0);
  231.  
  232.             emit(CMP(dst_null_d(), tmp_x, src_reg(0.0f), BRW_CONDITIONAL_G));
  233.             emit(IF(BRW_PREDICATE_NORMAL));
  234.  
  235.             if (vpi->DstReg.WriteMask & WRITEMASK_Y) {
  236.                result.writemask = WRITEMASK_Y;
  237.                emit(MOV(result, tmp_x));
  238.             }
  239.  
  240.             if (vpi->DstReg.WriteMask & WRITEMASK_Z) {
  241.                /* if (tmp.y < 0) tmp.y = 0; */
  242.                src_reg tmp_y = reswizzle(src[0], 1, 1, 1, 1);
  243.                result.writemask = WRITEMASK_Z;
  244.                emit_minmax(BRW_CONDITIONAL_G, result, tmp_y, src_reg(0.0f));
  245.  
  246.                src_reg clamped_y(result);
  247.                clamped_y.swizzle = BRW_SWIZZLE_ZZZZ;
  248.  
  249.                src_reg tmp_w = reswizzle(src[0], 3, 3, 3, 3);
  250.  
  251.                emit_math(SHADER_OPCODE_POW, result, clamped_y, tmp_w);
  252.             }
  253.             emit(BRW_OPCODE_ENDIF);
  254.          }
  255.          break;
  256.       }
  257.  
  258.       case OPCODE_LOG: {
  259.          dst_reg result = dst;
  260.          result.type = BRW_REGISTER_TYPE_UD;
  261.          src_reg result_src = src_reg(result);
  262.  
  263.          src_reg arg0_ud = reswizzle(src[0], 0, 0, 0, 0);
  264.          arg0_ud.type = BRW_REGISTER_TYPE_UD;
  265.  
  266.          /* Perform mant = frexpf(fabsf(x), &exp), adjust exp and mnt
  267.           * according to spec:
  268.           *
  269.           * These almost look likey they could be joined up, but not really
  270.           * practical:
  271.           *
  272.           * result[0].f = (x.i & ((1<<31)-1) >> 23) - 127
  273.           * result[1].i = (x.i & ((1<<23)-1)        + (127<<23)
  274.           */
  275.          if (vpi->DstReg.WriteMask & WRITEMASK_XZ) {
  276.             result.writemask = WRITEMASK_X;
  277.             emit(AND(result, arg0_ud, src_reg((1u << 31) - 1)));
  278.             emit(BRW_OPCODE_SHR, result, result_src, src_reg(23u));
  279.             src_reg result_d(result_src);
  280.             result_d.type = BRW_REGISTER_TYPE_D; /* does it matter? */
  281.             result.type = BRW_REGISTER_TYPE_F;
  282.             emit(ADD(result, result_d, src_reg(-127)));
  283.          }
  284.  
  285.          if (vpi->DstReg.WriteMask & WRITEMASK_YZ) {
  286.             result.writemask = WRITEMASK_Y;
  287.             result.type = BRW_REGISTER_TYPE_UD;
  288.             emit(AND(result, arg0_ud, src_reg((1u << 23) - 1)));
  289.             emit(OR(result, result_src, src_reg(127u << 23)));
  290.          }
  291.  
  292.          if (vpi->DstReg.WriteMask & WRITEMASK_Z) {
  293.             /* result[2] = result[0] + LOG2(result[1]); */
  294.  
  295.             /* Why bother?  The above is just a hint how to do this with a
  296.              * taylor series.  Maybe we *should* use a taylor series as by
  297.              * the time all the above has been done it's almost certainly
  298.              * quicker than calling the mathbox, even with low precision.
  299.              *
  300.              * Options are:
  301.              *    - result[0] + mathbox.LOG2(result[1])
  302.              *    - mathbox.LOG2(arg0.x)
  303.              *    - result[0] + inline_taylor_approx(result[1])
  304.              */
  305.             result.type = BRW_REGISTER_TYPE_F;
  306.             result.writemask = WRITEMASK_Z;
  307.             src_reg result_x(result), result_y(result), result_z(result);
  308.             result_x.swizzle = BRW_SWIZZLE_XXXX;
  309.             result_y.swizzle = BRW_SWIZZLE_YYYY;
  310.             result_z.swizzle = BRW_SWIZZLE_ZZZZ;
  311.             emit_math(SHADER_OPCODE_LOG2, result, result_y);
  312.             emit(ADD(result, result_z, result_x));
  313.          }
  314.  
  315.          if (vpi->DstReg.WriteMask & WRITEMASK_W) {
  316.             result.type = BRW_REGISTER_TYPE_F;
  317.             result.writemask = WRITEMASK_W;
  318.             emit(MOV(result, src_reg(1.0f)));
  319.          }
  320.          break;
  321.       }
  322.  
  323.       case OPCODE_MAD: {
  324.          src_reg temp = src_reg(this, glsl_type::vec4_type);
  325.          emit(MUL(dst_reg(temp), src[0], src[1]));
  326.          emit(ADD(dst, temp, src[2]));
  327.          break;
  328.       }
  329.  
  330.       case OPCODE_MAX:
  331.          emit_minmax(BRW_CONDITIONAL_G, dst, src[0], src[1]);
  332.          break;
  333.  
  334.       case OPCODE_MIN:
  335.          emit_minmax(BRW_CONDITIONAL_L, dst, src[0], src[1]);
  336.          break;
  337.  
  338.       case OPCODE_MOV:
  339.          emit(MOV(dst, src[0]));
  340.          break;
  341.  
  342.       case OPCODE_MUL:
  343.          emit(MUL(dst, src[0], src[1]));
  344.          break;
  345.  
  346.       case OPCODE_POW:
  347.          emit_math(SHADER_OPCODE_POW, dst, src[0], src[1]);
  348.          break;
  349.  
  350.       case OPCODE_RCP:
  351.          emit_math(SHADER_OPCODE_RCP, dst, src[0]);
  352.          break;
  353.  
  354.       case OPCODE_RSQ:
  355.          emit_math(SHADER_OPCODE_RSQ, dst, src[0]);
  356.          break;
  357.  
  358.       case OPCODE_SGE:
  359.          emit_vp_sop(BRW_CONDITIONAL_GE, dst, src[0], src[1], one);
  360.          break;
  361.  
  362.       case OPCODE_SLT:
  363.          emit_vp_sop(BRW_CONDITIONAL_L, dst, src[0], src[1], one);
  364.          break;
  365.  
  366.       case OPCODE_SUB: {
  367.          src_reg neg_src1 = src[1];
  368.          neg_src1.negate = !src[1].negate;
  369.          emit(ADD(dst, src[0], neg_src1));
  370.          break;
  371.       }
  372.  
  373.       case OPCODE_SWZ:
  374.          /* Note that SWZ's extended swizzles are handled in the general
  375.           * get_src_reg() code.
  376.           */
  377.          emit(MOV(dst, src[0]));
  378.          break;
  379.  
  380.       case OPCODE_XPD: {
  381.          src_reg t1 = src_reg(this, glsl_type::vec4_type);
  382.          src_reg t2 = src_reg(this, glsl_type::vec4_type);
  383.  
  384.          emit(MUL(dst_reg(t1),
  385.                   reswizzle(src[0], 1, 2, 0, 3),
  386.                   reswizzle(src[1], 2, 0, 1, 3)));
  387.          emit(MUL(dst_reg(t2),
  388.                   reswizzle(src[0], 2, 0, 1, 3),
  389.                   reswizzle(src[1], 1, 2, 0, 3)));
  390.          t2.negate = true;
  391.          emit(ADD(dst, t1, t2));
  392.          break;
  393.       }
  394.  
  395.       case OPCODE_END:
  396.          break;
  397.  
  398.       default:
  399.          _mesa_problem(ctx, "Unsupported opcode %s in vertex program\n",
  400.                        _mesa_opcode_string(vpi->Opcode));
  401.       }
  402.  
  403.       /* Copy the temporary back into the actual destination register. */
  404.       if (vpi->Opcode != OPCODE_END) {
  405.          emit(MOV(get_vp_dst_reg(vpi->DstReg), src_reg(dst)));
  406.       }
  407.    }
  408.  
  409.    /* If we used relative addressing, we need to upload all constants as
  410.     * pull constants.  Do that now.
  411.     */
  412.    if (this->need_all_constants_in_pull_buffer) {
  413.       const struct gl_program_parameter_list *params =
  414.          vs_compile->vp->program.Base.Parameters;
  415.       unsigned i;
  416.       for (i = 0; i < params->NumParameters * 4; i++) {
  417.          prog_data->pull_param[i] =
  418.             &params->ParameterValues[i / 4][i % 4].f;
  419.       }
  420.       prog_data->nr_pull_params = i;
  421.    }
  422. }
  423.  
  424. void
  425. vec4_vs_visitor::setup_vp_regs()
  426. {
  427.    /* PROGRAM_TEMPORARY */
  428.    int num_temp = prog->NumTemporaries;
  429.    vp_temp_regs = rzalloc_array(mem_ctx, src_reg, num_temp);
  430.    for (int i = 0; i < num_temp; i++)
  431.       vp_temp_regs[i] = src_reg(this, glsl_type::vec4_type);
  432.  
  433.    /* PROGRAM_STATE_VAR etc. */
  434.    struct gl_program_parameter_list *plist =
  435.       vs_compile->vp->program.Base.Parameters;
  436.    for (unsigned p = 0; p < plist->NumParameters; p++) {
  437.       unsigned components = plist->Parameters[p].Size;
  438.  
  439.       /* Parameters should be either vec4 uniforms or single component
  440.        * constants; matrices and other larger types should have been broken
  441.        * down earlier.
  442.        */
  443.       assert(components <= 4);
  444.  
  445.       this->uniform_size[this->uniforms] = 1; /* 1 vec4 */
  446.       this->uniform_vector_size[this->uniforms] = components;
  447.       for (unsigned i = 0; i < 4; i++) {
  448.          prog_data->param[this->uniforms * 4 + i] = i >= components
  449.             ? 0 : &plist->ParameterValues[p][i].f;
  450.       }
  451.       this->uniforms++; /* counted in vec4 units */
  452.    }
  453.  
  454.    /* PROGRAM_OUTPUT */
  455.    for (int slot = 0; slot < prog_data->vue_map.num_slots; slot++) {
  456.       int varying = prog_data->vue_map.slot_to_varying[slot];
  457.       if (varying == VARYING_SLOT_PSIZ)
  458.          output_reg[varying] = dst_reg(this, glsl_type::float_type);
  459.       else
  460.          output_reg[varying] = dst_reg(this, glsl_type::vec4_type);
  461.       assert(output_reg[varying].type == BRW_REGISTER_TYPE_F);
  462.    }
  463.  
  464.    /* PROGRAM_ADDRESS */
  465.    this->vp_addr_reg = src_reg(this, glsl_type::int_type);
  466.    assert(this->vp_addr_reg.type == BRW_REGISTER_TYPE_D);
  467. }
  468.  
  469. dst_reg
  470. vec4_vs_visitor::get_vp_dst_reg(const prog_dst_register &dst)
  471. {
  472.    dst_reg result;
  473.  
  474.    assert(!dst.RelAddr);
  475.  
  476.    switch (dst.File) {
  477.    case PROGRAM_TEMPORARY:
  478.       result = dst_reg(vp_temp_regs[dst.Index]);
  479.       break;
  480.  
  481.    case PROGRAM_OUTPUT:
  482.       result = output_reg[dst.Index];
  483.       break;
  484.  
  485.    case PROGRAM_ADDRESS: {
  486.       assert(dst.Index == 0);
  487.       result = dst_reg(this->vp_addr_reg);
  488.       break;
  489.    }
  490.  
  491.    case PROGRAM_UNDEFINED:
  492.       return dst_null_f();
  493.  
  494.    default:
  495.       assert("vec4_vp: bad destination register file");
  496.       return dst_reg(this, glsl_type::vec4_type);
  497.    }
  498.  
  499.    result.writemask = dst.WriteMask;
  500.    return result;
  501. }
  502.  
  503. src_reg
  504. vec4_vs_visitor::get_vp_src_reg(const prog_src_register &src)
  505. {
  506.    struct gl_program_parameter_list *plist =
  507.       vs_compile->vp->program.Base.Parameters;
  508.  
  509.    src_reg result;
  510.  
  511.    assert(!src.Abs);
  512.  
  513.    switch (src.File) {
  514.    case PROGRAM_UNDEFINED:
  515.       return src_reg(brw_null_reg());
  516.  
  517.    case PROGRAM_TEMPORARY:
  518.       result = vp_temp_regs[src.Index];
  519.       break;
  520.  
  521.    case PROGRAM_INPUT:
  522.       result = src_reg(ATTR, src.Index, glsl_type::vec4_type);
  523.       result.type = BRW_REGISTER_TYPE_F;
  524.       break;
  525.  
  526.    case PROGRAM_ADDRESS: {
  527.       assert(src.Index == 0);
  528.       result = this->vp_addr_reg;
  529.       break;
  530.    }
  531.  
  532.    case PROGRAM_STATE_VAR:
  533.    case PROGRAM_CONSTANT:
  534.       /* From the ARB_vertex_program specification:
  535.        * "Relative addressing can only be used for accessing program
  536.        *  parameter arrays."
  537.        */
  538.       if (src.RelAddr) {
  539.          /* Since we have no idea what the base of the array is, we need to
  540.           * upload ALL constants as push constants.
  541.           */
  542.          this->need_all_constants_in_pull_buffer = true;
  543.  
  544.          /* Add the small constant index to the address register */
  545.          src_reg reladdr = src_reg(this, glsl_type::int_type);
  546.          dst_reg dst_reladdr = dst_reg(reladdr);
  547.          dst_reladdr.writemask = WRITEMASK_X;
  548.          emit(ADD(dst_reladdr, this->vp_addr_reg, src_reg(src.Index)));
  549.  
  550.          if (brw->gen < 6)
  551.             emit(MUL(dst_reladdr, reladdr, src_reg(16)));
  552.  
  553.       #if 0
  554.          assert(src.Index < this->uniforms);
  555.          result = src_reg(dst_reg(UNIFORM, 0));
  556.          result.type = BRW_REGISTER_TYPE_F;
  557.          result.reladdr = new(mem_ctx) src_reg();
  558.          memcpy(result.reladdr, &reladdr, sizeof(src_reg));
  559.       #endif
  560.  
  561.          result = src_reg(this, glsl_type::vec4_type);
  562.          src_reg surf_index = src_reg(unsigned(SURF_INDEX_VERT_CONST_BUFFER));
  563.          vec4_instruction *load =
  564.             new(mem_ctx) vec4_instruction(this, VS_OPCODE_PULL_CONSTANT_LOAD,
  565.                                           dst_reg(result), surf_index, reladdr);
  566.          load->base_mrf = 14;
  567.          load->mlen = 1;
  568.          emit(load);
  569.          break;
  570.       }
  571.  
  572.       /* We actually want to look at the type in the Parameters list for this,
  573.        * because this lets us upload constant builtin uniforms as actual
  574.        * constants.
  575.        */
  576.       switch (plist->Parameters[src.Index].Type) {
  577.       case PROGRAM_CONSTANT:
  578.          result = src_reg(this, glsl_type::vec4_type);
  579.          for (int i = 0; i < 4; i++) {
  580.             dst_reg t = dst_reg(result);
  581.             t.writemask = 1 << i;
  582.             emit(MOV(t, src_reg(plist->ParameterValues[src.Index][i].f)));
  583.          }
  584.          break;
  585.  
  586.       case PROGRAM_STATE_VAR:
  587.          assert(src.Index < this->uniforms);
  588.          result = src_reg(dst_reg(UNIFORM, src.Index));
  589.          result.type = BRW_REGISTER_TYPE_F;
  590.          break;
  591.  
  592.       default:
  593.          _mesa_problem(ctx, "bad uniform src register file: %s\n",
  594.                        _mesa_register_file_name((gl_register_file)src.File));
  595.          return src_reg(this, glsl_type::vec4_type);
  596.       }
  597.       break;
  598.  
  599.    default:
  600.       _mesa_problem(ctx, "bad src register file: %s\n",
  601.                     _mesa_register_file_name((gl_register_file)src.File));
  602.       return src_reg(this, glsl_type::vec4_type);
  603.    }
  604.  
  605.    if (src.Swizzle != SWIZZLE_NOOP || src.Negate) {
  606.       unsigned short zeros_mask = 0;
  607.       unsigned short ones_mask = 0;
  608.       unsigned short src_mask = 0;
  609.       unsigned short src_swiz[4];
  610.  
  611.       for (int i = 0; i < 4; i++) {
  612.          src_swiz[i] = 0; /* initialize for safety */
  613.  
  614.          /* The ZERO, ONE, and Negate options are only used for OPCODE_SWZ,
  615.           * but it's simplest to handle it here.
  616.           */
  617.          int s = GET_SWZ(src.Swizzle, i);
  618.          switch (s) {
  619.          case SWIZZLE_X:
  620.          case SWIZZLE_Y:
  621.          case SWIZZLE_Z:
  622.          case SWIZZLE_W:
  623.             src_mask |= 1 << i;
  624.             src_swiz[i] = s;
  625.             break;
  626.          case SWIZZLE_ZERO:
  627.             zeros_mask |= 1 << i;
  628.             break;
  629.          case SWIZZLE_ONE:
  630.             ones_mask |= 1 << i;
  631.             break;
  632.          }
  633.       }
  634.  
  635.       result.swizzle =
  636.          BRW_SWIZZLE4(src_swiz[0], src_swiz[1], src_swiz[2], src_swiz[3]);
  637.  
  638.       /* The hardware doesn't natively handle the SWZ instruction's zero/one
  639.        * swizzles or per-component negation, so we need to use a temporary.
  640.        */
  641.       if (zeros_mask || ones_mask || src.Negate) {
  642.          src_reg temp_src(this, glsl_type::vec4_type);
  643.          dst_reg temp(temp_src);
  644.  
  645.          if (src_mask) {
  646.             temp.writemask = src_mask;
  647.             emit(MOV(temp, result));
  648.          }
  649.  
  650.          if (zeros_mask) {
  651.             temp.writemask = zeros_mask;
  652.             emit(MOV(temp, src_reg(0.0f)));
  653.          }
  654.  
  655.          if (ones_mask) {
  656.             temp.writemask = ones_mask;
  657.             emit(MOV(temp, src_reg(1.0f)));
  658.          }
  659.  
  660.          if (src.Negate) {
  661.             temp.writemask = src.Negate;
  662.             src_reg neg(temp_src);
  663.             neg.negate = true;
  664.             emit(MOV(temp, neg));
  665.          }
  666.          result = temp_src;
  667.       }
  668.    }
  669.  
  670.    return result;
  671. }
  672.