Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2012 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21.  * IN THE SOFTWARE.
  22.  */
  23.  
  24. /** @file brw_vec4_vp.cpp
  25.  *
  26.  * A translator from Mesa IR to the i965 driver's Vec4 IR, used to implement
  27.  * ARB_vertex_program and fixed-function vertex processing.
  28.  */
  29.  
  30. #include "brw_context.h"
  31. #include "brw_vec4.h"
  32. #include "brw_vs.h"
  33. extern "C" {
  34. #include "program/prog_parameter.h"
  35. #include "program/prog_print.h"
  36. }
  37. using namespace brw;
  38.  
  39. void
  40. vec4_visitor::emit_vp_sop(enum brw_conditional_mod conditional_mod,
  41.                           dst_reg dst, src_reg src0, src_reg src1,
  42.                           src_reg one)
  43. {
  44.    vec4_instruction *inst;
  45.  
  46.    inst = emit(CMP(dst_null_f(), src0, src1, conditional_mod));
  47.  
  48.    inst = emit(BRW_OPCODE_SEL, dst, one, src_reg(0.0f));
  49.    inst->predicate = BRW_PREDICATE_NORMAL;
  50. }
  51.  
  52. void
  53. vec4_vs_visitor::emit_program_code()
  54. {
  55.    this->need_all_constants_in_pull_buffer = false;
  56.  
  57.    setup_vp_regs();
  58.  
  59.    /* Keep a reg with 1.0 around, for reuse by emit_vs_sop so that it can just
  60.     * be:
  61.     *
  62.     * sel.f0 dst 1.0 0.0
  63.     *
  64.     * instead of
  65.     *
  66.     * mov    dst 0.0
  67.     * mov.f0 dst 1.0
  68.     */
  69.    src_reg one = src_reg(this, glsl_type::float_type);
  70.    emit(MOV(dst_reg(one), src_reg(1.0f)));
  71.  
  72.    for (unsigned int insn = 0; insn < prog->NumInstructions; insn++) {
  73.       const struct prog_instruction *vpi = &prog->Instructions[insn];
  74.       base_ir = vpi;
  75.  
  76.       dst_reg dst;
  77.       src_reg src[3];
  78.  
  79.       /* We always emit into a temporary destination register to avoid
  80.        * aliasing issues.
  81.        */
  82.       dst = dst_reg(this, glsl_type::vec4_type);
  83.  
  84.       for (int i = 0; i < 3; i++)
  85.          src[i] = get_vp_src_reg(vpi->SrcReg[i]);
  86.  
  87.       switch (vpi->Opcode) {
  88.       case OPCODE_ABS:
  89.          src[0].abs = true;
  90.          src[0].negate = false;
  91.          emit(MOV(dst, src[0]));
  92.          break;
  93.  
  94.       case OPCODE_ADD:
  95.          emit(ADD(dst, src[0], src[1]));
  96.          break;
  97.  
  98.       case OPCODE_ARL:
  99.          if (devinfo->gen >= 6) {
  100.             dst.writemask = WRITEMASK_X;
  101.             dst_reg dst_f = dst;
  102.             dst_f.type = BRW_REGISTER_TYPE_F;
  103.  
  104.             emit(RNDD(dst_f, src[0]));
  105.             emit(MOV(dst, src_reg(dst_f)));
  106.          } else {
  107.             emit(RNDD(dst, src[0]));
  108.          }
  109.          break;
  110.  
  111.       case OPCODE_DP3:
  112.          emit(DP3(dst, src[0], src[1]));
  113.          break;
  114.       case OPCODE_DP4:
  115.          emit(DP4(dst, src[0], src[1]));
  116.          break;
  117.       case OPCODE_DPH:
  118.          emit(DPH(dst, src[0], src[1]));
  119.          break;
  120.  
  121.       case OPCODE_DST: {
  122.          dst_reg t = dst;
  123.          if (vpi->DstReg.WriteMask & WRITEMASK_X) {
  124.             t.writemask = WRITEMASK_X;
  125.             emit(MOV(t, src_reg(1.0f)));
  126.          }
  127.          if (vpi->DstReg.WriteMask & WRITEMASK_Y) {
  128.             t.writemask = WRITEMASK_Y;
  129.             emit(MUL(t, src[0], src[1]));
  130.          }
  131.          if (vpi->DstReg.WriteMask & WRITEMASK_Z) {
  132.             t.writemask = WRITEMASK_Z;
  133.             emit(MOV(t, src[0]));
  134.          }
  135.          if (vpi->DstReg.WriteMask & WRITEMASK_W) {
  136.             t.writemask = WRITEMASK_W;
  137.             emit(MOV(t, src[1]));
  138.          }
  139.          break;
  140.       }
  141.  
  142.       case OPCODE_EXP: {
  143.          dst_reg result = dst;
  144.          if (vpi->DstReg.WriteMask & WRITEMASK_X) {
  145.             /* tmp_d = floor(src[0].x) */
  146.             src_reg tmp_d = src_reg(this, glsl_type::ivec4_type);
  147.             assert(tmp_d.type == BRW_REGISTER_TYPE_D);
  148.             emit(RNDD(dst_reg(tmp_d), swizzle(src[0], BRW_SWIZZLE_XXXX)));
  149.  
  150.             /* result[0] = 2.0 ^ tmp */
  151.             /* Adjust exponent for floating point: exp += 127 */
  152.             dst_reg tmp_d_x(GRF, tmp_d.reg, glsl_type::int_type, WRITEMASK_X);
  153.             emit(ADD(tmp_d_x, tmp_d, src_reg(127)));
  154.  
  155.             /* Install exponent and sign.  Excess drops off the edge: */
  156.             dst_reg res_d_x(GRF, result.reg, glsl_type::int_type, WRITEMASK_X);
  157.             emit(BRW_OPCODE_SHL, res_d_x, tmp_d, src_reg(23));
  158.          }
  159.          if (vpi->DstReg.WriteMask & WRITEMASK_Y) {
  160.             result.writemask = WRITEMASK_Y;
  161.             emit(FRC(result, src[0]));
  162.          }
  163.          if (vpi->DstReg.WriteMask & WRITEMASK_Z) {
  164.             result.writemask = WRITEMASK_Z;
  165.             emit_math(SHADER_OPCODE_EXP2, result, src[0]);
  166.          }
  167.          if (vpi->DstReg.WriteMask & WRITEMASK_W) {
  168.             result.writemask = WRITEMASK_W;
  169.             emit(MOV(result, src_reg(1.0f)));
  170.          }
  171.          break;
  172.       }
  173.  
  174.       case OPCODE_EX2:
  175.          emit_math(SHADER_OPCODE_EXP2, dst, src[0]);
  176.          break;
  177.  
  178.       case OPCODE_FLR:
  179.          emit(RNDD(dst, src[0]));
  180.          break;
  181.  
  182.       case OPCODE_FRC:
  183.          emit(FRC(dst, src[0]));
  184.          break;
  185.  
  186.       case OPCODE_LG2:
  187.          emit_math(SHADER_OPCODE_LOG2, dst, src[0]);
  188.          break;
  189.  
  190.       case OPCODE_LIT: {
  191.          dst_reg result = dst;
  192.          /* From the ARB_vertex_program spec:
  193.           *
  194.           *      tmp = VectorLoad(op0);
  195.           *      if (tmp.x < 0) tmp.x = 0;
  196.           *      if (tmp.y < 0) tmp.y = 0;
  197.           *      if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
  198.           *      else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
  199.           *      result.x = 1.0;
  200.           *      result.y = tmp.x;
  201.           *      result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
  202.           *      result.w = 1.0;
  203.           *
  204.           * Note that we don't do the clamping to +/- 128.  We didn't in
  205.           * brw_vs_emit.c either.
  206.           */
  207.          if (vpi->DstReg.WriteMask & WRITEMASK_XW) {
  208.             result.writemask = WRITEMASK_XW;
  209.             emit(MOV(result, src_reg(1.0f)));
  210.          }
  211.          if (vpi->DstReg.WriteMask & WRITEMASK_YZ) {
  212.             result.writemask = WRITEMASK_YZ;
  213.             emit(MOV(result, src_reg(0.0f)));
  214.  
  215.             src_reg tmp_x = swizzle(src[0], BRW_SWIZZLE_XXXX);
  216.  
  217.             emit(CMP(dst_null_d(), tmp_x, src_reg(0.0f), BRW_CONDITIONAL_G));
  218.             emit(IF(BRW_PREDICATE_NORMAL));
  219.  
  220.             if (vpi->DstReg.WriteMask & WRITEMASK_Y) {
  221.                result.writemask = WRITEMASK_Y;
  222.                emit(MOV(result, tmp_x));
  223.             }
  224.  
  225.             if (vpi->DstReg.WriteMask & WRITEMASK_Z) {
  226.                /* if (tmp.y < 0) tmp.y = 0; */
  227.                src_reg tmp_y = swizzle(src[0], BRW_SWIZZLE_YYYY);
  228.                result.writemask = WRITEMASK_Z;
  229.                emit_minmax(BRW_CONDITIONAL_GE, result, tmp_y, src_reg(0.0f));
  230.  
  231.                src_reg clamped_y(result);
  232.                clamped_y.swizzle = BRW_SWIZZLE_ZZZZ;
  233.  
  234.                src_reg tmp_w = swizzle(src[0], BRW_SWIZZLE_WWWW);
  235.  
  236.                emit_math(SHADER_OPCODE_POW, result, clamped_y, tmp_w);
  237.             }
  238.             emit(BRW_OPCODE_ENDIF);
  239.          }
  240.          break;
  241.       }
  242.  
  243.       case OPCODE_LOG: {
  244.          dst_reg result = dst;
  245.          result.type = BRW_REGISTER_TYPE_UD;
  246.          src_reg result_src = src_reg(result);
  247.  
  248.          src_reg arg0_ud = swizzle(src[0], BRW_SWIZZLE_XXXX);
  249.          arg0_ud.type = BRW_REGISTER_TYPE_UD;
  250.  
  251.          /* Perform mant = frexpf(fabsf(x), &exp), adjust exp and mnt
  252.           * according to spec:
  253.           *
  254.           * These almost look likey they could be joined up, but not really
  255.           * practical:
  256.           *
  257.           * result[0].f = (x.i & ((1<<31)-1) >> 23) - 127
  258.           * result[1].i = (x.i & ((1<<23)-1)        + (127<<23)
  259.           */
  260.          if (vpi->DstReg.WriteMask & WRITEMASK_XZ) {
  261.             result.writemask = WRITEMASK_X;
  262.             emit(AND(result, arg0_ud, src_reg((1u << 31) - 1)));
  263.             emit(BRW_OPCODE_SHR, result, result_src, src_reg(23u));
  264.             src_reg result_d(result_src);
  265.             result_d.type = BRW_REGISTER_TYPE_D; /* does it matter? */
  266.             result.type = BRW_REGISTER_TYPE_F;
  267.             emit(ADD(result, result_d, src_reg(-127)));
  268.          }
  269.  
  270.          if (vpi->DstReg.WriteMask & WRITEMASK_YZ) {
  271.             result.writemask = WRITEMASK_Y;
  272.             result.type = BRW_REGISTER_TYPE_UD;
  273.             emit(AND(result, arg0_ud, src_reg((1u << 23) - 1)));
  274.             emit(OR(result, result_src, src_reg(127u << 23)));
  275.          }
  276.  
  277.          if (vpi->DstReg.WriteMask & WRITEMASK_Z) {
  278.             /* result[2] = result[0] + LOG2(result[1]); */
  279.  
  280.             /* Why bother?  The above is just a hint how to do this with a
  281.              * taylor series.  Maybe we *should* use a taylor series as by
  282.              * the time all the above has been done it's almost certainly
  283.              * quicker than calling the mathbox, even with low precision.
  284.              *
  285.              * Options are:
  286.              *    - result[0] + mathbox.LOG2(result[1])
  287.              *    - mathbox.LOG2(arg0.x)
  288.              *    - result[0] + inline_taylor_approx(result[1])
  289.              */
  290.             result.type = BRW_REGISTER_TYPE_F;
  291.             result.writemask = WRITEMASK_Z;
  292.             src_reg result_x(result), result_y(result), result_z(result);
  293.             result_x.swizzle = BRW_SWIZZLE_XXXX;
  294.             result_y.swizzle = BRW_SWIZZLE_YYYY;
  295.             result_z.swizzle = BRW_SWIZZLE_ZZZZ;
  296.             emit_math(SHADER_OPCODE_LOG2, result, result_y);
  297.             emit(ADD(result, result_z, result_x));
  298.          }
  299.  
  300.          if (vpi->DstReg.WriteMask & WRITEMASK_W) {
  301.             result.type = BRW_REGISTER_TYPE_F;
  302.             result.writemask = WRITEMASK_W;
  303.             emit(MOV(result, src_reg(1.0f)));
  304.          }
  305.          break;
  306.       }
  307.  
  308.       case OPCODE_MAD: {
  309.          src_reg temp = src_reg(this, glsl_type::vec4_type);
  310.          emit(MUL(dst_reg(temp), src[0], src[1]));
  311.          emit(ADD(dst, temp, src[2]));
  312.          break;
  313.       }
  314.  
  315.       case OPCODE_MAX:
  316.          emit_minmax(BRW_CONDITIONAL_GE, dst, src[0], src[1]);
  317.          break;
  318.  
  319.       case OPCODE_MIN:
  320.          emit_minmax(BRW_CONDITIONAL_L, dst, src[0], src[1]);
  321.          break;
  322.  
  323.       case OPCODE_MOV:
  324.          emit(MOV(dst, src[0]));
  325.          break;
  326.  
  327.       case OPCODE_MUL:
  328.          emit(MUL(dst, src[0], src[1]));
  329.          break;
  330.  
  331.       case OPCODE_POW:
  332.          emit_math(SHADER_OPCODE_POW, dst, src[0], src[1]);
  333.          break;
  334.  
  335.       case OPCODE_RCP:
  336.          emit_math(SHADER_OPCODE_RCP, dst, src[0]);
  337.          break;
  338.  
  339.       case OPCODE_RSQ:
  340.          emit_math(SHADER_OPCODE_RSQ, dst, src[0]);
  341.          break;
  342.  
  343.       case OPCODE_SGE:
  344.          emit_vp_sop(BRW_CONDITIONAL_GE, dst, src[0], src[1], one);
  345.          break;
  346.  
  347.       case OPCODE_SLT:
  348.          emit_vp_sop(BRW_CONDITIONAL_L, dst, src[0], src[1], one);
  349.          break;
  350.  
  351.       case OPCODE_SUB: {
  352.          src_reg neg_src1 = src[1];
  353.          neg_src1.negate = !src[1].negate;
  354.          emit(ADD(dst, src[0], neg_src1));
  355.          break;
  356.       }
  357.  
  358.       case OPCODE_SWZ:
  359.          /* Note that SWZ's extended swizzles are handled in the general
  360.           * get_src_reg() code.
  361.           */
  362.          emit(MOV(dst, src[0]));
  363.          break;
  364.  
  365.       case OPCODE_XPD: {
  366.          src_reg t1 = src_reg(this, glsl_type::vec4_type);
  367.          src_reg t2 = src_reg(this, glsl_type::vec4_type);
  368.  
  369.          emit(MUL(dst_reg(t1),
  370.                   swizzle(src[0], BRW_SWIZZLE_YZXW),
  371.                   swizzle(src[1], BRW_SWIZZLE_ZXYW)));
  372.          emit(MUL(dst_reg(t2),
  373.                   swizzle(src[0], BRW_SWIZZLE_ZXYW),
  374.                   swizzle(src[1], BRW_SWIZZLE_YZXW)));
  375.          t2.negate = true;
  376.          emit(ADD(dst, t1, t2));
  377.          break;
  378.       }
  379.  
  380.       case OPCODE_END:
  381.          break;
  382.  
  383.       default:
  384.          _mesa_problem(ctx, "Unsupported opcode %s in vertex program\n",
  385.                        _mesa_opcode_string(vpi->Opcode));
  386.       }
  387.  
  388.       /* Copy the temporary back into the actual destination register. */
  389.       if (_mesa_num_inst_dst_regs(vpi->Opcode) != 0) {
  390.          emit(MOV(get_vp_dst_reg(vpi->DstReg), src_reg(dst)));
  391.       }
  392.    }
  393.  
  394.    /* If we used relative addressing, we need to upload all constants as
  395.     * pull constants.  Do that now.
  396.     */
  397.    if (this->need_all_constants_in_pull_buffer) {
  398.       const struct gl_program_parameter_list *params =
  399.          vs_compile->vp->program.Base.Parameters;
  400.       unsigned i;
  401.       for (i = 0; i < params->NumParameters * 4; i++) {
  402.          stage_prog_data->pull_param[i] =
  403.             &params->ParameterValues[i / 4][i % 4];
  404.       }
  405.       stage_prog_data->nr_pull_params = i;
  406.    }
  407. }
  408.  
  409. void
  410. vec4_vs_visitor::setup_vp_regs()
  411. {
  412.    /* PROGRAM_TEMPORARY */
  413.    int num_temp = prog->NumTemporaries;
  414.    vp_temp_regs = rzalloc_array(mem_ctx, src_reg, num_temp);
  415.    for (int i = 0; i < num_temp; i++)
  416.       vp_temp_regs[i] = src_reg(this, glsl_type::vec4_type);
  417.  
  418.    /* PROGRAM_STATE_VAR etc. */
  419.    struct gl_program_parameter_list *plist =
  420.       vs_compile->vp->program.Base.Parameters;
  421.    for (unsigned p = 0; p < plist->NumParameters; p++) {
  422.       unsigned components = plist->Parameters[p].Size;
  423.  
  424.       /* Parameters should be either vec4 uniforms or single component
  425.        * constants; matrices and other larger types should have been broken
  426.        * down earlier.
  427.        */
  428.       assert(components <= 4);
  429.  
  430.       this->uniform_size[this->uniforms] = 1; /* 1 vec4 */
  431.       this->uniform_vector_size[this->uniforms] = components;
  432.       for (unsigned i = 0; i < 4; i++) {
  433.          stage_prog_data->param[this->uniforms * 4 + i] = i >= components
  434.             ? 0 : &plist->ParameterValues[p][i];
  435.       }
  436.       this->uniforms++; /* counted in vec4 units */
  437.    }
  438.  
  439.    /* PROGRAM_OUTPUT */
  440.    for (int slot = 0; slot < prog_data->vue_map.num_slots; slot++) {
  441.       int varying = prog_data->vue_map.slot_to_varying[slot];
  442.       if (varying == VARYING_SLOT_PSIZ)
  443.          output_reg[varying] = dst_reg(this, glsl_type::float_type);
  444.       else
  445.          output_reg[varying] = dst_reg(this, glsl_type::vec4_type);
  446.       assert(output_reg[varying].type == BRW_REGISTER_TYPE_F);
  447.    }
  448.  
  449.    /* PROGRAM_ADDRESS */
  450.    this->vp_addr_reg = src_reg(this, glsl_type::int_type);
  451.    assert(this->vp_addr_reg.type == BRW_REGISTER_TYPE_D);
  452. }
  453.  
  454. dst_reg
  455. vec4_vs_visitor::get_vp_dst_reg(const prog_dst_register &dst)
  456. {
  457.    dst_reg result;
  458.  
  459.    assert(!dst.RelAddr);
  460.  
  461.    switch (dst.File) {
  462.    case PROGRAM_TEMPORARY:
  463.       result = dst_reg(vp_temp_regs[dst.Index]);
  464.       break;
  465.  
  466.    case PROGRAM_OUTPUT:
  467.       result = output_reg[dst.Index];
  468.       break;
  469.  
  470.    case PROGRAM_ADDRESS: {
  471.       assert(dst.Index == 0);
  472.       result = dst_reg(this->vp_addr_reg);
  473.       break;
  474.    }
  475.  
  476.    case PROGRAM_UNDEFINED:
  477.       return dst_null_f();
  478.  
  479.    default:
  480.       unreachable("vec4_vp: bad destination register file");
  481.    }
  482.  
  483.    result.writemask = dst.WriteMask;
  484.    return result;
  485. }
  486.  
  487. src_reg
  488. vec4_vs_visitor::get_vp_src_reg(const prog_src_register &src)
  489. {
  490.    struct gl_program_parameter_list *plist =
  491.       vs_compile->vp->program.Base.Parameters;
  492.  
  493.    src_reg result;
  494.  
  495.    assert(!src.Abs);
  496.  
  497.    switch (src.File) {
  498.    case PROGRAM_UNDEFINED:
  499.       return src_reg(brw_null_reg());
  500.  
  501.    case PROGRAM_TEMPORARY:
  502.       result = vp_temp_regs[src.Index];
  503.       break;
  504.  
  505.    case PROGRAM_INPUT:
  506.       result = src_reg(ATTR, src.Index, glsl_type::vec4_type);
  507.       result.type = BRW_REGISTER_TYPE_F;
  508.       break;
  509.  
  510.    case PROGRAM_ADDRESS: {
  511.       assert(src.Index == 0);
  512.       result = this->vp_addr_reg;
  513.       break;
  514.    }
  515.  
  516.    case PROGRAM_STATE_VAR:
  517.    case PROGRAM_CONSTANT:
  518.       /* From the ARB_vertex_program specification:
  519.        * "Relative addressing can only be used for accessing program
  520.        *  parameter arrays."
  521.        */
  522.       if (src.RelAddr) {
  523.          /* Since we have no idea what the base of the array is, we need to
  524.           * upload ALL constants as push constants.
  525.           */
  526.          this->need_all_constants_in_pull_buffer = true;
  527.  
  528.          /* Add the small constant index to the address register */
  529.          src_reg reladdr = src_reg(this, glsl_type::int_type);
  530.  
  531.          dst_reg dst_reladdr = dst_reg(reladdr);
  532.          dst_reladdr.writemask = WRITEMASK_X;
  533.          emit(ADD(dst_reladdr, this->vp_addr_reg, src_reg(src.Index)));
  534.  
  535.          if (devinfo->gen < 6)
  536.             emit(MUL(dst_reladdr, reladdr, src_reg(16)));
  537.  
  538.       #if 0
  539.          assert(src.Index < this->uniforms);
  540.          result = src_reg(dst_reg(UNIFORM, 0));
  541.          result.type = BRW_REGISTER_TYPE_F;
  542.          result.reladdr = new(mem_ctx) src_reg();
  543.          memcpy(result.reladdr, &reladdr, sizeof(src_reg));
  544.       #endif
  545.  
  546.          result = src_reg(this, glsl_type::vec4_type);
  547.          src_reg surf_index = src_reg(unsigned(prog_data->base.binding_table.pull_constants_start));
  548.  
  549.          emit_pull_constant_load_reg(dst_reg(result),
  550.                                      surf_index,
  551.                                      reladdr,
  552.                                      NULL, NULL /* before_block/inst */);
  553.          break;
  554.       }
  555.  
  556.       /* We actually want to look at the type in the Parameters list for this,
  557.        * because this lets us upload constant builtin uniforms as actual
  558.        * constants.
  559.        */
  560.       switch (plist->Parameters[src.Index].Type) {
  561.       case PROGRAM_CONSTANT:
  562.          result = src_reg(this, glsl_type::vec4_type);
  563.          for (int i = 0; i < 4; i++) {
  564.             dst_reg t = dst_reg(result);
  565.             t.writemask = 1 << i;
  566.             emit(MOV(t, src_reg(plist->ParameterValues[src.Index][i].f)));
  567.          }
  568.          break;
  569.  
  570.       case PROGRAM_STATE_VAR:
  571.          assert(src.Index < this->uniforms);
  572.          result = src_reg(dst_reg(UNIFORM, src.Index));
  573.          result.type = BRW_REGISTER_TYPE_F;
  574.          break;
  575.  
  576.       default:
  577.          _mesa_problem(ctx, "bad uniform src register file: %s\n",
  578.                        _mesa_register_file_name((gl_register_file)src.File));
  579.          return src_reg(this, glsl_type::vec4_type);
  580.       }
  581.       break;
  582.  
  583.    default:
  584.       _mesa_problem(ctx, "bad src register file: %s\n",
  585.                     _mesa_register_file_name((gl_register_file)src.File));
  586.       return src_reg(this, glsl_type::vec4_type);
  587.    }
  588.  
  589.    if (src.Swizzle != SWIZZLE_NOOP || src.Negate) {
  590.       unsigned short zeros_mask = 0;
  591.       unsigned short ones_mask = 0;
  592.       unsigned short src_mask = 0;
  593.       unsigned short src_swiz[4];
  594.  
  595.       for (int i = 0; i < 4; i++) {
  596.          src_swiz[i] = 0; /* initialize for safety */
  597.  
  598.          /* The ZERO, ONE, and Negate options are only used for OPCODE_SWZ,
  599.           * but it's simplest to handle it here.
  600.           */
  601.          int s = GET_SWZ(src.Swizzle, i);
  602.          switch (s) {
  603.          case SWIZZLE_X:
  604.          case SWIZZLE_Y:
  605.          case SWIZZLE_Z:
  606.          case SWIZZLE_W:
  607.             src_mask |= 1 << i;
  608.             src_swiz[i] = s;
  609.             break;
  610.          case SWIZZLE_ZERO:
  611.             zeros_mask |= 1 << i;
  612.             break;
  613.          case SWIZZLE_ONE:
  614.             ones_mask |= 1 << i;
  615.             break;
  616.          }
  617.       }
  618.  
  619.       result.swizzle =
  620.          BRW_SWIZZLE4(src_swiz[0], src_swiz[1], src_swiz[2], src_swiz[3]);
  621.  
  622.       /* The hardware doesn't natively handle the SWZ instruction's zero/one
  623.        * swizzles or per-component negation, so we need to use a temporary.
  624.        */
  625.       if (zeros_mask || ones_mask || src.Negate) {
  626.          src_reg temp_src(this, glsl_type::vec4_type);
  627.          dst_reg temp(temp_src);
  628.  
  629.          if (src_mask) {
  630.             temp.writemask = src_mask;
  631.             emit(MOV(temp, result));
  632.          }
  633.  
  634.          if (zeros_mask) {
  635.             temp.writemask = zeros_mask;
  636.             emit(MOV(temp, src_reg(0.0f)));
  637.          }
  638.  
  639.          if (ones_mask) {
  640.             temp.writemask = ones_mask;
  641.             emit(MOV(temp, src_reg(1.0f)));
  642.          }
  643.  
  644.          if (src.Negate) {
  645.             temp.writemask = src.Negate;
  646.             src_reg neg(temp_src);
  647.             neg.negate = true;
  648.             emit(MOV(temp, neg));
  649.          }
  650.          result = temp_src;
  651.       }
  652.    }
  653.  
  654.    return result;
  655. }
  656.