Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2010 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21.  * IN THE SOFTWARE.
  22.  */
  23.  
  24. /** @file brw_fs_visitor.cpp
  25.  *
  26.  * This file supports generating the FS LIR from the GLSL IR.  The LIR
  27.  * makes it easier to do backend-specific optimizations than doing so
  28.  * in the GLSL IR or in the native code.
  29.  */
  30. extern "C" {
  31.  
  32. #include <sys/types.h>
  33.  
  34. #include "main/macros.h"
  35. #include "main/shaderobj.h"
  36. #include "main/uniforms.h"
  37. #include "program/prog_parameter.h"
  38. #include "program/prog_print.h"
  39. #include "program/prog_optimize.h"
  40. #include "program/register_allocate.h"
  41. #include "program/sampler.h"
  42. #include "program/hash_table.h"
  43. #include "brw_context.h"
  44. #include "brw_eu.h"
  45. #include "brw_wm.h"
  46. }
  47. #include "brw_fs.h"
  48. #include "glsl/glsl_types.h"
  49. #include "glsl/ir_optimization.h"
  50.  
  51. void
  52. fs_visitor::visit(ir_variable *ir)
  53. {
  54.    fs_reg *reg = NULL;
  55.  
  56.    if (variable_storage(ir))
  57.       return;
  58.  
  59.    if (ir->mode == ir_var_shader_in) {
  60.       if (!strcmp(ir->name, "gl_FragCoord")) {
  61.          reg = emit_fragcoord_interpolation(ir);
  62.       } else if (!strcmp(ir->name, "gl_FrontFacing")) {
  63.          reg = emit_frontfacing_interpolation(ir);
  64.       } else {
  65.          reg = emit_general_interpolation(ir);
  66.       }
  67.       assert(reg);
  68.       hash_table_insert(this->variable_ht, reg, ir);
  69.       return;
  70.    } else if (ir->mode == ir_var_shader_out) {
  71.       reg = new(this->mem_ctx) fs_reg(this, ir->type);
  72.  
  73.       if (ir->index > 0) {
  74.          assert(ir->location == FRAG_RESULT_DATA0);
  75.          assert(ir->index == 1);
  76.          this->dual_src_output = *reg;
  77.       } else if (ir->location == FRAG_RESULT_COLOR) {
  78.          /* Writing gl_FragColor outputs to all color regions. */
  79.          for (unsigned int i = 0; i < MAX2(c->key.nr_color_regions, 1); i++) {
  80.             this->outputs[i] = *reg;
  81.             this->output_components[i] = 4;
  82.          }
  83.       } else if (ir->location == FRAG_RESULT_DEPTH) {
  84.          this->frag_depth = *reg;
  85.       } else {
  86.          /* gl_FragData or a user-defined FS output */
  87.          assert(ir->location >= FRAG_RESULT_DATA0 &&
  88.                 ir->location < FRAG_RESULT_DATA0 + BRW_MAX_DRAW_BUFFERS);
  89.  
  90.          int vector_elements =
  91.             ir->type->is_array() ? ir->type->fields.array->vector_elements
  92.                                  : ir->type->vector_elements;
  93.  
  94.          /* General color output. */
  95.          for (unsigned int i = 0; i < MAX2(1, ir->type->length); i++) {
  96.             int output = ir->location - FRAG_RESULT_DATA0 + i;
  97.             this->outputs[output] = *reg;
  98.             this->outputs[output].reg_offset += vector_elements * i;
  99.             this->output_components[output] = vector_elements;
  100.          }
  101.       }
  102.    } else if (ir->mode == ir_var_uniform) {
  103.       int param_index = c->prog_data.nr_params;
  104.  
  105.       /* Thanks to the lower_ubo_reference pass, we will see only
  106.        * ir_binop_ubo_load expressions and not ir_dereference_variable for UBO
  107.        * variables, so no need for them to be in variable_ht.
  108.        */
  109.       if (ir->is_in_uniform_block())
  110.          return;
  111.  
  112.       if (dispatch_width == 16) {
  113.          if (!variable_storage(ir)) {
  114.             fail("Failed to find uniform '%s' in 16-wide\n", ir->name);
  115.          }
  116.          return;
  117.       }
  118.  
  119.       param_size[param_index] = type_size(ir->type);
  120.       if (!strncmp(ir->name, "gl_", 3)) {
  121.          setup_builtin_uniform_values(ir);
  122.       } else {
  123.          setup_uniform_values(ir);
  124.       }
  125.  
  126.       reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index);
  127.       reg->type = brw_type_for_base_type(ir->type);
  128.    }
  129.  
  130.    if (!reg)
  131.       reg = new(this->mem_ctx) fs_reg(this, ir->type);
  132.  
  133.    hash_table_insert(this->variable_ht, reg, ir);
  134. }
  135.  
  136. void
  137. fs_visitor::visit(ir_dereference_variable *ir)
  138. {
  139.    fs_reg *reg = variable_storage(ir->var);
  140.    this->result = *reg;
  141. }
  142.  
  143. void
  144. fs_visitor::visit(ir_dereference_record *ir)
  145. {
  146.    const glsl_type *struct_type = ir->record->type;
  147.  
  148.    ir->record->accept(this);
  149.  
  150.    unsigned int offset = 0;
  151.    for (unsigned int i = 0; i < struct_type->length; i++) {
  152.       if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
  153.          break;
  154.       offset += type_size(struct_type->fields.structure[i].type);
  155.    }
  156.    this->result.reg_offset += offset;
  157.    this->result.type = brw_type_for_base_type(ir->type);
  158. }
  159.  
  160. void
  161. fs_visitor::visit(ir_dereference_array *ir)
  162. {
  163.    ir_constant *constant_index;
  164.    fs_reg src;
  165.    int element_size = type_size(ir->type);
  166.  
  167.    constant_index = ir->array_index->as_constant();
  168.  
  169.    ir->array->accept(this);
  170.    src = this->result;
  171.    src.type = brw_type_for_base_type(ir->type);
  172.  
  173.    if (constant_index) {
  174.       assert(src.file == UNIFORM || src.file == GRF);
  175.       src.reg_offset += constant_index->value.i[0] * element_size;
  176.    } else {
  177.       /* Variable index array dereference.  We attach the variable index
  178.        * component to the reg as a pointer to a register containing the
  179.        * offset.  Currently only uniform arrays are supported in this patch,
  180.        * and that reladdr pointer is resolved by
  181.        * move_uniform_array_access_to_pull_constants().  All other array types
  182.        * are lowered by lower_variable_index_to_cond_assign().
  183.        */
  184.       ir->array_index->accept(this);
  185.  
  186.       fs_reg index_reg;
  187.       index_reg = fs_reg(this, glsl_type::int_type);
  188.       emit(BRW_OPCODE_MUL, index_reg, this->result, fs_reg(element_size));
  189.  
  190.       if (src.reladdr) {
  191.          emit(BRW_OPCODE_ADD, index_reg, *src.reladdr, index_reg);
  192.       }
  193.  
  194.       src.reladdr = ralloc(mem_ctx, fs_reg);
  195.       memcpy(src.reladdr, &index_reg, sizeof(index_reg));
  196.    }
  197.    this->result = src;
  198. }
  199.  
  200. void
  201. fs_visitor::emit_lrp(fs_reg dst, fs_reg x, fs_reg y, fs_reg a)
  202. {
  203.    if (brw->gen < 6 ||
  204.        !x.is_valid_3src() ||
  205.        !y.is_valid_3src() ||
  206.        !a.is_valid_3src()) {
  207.       /* We can't use the LRP instruction.  Emit x*(1-a) + y*a. */
  208.       fs_reg y_times_a           = fs_reg(this, glsl_type::float_type);
  209.       fs_reg one_minus_a         = fs_reg(this, glsl_type::float_type);
  210.       fs_reg x_times_one_minus_a = fs_reg(this, glsl_type::float_type);
  211.  
  212.       emit(MUL(y_times_a, y, a));
  213.  
  214.       a.negate = !a.negate;
  215.       emit(ADD(one_minus_a, a, fs_reg(1.0f)));
  216.       emit(MUL(x_times_one_minus_a, x, one_minus_a));
  217.  
  218.       emit(ADD(dst, x_times_one_minus_a, y_times_a));
  219.    } else {
  220.       /* The LRP instruction actually does op1 * op0 + op2 * (1 - op0), so
  221.        * we need to reorder the operands.
  222.        */
  223.       emit(LRP(dst, a, y, x));
  224.    }
  225. }
  226.  
  227. void
  228. fs_visitor::emit_minmax(uint32_t conditionalmod, fs_reg dst,
  229.                         fs_reg src0, fs_reg src1)
  230. {
  231.    fs_inst *inst;
  232.  
  233.    if (brw->gen >= 6) {
  234.       inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
  235.       inst->conditional_mod = conditionalmod;
  236.    } else {
  237.       emit(CMP(reg_null_d, src0, src1, conditionalmod));
  238.  
  239.       inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
  240.       inst->predicate = BRW_PREDICATE_NORMAL;
  241.    }
  242. }
  243.  
  244. /* Instruction selection: Produce a MOV.sat instead of
  245.  * MIN(MAX(val, 0), 1) when possible.
  246.  */
  247. bool
  248. fs_visitor::try_emit_saturate(ir_expression *ir)
  249. {
  250.    ir_rvalue *sat_val = ir->as_rvalue_to_saturate();
  251.  
  252.    if (!sat_val)
  253.       return false;
  254.  
  255.    fs_inst *pre_inst = (fs_inst *) this->instructions.get_tail();
  256.  
  257.    sat_val->accept(this);
  258.    fs_reg src = this->result;
  259.  
  260.    fs_inst *last_inst = (fs_inst *) this->instructions.get_tail();
  261.  
  262.    /* If the last instruction from our accept() didn't generate our
  263.     * src, generate a saturated MOV
  264.     */
  265.    fs_inst *modify = get_instruction_generating_reg(pre_inst, last_inst, src);
  266.    if (!modify || modify->regs_written != 1) {
  267.       this->result = fs_reg(this, ir->type);
  268.       fs_inst *inst = emit(MOV(this->result, src));
  269.       inst->saturate = true;
  270.    } else {
  271.       modify->saturate = true;
  272.       this->result = src;
  273.    }
  274.  
  275.  
  276.    return true;
  277. }
  278.  
  279. bool
  280. fs_visitor::try_emit_mad(ir_expression *ir, int mul_arg)
  281. {
  282.    /* 3-src instructions were introduced in gen6. */
  283.    if (brw->gen < 6)
  284.       return false;
  285.  
  286.    /* MAD can only handle floating-point data. */
  287.    if (ir->type != glsl_type::float_type)
  288.       return false;
  289.  
  290.    ir_rvalue *nonmul = ir->operands[1 - mul_arg];
  291.    ir_expression *mul = ir->operands[mul_arg]->as_expression();
  292.  
  293.    if (!mul || mul->operation != ir_binop_mul)
  294.       return false;
  295.  
  296.    if (nonmul->as_constant() ||
  297.        mul->operands[0]->as_constant() ||
  298.        mul->operands[1]->as_constant())
  299.       return false;
  300.  
  301.    nonmul->accept(this);
  302.    fs_reg src0 = this->result;
  303.  
  304.    mul->operands[0]->accept(this);
  305.    fs_reg src1 = this->result;
  306.  
  307.    mul->operands[1]->accept(this);
  308.    fs_reg src2 = this->result;
  309.  
  310.    this->result = fs_reg(this, ir->type);
  311.    emit(BRW_OPCODE_MAD, this->result, src0, src1, src2);
  312.  
  313.    return true;
  314. }
  315.  
  316. void
  317. fs_visitor::visit(ir_expression *ir)
  318. {
  319.    unsigned int operand;
  320.    fs_reg op[3], temp;
  321.    fs_inst *inst;
  322.  
  323.    assert(ir->get_num_operands() <= 3);
  324.  
  325.    if (try_emit_saturate(ir))
  326.       return;
  327.    if (ir->operation == ir_binop_add) {
  328.       if (try_emit_mad(ir, 0) || try_emit_mad(ir, 1))
  329.          return;
  330.    }
  331.  
  332.    for (operand = 0; operand < ir->get_num_operands(); operand++) {
  333.       ir->operands[operand]->accept(this);
  334.       if (this->result.file == BAD_FILE) {
  335.          fail("Failed to get tree for expression operand:\n");
  336.          ir->operands[operand]->print();
  337.          printf("\n");
  338.       }
  339.       op[operand] = this->result;
  340.  
  341.       /* Matrix expression operands should have been broken down to vector
  342.        * operations already.
  343.        */
  344.       assert(!ir->operands[operand]->type->is_matrix());
  345.       /* And then those vector operands should have been broken down to scalar.
  346.        */
  347.       assert(!ir->operands[operand]->type->is_vector());
  348.    }
  349.  
  350.    /* Storage for our result.  If our result goes into an assignment, it will
  351.     * just get copy-propagated out, so no worries.
  352.     */
  353.    this->result = fs_reg(this, ir->type);
  354.  
  355.    switch (ir->operation) {
  356.    case ir_unop_logic_not:
  357.       /* Note that BRW_OPCODE_NOT is not appropriate here, since it is
  358.        * ones complement of the whole register, not just bit 0.
  359.        */
  360.       emit(XOR(this->result, op[0], fs_reg(1)));
  361.       break;
  362.    case ir_unop_neg:
  363.       op[0].negate = !op[0].negate;
  364.       emit(MOV(this->result, op[0]));
  365.       break;
  366.    case ir_unop_abs:
  367.       op[0].abs = true;
  368.       op[0].negate = false;
  369.       emit(MOV(this->result, op[0]));
  370.       break;
  371.    case ir_unop_sign:
  372.       temp = fs_reg(this, ir->type);
  373.  
  374.       emit(MOV(this->result, fs_reg(0.0f)));
  375.  
  376.       emit(CMP(reg_null_f, op[0], fs_reg(0.0f), BRW_CONDITIONAL_G));
  377.       inst = emit(MOV(this->result, fs_reg(1.0f)));
  378.       inst->predicate = BRW_PREDICATE_NORMAL;
  379.  
  380.       emit(CMP(reg_null_f, op[0], fs_reg(0.0f), BRW_CONDITIONAL_L));
  381.       inst = emit(MOV(this->result, fs_reg(-1.0f)));
  382.       inst->predicate = BRW_PREDICATE_NORMAL;
  383.  
  384.       break;
  385.    case ir_unop_rcp:
  386.       emit_math(SHADER_OPCODE_RCP, this->result, op[0]);
  387.       break;
  388.  
  389.    case ir_unop_exp2:
  390.       emit_math(SHADER_OPCODE_EXP2, this->result, op[0]);
  391.       break;
  392.    case ir_unop_log2:
  393.       emit_math(SHADER_OPCODE_LOG2, this->result, op[0]);
  394.       break;
  395.    case ir_unop_exp:
  396.    case ir_unop_log:
  397.       assert(!"not reached: should be handled by ir_explog_to_explog2");
  398.       break;
  399.    case ir_unop_sin:
  400.    case ir_unop_sin_reduced:
  401.       emit_math(SHADER_OPCODE_SIN, this->result, op[0]);
  402.       break;
  403.    case ir_unop_cos:
  404.    case ir_unop_cos_reduced:
  405.       emit_math(SHADER_OPCODE_COS, this->result, op[0]);
  406.       break;
  407.  
  408.    case ir_unop_dFdx:
  409.       emit(FS_OPCODE_DDX, this->result, op[0]);
  410.       break;
  411.    case ir_unop_dFdy:
  412.       emit(FS_OPCODE_DDY, this->result, op[0]);
  413.       break;
  414.  
  415.    case ir_binop_add:
  416.       emit(ADD(this->result, op[0], op[1]));
  417.       break;
  418.    case ir_binop_sub:
  419.       assert(!"not reached: should be handled by ir_sub_to_add_neg");
  420.       break;
  421.  
  422.    case ir_binop_mul:
  423.       if (ir->type->is_integer()) {
  424.          /* For integer multiplication, the MUL uses the low 16 bits
  425.           * of one of the operands (src0 on gen6, src1 on gen7).  The
  426.           * MACH accumulates in the contribution of the upper 16 bits
  427.           * of that operand.
  428.           *
  429.           * FINISHME: Emit just the MUL if we know an operand is small
  430.           * enough.
  431.           */
  432.          if (brw->gen >= 7 && dispatch_width == 16)
  433.             fail("16-wide explicit accumulator operands unsupported\n");
  434.  
  435.          struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D);
  436.  
  437.          emit(MUL(acc, op[0], op[1]));
  438.          emit(MACH(reg_null_d, op[0], op[1]));
  439.          emit(MOV(this->result, fs_reg(acc)));
  440.       } else {
  441.          emit(MUL(this->result, op[0], op[1]));
  442.       }
  443.       break;
  444.    case ir_binop_div:
  445.       /* Floating point should be lowered by DIV_TO_MUL_RCP in the compiler. */
  446.       assert(ir->type->is_integer());
  447.       emit_math(SHADER_OPCODE_INT_QUOTIENT, this->result, op[0], op[1]);
  448.       break;
  449.    case ir_binop_mod:
  450.       /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */
  451.       assert(ir->type->is_integer());
  452.       emit_math(SHADER_OPCODE_INT_REMAINDER, this->result, op[0], op[1]);
  453.       break;
  454.  
  455.    case ir_binop_less:
  456.    case ir_binop_greater:
  457.    case ir_binop_lequal:
  458.    case ir_binop_gequal:
  459.    case ir_binop_equal:
  460.    case ir_binop_all_equal:
  461.    case ir_binop_nequal:
  462.    case ir_binop_any_nequal:
  463.       resolve_bool_comparison(ir->operands[0], &op[0]);
  464.       resolve_bool_comparison(ir->operands[1], &op[1]);
  465.  
  466.       emit(CMP(this->result, op[0], op[1],
  467.                brw_conditional_for_comparison(ir->operation)));
  468.       break;
  469.  
  470.    case ir_binop_logic_xor:
  471.       emit(XOR(this->result, op[0], op[1]));
  472.       break;
  473.  
  474.    case ir_binop_logic_or:
  475.       emit(OR(this->result, op[0], op[1]));
  476.       break;
  477.  
  478.    case ir_binop_logic_and:
  479.       emit(AND(this->result, op[0], op[1]));
  480.       break;
  481.  
  482.    case ir_binop_dot:
  483.    case ir_unop_any:
  484.       assert(!"not reached: should be handled by brw_fs_channel_expressions");
  485.       break;
  486.  
  487.    case ir_unop_noise:
  488.       assert(!"not reached: should be handled by lower_noise");
  489.       break;
  490.  
  491.    case ir_quadop_vector:
  492.       assert(!"not reached: should be handled by lower_quadop_vector");
  493.       break;
  494.  
  495.    case ir_binop_vector_extract:
  496.       assert(!"not reached: should be handled by lower_vec_index_to_cond_assign()");
  497.       break;
  498.  
  499.    case ir_triop_vector_insert:
  500.       assert(!"not reached: should be handled by lower_vector_insert()");
  501.       break;
  502.  
  503.    case ir_unop_sqrt:
  504.       emit_math(SHADER_OPCODE_SQRT, this->result, op[0]);
  505.       break;
  506.  
  507.    case ir_unop_rsq:
  508.       emit_math(SHADER_OPCODE_RSQ, this->result, op[0]);
  509.       break;
  510.  
  511.    case ir_unop_bitcast_i2f:
  512.    case ir_unop_bitcast_u2f:
  513.       op[0].type = BRW_REGISTER_TYPE_F;
  514.       this->result = op[0];
  515.       break;
  516.    case ir_unop_i2u:
  517.    case ir_unop_bitcast_f2u:
  518.       op[0].type = BRW_REGISTER_TYPE_UD;
  519.       this->result = op[0];
  520.       break;
  521.    case ir_unop_u2i:
  522.    case ir_unop_bitcast_f2i:
  523.       op[0].type = BRW_REGISTER_TYPE_D;
  524.       this->result = op[0];
  525.       break;
  526.    case ir_unop_i2f:
  527.    case ir_unop_u2f:
  528.    case ir_unop_f2i:
  529.    case ir_unop_f2u:
  530.       emit(MOV(this->result, op[0]));
  531.       break;
  532.  
  533.    case ir_unop_b2i:
  534.       emit(AND(this->result, op[0], fs_reg(1)));
  535.       break;
  536.    case ir_unop_b2f:
  537.       temp = fs_reg(this, glsl_type::int_type);
  538.       emit(AND(temp, op[0], fs_reg(1)));
  539.       emit(MOV(this->result, temp));
  540.       break;
  541.  
  542.    case ir_unop_f2b:
  543.       emit(CMP(this->result, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ));
  544.       break;
  545.    case ir_unop_i2b:
  546.       emit(CMP(this->result, op[0], fs_reg(0), BRW_CONDITIONAL_NZ));
  547.       break;
  548.  
  549.    case ir_unop_trunc:
  550.       emit(RNDZ(this->result, op[0]));
  551.       break;
  552.    case ir_unop_ceil:
  553.       op[0].negate = !op[0].negate;
  554.       emit(RNDD(this->result, op[0]));
  555.       this->result.negate = true;
  556.       break;
  557.    case ir_unop_floor:
  558.       emit(RNDD(this->result, op[0]));
  559.       break;
  560.    case ir_unop_fract:
  561.       emit(FRC(this->result, op[0]));
  562.       break;
  563.    case ir_unop_round_even:
  564.       emit(RNDE(this->result, op[0]));
  565.       break;
  566.  
  567.    case ir_binop_min:
  568.    case ir_binop_max:
  569.       resolve_ud_negate(&op[0]);
  570.       resolve_ud_negate(&op[1]);
  571.       emit_minmax(ir->operation == ir_binop_min ?
  572.                   BRW_CONDITIONAL_L : BRW_CONDITIONAL_GE,
  573.                   this->result, op[0], op[1]);
  574.       break;
  575.    case ir_unop_pack_snorm_2x16:
  576.    case ir_unop_pack_snorm_4x8:
  577.    case ir_unop_pack_unorm_2x16:
  578.    case ir_unop_pack_unorm_4x8:
  579.    case ir_unop_unpack_snorm_2x16:
  580.    case ir_unop_unpack_snorm_4x8:
  581.    case ir_unop_unpack_unorm_2x16:
  582.    case ir_unop_unpack_unorm_4x8:
  583.    case ir_unop_unpack_half_2x16:
  584.    case ir_unop_pack_half_2x16:
  585.       assert(!"not reached: should be handled by lower_packing_builtins");
  586.       break;
  587.    case ir_unop_unpack_half_2x16_split_x:
  588.       emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X, this->result, op[0]);
  589.       break;
  590.    case ir_unop_unpack_half_2x16_split_y:
  591.       emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y, this->result, op[0]);
  592.       break;
  593.    case ir_binop_pow:
  594.       emit_math(SHADER_OPCODE_POW, this->result, op[0], op[1]);
  595.       break;
  596.  
  597.    case ir_unop_bitfield_reverse:
  598.       emit(BFREV(this->result, op[0]));
  599.       break;
  600.    case ir_unop_bit_count:
  601.       emit(CBIT(this->result, op[0]));
  602.       break;
  603.    case ir_unop_find_msb:
  604.       temp = fs_reg(this, glsl_type::uint_type);
  605.       emit(FBH(temp, op[0]));
  606.  
  607.       /* FBH counts from the MSB side, while GLSL's findMSB() wants the count
  608.        * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
  609.        * subtract the result from 31 to convert the MSB count into an LSB count.
  610.        */
  611.  
  612.       /* FBH only supports UD type for dst, so use a MOV to convert UD to D. */
  613.       emit(MOV(this->result, temp));
  614.       emit(CMP(reg_null_d, this->result, fs_reg(-1), BRW_CONDITIONAL_NZ));
  615.  
  616.       temp.negate = true;
  617.       inst = emit(ADD(this->result, temp, fs_reg(31)));
  618.       inst->predicate = BRW_PREDICATE_NORMAL;
  619.       break;
  620.    case ir_unop_find_lsb:
  621.       emit(FBL(this->result, op[0]));
  622.       break;
  623.    case ir_triop_bitfield_extract:
  624.       /* Note that the instruction's argument order is reversed from GLSL
  625.        * and the IR.
  626.        */
  627.       emit(BFE(this->result, op[2], op[1], op[0]));
  628.       break;
  629.    case ir_binop_bfm:
  630.       emit(BFI1(this->result, op[0], op[1]));
  631.       break;
  632.    case ir_triop_bfi:
  633.       emit(BFI2(this->result, op[0], op[1], op[2]));
  634.       break;
  635.    case ir_quadop_bitfield_insert:
  636.       assert(!"not reached: should be handled by "
  637.               "lower_instructions::bitfield_insert_to_bfm_bfi");
  638.       break;
  639.  
  640.    case ir_unop_bit_not:
  641.       emit(NOT(this->result, op[0]));
  642.       break;
  643.    case ir_binop_bit_and:
  644.       emit(AND(this->result, op[0], op[1]));
  645.       break;
  646.    case ir_binop_bit_xor:
  647.       emit(XOR(this->result, op[0], op[1]));
  648.       break;
  649.    case ir_binop_bit_or:
  650.       emit(OR(this->result, op[0], op[1]));
  651.       break;
  652.  
  653.    case ir_binop_lshift:
  654.       emit(SHL(this->result, op[0], op[1]));
  655.       break;
  656.  
  657.    case ir_binop_rshift:
  658.       if (ir->type->base_type == GLSL_TYPE_INT)
  659.          emit(ASR(this->result, op[0], op[1]));
  660.       else
  661.          emit(SHR(this->result, op[0], op[1]));
  662.       break;
  663.    case ir_binop_pack_half_2x16_split:
  664.       emit(FS_OPCODE_PACK_HALF_2x16_SPLIT, this->result, op[0], op[1]);
  665.       break;
  666.    case ir_binop_ubo_load: {
  667.       /* This IR node takes a constant uniform block and a constant or
  668.        * variable byte offset within the block and loads a vector from that.
  669.        */
  670.       ir_constant *uniform_block = ir->operands[0]->as_constant();
  671.       ir_constant *const_offset = ir->operands[1]->as_constant();
  672.       fs_reg surf_index = fs_reg((unsigned)SURF_INDEX_WM_UBO(uniform_block->value.u[0]));
  673.       if (const_offset) {
  674.          fs_reg packed_consts = fs_reg(this, glsl_type::float_type);
  675.          packed_consts.type = result.type;
  676.  
  677.          fs_reg const_offset_reg = fs_reg(const_offset->value.u[0] & ~15);
  678.          emit(fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
  679.                       packed_consts, surf_index, const_offset_reg));
  680.  
  681.          packed_consts.smear = const_offset->value.u[0] % 16 / 4;
  682.          for (int i = 0; i < ir->type->vector_elements; i++) {
  683.             /* UBO bools are any nonzero value.  We consider bools to be
  684.              * values with the low bit set to 1.  Convert them using CMP.
  685.              */
  686.             if (ir->type->base_type == GLSL_TYPE_BOOL) {
  687.                emit(CMP(result, packed_consts, fs_reg(0u), BRW_CONDITIONAL_NZ));
  688.             } else {
  689.                emit(MOV(result, packed_consts));
  690.             }
  691.  
  692.             packed_consts.smear++;
  693.             result.reg_offset++;
  694.  
  695.             /* The std140 packing rules don't allow vectors to cross 16-byte
  696.              * boundaries, and a reg is 32 bytes.
  697.              */
  698.             assert(packed_consts.smear < 8);
  699.          }
  700.       } else {
  701.          /* Turn the byte offset into a dword offset. */
  702.          fs_reg base_offset = fs_reg(this, glsl_type::int_type);
  703.          emit(SHR(base_offset, op[1], fs_reg(2)));
  704.  
  705.          for (int i = 0; i < ir->type->vector_elements; i++) {
  706.             emit(VARYING_PULL_CONSTANT_LOAD(result, surf_index,
  707.                                             base_offset, i));
  708.  
  709.             if (ir->type->base_type == GLSL_TYPE_BOOL)
  710.                emit(CMP(result, result, fs_reg(0), BRW_CONDITIONAL_NZ));
  711.  
  712.             result.reg_offset++;
  713.          }
  714.       }
  715.  
  716.       result.reg_offset = 0;
  717.       break;
  718.    }
  719.  
  720.    case ir_triop_lrp:
  721.       emit_lrp(this->result, op[0], op[1], op[2]);
  722.       break;
  723.    }
  724. }
  725.  
  726. void
  727. fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r,
  728.                                    const glsl_type *type, bool predicated)
  729. {
  730.    switch (type->base_type) {
  731.    case GLSL_TYPE_FLOAT:
  732.    case GLSL_TYPE_UINT:
  733.    case GLSL_TYPE_INT:
  734.    case GLSL_TYPE_BOOL:
  735.       for (unsigned int i = 0; i < type->components(); i++) {
  736.          l.type = brw_type_for_base_type(type);
  737.          r.type = brw_type_for_base_type(type);
  738.  
  739.          if (predicated || !l.equals(r)) {
  740.             fs_inst *inst = emit(MOV(l, r));
  741.             inst->predicate = predicated ? BRW_PREDICATE_NORMAL : BRW_PREDICATE_NONE;
  742.          }
  743.  
  744.          l.reg_offset++;
  745.          r.reg_offset++;
  746.       }
  747.       break;
  748.    case GLSL_TYPE_ARRAY:
  749.       for (unsigned int i = 0; i < type->length; i++) {
  750.          emit_assignment_writes(l, r, type->fields.array, predicated);
  751.       }
  752.       break;
  753.  
  754.    case GLSL_TYPE_STRUCT:
  755.       for (unsigned int i = 0; i < type->length; i++) {
  756.          emit_assignment_writes(l, r, type->fields.structure[i].type,
  757.                                 predicated);
  758.       }
  759.       break;
  760.  
  761.    case GLSL_TYPE_SAMPLER:
  762.       break;
  763.  
  764.    case GLSL_TYPE_VOID:
  765.    case GLSL_TYPE_ERROR:
  766.    case GLSL_TYPE_INTERFACE:
  767.       assert(!"not reached");
  768.       break;
  769.    }
  770. }
  771.  
  772. /* If the RHS processing resulted in an instruction generating a
  773.  * temporary value, and it would be easy to rewrite the instruction to
  774.  * generate its result right into the LHS instead, do so.  This ends
  775.  * up reliably removing instructions where it can be tricky to do so
  776.  * later without real UD chain information.
  777.  */
  778. bool
  779. fs_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
  780.                                    fs_reg dst,
  781.                                    fs_reg src,
  782.                                    fs_inst *pre_rhs_inst,
  783.                                    fs_inst *last_rhs_inst)
  784. {
  785.    /* Only attempt if we're doing a direct assignment. */
  786.    if (ir->condition ||
  787.        !(ir->lhs->type->is_scalar() ||
  788.         (ir->lhs->type->is_vector() &&
  789.          ir->write_mask == (1 << ir->lhs->type->vector_elements) - 1)))
  790.       return false;
  791.  
  792.    /* Make sure the last instruction generated our source reg. */
  793.    fs_inst *modify = get_instruction_generating_reg(pre_rhs_inst,
  794.                                                     last_rhs_inst,
  795.                                                     src);
  796.    if (!modify)
  797.       return false;
  798.  
  799.    /* If last_rhs_inst wrote a different number of components than our LHS,
  800.     * we can't safely rewrite it.
  801.     */
  802.    if (virtual_grf_sizes[dst.reg] != modify->regs_written)
  803.       return false;
  804.  
  805.    /* Success!  Rewrite the instruction. */
  806.    modify->dst = dst;
  807.  
  808.    return true;
  809. }
  810.  
  811. void
  812. fs_visitor::visit(ir_assignment *ir)
  813. {
  814.    fs_reg l, r;
  815.    fs_inst *inst;
  816.  
  817.    /* FINISHME: arrays on the lhs */
  818.    ir->lhs->accept(this);
  819.    l = this->result;
  820.  
  821.    fs_inst *pre_rhs_inst = (fs_inst *) this->instructions.get_tail();
  822.  
  823.    ir->rhs->accept(this);
  824.    r = this->result;
  825.  
  826.    fs_inst *last_rhs_inst = (fs_inst *) this->instructions.get_tail();
  827.  
  828.    assert(l.file != BAD_FILE);
  829.    assert(r.file != BAD_FILE);
  830.  
  831.    if (try_rewrite_rhs_to_dst(ir, l, r, pre_rhs_inst, last_rhs_inst))
  832.       return;
  833.  
  834.    if (ir->condition) {
  835.       emit_bool_to_cond_code(ir->condition);
  836.    }
  837.  
  838.    if (ir->lhs->type->is_scalar() ||
  839.        ir->lhs->type->is_vector()) {
  840.       for (int i = 0; i < ir->lhs->type->vector_elements; i++) {
  841.          if (ir->write_mask & (1 << i)) {
  842.             inst = emit(MOV(l, r));
  843.             if (ir->condition)
  844.                inst->predicate = BRW_PREDICATE_NORMAL;
  845.             r.reg_offset++;
  846.          }
  847.          l.reg_offset++;
  848.       }
  849.    } else {
  850.       emit_assignment_writes(l, r, ir->lhs->type, ir->condition != NULL);
  851.    }
  852. }
  853.  
  854. fs_inst *
  855. fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
  856.                               fs_reg shadow_c, fs_reg lod, fs_reg dPdy)
  857. {
  858.    int mlen;
  859.    int base_mrf = 1;
  860.    bool simd16 = false;
  861.    fs_reg orig_dst;
  862.  
  863.    /* g0 header. */
  864.    mlen = 1;
  865.  
  866.    if (ir->shadow_comparitor) {
  867.       for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
  868.          emit(MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate));
  869.          coordinate.reg_offset++;
  870.       }
  871.  
  872.       /* gen4's SIMD8 sampler always has the slots for u,v,r present.
  873.        * the unused slots must be zeroed.
  874.        */
  875.       for (int i = ir->coordinate->type->vector_elements; i < 3; i++) {
  876.          emit(MOV(fs_reg(MRF, base_mrf + mlen + i), fs_reg(0.0f)));
  877.       }
  878.       mlen += 3;
  879.  
  880.       if (ir->op == ir_tex) {
  881.          /* There's no plain shadow compare message, so we use shadow
  882.           * compare with a bias of 0.0.
  883.           */
  884.          emit(MOV(fs_reg(MRF, base_mrf + mlen), fs_reg(0.0f)));
  885.          mlen++;
  886.       } else if (ir->op == ir_txb || ir->op == ir_txl) {
  887.          emit(MOV(fs_reg(MRF, base_mrf + mlen), lod));
  888.          mlen++;
  889.       } else {
  890.          assert(!"Should not get here.");
  891.       }
  892.  
  893.       emit(MOV(fs_reg(MRF, base_mrf + mlen), shadow_c));
  894.       mlen++;
  895.    } else if (ir->op == ir_tex) {
  896.       for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
  897.          emit(MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate));
  898.          coordinate.reg_offset++;
  899.       }
  900.       /* zero the others. */
  901.       for (int i = ir->coordinate->type->vector_elements; i<3; i++) {
  902.          emit(MOV(fs_reg(MRF, base_mrf + mlen + i), fs_reg(0.0f)));
  903.       }
  904.       /* gen4's SIMD8 sampler always has the slots for u,v,r present. */
  905.       mlen += 3;
  906.    } else if (ir->op == ir_txd) {
  907.       fs_reg &dPdx = lod;
  908.  
  909.       for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
  910.          emit(MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate));
  911.          coordinate.reg_offset++;
  912.       }
  913.       /* the slots for u and v are always present, but r is optional */
  914.       mlen += MAX2(ir->coordinate->type->vector_elements, 2);
  915.  
  916.       /*  P   = u, v, r
  917.        * dPdx = dudx, dvdx, drdx
  918.        * dPdy = dudy, dvdy, drdy
  919.        *
  920.        * 1-arg: Does not exist.
  921.        *
  922.        * 2-arg: dudx   dvdx   dudy   dvdy
  923.        *        dPdx.x dPdx.y dPdy.x dPdy.y
  924.        *        m4     m5     m6     m7
  925.        *
  926.        * 3-arg: dudx   dvdx   drdx   dudy   dvdy   drdy
  927.        *        dPdx.x dPdx.y dPdx.z dPdy.x dPdy.y dPdy.z
  928.        *        m5     m6     m7     m8     m9     m10
  929.        */
  930.       for (int i = 0; i < ir->lod_info.grad.dPdx->type->vector_elements; i++) {
  931.          emit(MOV(fs_reg(MRF, base_mrf + mlen), dPdx));
  932.          dPdx.reg_offset++;
  933.       }
  934.       mlen += MAX2(ir->lod_info.grad.dPdx->type->vector_elements, 2);
  935.  
  936.       for (int i = 0; i < ir->lod_info.grad.dPdy->type->vector_elements; i++) {
  937.          emit(MOV(fs_reg(MRF, base_mrf + mlen), dPdy));
  938.          dPdy.reg_offset++;
  939.       }
  940.       mlen += MAX2(ir->lod_info.grad.dPdy->type->vector_elements, 2);
  941.    } else if (ir->op == ir_txs) {
  942.       /* There's no SIMD8 resinfo message on Gen4.  Use SIMD16 instead. */
  943.       simd16 = true;
  944.       emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), lod));
  945.       mlen += 2;
  946.    } else {
  947.       /* Oh joy.  gen4 doesn't have SIMD8 non-shadow-compare bias/lod
  948.        * instructions.  We'll need to do SIMD16 here.
  949.        */
  950.       simd16 = true;
  951.       assert(ir->op == ir_txb || ir->op == ir_txl || ir->op == ir_txf);
  952.  
  953.       for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
  954.          emit(MOV(fs_reg(MRF, base_mrf + mlen + i * 2, coordinate.type),
  955.                   coordinate));
  956.          coordinate.reg_offset++;
  957.       }
  958.  
  959.       /* Initialize the rest of u/v/r with 0.0.  Empirically, this seems to
  960.        * be necessary for TXF (ld), but seems wise to do for all messages.
  961.        */
  962.       for (int i = ir->coordinate->type->vector_elements; i < 3; i++) {
  963.          emit(MOV(fs_reg(MRF, base_mrf + mlen + i * 2), fs_reg(0.0f)));
  964.       }
  965.  
  966.       /* lod/bias appears after u/v/r. */
  967.       mlen += 6;
  968.  
  969.       emit(MOV(fs_reg(MRF, base_mrf + mlen, lod.type), lod));
  970.       mlen++;
  971.  
  972.       /* The unused upper half. */
  973.       mlen++;
  974.    }
  975.  
  976.    if (simd16) {
  977.       /* Now, since we're doing simd16, the return is 2 interleaved
  978.        * vec4s where the odd-indexed ones are junk. We'll need to move
  979.        * this weirdness around to the expected layout.
  980.        */
  981.       orig_dst = dst;
  982.       dst = fs_reg(GRF, virtual_grf_alloc(8),
  983.                    (brw->is_g4x ?
  984.                     brw_type_for_base_type(ir->type) :
  985.                     BRW_REGISTER_TYPE_F));
  986.    }
  987.  
  988.    fs_inst *inst = NULL;
  989.    switch (ir->op) {
  990.    case ir_tex:
  991.       inst = emit(SHADER_OPCODE_TEX, dst);
  992.       break;
  993.    case ir_txb:
  994.       inst = emit(FS_OPCODE_TXB, dst);
  995.       break;
  996.    case ir_txl:
  997.       inst = emit(SHADER_OPCODE_TXL, dst);
  998.       break;
  999.    case ir_txd:
  1000.       inst = emit(SHADER_OPCODE_TXD, dst);
  1001.       break;
  1002.    case ir_txs:
  1003.       inst = emit(SHADER_OPCODE_TXS, dst);
  1004.       break;
  1005.    case ir_txf:
  1006.       inst = emit(SHADER_OPCODE_TXF, dst);
  1007.       break;
  1008.    default:
  1009.       fail("unrecognized texture opcode");
  1010.    }
  1011.    inst->base_mrf = base_mrf;
  1012.    inst->mlen = mlen;
  1013.    inst->header_present = true;
  1014.    inst->regs_written = simd16 ? 8 : 4;
  1015.  
  1016.    if (simd16) {
  1017.       for (int i = 0; i < 4; i++) {
  1018.          emit(MOV(orig_dst, dst));
  1019.          orig_dst.reg_offset++;
  1020.          dst.reg_offset += 2;
  1021.       }
  1022.    }
  1023.  
  1024.    return inst;
  1025. }
  1026.  
  1027. /* gen5's sampler has slots for u, v, r, array index, then optional
  1028.  * parameters like shadow comparitor or LOD bias.  If optional
  1029.  * parameters aren't present, those base slots are optional and don't
  1030.  * need to be included in the message.
  1031.  *
  1032.  * We don't fill in the unnecessary slots regardless, which may look
  1033.  * surprising in the disassembly.
  1034.  */
  1035. fs_inst *
  1036. fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
  1037.                               fs_reg shadow_c, fs_reg lod, fs_reg lod2,
  1038.                               fs_reg sample_index)
  1039. {
  1040.    int mlen = 0;
  1041.    int base_mrf = 2;
  1042.    int reg_width = dispatch_width / 8;
  1043.    bool header_present = false;
  1044.    const int vector_elements =
  1045.       ir->coordinate ? ir->coordinate->type->vector_elements : 0;
  1046.  
  1047.    if (ir->offset != NULL && ir->op == ir_txf) {
  1048.       /* It appears that the ld instruction used for txf does its
  1049.        * address bounds check before adding in the offset.  To work
  1050.        * around this, just add the integer offset to the integer texel
  1051.        * coordinate, and don't put the offset in the header.
  1052.        */
  1053.       ir_constant *offset = ir->offset->as_constant();
  1054.       for (int i = 0; i < vector_elements; i++) {
  1055.          emit(ADD(fs_reg(MRF, base_mrf + mlen + i * reg_width, coordinate.type),
  1056.                   coordinate,
  1057.                   offset->value.i[i]));
  1058.          coordinate.reg_offset++;
  1059.       }
  1060.    } else {
  1061.       if (ir->offset) {
  1062.          /* The offsets set up by the ir_texture visitor are in the
  1063.           * m1 header, so we can't go headerless.
  1064.           */
  1065.          header_present = true;
  1066.          mlen++;
  1067.          base_mrf--;
  1068.       }
  1069.  
  1070.       for (int i = 0; i < vector_elements; i++) {
  1071.          emit(MOV(fs_reg(MRF, base_mrf + mlen + i * reg_width, coordinate.type),
  1072.                   coordinate));
  1073.          coordinate.reg_offset++;
  1074.       }
  1075.    }
  1076.    mlen += vector_elements * reg_width;
  1077.  
  1078.    if (ir->shadow_comparitor) {
  1079.       mlen = MAX2(mlen, header_present + 4 * reg_width);
  1080.  
  1081.       emit(MOV(fs_reg(MRF, base_mrf + mlen), shadow_c));
  1082.       mlen += reg_width;
  1083.    }
  1084.  
  1085.    fs_inst *inst = NULL;
  1086.    switch (ir->op) {
  1087.    case ir_tex:
  1088.       inst = emit(SHADER_OPCODE_TEX, dst);
  1089.       break;
  1090.    case ir_txb:
  1091.       mlen = MAX2(mlen, header_present + 4 * reg_width);
  1092.       emit(MOV(fs_reg(MRF, base_mrf + mlen), lod));
  1093.       mlen += reg_width;
  1094.  
  1095.       inst = emit(FS_OPCODE_TXB, dst);
  1096.       break;
  1097.    case ir_txl:
  1098.       mlen = MAX2(mlen, header_present + 4 * reg_width);
  1099.       emit(MOV(fs_reg(MRF, base_mrf + mlen), lod));
  1100.       mlen += reg_width;
  1101.  
  1102.       inst = emit(SHADER_OPCODE_TXL, dst);
  1103.       break;
  1104.    case ir_txd: {
  1105.       mlen = MAX2(mlen, header_present + 4 * reg_width); /* skip over 'ai' */
  1106.  
  1107.       /**
  1108.        *  P   =  u,    v,    r
  1109.        * dPdx = dudx, dvdx, drdx
  1110.        * dPdy = dudy, dvdy, drdy
  1111.        *
  1112.        * Load up these values:
  1113.        * - dudx   dudy   dvdx   dvdy   drdx   drdy
  1114.        * - dPdx.x dPdy.x dPdx.y dPdy.y dPdx.z dPdy.z
  1115.        */
  1116.       for (int i = 0; i < ir->lod_info.grad.dPdx->type->vector_elements; i++) {
  1117.          emit(MOV(fs_reg(MRF, base_mrf + mlen), lod));
  1118.          lod.reg_offset++;
  1119.          mlen += reg_width;
  1120.  
  1121.          emit(MOV(fs_reg(MRF, base_mrf + mlen), lod2));
  1122.          lod2.reg_offset++;
  1123.          mlen += reg_width;
  1124.       }
  1125.  
  1126.       inst = emit(SHADER_OPCODE_TXD, dst);
  1127.       break;
  1128.    }
  1129.    case ir_txs:
  1130.       emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), lod));
  1131.       mlen += reg_width;
  1132.       inst = emit(SHADER_OPCODE_TXS, dst);
  1133.       break;
  1134.    case ir_txf:
  1135.       mlen = header_present + 4 * reg_width;
  1136.       emit(MOV(fs_reg(MRF, base_mrf + mlen - reg_width, BRW_REGISTER_TYPE_UD), lod));
  1137.       inst = emit(SHADER_OPCODE_TXF, dst);
  1138.       break;
  1139.    case ir_txf_ms:
  1140.       mlen = header_present + 4 * reg_width;
  1141.  
  1142.       /* lod */
  1143.       emit(MOV(fs_reg(MRF, base_mrf + mlen - reg_width, BRW_REGISTER_TYPE_UD), fs_reg(0)));
  1144.       /* sample index */
  1145.       emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), sample_index));
  1146.       mlen += reg_width;
  1147.       inst = emit(SHADER_OPCODE_TXF_MS, dst);
  1148.       break;
  1149.    case ir_lod:
  1150.       inst = emit(SHADER_OPCODE_LOD, dst);
  1151.       break;
  1152.    }
  1153.    inst->base_mrf = base_mrf;
  1154.    inst->mlen = mlen;
  1155.    inst->header_present = header_present;
  1156.    inst->regs_written = 4;
  1157.  
  1158.    if (mlen > 11) {
  1159.       fail("Message length >11 disallowed by hardware\n");
  1160.    }
  1161.  
  1162.    return inst;
  1163. }
  1164.  
  1165. fs_inst *
  1166. fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
  1167.                               fs_reg shadow_c, fs_reg lod, fs_reg lod2,
  1168.                               fs_reg sample_index)
  1169. {
  1170.    int mlen = 0;
  1171.    int base_mrf = 2;
  1172.    int reg_width = dispatch_width / 8;
  1173.    bool header_present = false;
  1174.    int offsets[3];
  1175.  
  1176.    if (ir->offset && ir->op != ir_txf) {
  1177.       /* The offsets set up by the ir_texture visitor are in the
  1178.        * m1 header, so we can't go headerless.
  1179.        */
  1180.       header_present = true;
  1181.       mlen++;
  1182.       base_mrf--;
  1183.    }
  1184.  
  1185.    if (ir->shadow_comparitor) {
  1186.       emit(MOV(fs_reg(MRF, base_mrf + mlen), shadow_c));
  1187.       mlen += reg_width;
  1188.    }
  1189.  
  1190.    /* Set up the LOD info */
  1191.    switch (ir->op) {
  1192.    case ir_tex:
  1193.    case ir_lod:
  1194.       break;
  1195.    case ir_txb:
  1196.       emit(MOV(fs_reg(MRF, base_mrf + mlen), lod));
  1197.       mlen += reg_width;
  1198.       break;
  1199.    case ir_txl:
  1200.       emit(MOV(fs_reg(MRF, base_mrf + mlen), lod));
  1201.       mlen += reg_width;
  1202.       break;
  1203.    case ir_txd: {
  1204.       if (dispatch_width == 16)
  1205.          fail("Gen7 does not support sample_d/sample_d_c in SIMD16 mode.");
  1206.  
  1207.       /* Load dPdx and the coordinate together:
  1208.        * [hdr], [ref], x, dPdx.x, dPdy.x, y, dPdx.y, dPdy.y, z, dPdx.z, dPdy.z
  1209.        */
  1210.       for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
  1211.          emit(MOV(fs_reg(MRF, base_mrf + mlen), coordinate));
  1212.          coordinate.reg_offset++;
  1213.          mlen += reg_width;
  1214.  
  1215.          /* For cube map array, the coordinate is (u,v,r,ai) but there are
  1216.           * only derivatives for (u, v, r).
  1217.           */
  1218.          if (i < ir->lod_info.grad.dPdx->type->vector_elements) {
  1219.             emit(MOV(fs_reg(MRF, base_mrf + mlen), lod));
  1220.             lod.reg_offset++;
  1221.             mlen += reg_width;
  1222.  
  1223.             emit(MOV(fs_reg(MRF, base_mrf + mlen), lod2));
  1224.             lod2.reg_offset++;
  1225.             mlen += reg_width;
  1226.          }
  1227.       }
  1228.       break;
  1229.    }
  1230.    case ir_txs:
  1231.       emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), lod));
  1232.       mlen += reg_width;
  1233.       break;
  1234.    case ir_txf:
  1235.       /* It appears that the ld instruction used for txf does its
  1236.        * address bounds check before adding in the offset.  To work
  1237.        * around this, just add the integer offset to the integer texel
  1238.        * coordinate, and don't put the offset in the header.
  1239.        */
  1240.       if (ir->offset) {
  1241.          ir_constant *offset = ir->offset->as_constant();
  1242.          offsets[0] = offset->value.i[0];
  1243.          offsets[1] = offset->value.i[1];
  1244.          offsets[2] = offset->value.i[2];
  1245.       } else {
  1246.          memset(offsets, 0, sizeof(offsets));
  1247.       }
  1248.  
  1249.       /* Unfortunately, the parameters for LD are intermixed: u, lod, v, r. */
  1250.       emit(ADD(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D),
  1251.                coordinate, offsets[0]));
  1252.       coordinate.reg_offset++;
  1253.       mlen += reg_width;
  1254.  
  1255.       emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D), lod));
  1256.       mlen += reg_width;
  1257.  
  1258.       for (int i = 1; i < ir->coordinate->type->vector_elements; i++) {
  1259.          emit(ADD(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D),
  1260.                   coordinate, offsets[i]));
  1261.          coordinate.reg_offset++;
  1262.          mlen += reg_width;
  1263.       }
  1264.       break;
  1265.    case ir_txf_ms:
  1266.       emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), sample_index));
  1267.       mlen += reg_width;
  1268.  
  1269.       /* constant zero MCS; we arrange to never actually have a compressed
  1270.        * multisample surface here for now. TODO: issue ld_mcs to get this first,
  1271.        * if we ever support texturing from compressed multisample surfaces
  1272.        */
  1273.       emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), fs_reg(0u)));
  1274.       mlen += reg_width;
  1275.  
  1276.       /* there is no offsetting for this message; just copy in the integer
  1277.        * texture coordinates
  1278.        */
  1279.       for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
  1280.          emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D),
  1281.                   coordinate));
  1282.          coordinate.reg_offset++;
  1283.          mlen += reg_width;
  1284.       }
  1285.       break;
  1286.    }
  1287.  
  1288.    /* Set up the coordinate (except for cases where it was done above) */
  1289.    if (ir->op != ir_txd && ir->op != ir_txs && ir->op != ir_txf && ir->op != ir_txf_ms) {
  1290.       for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
  1291.          emit(MOV(fs_reg(MRF, base_mrf + mlen), coordinate));
  1292.          coordinate.reg_offset++;
  1293.          mlen += reg_width;
  1294.       }
  1295.    }
  1296.  
  1297.    /* Generate the SEND */
  1298.    fs_inst *inst = NULL;
  1299.    switch (ir->op) {
  1300.    case ir_tex: inst = emit(SHADER_OPCODE_TEX, dst); break;
  1301.    case ir_txb: inst = emit(FS_OPCODE_TXB, dst); break;
  1302.    case ir_txl: inst = emit(SHADER_OPCODE_TXL, dst); break;
  1303.    case ir_txd: inst = emit(SHADER_OPCODE_TXD, dst); break;
  1304.    case ir_txf: inst = emit(SHADER_OPCODE_TXF, dst); break;
  1305.    case ir_txf_ms: inst = emit(SHADER_OPCODE_TXF_MS, dst); break;
  1306.    case ir_txs: inst = emit(SHADER_OPCODE_TXS, dst); break;
  1307.    case ir_lod: inst = emit(SHADER_OPCODE_LOD, dst); break;
  1308.    }
  1309.    inst->base_mrf = base_mrf;
  1310.    inst->mlen = mlen;
  1311.    inst->header_present = header_present;
  1312.    inst->regs_written = 4;
  1313.  
  1314.    if (mlen > 11) {
  1315.       fail("Message length >11 disallowed by hardware\n");
  1316.    }
  1317.  
  1318.    return inst;
  1319. }
  1320.  
  1321. fs_reg
  1322. fs_visitor::rescale_texcoord(ir_texture *ir, fs_reg coordinate,
  1323.                              bool is_rect, int sampler, int texunit)
  1324. {
  1325.    fs_inst *inst = NULL;
  1326.    bool needs_gl_clamp = true;
  1327.    fs_reg scale_x, scale_y;
  1328.  
  1329.    /* The 965 requires the EU to do the normalization of GL rectangle
  1330.     * texture coordinates.  We use the program parameter state
  1331.     * tracking to get the scaling factor.
  1332.     */
  1333.    if (is_rect &&
  1334.        (brw->gen < 6 ||
  1335.         (brw->gen >= 6 && (c->key.tex.gl_clamp_mask[0] & (1 << sampler) ||
  1336.                              c->key.tex.gl_clamp_mask[1] & (1 << sampler))))) {
  1337.       struct gl_program_parameter_list *params = fp->Base.Parameters;
  1338.       int tokens[STATE_LENGTH] = {
  1339.          STATE_INTERNAL,
  1340.          STATE_TEXRECT_SCALE,
  1341.          texunit,
  1342.          0,
  1343.          0
  1344.       };
  1345.  
  1346.       if (dispatch_width == 16) {
  1347.          fail("rectangle scale uniform setup not supported on 16-wide\n");
  1348.          return coordinate;
  1349.       }
  1350.  
  1351.       scale_x = fs_reg(UNIFORM, c->prog_data.nr_params);
  1352.       scale_y = fs_reg(UNIFORM, c->prog_data.nr_params + 1);
  1353.  
  1354.       GLuint index = _mesa_add_state_reference(params,
  1355.                                                (gl_state_index *)tokens);
  1356.       c->prog_data.param[c->prog_data.nr_params++] =
  1357.          &fp->Base.Parameters->ParameterValues[index][0].f;
  1358.       c->prog_data.param[c->prog_data.nr_params++] =
  1359.          &fp->Base.Parameters->ParameterValues[index][1].f;
  1360.    }
  1361.  
  1362.    /* The 965 requires the EU to do the normalization of GL rectangle
  1363.     * texture coordinates.  We use the program parameter state
  1364.     * tracking to get the scaling factor.
  1365.     */
  1366.    if (brw->gen < 6 && is_rect) {
  1367.       fs_reg dst = fs_reg(this, ir->coordinate->type);
  1368.       fs_reg src = coordinate;
  1369.       coordinate = dst;
  1370.  
  1371.       emit(MUL(dst, src, scale_x));
  1372.       dst.reg_offset++;
  1373.       src.reg_offset++;
  1374.       emit(MUL(dst, src, scale_y));
  1375.    } else if (is_rect) {
  1376.       /* On gen6+, the sampler handles the rectangle coordinates
  1377.        * natively, without needing rescaling.  But that means we have
  1378.        * to do GL_CLAMP clamping at the [0, width], [0, height] scale,
  1379.        * not [0, 1] like the default case below.
  1380.        */
  1381.       needs_gl_clamp = false;
  1382.  
  1383.       for (int i = 0; i < 2; i++) {
  1384.          if (c->key.tex.gl_clamp_mask[i] & (1 << sampler)) {
  1385.             fs_reg chan = coordinate;
  1386.             chan.reg_offset += i;
  1387.  
  1388.             inst = emit(BRW_OPCODE_SEL, chan, chan, brw_imm_f(0.0));
  1389.             inst->conditional_mod = BRW_CONDITIONAL_G;
  1390.  
  1391.             /* Our parameter comes in as 1.0/width or 1.0/height,
  1392.              * because that's what people normally want for doing
  1393.              * texture rectangle handling.  We need width or height
  1394.              * for clamping, but we don't care enough to make a new
  1395.              * parameter type, so just invert back.
  1396.              */
  1397.             fs_reg limit = fs_reg(this, glsl_type::float_type);
  1398.             emit(MOV(limit, i == 0 ? scale_x : scale_y));
  1399.             emit(SHADER_OPCODE_RCP, limit, limit);
  1400.  
  1401.             inst = emit(BRW_OPCODE_SEL, chan, chan, limit);
  1402.             inst->conditional_mod = BRW_CONDITIONAL_L;
  1403.          }
  1404.       }
  1405.    }
  1406.  
  1407.    if (ir->coordinate && needs_gl_clamp) {
  1408.       for (unsigned int i = 0;
  1409.            i < MIN2(ir->coordinate->type->vector_elements, 3); i++) {
  1410.          if (c->key.tex.gl_clamp_mask[i] & (1 << sampler)) {
  1411.             fs_reg chan = coordinate;
  1412.             chan.reg_offset += i;
  1413.  
  1414.             fs_inst *inst = emit(MOV(chan, chan));
  1415.             inst->saturate = true;
  1416.          }
  1417.       }
  1418.    }
  1419.    return coordinate;
  1420. }
  1421.  
  1422. void
  1423. fs_visitor::visit(ir_texture *ir)
  1424. {
  1425.    fs_inst *inst = NULL;
  1426.  
  1427.    int sampler =
  1428.       _mesa_get_sampler_uniform_value(ir->sampler, shader_prog, &fp->Base);
  1429.    /* FINISHME: We're failing to recompile our programs when the sampler is
  1430.     * updated.  This only matters for the texture rectangle scale parameters
  1431.     * (pre-gen6, or gen6+ with GL_CLAMP).
  1432.     */
  1433.    int texunit = fp->Base.SamplerUnits[sampler];
  1434.  
  1435.    /* Should be lowered by do_lower_texture_projection */
  1436.    assert(!ir->projector);
  1437.  
  1438.    /* Generate code to compute all the subexpression trees.  This has to be
  1439.     * done before loading any values into MRFs for the sampler message since
  1440.     * generating these values may involve SEND messages that need the MRFs.
  1441.     */
  1442.    fs_reg coordinate;
  1443.    if (ir->coordinate) {
  1444.       ir->coordinate->accept(this);
  1445.  
  1446.       coordinate = rescale_texcoord(ir, this->result,
  1447.                                     ir->sampler->type->sampler_dimensionality ==
  1448.                                     GLSL_SAMPLER_DIM_RECT,
  1449.                                     sampler, texunit);
  1450.    }
  1451.  
  1452.    fs_reg shadow_comparitor;
  1453.    if (ir->shadow_comparitor) {
  1454.       ir->shadow_comparitor->accept(this);
  1455.       shadow_comparitor = this->result;
  1456.    }
  1457.  
  1458.    fs_reg lod, lod2, sample_index;
  1459.    switch (ir->op) {
  1460.    case ir_tex:
  1461.    case ir_lod:
  1462.       break;
  1463.    case ir_txb:
  1464.       ir->lod_info.bias->accept(this);
  1465.       lod = this->result;
  1466.       break;
  1467.    case ir_txd:
  1468.       ir->lod_info.grad.dPdx->accept(this);
  1469.       lod = this->result;
  1470.  
  1471.       ir->lod_info.grad.dPdy->accept(this);
  1472.       lod2 = this->result;
  1473.       break;
  1474.    case ir_txf:
  1475.    case ir_txl:
  1476.    case ir_txs:
  1477.       ir->lod_info.lod->accept(this);
  1478.       lod = this->result;
  1479.       break;
  1480.    case ir_txf_ms:
  1481.       ir->lod_info.sample_index->accept(this);
  1482.       sample_index = this->result;
  1483.       break;
  1484.    };
  1485.  
  1486.    /* Writemasking doesn't eliminate channels on SIMD8 texture
  1487.     * samples, so don't worry about them.
  1488.     */
  1489.    fs_reg dst = fs_reg(this, glsl_type::get_instance(ir->type->base_type, 4, 1));
  1490.  
  1491.    if (brw->gen >= 7) {
  1492.       inst = emit_texture_gen7(ir, dst, coordinate, shadow_comparitor,
  1493.                                lod, lod2, sample_index);
  1494.    } else if (brw->gen >= 5) {
  1495.       inst = emit_texture_gen5(ir, dst, coordinate, shadow_comparitor,
  1496.                                lod, lod2, sample_index);
  1497.    } else {
  1498.       inst = emit_texture_gen4(ir, dst, coordinate, shadow_comparitor,
  1499.                                lod, lod2);
  1500.    }
  1501.  
  1502.    /* The header is set up by generate_tex() when necessary. */
  1503.    inst->src[0] = reg_undef;
  1504.  
  1505.    if (ir->offset != NULL && ir->op != ir_txf)
  1506.       inst->texture_offset = brw_texture_offset(ir->offset->as_constant());
  1507.  
  1508.    inst->sampler = sampler;
  1509.  
  1510.    if (ir->shadow_comparitor)
  1511.       inst->shadow_compare = true;
  1512.  
  1513.    /* fixup #layers for cube map arrays */
  1514.    if (ir->op == ir_txs) {
  1515.       glsl_type const *type = ir->sampler->type;
  1516.       if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE &&
  1517.           type->sampler_array) {
  1518.          fs_reg depth = dst;
  1519.          depth.reg_offset = 2;
  1520.          emit_math(SHADER_OPCODE_INT_QUOTIENT, depth, depth, fs_reg(6));
  1521.       }
  1522.    }
  1523.  
  1524.    swizzle_result(ir, dst, sampler);
  1525. }
  1526.  
  1527. /**
  1528.  * Swizzle the result of a texture result.  This is necessary for
  1529.  * EXT_texture_swizzle as well as DEPTH_TEXTURE_MODE for shadow comparisons.
  1530.  */
  1531. void
  1532. fs_visitor::swizzle_result(ir_texture *ir, fs_reg orig_val, int sampler)
  1533. {
  1534.    this->result = orig_val;
  1535.  
  1536.    if (ir->op == ir_txs || ir->op == ir_lod)
  1537.       return;
  1538.  
  1539.    if (ir->type == glsl_type::float_type) {
  1540.       /* Ignore DEPTH_TEXTURE_MODE swizzling. */
  1541.       assert(ir->sampler->type->sampler_shadow);
  1542.    } else if (c->key.tex.swizzles[sampler] != SWIZZLE_NOOP) {
  1543.       fs_reg swizzled_result = fs_reg(this, glsl_type::vec4_type);
  1544.  
  1545.       for (int i = 0; i < 4; i++) {
  1546.          int swiz = GET_SWZ(c->key.tex.swizzles[sampler], i);
  1547.          fs_reg l = swizzled_result;
  1548.          l.reg_offset += i;
  1549.  
  1550.          if (swiz == SWIZZLE_ZERO) {
  1551.             emit(MOV(l, fs_reg(0.0f)));
  1552.          } else if (swiz == SWIZZLE_ONE) {
  1553.             emit(MOV(l, fs_reg(1.0f)));
  1554.          } else {
  1555.             fs_reg r = orig_val;
  1556.             r.reg_offset += GET_SWZ(c->key.tex.swizzles[sampler], i);
  1557.             emit(MOV(l, r));
  1558.          }
  1559.       }
  1560.       this->result = swizzled_result;
  1561.    }
  1562. }
  1563.  
  1564. void
  1565. fs_visitor::visit(ir_swizzle *ir)
  1566. {
  1567.    ir->val->accept(this);
  1568.    fs_reg val = this->result;
  1569.  
  1570.    if (ir->type->vector_elements == 1) {
  1571.       this->result.reg_offset += ir->mask.x;
  1572.       return;
  1573.    }
  1574.  
  1575.    fs_reg result = fs_reg(this, ir->type);
  1576.    this->result = result;
  1577.  
  1578.    for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
  1579.       fs_reg channel = val;
  1580.       int swiz = 0;
  1581.  
  1582.       switch (i) {
  1583.       case 0:
  1584.          swiz = ir->mask.x;
  1585.          break;
  1586.       case 1:
  1587.          swiz = ir->mask.y;
  1588.          break;
  1589.       case 2:
  1590.          swiz = ir->mask.z;
  1591.          break;
  1592.       case 3:
  1593.          swiz = ir->mask.w;
  1594.          break;
  1595.       }
  1596.  
  1597.       channel.reg_offset += swiz;
  1598.       emit(MOV(result, channel));
  1599.       result.reg_offset++;
  1600.    }
  1601. }
  1602.  
  1603. void
  1604. fs_visitor::visit(ir_discard *ir)
  1605. {
  1606.    assert(ir->condition == NULL); /* FINISHME */
  1607.  
  1608.    /* We track our discarded pixels in f0.1.  By predicating on it, we can
  1609.     * update just the flag bits that aren't yet discarded.  By emitting a
  1610.     * CMP of g0 != g0, all our currently executing channels will get turned
  1611.     * off.
  1612.     */
  1613.    fs_reg some_reg = fs_reg(retype(brw_vec8_grf(0, 0),
  1614.                                    BRW_REGISTER_TYPE_UW));
  1615.    fs_inst *cmp = emit(CMP(reg_null_f, some_reg, some_reg,
  1616.                            BRW_CONDITIONAL_NZ));
  1617.    cmp->predicate = BRW_PREDICATE_NORMAL;
  1618.    cmp->flag_subreg = 1;
  1619.  
  1620.    if (brw->gen >= 6) {
  1621.       /* For performance, after a discard, jump to the end of the shader.
  1622.        * However, many people will do foliage by discarding based on a
  1623.        * texture's alpha mask, and then continue on to texture with the
  1624.        * remaining pixels.  To avoid trashing the derivatives for those
  1625.        * texture samples, we'll only jump if all of the pixels in the subspan
  1626.        * have been discarded.
  1627.        */
  1628.       fs_inst *discard_jump = emit(FS_OPCODE_DISCARD_JUMP);
  1629.       discard_jump->flag_subreg = 1;
  1630.       discard_jump->predicate = BRW_PREDICATE_ALIGN1_ANY4H;
  1631.       discard_jump->predicate_inverse = true;
  1632.    }
  1633. }
  1634.  
  1635. void
  1636. fs_visitor::visit(ir_constant *ir)
  1637. {
  1638.    /* Set this->result to reg at the bottom of the function because some code
  1639.     * paths will cause this visitor to be applied to other fields.  This will
  1640.     * cause the value stored in this->result to be modified.
  1641.     *
  1642.     * Make reg constant so that it doesn't get accidentally modified along the
  1643.     * way.  Yes, I actually had this problem. :(
  1644.     */
  1645.    const fs_reg reg(this, ir->type);
  1646.    fs_reg dst_reg = reg;
  1647.  
  1648.    if (ir->type->is_array()) {
  1649.       const unsigned size = type_size(ir->type->fields.array);
  1650.  
  1651.       for (unsigned i = 0; i < ir->type->length; i++) {
  1652.          ir->array_elements[i]->accept(this);
  1653.          fs_reg src_reg = this->result;
  1654.  
  1655.          dst_reg.type = src_reg.type;
  1656.          for (unsigned j = 0; j < size; j++) {
  1657.             emit(MOV(dst_reg, src_reg));
  1658.             src_reg.reg_offset++;
  1659.             dst_reg.reg_offset++;
  1660.          }
  1661.       }
  1662.    } else if (ir->type->is_record()) {
  1663.       foreach_list(node, &ir->components) {
  1664.          ir_constant *const field = (ir_constant *) node;
  1665.          const unsigned size = type_size(field->type);
  1666.  
  1667.          field->accept(this);
  1668.          fs_reg src_reg = this->result;
  1669.  
  1670.          dst_reg.type = src_reg.type;
  1671.          for (unsigned j = 0; j < size; j++) {
  1672.             emit(MOV(dst_reg, src_reg));
  1673.             src_reg.reg_offset++;
  1674.             dst_reg.reg_offset++;
  1675.          }
  1676.       }
  1677.    } else {
  1678.       const unsigned size = type_size(ir->type);
  1679.  
  1680.       for (unsigned i = 0; i < size; i++) {
  1681.          switch (ir->type->base_type) {
  1682.          case GLSL_TYPE_FLOAT:
  1683.             emit(MOV(dst_reg, fs_reg(ir->value.f[i])));
  1684.             break;
  1685.          case GLSL_TYPE_UINT:
  1686.             emit(MOV(dst_reg, fs_reg(ir->value.u[i])));
  1687.             break;
  1688.          case GLSL_TYPE_INT:
  1689.             emit(MOV(dst_reg, fs_reg(ir->value.i[i])));
  1690.             break;
  1691.          case GLSL_TYPE_BOOL:
  1692.             emit(MOV(dst_reg, fs_reg((int)ir->value.b[i])));
  1693.             break;
  1694.          default:
  1695.             assert(!"Non-float/uint/int/bool constant");
  1696.          }
  1697.          dst_reg.reg_offset++;
  1698.       }
  1699.    }
  1700.  
  1701.    this->result = reg;
  1702. }
  1703.  
  1704. void
  1705. fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
  1706. {
  1707.    ir_expression *expr = ir->as_expression();
  1708.  
  1709.    if (expr) {
  1710.       fs_reg op[2];
  1711.       fs_inst *inst;
  1712.  
  1713.       assert(expr->get_num_operands() <= 2);
  1714.       for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
  1715.          assert(expr->operands[i]->type->is_scalar());
  1716.  
  1717.          expr->operands[i]->accept(this);
  1718.          op[i] = this->result;
  1719.  
  1720.          resolve_ud_negate(&op[i]);
  1721.       }
  1722.  
  1723.       switch (expr->operation) {
  1724.       case ir_unop_logic_not:
  1725.          inst = emit(AND(reg_null_d, op[0], fs_reg(1)));
  1726.          inst->conditional_mod = BRW_CONDITIONAL_Z;
  1727.          break;
  1728.  
  1729.       case ir_binop_logic_xor:
  1730.       case ir_binop_logic_or:
  1731.       case ir_binop_logic_and:
  1732.          goto out;
  1733.  
  1734.       case ir_unop_f2b:
  1735.          if (brw->gen >= 6) {
  1736.             emit(CMP(reg_null_d, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ));
  1737.          } else {
  1738.             inst = emit(MOV(reg_null_f, op[0]));
  1739.             inst->conditional_mod = BRW_CONDITIONAL_NZ;
  1740.          }
  1741.          break;
  1742.  
  1743.       case ir_unop_i2b:
  1744.          if (brw->gen >= 6) {
  1745.             emit(CMP(reg_null_d, op[0], fs_reg(0), BRW_CONDITIONAL_NZ));
  1746.          } else {
  1747.             inst = emit(MOV(reg_null_d, op[0]));
  1748.             inst->conditional_mod = BRW_CONDITIONAL_NZ;
  1749.          }
  1750.          break;
  1751.  
  1752.       case ir_binop_greater:
  1753.       case ir_binop_gequal:
  1754.       case ir_binop_less:
  1755.       case ir_binop_lequal:
  1756.       case ir_binop_equal:
  1757.       case ir_binop_all_equal:
  1758.       case ir_binop_nequal:
  1759.       case ir_binop_any_nequal:
  1760.          resolve_bool_comparison(expr->operands[0], &op[0]);
  1761.          resolve_bool_comparison(expr->operands[1], &op[1]);
  1762.  
  1763.          emit(CMP(reg_null_d, op[0], op[1],
  1764.                   brw_conditional_for_comparison(expr->operation)));
  1765.          break;
  1766.  
  1767.       default:
  1768.          assert(!"not reached");
  1769.          fail("bad cond code\n");
  1770.          break;
  1771.       }
  1772.       return;
  1773.    }
  1774.  
  1775. out:
  1776.    ir->accept(this);
  1777.  
  1778.    fs_inst *inst = emit(AND(reg_null_d, this->result, fs_reg(1)));
  1779.    inst->conditional_mod = BRW_CONDITIONAL_NZ;
  1780. }
  1781.  
  1782. /**
  1783.  * Emit a gen6 IF statement with the comparison folded into the IF
  1784.  * instruction.
  1785.  */
  1786. void
  1787. fs_visitor::emit_if_gen6(ir_if *ir)
  1788. {
  1789.    ir_expression *expr = ir->condition->as_expression();
  1790.  
  1791.    if (expr) {
  1792.       fs_reg op[2];
  1793.       fs_inst *inst;
  1794.       fs_reg temp;
  1795.  
  1796.       assert(expr->get_num_operands() <= 2);
  1797.       for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
  1798.          assert(expr->operands[i]->type->is_scalar());
  1799.  
  1800.          expr->operands[i]->accept(this);
  1801.          op[i] = this->result;
  1802.       }
  1803.  
  1804.       switch (expr->operation) {
  1805.       case ir_unop_logic_not:
  1806.       case ir_binop_logic_xor:
  1807.       case ir_binop_logic_or:
  1808.       case ir_binop_logic_and:
  1809.          /* For operations on bool arguments, only the low bit of the bool is
  1810.           * valid, and the others are undefined.  Fall back to the condition
  1811.           * code path.
  1812.           */
  1813.          break;
  1814.  
  1815.       case ir_unop_f2b:
  1816.          inst = emit(BRW_OPCODE_IF, reg_null_f, op[0], fs_reg(0));
  1817.          inst->conditional_mod = BRW_CONDITIONAL_NZ;
  1818.          return;
  1819.  
  1820.       case ir_unop_i2b:
  1821.          emit(IF(op[0], fs_reg(0), BRW_CONDITIONAL_NZ));
  1822.          return;
  1823.  
  1824.       case ir_binop_greater:
  1825.       case ir_binop_gequal:
  1826.       case ir_binop_less:
  1827.       case ir_binop_lequal:
  1828.       case ir_binop_equal:
  1829.       case ir_binop_all_equal:
  1830.       case ir_binop_nequal:
  1831.       case ir_binop_any_nequal:
  1832.          resolve_bool_comparison(expr->operands[0], &op[0]);
  1833.          resolve_bool_comparison(expr->operands[1], &op[1]);
  1834.  
  1835.          emit(IF(op[0], op[1],
  1836.                  brw_conditional_for_comparison(expr->operation)));
  1837.          return;
  1838.       default:
  1839.          assert(!"not reached");
  1840.          emit(IF(op[0], fs_reg(0), BRW_CONDITIONAL_NZ));
  1841.          fail("bad condition\n");
  1842.          return;
  1843.       }
  1844.    }
  1845.  
  1846.    emit_bool_to_cond_code(ir->condition);
  1847.    fs_inst *inst = emit(BRW_OPCODE_IF);
  1848.    inst->predicate = BRW_PREDICATE_NORMAL;
  1849. }
  1850.  
  1851. void
  1852. fs_visitor::visit(ir_if *ir)
  1853. {
  1854.    if (brw->gen < 6 && dispatch_width == 16) {
  1855.       fail("Can't support (non-uniform) control flow on 16-wide\n");
  1856.    }
  1857.  
  1858.    /* Don't point the annotation at the if statement, because then it plus
  1859.     * the then and else blocks get printed.
  1860.     */
  1861.    this->base_ir = ir->condition;
  1862.  
  1863.    if (brw->gen == 6) {
  1864.       emit_if_gen6(ir);
  1865.    } else {
  1866.       emit_bool_to_cond_code(ir->condition);
  1867.  
  1868.       emit(IF(BRW_PREDICATE_NORMAL));
  1869.    }
  1870.  
  1871.    foreach_list(node, &ir->then_instructions) {
  1872.       ir_instruction *ir = (ir_instruction *)node;
  1873.       this->base_ir = ir;
  1874.  
  1875.       ir->accept(this);
  1876.    }
  1877.  
  1878.    if (!ir->else_instructions.is_empty()) {
  1879.       emit(BRW_OPCODE_ELSE);
  1880.  
  1881.       foreach_list(node, &ir->else_instructions) {
  1882.          ir_instruction *ir = (ir_instruction *)node;
  1883.          this->base_ir = ir;
  1884.  
  1885.          ir->accept(this);
  1886.       }
  1887.    }
  1888.  
  1889.    emit(BRW_OPCODE_ENDIF);
  1890. }
  1891.  
  1892. void
  1893. fs_visitor::visit(ir_loop *ir)
  1894. {
  1895.    fs_reg counter = reg_undef;
  1896.  
  1897.    if (brw->gen < 6 && dispatch_width == 16) {
  1898.       fail("Can't support (non-uniform) control flow on 16-wide\n");
  1899.    }
  1900.  
  1901.    if (ir->counter) {
  1902.       this->base_ir = ir->counter;
  1903.       ir->counter->accept(this);
  1904.       counter = *(variable_storage(ir->counter));
  1905.  
  1906.       if (ir->from) {
  1907.          this->base_ir = ir->from;
  1908.          ir->from->accept(this);
  1909.  
  1910.          emit(MOV(counter, this->result));
  1911.       }
  1912.    }
  1913.  
  1914.    this->base_ir = NULL;
  1915.    emit(BRW_OPCODE_DO);
  1916.  
  1917.    if (ir->to) {
  1918.       this->base_ir = ir->to;
  1919.       ir->to->accept(this);
  1920.  
  1921.       emit(CMP(reg_null_d, counter, this->result,
  1922.                brw_conditional_for_comparison(ir->cmp)));
  1923.  
  1924.       fs_inst *inst = emit(BRW_OPCODE_BREAK);
  1925.       inst->predicate = BRW_PREDICATE_NORMAL;
  1926.    }
  1927.  
  1928.    foreach_list(node, &ir->body_instructions) {
  1929.       ir_instruction *ir = (ir_instruction *)node;
  1930.  
  1931.       this->base_ir = ir;
  1932.       ir->accept(this);
  1933.    }
  1934.  
  1935.    if (ir->increment) {
  1936.       this->base_ir = ir->increment;
  1937.       ir->increment->accept(this);
  1938.       emit(ADD(counter, counter, this->result));
  1939.    }
  1940.  
  1941.    this->base_ir = NULL;
  1942.    emit(BRW_OPCODE_WHILE);
  1943. }
  1944.  
  1945. void
  1946. fs_visitor::visit(ir_loop_jump *ir)
  1947. {
  1948.    switch (ir->mode) {
  1949.    case ir_loop_jump::jump_break:
  1950.       emit(BRW_OPCODE_BREAK);
  1951.       break;
  1952.    case ir_loop_jump::jump_continue:
  1953.       emit(BRW_OPCODE_CONTINUE);
  1954.       break;
  1955.    }
  1956. }
  1957.  
  1958. void
  1959. fs_visitor::visit(ir_call *ir)
  1960. {
  1961.    assert(!"FINISHME");
  1962. }
  1963.  
  1964. void
  1965. fs_visitor::visit(ir_return *ir)
  1966. {
  1967.    assert(!"FINISHME");
  1968. }
  1969.  
  1970. void
  1971. fs_visitor::visit(ir_function *ir)
  1972. {
  1973.    /* Ignore function bodies other than main() -- we shouldn't see calls to
  1974.     * them since they should all be inlined before we get to ir_to_mesa.
  1975.     */
  1976.    if (strcmp(ir->name, "main") == 0) {
  1977.       const ir_function_signature *sig;
  1978.       exec_list empty;
  1979.  
  1980.       sig = ir->matching_signature(&empty);
  1981.  
  1982.       assert(sig);
  1983.  
  1984.       foreach_list(node, &sig->body) {
  1985.          ir_instruction *ir = (ir_instruction *)node;
  1986.          this->base_ir = ir;
  1987.  
  1988.          ir->accept(this);
  1989.       }
  1990.    }
  1991. }
  1992.  
  1993. void
  1994. fs_visitor::visit(ir_function_signature *ir)
  1995. {
  1996.    assert(!"not reached");
  1997.    (void)ir;
  1998. }
  1999.  
  2000. fs_inst *
  2001. fs_visitor::emit(fs_inst inst)
  2002. {
  2003.    fs_inst *list_inst = new(mem_ctx) fs_inst;
  2004.    *list_inst = inst;
  2005.    emit(list_inst);
  2006.    return list_inst;
  2007. }
  2008.  
  2009. fs_inst *
  2010. fs_visitor::emit(fs_inst *inst)
  2011. {
  2012.    if (force_uncompressed_stack > 0)
  2013.       inst->force_uncompressed = true;
  2014.    else if (force_sechalf_stack > 0)
  2015.       inst->force_sechalf = true;
  2016.  
  2017.    inst->annotation = this->current_annotation;
  2018.    inst->ir = this->base_ir;
  2019.  
  2020.    this->instructions.push_tail(inst);
  2021.  
  2022.    return inst;
  2023. }
  2024.  
  2025. void
  2026. fs_visitor::emit(exec_list list)
  2027. {
  2028.    foreach_list_safe(node, &list) {
  2029.       fs_inst *inst = (fs_inst *)node;
  2030.       inst->remove();
  2031.       emit(inst);
  2032.    }
  2033. }
  2034.  
  2035. /** Emits a dummy fragment shader consisting of magenta for bringup purposes. */
  2036. void
  2037. fs_visitor::emit_dummy_fs()
  2038. {
  2039.    int reg_width = dispatch_width / 8;
  2040.  
  2041.    /* Everyone's favorite color. */
  2042.    emit(MOV(fs_reg(MRF, 2 + 0 * reg_width), fs_reg(1.0f)));
  2043.    emit(MOV(fs_reg(MRF, 2 + 1 * reg_width), fs_reg(0.0f)));
  2044.    emit(MOV(fs_reg(MRF, 2 + 2 * reg_width), fs_reg(1.0f)));
  2045.    emit(MOV(fs_reg(MRF, 2 + 3 * reg_width), fs_reg(0.0f)));
  2046.  
  2047.    fs_inst *write;
  2048.    write = emit(FS_OPCODE_FB_WRITE, fs_reg(0), fs_reg(0));
  2049.    write->base_mrf = 2;
  2050.    write->mlen = 4 * reg_width;
  2051.    write->eot = true;
  2052. }
  2053.  
  2054. /* The register location here is relative to the start of the URB
  2055.  * data.  It will get adjusted to be a real location before
  2056.  * generate_code() time.
  2057.  */
  2058. struct brw_reg
  2059. fs_visitor::interp_reg(int location, int channel)
  2060. {
  2061.    int regnr = urb_setup[location] * 2 + channel / 2;
  2062.    int stride = (channel & 1) * 4;
  2063.  
  2064.    assert(urb_setup[location] != -1);
  2065.  
  2066.    return brw_vec1_grf(regnr, stride);
  2067. }
  2068.  
  2069. /** Emits the interpolation for the varying inputs. */
  2070. void
  2071. fs_visitor::emit_interpolation_setup_gen4()
  2072. {
  2073.    this->current_annotation = "compute pixel centers";
  2074.    this->pixel_x = fs_reg(this, glsl_type::uint_type);
  2075.    this->pixel_y = fs_reg(this, glsl_type::uint_type);
  2076.    this->pixel_x.type = BRW_REGISTER_TYPE_UW;
  2077.    this->pixel_y.type = BRW_REGISTER_TYPE_UW;
  2078.  
  2079.    emit(FS_OPCODE_PIXEL_X, this->pixel_x);
  2080.    emit(FS_OPCODE_PIXEL_Y, this->pixel_y);
  2081.  
  2082.    this->current_annotation = "compute pixel deltas from v0";
  2083.    if (brw->has_pln) {
  2084.       this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] =
  2085.          fs_reg(this, glsl_type::vec2_type);
  2086.       this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] =
  2087.          this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC];
  2088.       this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].reg_offset++;
  2089.    } else {
  2090.       this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] =
  2091.          fs_reg(this, glsl_type::float_type);
  2092.       this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] =
  2093.          fs_reg(this, glsl_type::float_type);
  2094.    }
  2095.    emit(ADD(this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
  2096.             this->pixel_x, fs_reg(negate(brw_vec1_grf(1, 0)))));
  2097.    emit(ADD(this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
  2098.             this->pixel_y, fs_reg(negate(brw_vec1_grf(1, 1)))));
  2099.  
  2100.    this->current_annotation = "compute pos.w and 1/pos.w";
  2101.    /* Compute wpos.w.  It's always in our setup, since it's needed to
  2102.     * interpolate the other attributes.
  2103.     */
  2104.    this->wpos_w = fs_reg(this, glsl_type::float_type);
  2105.    emit(FS_OPCODE_LINTERP, wpos_w,
  2106.         this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
  2107.         this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
  2108.         interp_reg(VARYING_SLOT_POS, 3));
  2109.    /* Compute the pixel 1/W value from wpos.w. */
  2110.    this->pixel_w = fs_reg(this, glsl_type::float_type);
  2111.    emit_math(SHADER_OPCODE_RCP, this->pixel_w, wpos_w);
  2112.    this->current_annotation = NULL;
  2113. }
  2114.  
  2115. /** Emits the interpolation for the varying inputs. */
  2116. void
  2117. fs_visitor::emit_interpolation_setup_gen6()
  2118. {
  2119.    struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
  2120.  
  2121.    /* If the pixel centers end up used, the setup is the same as for gen4. */
  2122.    this->current_annotation = "compute pixel centers";
  2123.    fs_reg int_pixel_x = fs_reg(this, glsl_type::uint_type);
  2124.    fs_reg int_pixel_y = fs_reg(this, glsl_type::uint_type);
  2125.    int_pixel_x.type = BRW_REGISTER_TYPE_UW;
  2126.    int_pixel_y.type = BRW_REGISTER_TYPE_UW;
  2127.    emit(ADD(int_pixel_x,
  2128.             fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
  2129.             fs_reg(brw_imm_v(0x10101010))));
  2130.    emit(ADD(int_pixel_y,
  2131.             fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
  2132.             fs_reg(brw_imm_v(0x11001100))));
  2133.  
  2134.    /* As of gen6, we can no longer mix float and int sources.  We have
  2135.     * to turn the integer pixel centers into floats for their actual
  2136.     * use.
  2137.     */
  2138.    this->pixel_x = fs_reg(this, glsl_type::float_type);
  2139.    this->pixel_y = fs_reg(this, glsl_type::float_type);
  2140.    emit(MOV(this->pixel_x, int_pixel_x));
  2141.    emit(MOV(this->pixel_y, int_pixel_y));
  2142.  
  2143.    this->current_annotation = "compute pos.w";
  2144.    this->pixel_w = fs_reg(brw_vec8_grf(c->source_w_reg, 0));
  2145.    this->wpos_w = fs_reg(this, glsl_type::float_type);
  2146.    emit_math(SHADER_OPCODE_RCP, this->wpos_w, this->pixel_w);
  2147.  
  2148.    for (int i = 0; i < BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT; ++i) {
  2149.       uint8_t reg = c->barycentric_coord_reg[i];
  2150.       this->delta_x[i] = fs_reg(brw_vec8_grf(reg, 0));
  2151.       this->delta_y[i] = fs_reg(brw_vec8_grf(reg + 1, 0));
  2152.    }
  2153.  
  2154.    this->current_annotation = NULL;
  2155. }
  2156.  
  2157. void
  2158. fs_visitor::emit_color_write(int target, int index, int first_color_mrf)
  2159. {
  2160.    int reg_width = dispatch_width / 8;
  2161.    fs_inst *inst;
  2162.    fs_reg color = outputs[target];
  2163.    fs_reg mrf;
  2164.  
  2165.    /* If there's no color data to be written, skip it. */
  2166.    if (color.file == BAD_FILE)
  2167.       return;
  2168.  
  2169.    color.reg_offset += index;
  2170.  
  2171.    if (dispatch_width == 8 || brw->gen >= 6) {
  2172.       /* SIMD8 write looks like:
  2173.        * m + 0: r0
  2174.        * m + 1: r1
  2175.        * m + 2: g0
  2176.        * m + 3: g1
  2177.        *
  2178.        * gen6 SIMD16 DP write looks like:
  2179.        * m + 0: r0
  2180.        * m + 1: r1
  2181.        * m + 2: g0
  2182.        * m + 3: g1
  2183.        * m + 4: b0
  2184.        * m + 5: b1
  2185.        * m + 6: a0
  2186.        * m + 7: a1
  2187.        */
  2188.       inst = emit(MOV(fs_reg(MRF, first_color_mrf + index * reg_width,
  2189.                              color.type),
  2190.                       color));
  2191.       inst->saturate = c->key.clamp_fragment_color;
  2192.    } else {
  2193.       /* pre-gen6 SIMD16 single source DP write looks like:
  2194.        * m + 0: r0
  2195.        * m + 1: g0
  2196.        * m + 2: b0
  2197.        * m + 3: a0
  2198.        * m + 4: r1
  2199.        * m + 5: g1
  2200.        * m + 6: b1
  2201.        * m + 7: a1
  2202.        */
  2203.       if (brw->has_compr4) {
  2204.          /* By setting the high bit of the MRF register number, we
  2205.           * indicate that we want COMPR4 mode - instead of doing the
  2206.           * usual destination + 1 for the second half we get
  2207.           * destination + 4.
  2208.           */
  2209.          inst = emit(MOV(fs_reg(MRF, BRW_MRF_COMPR4 + first_color_mrf + index,
  2210.                                 color.type),
  2211.                          color));
  2212.          inst->saturate = c->key.clamp_fragment_color;
  2213.       } else {
  2214.          push_force_uncompressed();
  2215.          inst = emit(MOV(fs_reg(MRF, first_color_mrf + index, color.type),
  2216.                          color));
  2217.          inst->saturate = c->key.clamp_fragment_color;
  2218.          pop_force_uncompressed();
  2219.  
  2220.          push_force_sechalf();
  2221.          color.sechalf = true;
  2222.          inst = emit(MOV(fs_reg(MRF, first_color_mrf + index + 4, color.type),
  2223.                          color));
  2224.          inst->saturate = c->key.clamp_fragment_color;
  2225.          pop_force_sechalf();
  2226.          color.sechalf = false;
  2227.       }
  2228.    }
  2229. }
  2230.  
  2231. void
  2232. fs_visitor::emit_fb_writes()
  2233. {
  2234.    this->current_annotation = "FB write header";
  2235.    bool header_present = true;
  2236.    /* We can potentially have a message length of up to 15, so we have to set
  2237.     * base_mrf to either 0 or 1 in order to fit in m0..m15.
  2238.     */
  2239.    int base_mrf = 1;
  2240.    int nr = base_mrf;
  2241.    int reg_width = dispatch_width / 8;
  2242.    bool do_dual_src = this->dual_src_output.file != BAD_FILE;
  2243.    bool src0_alpha_to_render_target = false;
  2244.  
  2245.    if (dispatch_width == 16 && do_dual_src) {
  2246.       fail("GL_ARB_blend_func_extended not yet supported in 16-wide.");
  2247.       do_dual_src = false;
  2248.    }
  2249.  
  2250.    /* From the Sandy Bridge PRM, volume 4, page 198:
  2251.     *
  2252.     *     "Dispatched Pixel Enables. One bit per pixel indicating
  2253.     *      which pixels were originally enabled when the thread was
  2254.     *      dispatched. This field is only required for the end-of-
  2255.     *      thread message and on all dual-source messages."
  2256.     */
  2257.    if (brw->gen >= 6 &&
  2258.        !this->fp->UsesKill &&
  2259.        !do_dual_src &&
  2260.        c->key.nr_color_regions == 1) {
  2261.       header_present = false;
  2262.    }
  2263.  
  2264.    if (header_present) {
  2265.       src0_alpha_to_render_target = brw->gen >= 6 &&
  2266.                                     !do_dual_src &&
  2267.                                     c->key.replicate_alpha;
  2268.       /* m2, m3 header */
  2269.       nr += 2;
  2270.    }
  2271.  
  2272.    if (c->aa_dest_stencil_reg) {
  2273.       push_force_uncompressed();
  2274.       emit(MOV(fs_reg(MRF, nr++),
  2275.                fs_reg(brw_vec8_grf(c->aa_dest_stencil_reg, 0))));
  2276.       pop_force_uncompressed();
  2277.    }
  2278.  
  2279.    /* Reserve space for color. It'll be filled in per MRT below. */
  2280.    int color_mrf = nr;
  2281.    nr += 4 * reg_width;
  2282.    if (do_dual_src)
  2283.       nr += 4;
  2284.    if (src0_alpha_to_render_target)
  2285.       nr += reg_width;
  2286.  
  2287.    if (c->source_depth_to_render_target) {
  2288.       if (brw->gen == 6 && dispatch_width == 16) {
  2289.          /* For outputting oDepth on gen6, SIMD8 writes have to be
  2290.           * used.  This would require 8-wide moves of each half to
  2291.           * message regs, kind of like pre-gen5 SIMD16 FB writes.
  2292.           * Just bail on doing so for now.
  2293.           */
  2294.          fail("Missing support for simd16 depth writes on gen6\n");
  2295.       }
  2296.  
  2297.       if (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
  2298.          /* Hand over gl_FragDepth. */
  2299.          assert(this->frag_depth.file != BAD_FILE);
  2300.          emit(MOV(fs_reg(MRF, nr), this->frag_depth));
  2301.       } else {
  2302.          /* Pass through the payload depth. */
  2303.          emit(MOV(fs_reg(MRF, nr),
  2304.                   fs_reg(brw_vec8_grf(c->source_depth_reg, 0))));
  2305.       }
  2306.       nr += reg_width;
  2307.    }
  2308.  
  2309.    if (c->dest_depth_reg) {
  2310.       emit(MOV(fs_reg(MRF, nr),
  2311.                fs_reg(brw_vec8_grf(c->dest_depth_reg, 0))));
  2312.       nr += reg_width;
  2313.    }
  2314.  
  2315.    if (do_dual_src) {
  2316.       fs_reg src0 = this->outputs[0];
  2317.       fs_reg src1 = this->dual_src_output;
  2318.  
  2319.       this->current_annotation = ralloc_asprintf(this->mem_ctx,
  2320.                                                  "FB write src0");
  2321.       for (int i = 0; i < 4; i++) {
  2322.          fs_inst *inst = emit(MOV(fs_reg(MRF, color_mrf + i, src0.type), src0));
  2323.          src0.reg_offset++;
  2324.          inst->saturate = c->key.clamp_fragment_color;
  2325.       }
  2326.  
  2327.       this->current_annotation = ralloc_asprintf(this->mem_ctx,
  2328.                                                  "FB write src1");
  2329.       for (int i = 0; i < 4; i++) {
  2330.          fs_inst *inst = emit(MOV(fs_reg(MRF, color_mrf + 4 + i, src1.type),
  2331.                                   src1));
  2332.          src1.reg_offset++;
  2333.          inst->saturate = c->key.clamp_fragment_color;
  2334.       }
  2335.  
  2336.       if (INTEL_DEBUG & DEBUG_SHADER_TIME)
  2337.          emit_shader_time_end();
  2338.  
  2339.       fs_inst *inst = emit(FS_OPCODE_FB_WRITE);
  2340.       inst->target = 0;
  2341.       inst->base_mrf = base_mrf;
  2342.       inst->mlen = nr - base_mrf;
  2343.       inst->eot = true;
  2344.       inst->header_present = header_present;
  2345.  
  2346.       c->prog_data.dual_src_blend = true;
  2347.       this->current_annotation = NULL;
  2348.       return;
  2349.    }
  2350.  
  2351.    for (int target = 0; target < c->key.nr_color_regions; target++) {
  2352.       this->current_annotation = ralloc_asprintf(this->mem_ctx,
  2353.                                                  "FB write target %d",
  2354.                                                  target);
  2355.       /* If src0_alpha_to_render_target is true, include source zero alpha
  2356.        * data in RenderTargetWrite message for targets > 0.
  2357.        */
  2358.       int write_color_mrf = color_mrf;
  2359.       if (src0_alpha_to_render_target && target != 0) {
  2360.          fs_inst *inst;
  2361.          fs_reg color = outputs[0];
  2362.          color.reg_offset += 3;
  2363.  
  2364.          inst = emit(MOV(fs_reg(MRF, write_color_mrf, color.type),
  2365.                          color));
  2366.          inst->saturate = c->key.clamp_fragment_color;
  2367.          write_color_mrf = color_mrf + reg_width;
  2368.       }
  2369.  
  2370.       for (unsigned i = 0; i < this->output_components[target]; i++)
  2371.          emit_color_write(target, i, write_color_mrf);
  2372.  
  2373.       bool eot = false;
  2374.       if (target == c->key.nr_color_regions - 1) {
  2375.          eot = true;
  2376.  
  2377.          if (INTEL_DEBUG & DEBUG_SHADER_TIME)
  2378.             emit_shader_time_end();
  2379.       }
  2380.  
  2381.       fs_inst *inst = emit(FS_OPCODE_FB_WRITE);
  2382.       inst->target = target;
  2383.       inst->base_mrf = base_mrf;
  2384.       if (src0_alpha_to_render_target && target == 0)
  2385.          inst->mlen = nr - base_mrf - reg_width;
  2386.       else
  2387.          inst->mlen = nr - base_mrf;
  2388.       inst->eot = eot;
  2389.       inst->header_present = header_present;
  2390.    }
  2391.  
  2392.    if (c->key.nr_color_regions == 0) {
  2393.       /* Even if there's no color buffers enabled, we still need to send
  2394.        * alpha out the pipeline to our null renderbuffer to support
  2395.        * alpha-testing, alpha-to-coverage, and so on.
  2396.        */
  2397.       emit_color_write(0, 3, color_mrf);
  2398.  
  2399.       if (INTEL_DEBUG & DEBUG_SHADER_TIME)
  2400.          emit_shader_time_end();
  2401.  
  2402.       fs_inst *inst = emit(FS_OPCODE_FB_WRITE);
  2403.       inst->base_mrf = base_mrf;
  2404.       inst->mlen = nr - base_mrf;
  2405.       inst->eot = true;
  2406.       inst->header_present = header_present;
  2407.    }
  2408.  
  2409.    this->current_annotation = NULL;
  2410. }
  2411.  
  2412. void
  2413. fs_visitor::resolve_ud_negate(fs_reg *reg)
  2414. {
  2415.    if (reg->type != BRW_REGISTER_TYPE_UD ||
  2416.        !reg->negate)
  2417.       return;
  2418.  
  2419.    fs_reg temp = fs_reg(this, glsl_type::uint_type);
  2420.    emit(MOV(temp, *reg));
  2421.    *reg = temp;
  2422. }
  2423.  
  2424. void
  2425. fs_visitor::resolve_bool_comparison(ir_rvalue *rvalue, fs_reg *reg)
  2426. {
  2427.    if (rvalue->type != glsl_type::bool_type)
  2428.       return;
  2429.  
  2430.    fs_reg temp = fs_reg(this, glsl_type::bool_type);
  2431.    emit(AND(temp, *reg, fs_reg(1)));
  2432.    *reg = temp;
  2433. }
  2434.  
  2435. fs_visitor::fs_visitor(struct brw_context *brw,
  2436.                        struct brw_wm_compile *c,
  2437.                        struct gl_shader_program *shader_prog,
  2438.                        struct gl_fragment_program *fp,
  2439.                        unsigned dispatch_width)
  2440.    : dispatch_width(dispatch_width)
  2441. {
  2442.    this->c = c;
  2443.    this->brw = brw;
  2444.    this->fp = fp;
  2445.    this->shader_prog = shader_prog;
  2446.    this->ctx = &brw->ctx;
  2447.    this->mem_ctx = ralloc_context(NULL);
  2448.    if (shader_prog)
  2449.       shader = (struct brw_shader *)
  2450.          shader_prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
  2451.    else
  2452.       shader = NULL;
  2453.    this->failed = false;
  2454.    this->variable_ht = hash_table_ctor(0,
  2455.                                        hash_table_pointer_hash,
  2456.                                        hash_table_pointer_compare);
  2457.  
  2458.    memset(this->outputs, 0, sizeof(this->outputs));
  2459.    memset(this->output_components, 0, sizeof(this->output_components));
  2460.    this->first_non_payload_grf = 0;
  2461.    this->max_grf = brw->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
  2462.  
  2463.    this->current_annotation = NULL;
  2464.    this->base_ir = NULL;
  2465.  
  2466.    this->virtual_grf_sizes = NULL;
  2467.    this->virtual_grf_count = 0;
  2468.    this->virtual_grf_array_size = 0;
  2469.    this->virtual_grf_start = NULL;
  2470.    this->virtual_grf_end = NULL;
  2471.    this->live_intervals_valid = false;
  2472.  
  2473.    this->params_remap = NULL;
  2474.    this->nr_params_remap = 0;
  2475.  
  2476.    this->force_uncompressed_stack = 0;
  2477.    this->force_sechalf_stack = 0;
  2478.  
  2479.    memset(&this->param_size, 0, sizeof(this->param_size));
  2480. }
  2481.  
  2482. fs_visitor::~fs_visitor()
  2483. {
  2484.    ralloc_free(this->mem_ctx);
  2485.    hash_table_dtor(this->variable_ht);
  2486. }
  2487.