Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2010 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21.  * IN THE SOFTWARE.
  22.  */
  23.  
  24. #include "glsl/ir.h"
  25. #include "glsl/ir_optimization.h"
  26. #include "glsl/nir/glsl_to_nir.h"
  27. #include "program/prog_to_nir.h"
  28. #include "brw_fs.h"
  29. #include "brw_nir.h"
  30.  
  31. void
  32. fs_visitor::emit_nir_code()
  33. {
  34.    nir_shader *nir = prog->nir;
  35.  
  36.    /* emit the arrays used for inputs and outputs - load/store intrinsics will
  37.     * be converted to reads/writes of these arrays
  38.     */
  39.  
  40.    if (nir->num_inputs > 0) {
  41.       nir_inputs = vgrf(nir->num_inputs);
  42.       nir_setup_inputs(nir);
  43.    }
  44.  
  45.    if (nir->num_outputs > 0) {
  46.       nir_outputs = vgrf(nir->num_outputs);
  47.       nir_setup_outputs(nir);
  48.    }
  49.  
  50.    if (nir->num_uniforms > 0) {
  51.       nir_setup_uniforms(nir);
  52.    }
  53.  
  54.    nir_emit_system_values(nir);
  55.  
  56.    nir_globals = ralloc_array(mem_ctx, fs_reg, nir->reg_alloc);
  57.    foreach_list_typed(nir_register, reg, node, &nir->registers) {
  58.       unsigned array_elems =
  59.          reg->num_array_elems == 0 ? 1 : reg->num_array_elems;
  60.       unsigned size = array_elems * reg->num_components;
  61.       nir_globals[reg->index] = vgrf(size);
  62.    }
  63.  
  64.    /* get the main function and emit it */
  65.    nir_foreach_overload(nir, overload) {
  66.       assert(strcmp(overload->function->name, "main") == 0);
  67.       assert(overload->impl);
  68.       nir_emit_impl(overload->impl);
  69.    }
  70. }
  71.  
  72. void
  73. fs_visitor::nir_setup_inputs(nir_shader *shader)
  74. {
  75.    foreach_list_typed(nir_variable, var, node, &shader->inputs) {
  76.       enum brw_reg_type type = brw_type_for_base_type(var->type);
  77.       fs_reg input = offset(nir_inputs, var->data.driver_location);
  78.  
  79.       fs_reg reg;
  80.       switch (stage) {
  81.       case MESA_SHADER_VERTEX: {
  82.          /* Our ATTR file is indexed by VERT_ATTRIB_*, which is the value
  83.           * stored in nir_variable::location.
  84.           *
  85.           * However, NIR's load_input intrinsics use a different index - an
  86.           * offset into a single contiguous array containing all inputs.
  87.           * This index corresponds to the nir_variable::driver_location field.
  88.           *
  89.           * So, we need to copy from fs_reg(ATTR, var->location) to
  90.           * offset(nir_inputs, var->data.driver_location).
  91.           */
  92.          unsigned components = var->type->without_array()->components();
  93.          unsigned array_length = var->type->is_array() ? var->type->length : 1;
  94.          for (unsigned i = 0; i < array_length; i++) {
  95.             for (unsigned j = 0; j < components; j++) {
  96.                emit(MOV(retype(offset(input, components * i + j), type),
  97.                         offset(fs_reg(ATTR, var->data.location + i, type), j)));
  98.             }
  99.          }
  100.          break;
  101.       }
  102.       case MESA_SHADER_GEOMETRY:
  103.       case MESA_SHADER_COMPUTE:
  104.          unreachable("fs_visitor not used for these stages yet.");
  105.          break;
  106.       case MESA_SHADER_FRAGMENT:
  107.          if (var->data.location == VARYING_SLOT_POS) {
  108.             reg = *emit_fragcoord_interpolation(var->data.pixel_center_integer,
  109.                                                 var->data.origin_upper_left);
  110.             emit_percomp(MOV(input, reg), 0xF);
  111.          } else {
  112.             emit_general_interpolation(input, var->name, var->type,
  113.                                        (glsl_interp_qualifier) var->data.interpolation,
  114.                                        var->data.location, var->data.centroid,
  115.                                        var->data.sample);
  116.          }
  117.          break;
  118.       }
  119.    }
  120. }
  121.  
  122. void
  123. fs_visitor::nir_setup_outputs(nir_shader *shader)
  124. {
  125.    brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
  126.  
  127.    foreach_list_typed(nir_variable, var, node, &shader->outputs) {
  128.       fs_reg reg = offset(nir_outputs, var->data.driver_location);
  129.  
  130.       int vector_elements =
  131.          var->type->is_array() ? var->type->fields.array->vector_elements
  132.                                : var->type->vector_elements;
  133.  
  134.       if (stage == MESA_SHADER_VERTEX) {
  135.          for (int i = 0; i < ALIGN(type_size(var->type), 4) / 4; i++) {
  136.             int output = var->data.location + i;
  137.             this->outputs[output] = offset(reg, 4 * i);
  138.             this->output_components[output] = vector_elements;
  139.          }
  140.       } else if (var->data.index > 0) {
  141.          assert(var->data.location == FRAG_RESULT_DATA0);
  142.          assert(var->data.index == 1);
  143.          this->dual_src_output = reg;
  144.          this->do_dual_src = true;
  145.       } else if (var->data.location == FRAG_RESULT_COLOR) {
  146.          /* Writing gl_FragColor outputs to all color regions. */
  147.          for (unsigned int i = 0; i < MAX2(key->nr_color_regions, 1); i++) {
  148.             this->outputs[i] = reg;
  149.             this->output_components[i] = 4;
  150.          }
  151.       } else if (var->data.location == FRAG_RESULT_DEPTH) {
  152.          this->frag_depth = reg;
  153.       } else if (var->data.location == FRAG_RESULT_SAMPLE_MASK) {
  154.          this->sample_mask = reg;
  155.       } else {
  156.          /* gl_FragData or a user-defined FS output */
  157.          assert(var->data.location >= FRAG_RESULT_DATA0 &&
  158.                 var->data.location < FRAG_RESULT_DATA0 + BRW_MAX_DRAW_BUFFERS);
  159.  
  160.          /* General color output. */
  161.          for (unsigned int i = 0; i < MAX2(1, var->type->length); i++) {
  162.             int output = var->data.location - FRAG_RESULT_DATA0 + i;
  163.             this->outputs[output] = offset(reg, vector_elements * i);
  164.             this->output_components[output] = vector_elements;
  165.          }
  166.       }
  167.    }
  168. }
  169.  
  170. void
  171. fs_visitor::nir_setup_uniforms(nir_shader *shader)
  172. {
  173.    uniforms = shader->num_uniforms;
  174.    num_direct_uniforms = shader->num_direct_uniforms;
  175.  
  176.    /* We split the uniform register file in half.  The first half is
  177.     * entirely direct uniforms.  The second half is indirect.
  178.     */
  179.    param_size[0] = num_direct_uniforms;
  180.    if (shader->num_uniforms > num_direct_uniforms)
  181.       param_size[num_direct_uniforms] = shader->num_uniforms - num_direct_uniforms;
  182.  
  183.    if (dispatch_width != 8)
  184.       return;
  185.  
  186.    if (shader_prog) {
  187.       foreach_list_typed(nir_variable, var, node, &shader->uniforms) {
  188.          /* UBO's and atomics don't take up space in the uniform file */
  189.          if (var->interface_type != NULL || var->type->contains_atomic())
  190.             continue;
  191.  
  192.          if (strncmp(var->name, "gl_", 3) == 0)
  193.             nir_setup_builtin_uniform(var);
  194.          else
  195.             nir_setup_uniform(var);
  196.       }
  197.    } else {
  198.       /* prog_to_nir doesn't create uniform variables; set param up directly. */
  199.       for (unsigned p = 0; p < prog->Parameters->NumParameters; p++) {
  200.          for (unsigned int i = 0; i < 4; i++) {
  201.             stage_prog_data->param[4 * p + i] =
  202.                &prog->Parameters->ParameterValues[p][i];
  203.          }
  204.       }
  205.    }
  206. }
  207.  
  208. void
  209. fs_visitor::nir_setup_uniform(nir_variable *var)
  210. {
  211.    int namelen = strlen(var->name);
  212.  
  213.    /* The data for our (non-builtin) uniforms is stored in a series of
  214.       * gl_uniform_driver_storage structs for each subcomponent that
  215.       * glGetUniformLocation() could name.  We know it's been set up in the
  216.       * same order we'd walk the type, so walk the list of storage and find
  217.       * anything with our name, or the prefix of a component that starts with
  218.       * our name.
  219.       */
  220.    unsigned index = var->data.driver_location;
  221.    for (unsigned u = 0; u < shader_prog->NumUserUniformStorage; u++) {
  222.       struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u];
  223.  
  224.       if (strncmp(var->name, storage->name, namelen) != 0 ||
  225.          (storage->name[namelen] != 0 &&
  226.          storage->name[namelen] != '.' &&
  227.          storage->name[namelen] != '[')) {
  228.          continue;
  229.       }
  230.  
  231.       unsigned slots = storage->type->component_slots();
  232.       if (storage->array_elements)
  233.          slots *= storage->array_elements;
  234.  
  235.       for (unsigned i = 0; i < slots; i++) {
  236.          stage_prog_data->param[index++] = &storage->storage[i];
  237.       }
  238.    }
  239.  
  240.    /* Make sure we actually initialized the right amount of stuff here. */
  241.    assert(var->data.driver_location + var->type->component_slots() == index);
  242. }
  243.  
  244. void
  245. fs_visitor::nir_setup_builtin_uniform(nir_variable *var)
  246. {
  247.    const nir_state_slot *const slots = var->state_slots;
  248.    assert(var->state_slots != NULL);
  249.  
  250.    unsigned uniform_index = var->data.driver_location;
  251.    for (unsigned int i = 0; i < var->num_state_slots; i++) {
  252.       /* This state reference has already been setup by ir_to_mesa, but we'll
  253.        * get the same index back here.
  254.        */
  255.       int index = _mesa_add_state_reference(this->prog->Parameters,
  256.                                             (gl_state_index *)slots[i].tokens);
  257.  
  258.       /* Add each of the unique swizzles of the element as a parameter.
  259.        * This'll end up matching the expected layout of the
  260.        * array/matrix/structure we're trying to fill in.
  261.        */
  262.       int last_swiz = -1;
  263.       for (unsigned int j = 0; j < 4; j++) {
  264.          int swiz = GET_SWZ(slots[i].swizzle, j);
  265.          if (swiz == last_swiz)
  266.             break;
  267.          last_swiz = swiz;
  268.  
  269.          stage_prog_data->param[uniform_index++] =
  270.             &prog->Parameters->ParameterValues[index][swiz];
  271.       }
  272.    }
  273. }
  274.  
  275. static bool
  276. emit_system_values_block(nir_block *block, void *void_visitor)
  277. {
  278.    fs_visitor *v = (fs_visitor *)void_visitor;
  279.    fs_reg *reg;
  280.  
  281.    nir_foreach_instr(block, instr) {
  282.       if (instr->type != nir_instr_type_intrinsic)
  283.          continue;
  284.  
  285.       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
  286.       switch (intrin->intrinsic) {
  287.       case nir_intrinsic_load_vertex_id:
  288.          unreachable("should be lowered by lower_vertex_id().");
  289.  
  290.       case nir_intrinsic_load_vertex_id_zero_base:
  291.          assert(v->stage == MESA_SHADER_VERTEX);
  292.          reg = &v->nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE];
  293.          if (reg->file == BAD_FILE)
  294.             *reg = *v->emit_vs_system_value(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE);
  295.          break;
  296.  
  297.       case nir_intrinsic_load_base_vertex:
  298.          assert(v->stage == MESA_SHADER_VERTEX);
  299.          reg = &v->nir_system_values[SYSTEM_VALUE_BASE_VERTEX];
  300.          if (reg->file == BAD_FILE)
  301.             *reg = *v->emit_vs_system_value(SYSTEM_VALUE_BASE_VERTEX);
  302.          break;
  303.  
  304.       case nir_intrinsic_load_instance_id:
  305.          assert(v->stage == MESA_SHADER_VERTEX);
  306.          reg = &v->nir_system_values[SYSTEM_VALUE_INSTANCE_ID];
  307.          if (reg->file == BAD_FILE)
  308.             *reg = *v->emit_vs_system_value(SYSTEM_VALUE_INSTANCE_ID);
  309.          break;
  310.  
  311.       case nir_intrinsic_load_sample_pos:
  312.          assert(v->stage == MESA_SHADER_FRAGMENT);
  313.          reg = &v->nir_system_values[SYSTEM_VALUE_SAMPLE_POS];
  314.          if (reg->file == BAD_FILE)
  315.             *reg = *v->emit_samplepos_setup();
  316.          break;
  317.  
  318.       case nir_intrinsic_load_sample_id:
  319.          assert(v->stage == MESA_SHADER_FRAGMENT);
  320.          reg = &v->nir_system_values[SYSTEM_VALUE_SAMPLE_ID];
  321.          if (reg->file == BAD_FILE)
  322.             *reg = *v->emit_sampleid_setup();
  323.          break;
  324.  
  325.       case nir_intrinsic_load_sample_mask_in:
  326.          assert(v->stage == MESA_SHADER_FRAGMENT);
  327.          assert(v->devinfo->gen >= 7);
  328.          reg = &v->nir_system_values[SYSTEM_VALUE_SAMPLE_MASK_IN];
  329.          if (reg->file == BAD_FILE)
  330.             *reg = fs_reg(retype(brw_vec8_grf(v->payload.sample_mask_in_reg, 0),
  331.                                  BRW_REGISTER_TYPE_D));
  332.          break;
  333.  
  334.       default:
  335.          break;
  336.       }
  337.    }
  338.  
  339.    return true;
  340. }
  341.  
  342. void
  343. fs_visitor::nir_emit_system_values(nir_shader *shader)
  344. {
  345.    nir_system_values = ralloc_array(mem_ctx, fs_reg, SYSTEM_VALUE_MAX);
  346.    nir_foreach_overload(shader, overload) {
  347.       assert(strcmp(overload->function->name, "main") == 0);
  348.       assert(overload->impl);
  349.       nir_foreach_block(overload->impl, emit_system_values_block, this);
  350.    }
  351. }
  352.  
  353. void
  354. fs_visitor::nir_emit_impl(nir_function_impl *impl)
  355. {
  356.    nir_locals = reralloc(mem_ctx, nir_locals, fs_reg, impl->reg_alloc);
  357.    foreach_list_typed(nir_register, reg, node, &impl->registers) {
  358.       unsigned array_elems =
  359.          reg->num_array_elems == 0 ? 1 : reg->num_array_elems;
  360.       unsigned size = array_elems * reg->num_components;
  361.       nir_locals[reg->index] = vgrf(size);
  362.    }
  363.  
  364.    nir_emit_cf_list(&impl->body);
  365. }
  366.  
  367. void
  368. fs_visitor::nir_emit_cf_list(exec_list *list)
  369. {
  370.    exec_list_validate(list);
  371.    foreach_list_typed(nir_cf_node, node, node, list) {
  372.       switch (node->type) {
  373.       case nir_cf_node_if:
  374.          nir_emit_if(nir_cf_node_as_if(node));
  375.          break;
  376.  
  377.       case nir_cf_node_loop:
  378.          nir_emit_loop(nir_cf_node_as_loop(node));
  379.          break;
  380.  
  381.       case nir_cf_node_block:
  382.          nir_emit_block(nir_cf_node_as_block(node));
  383.          break;
  384.  
  385.       default:
  386.          unreachable("Invalid CFG node block");
  387.       }
  388.    }
  389. }
  390.  
  391. void
  392. fs_visitor::nir_emit_if(nir_if *if_stmt)
  393. {
  394.    /* first, put the condition into f0 */
  395.    fs_inst *inst = emit(MOV(reg_null_d,
  396.                             retype(get_nir_src(if_stmt->condition),
  397.                                    BRW_REGISTER_TYPE_D)));
  398.    inst->conditional_mod = BRW_CONDITIONAL_NZ;
  399.  
  400.    emit(IF(BRW_PREDICATE_NORMAL));
  401.  
  402.    nir_emit_cf_list(&if_stmt->then_list);
  403.  
  404.    /* note: if the else is empty, dead CF elimination will remove it */
  405.    emit(BRW_OPCODE_ELSE);
  406.  
  407.    nir_emit_cf_list(&if_stmt->else_list);
  408.  
  409.    emit(BRW_OPCODE_ENDIF);
  410.  
  411.    if (!try_replace_with_sel() && devinfo->gen < 6) {
  412.       no16("Can't support (non-uniform) control flow on SIMD16\n");
  413.    }
  414. }
  415.  
  416. void
  417. fs_visitor::nir_emit_loop(nir_loop *loop)
  418. {
  419.    if (devinfo->gen < 6) {
  420.       no16("Can't support (non-uniform) control flow on SIMD16\n");
  421.    }
  422.  
  423.    emit(BRW_OPCODE_DO);
  424.  
  425.    nir_emit_cf_list(&loop->body);
  426.  
  427.    emit(BRW_OPCODE_WHILE);
  428. }
  429.  
  430. void
  431. fs_visitor::nir_emit_block(nir_block *block)
  432. {
  433.    nir_foreach_instr(block, instr) {
  434.       nir_emit_instr(instr);
  435.    }
  436. }
  437.  
  438. void
  439. fs_visitor::nir_emit_instr(nir_instr *instr)
  440. {
  441.    this->base_ir = instr;
  442.  
  443.    switch (instr->type) {
  444.    case nir_instr_type_alu:
  445.       nir_emit_alu(nir_instr_as_alu(instr));
  446.       break;
  447.  
  448.    case nir_instr_type_intrinsic:
  449.       nir_emit_intrinsic(nir_instr_as_intrinsic(instr));
  450.       break;
  451.  
  452.    case nir_instr_type_tex:
  453.       nir_emit_texture(nir_instr_as_tex(instr));
  454.       break;
  455.  
  456.    case nir_instr_type_load_const:
  457.       /* We can hit these, but we do nothing now and use them as
  458.        * immediates later.
  459.        */
  460.       break;
  461.  
  462.    case nir_instr_type_jump:
  463.       nir_emit_jump(nir_instr_as_jump(instr));
  464.       break;
  465.  
  466.    default:
  467.       unreachable("unknown instruction type");
  468.    }
  469.  
  470.    this->base_ir = NULL;
  471. }
  472.  
  473. static brw_reg_type
  474. brw_type_for_nir_type(nir_alu_type type)
  475. {
  476.    switch (type) {
  477.    case nir_type_unsigned:
  478.       return BRW_REGISTER_TYPE_UD;
  479.    case nir_type_bool:
  480.    case nir_type_int:
  481.       return BRW_REGISTER_TYPE_D;
  482.    case nir_type_float:
  483.       return BRW_REGISTER_TYPE_F;
  484.    default:
  485.       unreachable("unknown type");
  486.    }
  487.  
  488.    return BRW_REGISTER_TYPE_F;
  489. }
  490.  
  491. bool
  492. fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr,
  493.                                          const fs_reg &result)
  494. {
  495.    if (instr->src[0].src.is_ssa ||
  496.        !instr->src[0].src.reg.reg ||
  497.        !instr->src[0].src.reg.reg->parent_instr)
  498.       return false;
  499.  
  500.    if (instr->src[0].src.reg.reg->parent_instr->type !=
  501.        nir_instr_type_intrinsic)
  502.       return false;
  503.  
  504.    nir_intrinsic_instr *src0 =
  505.       nir_instr_as_intrinsic(instr->src[0].src.reg.reg->parent_instr);
  506.  
  507.    if (src0->intrinsic != nir_intrinsic_load_front_face)
  508.       return false;
  509.  
  510.    nir_const_value *value1 = nir_src_as_const_value(instr->src[1].src);
  511.    if (!value1 || fabsf(value1->f[0]) != 1.0f)
  512.       return false;
  513.  
  514.    nir_const_value *value2 = nir_src_as_const_value(instr->src[2].src);
  515.    if (!value2 || fabsf(value2->f[0]) != 1.0f)
  516.       return false;
  517.  
  518.    fs_reg tmp = vgrf(glsl_type::int_type);
  519.  
  520.    if (devinfo->gen >= 6) {
  521.       /* Bit 15 of g0.0 is 0 if the polygon is front facing. */
  522.       fs_reg g0 = fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_W));
  523.  
  524.       /* For (gl_FrontFacing ? 1.0 : -1.0), emit:
  525.        *
  526.        *    or(8)  tmp.1<2>W  g0.0<0,1,0>W  0x00003f80W
  527.        *    and(8) dst<1>D    tmp<8,8,1>D   0xbf800000D
  528.        *
  529.        * and negate g0.0<0,1,0>W for (gl_FrontFacing ? -1.0 : 1.0).
  530.        *
  531.        * This negation looks like it's safe in practice, because bits 0:4 will
  532.        * surely be TRIANGLES
  533.        */
  534.  
  535.       if (value1->f[0] == -1.0f) {
  536.          g0.negate = true;
  537.       }
  538.  
  539.       tmp.type = BRW_REGISTER_TYPE_W;
  540.       tmp.subreg_offset = 2;
  541.       tmp.stride = 2;
  542.  
  543.       fs_inst *or_inst = emit(OR(tmp, g0, fs_reg(0x3f80)));
  544.       or_inst->src[1].type = BRW_REGISTER_TYPE_UW;
  545.  
  546.       tmp.type = BRW_REGISTER_TYPE_D;
  547.       tmp.subreg_offset = 0;
  548.       tmp.stride = 1;
  549.    } else {
  550.       /* Bit 31 of g1.6 is 0 if the polygon is front facing. */
  551.       fs_reg g1_6 = fs_reg(retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_D));
  552.  
  553.       /* For (gl_FrontFacing ? 1.0 : -1.0), emit:
  554.        *
  555.        *    or(8)  tmp<1>D  g1.6<0,1,0>D  0x3f800000D
  556.        *    and(8) dst<1>D  tmp<8,8,1>D   0xbf800000D
  557.        *
  558.        * and negate g1.6<0,1,0>D for (gl_FrontFacing ? -1.0 : 1.0).
  559.        *
  560.        * This negation looks like it's safe in practice, because bits 0:4 will
  561.        * surely be TRIANGLES
  562.        */
  563.  
  564.       if (value1->f[0] == -1.0f) {
  565.          g1_6.negate = true;
  566.       }
  567.  
  568.       emit(OR(tmp, g1_6, fs_reg(0x3f800000)));
  569.    }
  570.    emit(AND(retype(result, BRW_REGISTER_TYPE_D), tmp, fs_reg(0xbf800000)));
  571.  
  572.    return true;
  573. }
  574.  
  575. void
  576. fs_visitor::nir_emit_alu(nir_alu_instr *instr)
  577. {
  578.    struct brw_wm_prog_key *fs_key = (struct brw_wm_prog_key *) this->key;
  579.    fs_inst *inst;
  580.  
  581.    fs_reg result = get_nir_dest(instr->dest.dest);
  582.    result.type = brw_type_for_nir_type(nir_op_infos[instr->op].output_type);
  583.  
  584.    fs_reg op[4];
  585.    for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
  586.       op[i] = get_nir_src(instr->src[i].src);
  587.       op[i].type = brw_type_for_nir_type(nir_op_infos[instr->op].input_types[i]);
  588.       op[i].abs = instr->src[i].abs;
  589.       op[i].negate = instr->src[i].negate;
  590.    }
  591.  
  592.    /* We get a bunch of mov's out of the from_ssa pass and they may still
  593.     * be vectorized.  We'll handle them as a special-case.  We'll also
  594.     * handle vecN here because it's basically the same thing.
  595.     */
  596.    switch (instr->op) {
  597.    case nir_op_imov:
  598.    case nir_op_fmov:
  599.    case nir_op_vec2:
  600.    case nir_op_vec3:
  601.    case nir_op_vec4: {
  602.       fs_reg temp = result;
  603.       bool need_extra_copy = false;
  604.       for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
  605.          if (!instr->src[i].src.is_ssa &&
  606.              instr->dest.dest.reg.reg == instr->src[i].src.reg.reg) {
  607.             need_extra_copy = true;
  608.             temp = retype(vgrf(4), result.type);
  609.             break;
  610.          }
  611.       }
  612.  
  613.       for (unsigned i = 0; i < 4; i++) {
  614.          if (!(instr->dest.write_mask & (1 << i)))
  615.             continue;
  616.  
  617.          if (instr->op == nir_op_imov || instr->op == nir_op_fmov) {
  618.             inst = emit(MOV(offset(temp, i),
  619.                         offset(op[0], instr->src[0].swizzle[i])));
  620.          } else {
  621.             inst = emit(MOV(offset(temp, i),
  622.                         offset(op[i], instr->src[i].swizzle[0])));
  623.          }
  624.          inst->saturate = instr->dest.saturate;
  625.       }
  626.  
  627.       /* In this case the source and destination registers were the same,
  628.        * so we need to insert an extra set of moves in order to deal with
  629.        * any swizzling.
  630.        */
  631.       if (need_extra_copy) {
  632.          for (unsigned i = 0; i < 4; i++) {
  633.             if (!(instr->dest.write_mask & (1 << i)))
  634.                continue;
  635.  
  636.             emit(MOV(offset(result, i), offset(temp, i)));
  637.          }
  638.       }
  639.       return;
  640.    }
  641.    default:
  642.       break;
  643.    }
  644.  
  645.    /* At this point, we have dealt with any instruction that operates on
  646.     * more than a single channel.  Therefore, we can just adjust the source
  647.     * and destination registers for that channel and emit the instruction.
  648.     */
  649.    unsigned channel = 0;
  650.    if (nir_op_infos[instr->op].output_size == 0) {
  651.       /* Since NIR is doing the scalarizing for us, we should only ever see
  652.        * vectorized operations with a single channel.
  653.        */
  654.       assert(_mesa_bitcount(instr->dest.write_mask) == 1);
  655.       channel = ffs(instr->dest.write_mask) - 1;
  656.  
  657.       result = offset(result, channel);
  658.    }
  659.  
  660.    for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
  661.       assert(nir_op_infos[instr->op].input_sizes[i] < 2);
  662.       op[i] = offset(op[i], instr->src[i].swizzle[channel]);
  663.    }
  664.  
  665.    switch (instr->op) {
  666.    case nir_op_i2f:
  667.    case nir_op_u2f:
  668.       inst = emit(MOV(result, op[0]));
  669.       inst->saturate = instr->dest.saturate;
  670.       break;
  671.  
  672.    case nir_op_f2i:
  673.    case nir_op_f2u:
  674.       emit(MOV(result, op[0]));
  675.       break;
  676.  
  677.    case nir_op_fsign: {
  678.       /* AND(val, 0x80000000) gives the sign bit.
  679.          *
  680.          * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not
  681.          * zero.
  682.          */
  683.       emit(CMP(reg_null_f, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ));
  684.  
  685.       fs_reg result_int = retype(result, BRW_REGISTER_TYPE_UD);
  686.       op[0].type = BRW_REGISTER_TYPE_UD;
  687.       result.type = BRW_REGISTER_TYPE_UD;
  688.       emit(AND(result_int, op[0], fs_reg(0x80000000u)));
  689.  
  690.       inst = emit(OR(result_int, result_int, fs_reg(0x3f800000u)));
  691.       inst->predicate = BRW_PREDICATE_NORMAL;
  692.       if (instr->dest.saturate) {
  693.          inst = emit(MOV(result, result));
  694.          inst->saturate = true;
  695.       }
  696.       break;
  697.    }
  698.  
  699.    case nir_op_isign:
  700.       /*  ASR(val, 31) -> negative val generates 0xffffffff (signed -1).
  701.        *               -> non-negative val generates 0x00000000.
  702.        *  Predicated OR sets 1 if val is positive.
  703.        */
  704.       emit(CMP(reg_null_d, op[0], fs_reg(0), BRW_CONDITIONAL_G));
  705.       emit(ASR(result, op[0], fs_reg(31)));
  706.       inst = emit(OR(result, result, fs_reg(1)));
  707.       inst->predicate = BRW_PREDICATE_NORMAL;
  708.       break;
  709.  
  710.    case nir_op_frcp:
  711.       inst = emit_math(SHADER_OPCODE_RCP, result, op[0]);
  712.       inst->saturate = instr->dest.saturate;
  713.       break;
  714.  
  715.    case nir_op_fexp2:
  716.       inst = emit_math(SHADER_OPCODE_EXP2, result, op[0]);
  717.       inst->saturate = instr->dest.saturate;
  718.       break;
  719.  
  720.    case nir_op_flog2:
  721.       inst = emit_math(SHADER_OPCODE_LOG2, result, op[0]);
  722.       inst->saturate = instr->dest.saturate;
  723.       break;
  724.  
  725.    case nir_op_fsin:
  726.       inst = emit_math(SHADER_OPCODE_SIN, result, op[0]);
  727.       inst->saturate = instr->dest.saturate;
  728.       break;
  729.  
  730.    case nir_op_fcos:
  731.       inst = emit_math(SHADER_OPCODE_COS, result, op[0]);
  732.       inst->saturate = instr->dest.saturate;
  733.       break;
  734.  
  735.    case nir_op_fddx:
  736.       if (fs_key->high_quality_derivatives) {
  737.          inst = emit(FS_OPCODE_DDX_FINE, result, op[0]);
  738.       } else {
  739.          inst = emit(FS_OPCODE_DDX_COARSE, result, op[0]);
  740.       }
  741.       inst->saturate = instr->dest.saturate;
  742.       break;
  743.    case nir_op_fddx_fine:
  744.       inst = emit(FS_OPCODE_DDX_FINE, result, op[0]);
  745.       inst->saturate = instr->dest.saturate;
  746.       break;
  747.    case nir_op_fddx_coarse:
  748.       inst = emit(FS_OPCODE_DDX_COARSE, result, op[0]);
  749.       inst->saturate = instr->dest.saturate;
  750.       break;
  751.    case nir_op_fddy:
  752.       if (fs_key->high_quality_derivatives) {
  753.          inst = emit(FS_OPCODE_DDY_FINE, result, op[0],
  754.                      fs_reg(fs_key->render_to_fbo));
  755.       } else {
  756.          inst = emit(FS_OPCODE_DDY_COARSE, result, op[0],
  757.                      fs_reg(fs_key->render_to_fbo));
  758.       }
  759.       inst->saturate = instr->dest.saturate;
  760.       break;
  761.    case nir_op_fddy_fine:
  762.       inst = emit(FS_OPCODE_DDY_FINE, result, op[0],
  763.                   fs_reg(fs_key->render_to_fbo));
  764.       inst->saturate = instr->dest.saturate;
  765.       break;
  766.    case nir_op_fddy_coarse:
  767.       inst = emit(FS_OPCODE_DDY_COARSE, result, op[0],
  768.                   fs_reg(fs_key->render_to_fbo));
  769.       inst->saturate = instr->dest.saturate;
  770.       break;
  771.  
  772.    case nir_op_fadd:
  773.    case nir_op_iadd:
  774.       inst = emit(ADD(result, op[0], op[1]));
  775.       inst->saturate = instr->dest.saturate;
  776.       break;
  777.  
  778.    case nir_op_fmul:
  779.       inst = emit(MUL(result, op[0], op[1]));
  780.       inst->saturate = instr->dest.saturate;
  781.       break;
  782.  
  783.    case nir_op_imul:
  784.       emit(MUL(result, op[0], op[1]));
  785.       break;
  786.  
  787.    case nir_op_imul_high:
  788.    case nir_op_umul_high: {
  789.       if (devinfo->gen >= 7)
  790.          no16("SIMD16 explicit accumulator operands unsupported\n");
  791.  
  792.       struct brw_reg acc = retype(brw_acc_reg(dispatch_width), result.type);
  793.  
  794.       fs_inst *mul = emit(MUL(acc, op[0], op[1]));
  795.       emit(MACH(result, op[0], op[1]));
  796.  
  797.       /* Until Gen8, integer multiplies read 32-bits from one source, and
  798.        * 16-bits from the other, and relying on the MACH instruction to
  799.        * generate the high bits of the result.
  800.        *
  801.        * On Gen8, the multiply instruction does a full 32x32-bit multiply,
  802.        * but in order to do a 64x64-bit multiply we have to simulate the
  803.        * previous behavior and then use a MACH instruction.
  804.        *
  805.        * FINISHME: Don't use source modifiers on src1.
  806.        */
  807.       if (devinfo->gen >= 8) {
  808.          assert(mul->src[1].type == BRW_REGISTER_TYPE_D ||
  809.                 mul->src[1].type == BRW_REGISTER_TYPE_UD);
  810.          if (mul->src[1].type == BRW_REGISTER_TYPE_D) {
  811.             mul->src[1].type = BRW_REGISTER_TYPE_W;
  812.             mul->src[1].stride = 2;
  813.          } else {
  814.             mul->src[1].type = BRW_REGISTER_TYPE_UW;
  815.             mul->src[1].stride = 2;
  816.          }
  817.       }
  818.       break;
  819.    }
  820.  
  821.    case nir_op_idiv:
  822.    case nir_op_udiv:
  823.       emit_math(SHADER_OPCODE_INT_QUOTIENT, result, op[0], op[1]);
  824.       break;
  825.  
  826.    case nir_op_uadd_carry: {
  827.       if (devinfo->gen >= 7)
  828.          no16("SIMD16 explicit accumulator operands unsupported\n");
  829.  
  830.       struct brw_reg acc = retype(brw_acc_reg(dispatch_width),
  831.                                   BRW_REGISTER_TYPE_UD);
  832.  
  833.       emit(ADDC(reg_null_ud, op[0], op[1]));
  834.       emit(MOV(result, fs_reg(acc)));
  835.       break;
  836.    }
  837.  
  838.    case nir_op_usub_borrow: {
  839.       if (devinfo->gen >= 7)
  840.          no16("SIMD16 explicit accumulator operands unsupported\n");
  841.  
  842.       struct brw_reg acc = retype(brw_acc_reg(dispatch_width),
  843.                                   BRW_REGISTER_TYPE_UD);
  844.  
  845.       emit(SUBB(reg_null_ud, op[0], op[1]));
  846.       emit(MOV(result, fs_reg(acc)));
  847.       break;
  848.    }
  849.  
  850.    case nir_op_umod:
  851.       emit_math(SHADER_OPCODE_INT_REMAINDER, result, op[0], op[1]);
  852.       break;
  853.  
  854.    case nir_op_flt:
  855.    case nir_op_ilt:
  856.    case nir_op_ult:
  857.       emit(CMP(result, op[0], op[1], BRW_CONDITIONAL_L));
  858.       break;
  859.  
  860.    case nir_op_fge:
  861.    case nir_op_ige:
  862.    case nir_op_uge:
  863.       emit(CMP(result, op[0], op[1], BRW_CONDITIONAL_GE));
  864.       break;
  865.  
  866.    case nir_op_feq:
  867.    case nir_op_ieq:
  868.       emit(CMP(result, op[0], op[1], BRW_CONDITIONAL_Z));
  869.       break;
  870.  
  871.    case nir_op_fne:
  872.    case nir_op_ine:
  873.       emit(CMP(result, op[0], op[1], BRW_CONDITIONAL_NZ));
  874.       break;
  875.  
  876.    case nir_op_inot:
  877.       if (devinfo->gen >= 8) {
  878.          resolve_source_modifiers(&op[0]);
  879.       }
  880.       emit(NOT(result, op[0]));
  881.       break;
  882.    case nir_op_ixor:
  883.       if (devinfo->gen >= 8) {
  884.          resolve_source_modifiers(&op[0]);
  885.          resolve_source_modifiers(&op[1]);
  886.       }
  887.       emit(XOR(result, op[0], op[1]));
  888.       break;
  889.    case nir_op_ior:
  890.       if (devinfo->gen >= 8) {
  891.          resolve_source_modifiers(&op[0]);
  892.          resolve_source_modifiers(&op[1]);
  893.       }
  894.       emit(OR(result, op[0], op[1]));
  895.       break;
  896.    case nir_op_iand:
  897.       if (devinfo->gen >= 8) {
  898.          resolve_source_modifiers(&op[0]);
  899.          resolve_source_modifiers(&op[1]);
  900.       }
  901.       emit(AND(result, op[0], op[1]));
  902.       break;
  903.  
  904.    case nir_op_fdot2:
  905.    case nir_op_fdot3:
  906.    case nir_op_fdot4:
  907.    case nir_op_bany2:
  908.    case nir_op_bany3:
  909.    case nir_op_bany4:
  910.    case nir_op_ball2:
  911.    case nir_op_ball3:
  912.    case nir_op_ball4:
  913.    case nir_op_ball_fequal2:
  914.    case nir_op_ball_iequal2:
  915.    case nir_op_ball_fequal3:
  916.    case nir_op_ball_iequal3:
  917.    case nir_op_ball_fequal4:
  918.    case nir_op_ball_iequal4:
  919.    case nir_op_bany_fnequal2:
  920.    case nir_op_bany_inequal2:
  921.    case nir_op_bany_fnequal3:
  922.    case nir_op_bany_inequal3:
  923.    case nir_op_bany_fnequal4:
  924.    case nir_op_bany_inequal4:
  925.       unreachable("Lowered by nir_lower_alu_reductions");
  926.  
  927.    case nir_op_fnoise1_1:
  928.    case nir_op_fnoise1_2:
  929.    case nir_op_fnoise1_3:
  930.    case nir_op_fnoise1_4:
  931.    case nir_op_fnoise2_1:
  932.    case nir_op_fnoise2_2:
  933.    case nir_op_fnoise2_3:
  934.    case nir_op_fnoise2_4:
  935.    case nir_op_fnoise3_1:
  936.    case nir_op_fnoise3_2:
  937.    case nir_op_fnoise3_3:
  938.    case nir_op_fnoise3_4:
  939.    case nir_op_fnoise4_1:
  940.    case nir_op_fnoise4_2:
  941.    case nir_op_fnoise4_3:
  942.    case nir_op_fnoise4_4:
  943.       unreachable("not reached: should be handled by lower_noise");
  944.  
  945.    case nir_op_ldexp:
  946.       unreachable("not reached: should be handled by ldexp_to_arith()");
  947.  
  948.    case nir_op_fsqrt:
  949.       inst = emit_math(SHADER_OPCODE_SQRT, result, op[0]);
  950.       inst->saturate = instr->dest.saturate;
  951.       break;
  952.  
  953.    case nir_op_frsq:
  954.       inst = emit_math(SHADER_OPCODE_RSQ, result, op[0]);
  955.       inst->saturate = instr->dest.saturate;
  956.       break;
  957.  
  958.    case nir_op_b2i:
  959.       emit(AND(result, op[0], fs_reg(1)));
  960.       break;
  961.    case nir_op_b2f:
  962.       emit(AND(retype(result, BRW_REGISTER_TYPE_UD), op[0], fs_reg(0x3f800000u)));
  963.       break;
  964.  
  965.    case nir_op_f2b:
  966.       emit(CMP(result, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ));
  967.       break;
  968.    case nir_op_i2b:
  969.       emit(CMP(result, op[0], fs_reg(0), BRW_CONDITIONAL_NZ));
  970.       break;
  971.  
  972.    case nir_op_ftrunc:
  973.       inst = emit(RNDZ(result, op[0]));
  974.       inst->saturate = instr->dest.saturate;
  975.       break;
  976.  
  977.    case nir_op_fceil: {
  978.       op[0].negate = !op[0].negate;
  979.       fs_reg temp = vgrf(glsl_type::float_type);
  980.       emit(RNDD(temp, op[0]));
  981.       temp.negate = true;
  982.       inst = emit(MOV(result, temp));
  983.       inst->saturate = instr->dest.saturate;
  984.       break;
  985.    }
  986.    case nir_op_ffloor:
  987.       inst = emit(RNDD(result, op[0]));
  988.       inst->saturate = instr->dest.saturate;
  989.       break;
  990.    case nir_op_ffract:
  991.       inst = emit(FRC(result, op[0]));
  992.       inst->saturate = instr->dest.saturate;
  993.       break;
  994.    case nir_op_fround_even:
  995.       inst = emit(RNDE(result, op[0]));
  996.       inst->saturate = instr->dest.saturate;
  997.       break;
  998.  
  999.    case nir_op_fmin:
  1000.    case nir_op_imin:
  1001.    case nir_op_umin:
  1002.       if (devinfo->gen >= 6) {
  1003.          inst = emit(BRW_OPCODE_SEL, result, op[0], op[1]);
  1004.          inst->conditional_mod = BRW_CONDITIONAL_L;
  1005.       } else {
  1006.          emit(CMP(reg_null_d, op[0], op[1], BRW_CONDITIONAL_L));
  1007.          inst = emit(SEL(result, op[0], op[1]));
  1008.          inst->predicate = BRW_PREDICATE_NORMAL;
  1009.       }
  1010.       inst->saturate = instr->dest.saturate;
  1011.       break;
  1012.  
  1013.    case nir_op_fmax:
  1014.    case nir_op_imax:
  1015.    case nir_op_umax:
  1016.       if (devinfo->gen >= 6) {
  1017.          inst = emit(BRW_OPCODE_SEL, result, op[0], op[1]);
  1018.          inst->conditional_mod = BRW_CONDITIONAL_GE;
  1019.       } else {
  1020.          emit(CMP(reg_null_d, op[0], op[1], BRW_CONDITIONAL_GE));
  1021.          inst = emit(SEL(result, op[0], op[1]));
  1022.          inst->predicate = BRW_PREDICATE_NORMAL;
  1023.       }
  1024.       inst->saturate = instr->dest.saturate;
  1025.       break;
  1026.  
  1027.    case nir_op_pack_snorm_2x16:
  1028.    case nir_op_pack_snorm_4x8:
  1029.    case nir_op_pack_unorm_2x16:
  1030.    case nir_op_pack_unorm_4x8:
  1031.    case nir_op_unpack_snorm_2x16:
  1032.    case nir_op_unpack_snorm_4x8:
  1033.    case nir_op_unpack_unorm_2x16:
  1034.    case nir_op_unpack_unorm_4x8:
  1035.    case nir_op_unpack_half_2x16:
  1036.    case nir_op_pack_half_2x16:
  1037.       unreachable("not reached: should be handled by lower_packing_builtins");
  1038.  
  1039.    case nir_op_unpack_half_2x16_split_x:
  1040.       inst = emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X, result, op[0]);
  1041.       inst->saturate = instr->dest.saturate;
  1042.       break;
  1043.    case nir_op_unpack_half_2x16_split_y:
  1044.       inst = emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y, result, op[0]);
  1045.       inst->saturate = instr->dest.saturate;
  1046.       break;
  1047.  
  1048.    case nir_op_fpow:
  1049.       inst = emit_math(SHADER_OPCODE_POW, result, op[0], op[1]);
  1050.       inst->saturate = instr->dest.saturate;
  1051.       break;
  1052.  
  1053.    case nir_op_bitfield_reverse:
  1054.       emit(BFREV(result, op[0]));
  1055.       break;
  1056.  
  1057.    case nir_op_bit_count:
  1058.       emit(CBIT(result, op[0]));
  1059.       break;
  1060.  
  1061.    case nir_op_ufind_msb:
  1062.    case nir_op_ifind_msb: {
  1063.       emit(FBH(retype(result, BRW_REGISTER_TYPE_UD), op[0]));
  1064.  
  1065.       /* FBH counts from the MSB side, while GLSL's findMSB() wants the count
  1066.        * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
  1067.        * subtract the result from 31 to convert the MSB count into an LSB count.
  1068.        */
  1069.  
  1070.       emit(CMP(reg_null_d, result, fs_reg(-1), BRW_CONDITIONAL_NZ));
  1071.       fs_reg neg_result(result);
  1072.       neg_result.negate = true;
  1073.       inst = emit(ADD(result, neg_result, fs_reg(31)));
  1074.       inst->predicate = BRW_PREDICATE_NORMAL;
  1075.       break;
  1076.    }
  1077.  
  1078.    case nir_op_find_lsb:
  1079.       emit(FBL(result, op[0]));
  1080.       break;
  1081.  
  1082.    case nir_op_ubitfield_extract:
  1083.    case nir_op_ibitfield_extract:
  1084.       emit(BFE(result, op[2], op[1], op[0]));
  1085.       break;
  1086.    case nir_op_bfm:
  1087.       emit(BFI1(result, op[0], op[1]));
  1088.       break;
  1089.    case nir_op_bfi:
  1090.       emit(BFI2(result, op[0], op[1], op[2]));
  1091.       break;
  1092.  
  1093.    case nir_op_bitfield_insert:
  1094.       unreachable("not reached: should be handled by "
  1095.                   "lower_instructions::bitfield_insert_to_bfm_bfi");
  1096.  
  1097.    case nir_op_ishl:
  1098.       emit(SHL(result, op[0], op[1]));
  1099.       break;
  1100.    case nir_op_ishr:
  1101.       emit(ASR(result, op[0], op[1]));
  1102.       break;
  1103.    case nir_op_ushr:
  1104.       emit(SHR(result, op[0], op[1]));
  1105.       break;
  1106.  
  1107.    case nir_op_pack_half_2x16_split:
  1108.       emit(FS_OPCODE_PACK_HALF_2x16_SPLIT, result, op[0], op[1]);
  1109.       break;
  1110.  
  1111.    case nir_op_ffma:
  1112.       inst = emit(MAD(result, op[2], op[1], op[0]));
  1113.       inst->saturate = instr->dest.saturate;
  1114.       break;
  1115.  
  1116.    case nir_op_flrp:
  1117.       inst = emit_lrp(result, op[0], op[1], op[2]);
  1118.       inst->saturate = instr->dest.saturate;
  1119.       break;
  1120.  
  1121.    case nir_op_bcsel:
  1122.       if (optimize_frontfacing_ternary(instr, result))
  1123.          return;
  1124.  
  1125.       emit(CMP(reg_null_d, op[0], fs_reg(0), BRW_CONDITIONAL_NZ));
  1126.       inst = emit(SEL(result, op[1], op[2]));
  1127.       inst->predicate = BRW_PREDICATE_NORMAL;
  1128.       break;
  1129.  
  1130.    default:
  1131.       unreachable("unhandled instruction");
  1132.    }
  1133.  
  1134.    /* If we need to do a boolean resolve, replace the result with -(x & 1)
  1135.     * to sign extend the low bit to 0/~0
  1136.     */
  1137.    if (devinfo->gen <= 5 &&
  1138.        (instr->instr.pass_flags & BRW_NIR_BOOLEAN_MASK) == BRW_NIR_BOOLEAN_NEEDS_RESOLVE) {
  1139.       fs_reg masked = vgrf(glsl_type::int_type);
  1140.       emit(AND(masked, result, fs_reg(1)));
  1141.       masked.negate = true;
  1142.       emit(MOV(retype(result, BRW_REGISTER_TYPE_D), masked));
  1143.    }
  1144. }
  1145.  
  1146. static fs_reg
  1147. fs_reg_for_nir_reg(fs_visitor *v, nir_register *nir_reg,
  1148.                    unsigned base_offset, nir_src *indirect)
  1149. {
  1150.    fs_reg reg;
  1151.    if (nir_reg->is_global)
  1152.       reg = v->nir_globals[nir_reg->index];
  1153.    else
  1154.       reg = v->nir_locals[nir_reg->index];
  1155.  
  1156.    reg = offset(reg, base_offset * nir_reg->num_components);
  1157.    if (indirect) {
  1158.       int multiplier = nir_reg->num_components * (v->dispatch_width / 8);
  1159.  
  1160.       reg.reladdr = new(v->mem_ctx) fs_reg(v->vgrf(glsl_type::int_type));
  1161.       v->emit(v->MUL(*reg.reladdr, v->get_nir_src(*indirect),
  1162.                      fs_reg(multiplier)));
  1163.    }
  1164.  
  1165.    return reg;
  1166. }
  1167.  
  1168. fs_reg
  1169. fs_visitor::get_nir_src(nir_src src)
  1170. {
  1171.    if (src.is_ssa) {
  1172.       assert(src.ssa->parent_instr->type == nir_instr_type_load_const);
  1173.       nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr);
  1174.       fs_reg reg = vgrf(src.ssa->num_components);
  1175.       reg.type = BRW_REGISTER_TYPE_D;
  1176.  
  1177.       for (unsigned i = 0; i < src.ssa->num_components; ++i)
  1178.          emit(MOV(offset(reg, i), fs_reg(load->value.i[i])));
  1179.  
  1180.       return reg;
  1181.    } else {
  1182.       fs_reg reg = fs_reg_for_nir_reg(this, src.reg.reg, src.reg.base_offset,
  1183.                                       src.reg.indirect);
  1184.  
  1185.       /* to avoid floating-point denorm flushing problems, set the type by
  1186.        * default to D - instructions that need floating point semantics will set
  1187.        * this to F if they need to
  1188.        */
  1189.       return retype(reg, BRW_REGISTER_TYPE_D);
  1190.    }
  1191. }
  1192.  
  1193. fs_reg
  1194. fs_visitor::get_nir_dest(nir_dest dest)
  1195. {
  1196.    return fs_reg_for_nir_reg(this, dest.reg.reg, dest.reg.base_offset,
  1197.                              dest.reg.indirect);
  1198. }
  1199.  
  1200. void
  1201. fs_visitor::emit_percomp(fs_inst *inst, unsigned wr_mask)
  1202. {
  1203.    for (unsigned i = 0; i < 4; i++) {
  1204.       if (!((wr_mask >> i) & 1))
  1205.          continue;
  1206.  
  1207.       fs_inst *new_inst = new(mem_ctx) fs_inst(*inst);
  1208.       new_inst->dst = offset(new_inst->dst, i);
  1209.       for (unsigned j = 0; j < new_inst->sources; j++)
  1210.          if (inst->src[j].file == GRF)
  1211.             new_inst->src[j] = offset(new_inst->src[j], i);
  1212.  
  1213.       emit(new_inst);
  1214.    }
  1215. }
  1216.  
  1217. void
  1218. fs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
  1219. {
  1220.    fs_reg dest;
  1221.    if (nir_intrinsic_infos[instr->intrinsic].has_dest)
  1222.       dest = get_nir_dest(instr->dest);
  1223.  
  1224.    bool has_indirect = false;
  1225.  
  1226.    switch (instr->intrinsic) {
  1227.    case nir_intrinsic_discard:
  1228.    case nir_intrinsic_discard_if: {
  1229.       /* We track our discarded pixels in f0.1.  By predicating on it, we can
  1230.        * update just the flag bits that aren't yet discarded.  If there's no
  1231.        * condition, we emit a CMP of g0 != g0, so all currently executing
  1232.        * channels will get turned off.
  1233.        */
  1234.       fs_inst *cmp;
  1235.       if (instr->intrinsic == nir_intrinsic_discard_if) {
  1236.          cmp = emit(CMP(reg_null_f, get_nir_src(instr->src[0]),
  1237.                         fs_reg(0), BRW_CONDITIONAL_Z));
  1238.       } else {
  1239.          fs_reg some_reg = fs_reg(retype(brw_vec8_grf(0, 0),
  1240.                                        BRW_REGISTER_TYPE_UW));
  1241.          cmp = emit(CMP(reg_null_f, some_reg, some_reg, BRW_CONDITIONAL_NZ));
  1242.       }
  1243.       cmp->predicate = BRW_PREDICATE_NORMAL;
  1244.       cmp->flag_subreg = 1;
  1245.  
  1246.       if (devinfo->gen >= 6) {
  1247.          emit_discard_jump();
  1248.       }
  1249.       break;
  1250.    }
  1251.  
  1252.    case nir_intrinsic_atomic_counter_inc:
  1253.    case nir_intrinsic_atomic_counter_dec:
  1254.    case nir_intrinsic_atomic_counter_read: {
  1255.       unsigned surf_index = prog_data->binding_table.abo_start +
  1256.                             (unsigned) instr->const_index[0];
  1257.       fs_reg offset = fs_reg(get_nir_src(instr->src[0]));
  1258.  
  1259.       switch (instr->intrinsic) {
  1260.          case nir_intrinsic_atomic_counter_inc:
  1261.             emit_untyped_atomic(BRW_AOP_INC, surf_index, dest, offset,
  1262.                                 fs_reg(), fs_reg());
  1263.             break;
  1264.          case nir_intrinsic_atomic_counter_dec:
  1265.             emit_untyped_atomic(BRW_AOP_PREDEC, surf_index, dest, offset,
  1266.                                 fs_reg(), fs_reg());
  1267.             break;
  1268.          case nir_intrinsic_atomic_counter_read:
  1269.             emit_untyped_surface_read(surf_index, dest, offset);
  1270.             break;
  1271.          default:
  1272.             unreachable("Unreachable");
  1273.       }
  1274.       break;
  1275.    }
  1276.  
  1277.    case nir_intrinsic_load_front_face:
  1278.       emit(MOV(retype(dest, BRW_REGISTER_TYPE_D),
  1279.                *emit_frontfacing_interpolation()));
  1280.       break;
  1281.  
  1282.    case nir_intrinsic_load_vertex_id:
  1283.       unreachable("should be lowered by lower_vertex_id()");
  1284.  
  1285.    case nir_intrinsic_load_vertex_id_zero_base: {
  1286.       fs_reg vertex_id = nir_system_values[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE];
  1287.       assert(vertex_id.file != BAD_FILE);
  1288.       dest.type = vertex_id.type;
  1289.       emit(MOV(dest, vertex_id));
  1290.       break;
  1291.    }
  1292.  
  1293.    case nir_intrinsic_load_base_vertex: {
  1294.       fs_reg base_vertex = nir_system_values[SYSTEM_VALUE_BASE_VERTEX];
  1295.       assert(base_vertex.file != BAD_FILE);
  1296.       dest.type = base_vertex.type;
  1297.       emit(MOV(dest, base_vertex));
  1298.       break;
  1299.    }
  1300.  
  1301.    case nir_intrinsic_load_instance_id: {
  1302.       fs_reg instance_id = nir_system_values[SYSTEM_VALUE_INSTANCE_ID];
  1303.       assert(instance_id.file != BAD_FILE);
  1304.       dest.type = instance_id.type;
  1305.       emit(MOV(dest, instance_id));
  1306.       break;
  1307.    }
  1308.  
  1309.    case nir_intrinsic_load_sample_mask_in: {
  1310.       fs_reg sample_mask_in = nir_system_values[SYSTEM_VALUE_SAMPLE_MASK_IN];
  1311.       assert(sample_mask_in.file != BAD_FILE);
  1312.       dest.type = sample_mask_in.type;
  1313.       emit(MOV(dest, sample_mask_in));
  1314.       break;
  1315.    }
  1316.  
  1317.    case nir_intrinsic_load_sample_pos: {
  1318.       fs_reg sample_pos = nir_system_values[SYSTEM_VALUE_SAMPLE_POS];
  1319.       assert(sample_pos.file != BAD_FILE);
  1320.       dest.type = sample_pos.type;
  1321.       emit(MOV(dest, sample_pos));
  1322.       emit(MOV(offset(dest, 1), offset(sample_pos, 1)));
  1323.       break;
  1324.    }
  1325.  
  1326.    case nir_intrinsic_load_sample_id: {
  1327.       fs_reg sample_id = nir_system_values[SYSTEM_VALUE_SAMPLE_ID];
  1328.       assert(sample_id.file != BAD_FILE);
  1329.       dest.type = sample_id.type;
  1330.       emit(MOV(dest, sample_id));
  1331.       break;
  1332.    }
  1333.  
  1334.    case nir_intrinsic_load_uniform_indirect:
  1335.       has_indirect = true;
  1336.       /* fallthrough */
  1337.    case nir_intrinsic_load_uniform: {
  1338.       unsigned index = instr->const_index[0];
  1339.  
  1340.       fs_reg uniform_reg;
  1341.       if (index < num_direct_uniforms) {
  1342.          uniform_reg = fs_reg(UNIFORM, 0);
  1343.       } else {
  1344.          uniform_reg = fs_reg(UNIFORM, num_direct_uniforms);
  1345.          index -= num_direct_uniforms;
  1346.       }
  1347.  
  1348.       for (int i = 0; i < instr->const_index[1]; i++) {
  1349.          for (unsigned j = 0; j < instr->num_components; j++) {
  1350.             fs_reg src = offset(retype(uniform_reg, dest.type), index);
  1351.             if (has_indirect)
  1352.                src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
  1353.             index++;
  1354.  
  1355.             emit(MOV(dest, src));
  1356.             dest = offset(dest, 1);
  1357.          }
  1358.       }
  1359.       break;
  1360.    }
  1361.  
  1362.    case nir_intrinsic_load_ubo_indirect:
  1363.       has_indirect = true;
  1364.       /* fallthrough */
  1365.    case nir_intrinsic_load_ubo: {
  1366.       nir_const_value *const_index = nir_src_as_const_value(instr->src[0]);
  1367.       fs_reg surf_index;
  1368.  
  1369.       if (const_index) {
  1370.          surf_index = fs_reg(stage_prog_data->binding_table.ubo_start +
  1371.                              const_index->u[0]);
  1372.       } else {
  1373.          /* The block index is not a constant. Evaluate the index expression
  1374.           * per-channel and add the base UBO index; we have to select a value
  1375.           * from any live channel.
  1376.           */
  1377.          surf_index = vgrf(glsl_type::uint_type);
  1378.          emit(ADD(surf_index, get_nir_src(instr->src[0]),
  1379.                   fs_reg(stage_prog_data->binding_table.ubo_start)));
  1380.          emit_uniformize(surf_index, surf_index);
  1381.  
  1382.          /* Assume this may touch any UBO. It would be nice to provide
  1383.           * a tighter bound, but the array information is already lowered away.
  1384.           */
  1385.          brw_mark_surface_used(prog_data,
  1386.                                stage_prog_data->binding_table.ubo_start +
  1387.                                shader_prog->NumUniformBlocks - 1);
  1388.       }
  1389.  
  1390.       if (has_indirect) {
  1391.          /* Turn the byte offset into a dword offset. */
  1392.          fs_reg base_offset = vgrf(glsl_type::int_type);
  1393.          emit(SHR(base_offset, retype(get_nir_src(instr->src[1]),
  1394.                                  BRW_REGISTER_TYPE_D),
  1395.                   fs_reg(2)));
  1396.  
  1397.          unsigned vec4_offset = instr->const_index[0] / 4;
  1398.          for (int i = 0; i < instr->num_components; i++)
  1399.             emit(VARYING_PULL_CONSTANT_LOAD(offset(dest, i), surf_index,
  1400.                                             base_offset, vec4_offset + i));
  1401.       } else {
  1402.          fs_reg packed_consts = vgrf(glsl_type::float_type);
  1403.          packed_consts.type = dest.type;
  1404.  
  1405.          fs_reg const_offset_reg((unsigned) instr->const_index[0] & ~15);
  1406.          emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, packed_consts,
  1407.               surf_index, const_offset_reg);
  1408.  
  1409.          for (unsigned i = 0; i < instr->num_components; i++) {
  1410.             packed_consts.set_smear(instr->const_index[0] % 16 / 4 + i);
  1411.  
  1412.             /* The std140 packing rules don't allow vectors to cross 16-byte
  1413.              * boundaries, and a reg is 32 bytes.
  1414.              */
  1415.             assert(packed_consts.subreg_offset < 32);
  1416.  
  1417.             emit(MOV(dest, packed_consts));
  1418.             dest = offset(dest, 1);
  1419.          }
  1420.       }
  1421.       break;
  1422.    }
  1423.  
  1424.    case nir_intrinsic_load_input_indirect:
  1425.       has_indirect = true;
  1426.       /* fallthrough */
  1427.    case nir_intrinsic_load_input: {
  1428.       unsigned index = 0;
  1429.       for (int i = 0; i < instr->const_index[1]; i++) {
  1430.          for (unsigned j = 0; j < instr->num_components; j++) {
  1431.             fs_reg src = offset(retype(nir_inputs, dest.type),
  1432.                                 instr->const_index[0] + index);
  1433.             if (has_indirect)
  1434.                src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[0]));
  1435.             index++;
  1436.  
  1437.             emit(MOV(dest, src));
  1438.             dest = offset(dest, 1);
  1439.          }
  1440.       }
  1441.       break;
  1442.    }
  1443.  
  1444.    /* Handle ARB_gpu_shader5 interpolation intrinsics
  1445.     *
  1446.     * It's worth a quick word of explanation as to why we handle the full
  1447.     * variable-based interpolation intrinsic rather than a lowered version
  1448.     * with like we do for other inputs.  We have to do that because the way
  1449.     * we set up inputs doesn't allow us to use the already setup inputs for
  1450.     * interpolation.  At the beginning of the shader, we go through all of
  1451.     * the input variables and do the initial interpolation and put it in
  1452.     * the nir_inputs array based on its location as determined in
  1453.     * nir_lower_io.  If the input isn't used, dead code cleans up and
  1454.     * everything works fine.  However, when we get to the ARB_gpu_shader5
  1455.     * interpolation intrinsics, we need to reinterpolate the input
  1456.     * differently.  If we used an intrinsic that just had an index it would
  1457.     * only give us the offset into the nir_inputs array.  However, this is
  1458.     * useless because that value is post-interpolation and we need
  1459.     * pre-interpolation.  In order to get the actual location of the bits
  1460.     * we get from the vertex fetching hardware, we need the variable.
  1461.     */
  1462.    case nir_intrinsic_interp_var_at_centroid:
  1463.    case nir_intrinsic_interp_var_at_sample:
  1464.    case nir_intrinsic_interp_var_at_offset: {
  1465.       /* in SIMD16 mode, the pixel interpolator returns coords interleaved
  1466.        * 8 channels at a time, same as the barycentric coords presented in
  1467.        * the FS payload. this requires a bit of extra work to support.
  1468.        */
  1469.       no16("interpolate_at_* not yet supported in SIMD16 mode.");
  1470.  
  1471.       fs_reg dst_xy = vgrf(2);
  1472.  
  1473.       /* For most messages, we need one reg of ignored data; the hardware
  1474.        * requires mlen==1 even when there is no payload. in the per-slot
  1475.        * offset case, we'll replace this with the proper source data.
  1476.        */
  1477.       fs_reg src = vgrf(glsl_type::float_type);
  1478.       int mlen = 1;     /* one reg unless overriden */
  1479.       fs_inst *inst;
  1480.  
  1481.       switch (instr->intrinsic) {
  1482.       case nir_intrinsic_interp_var_at_centroid:
  1483.          inst = emit(FS_OPCODE_INTERPOLATE_AT_CENTROID, dst_xy, src, fs_reg(0u));
  1484.          break;
  1485.  
  1486.       case nir_intrinsic_interp_var_at_sample: {
  1487.          /* XXX: We should probably handle non-constant sample id's */
  1488.          nir_const_value *const_sample = nir_src_as_const_value(instr->src[0]);
  1489.          assert(const_sample);
  1490.          unsigned msg_data = const_sample ? const_sample->i[0] << 4 : 0;
  1491.          inst = emit(FS_OPCODE_INTERPOLATE_AT_SAMPLE, dst_xy, src,
  1492.                      fs_reg(msg_data));
  1493.          break;
  1494.       }
  1495.  
  1496.       case nir_intrinsic_interp_var_at_offset: {
  1497.          nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
  1498.  
  1499.          if (const_offset) {
  1500.             unsigned off_x = MIN2((int)(const_offset->f[0] * 16), 7) & 0xf;
  1501.             unsigned off_y = MIN2((int)(const_offset->f[1] * 16), 7) & 0xf;
  1502.  
  1503.             inst = emit(FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET, dst_xy, src,
  1504.                         fs_reg(off_x | (off_y << 4)));
  1505.          } else {
  1506.             src = vgrf(glsl_type::ivec2_type);
  1507.             fs_reg offset_src = retype(get_nir_src(instr->src[0]),
  1508.                                        BRW_REGISTER_TYPE_F);
  1509.             for (int i = 0; i < 2; i++) {
  1510.                fs_reg temp = vgrf(glsl_type::float_type);
  1511.                emit(MUL(temp, offset(offset_src, i), fs_reg(16.0f)));
  1512.                fs_reg itemp = vgrf(glsl_type::int_type);
  1513.                emit(MOV(itemp, temp));  /* float to int */
  1514.  
  1515.                /* Clamp the upper end of the range to +7/16.
  1516.                 * ARB_gpu_shader5 requires that we support a maximum offset
  1517.                 * of +0.5, which isn't representable in a S0.4 value -- if
  1518.                 * we didn't clamp it, we'd end up with -8/16, which is the
  1519.                 * opposite of what the shader author wanted.
  1520.                 *
  1521.                 * This is legal due to ARB_gpu_shader5's quantization
  1522.                 * rules:
  1523.                 *
  1524.                 * "Not all values of <offset> may be supported; x and y
  1525.                 * offsets may be rounded to fixed-point values with the
  1526.                 * number of fraction bits given by the
  1527.                 * implementation-dependent constant
  1528.                 * FRAGMENT_INTERPOLATION_OFFSET_BITS"
  1529.                 */
  1530.  
  1531.                emit(BRW_OPCODE_SEL, offset(src, i), itemp, fs_reg(7))
  1532.                    ->conditional_mod = BRW_CONDITIONAL_L; /* min(src2, 7) */
  1533.             }
  1534.  
  1535.             mlen = 2;
  1536.             inst = emit(FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET, dst_xy, src,
  1537.                         fs_reg(0u));
  1538.          }
  1539.          break;
  1540.       }
  1541.  
  1542.       default:
  1543.          unreachable("Invalid intrinsic");
  1544.       }
  1545.  
  1546.       inst->mlen = mlen;
  1547.       inst->regs_written = 2; /* 2 floats per slot returned */
  1548.       inst->pi_noperspective = instr->variables[0]->var->data.interpolation ==
  1549.                                INTERP_QUALIFIER_NOPERSPECTIVE;
  1550.  
  1551.       for (unsigned j = 0; j < instr->num_components; j++) {
  1552.          fs_reg src = interp_reg(instr->variables[0]->var->data.location, j);
  1553.          src.type = dest.type;
  1554.  
  1555.          emit(FS_OPCODE_LINTERP, dest, dst_xy, src);
  1556.          dest = offset(dest, 1);
  1557.       }
  1558.       break;
  1559.    }
  1560.  
  1561.    case nir_intrinsic_store_output_indirect:
  1562.       has_indirect = true;
  1563.       /* fallthrough */
  1564.    case nir_intrinsic_store_output: {
  1565.       fs_reg src = get_nir_src(instr->src[0]);
  1566.       unsigned index = 0;
  1567.       for (int i = 0; i < instr->const_index[1]; i++) {
  1568.          for (unsigned j = 0; j < instr->num_components; j++) {
  1569.             fs_reg new_dest = offset(retype(nir_outputs, src.type),
  1570.                                      instr->const_index[0] + index);
  1571.             if (has_indirect)
  1572.                src.reladdr = new(mem_ctx) fs_reg(get_nir_src(instr->src[1]));
  1573.             index++;
  1574.             emit(MOV(new_dest, src));
  1575.             src = offset(src, 1);
  1576.          }
  1577.       }
  1578.       break;
  1579.    }
  1580.  
  1581.    default:
  1582.       unreachable("unknown intrinsic");
  1583.    }
  1584. }
  1585.  
  1586. void
  1587. fs_visitor::nir_emit_texture(nir_tex_instr *instr)
  1588. {
  1589.    unsigned sampler = instr->sampler_index;
  1590.    fs_reg sampler_reg(sampler);
  1591.  
  1592.    /* FINISHME: We're failing to recompile our programs when the sampler is
  1593.     * updated.  This only matters for the texture rectangle scale parameters
  1594.     * (pre-gen6, or gen6+ with GL_CLAMP).
  1595.     */
  1596.    int texunit = prog->SamplerUnits[sampler];
  1597.  
  1598.    int gather_component = instr->component;
  1599.  
  1600.    bool is_rect = instr->sampler_dim == GLSL_SAMPLER_DIM_RECT;
  1601.  
  1602.    bool is_cube_array = instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
  1603.                         instr->is_array;
  1604.  
  1605.    int lod_components = 0, offset_components = 0;
  1606.  
  1607.    fs_reg coordinate, shadow_comparitor, lod, lod2, sample_index, mcs, tex_offset;
  1608.  
  1609.    for (unsigned i = 0; i < instr->num_srcs; i++) {
  1610.       fs_reg src = get_nir_src(instr->src[i].src);
  1611.       switch (instr->src[i].src_type) {
  1612.       case nir_tex_src_bias:
  1613.          lod = retype(src, BRW_REGISTER_TYPE_F);
  1614.          break;
  1615.       case nir_tex_src_comparitor:
  1616.          shadow_comparitor = retype(src, BRW_REGISTER_TYPE_F);
  1617.          break;
  1618.       case nir_tex_src_coord:
  1619.          switch (instr->op) {
  1620.          case nir_texop_txf:
  1621.          case nir_texop_txf_ms:
  1622.             coordinate = retype(src, BRW_REGISTER_TYPE_D);
  1623.             break;
  1624.          default:
  1625.             coordinate = retype(src, BRW_REGISTER_TYPE_F);
  1626.             break;
  1627.          }
  1628.          break;
  1629.       case nir_tex_src_ddx:
  1630.          lod = retype(src, BRW_REGISTER_TYPE_F);
  1631.          lod_components = nir_tex_instr_src_size(instr, i);
  1632.          break;
  1633.       case nir_tex_src_ddy:
  1634.          lod2 = retype(src, BRW_REGISTER_TYPE_F);
  1635.          break;
  1636.       case nir_tex_src_lod:
  1637.          switch (instr->op) {
  1638.          case nir_texop_txs:
  1639.             lod = retype(src, BRW_REGISTER_TYPE_UD);
  1640.             break;
  1641.          case nir_texop_txf:
  1642.             lod = retype(src, BRW_REGISTER_TYPE_D);
  1643.             break;
  1644.          default:
  1645.             lod = retype(src, BRW_REGISTER_TYPE_F);
  1646.             break;
  1647.          }
  1648.          break;
  1649.       case nir_tex_src_ms_index:
  1650.          sample_index = retype(src, BRW_REGISTER_TYPE_UD);
  1651.          break;
  1652.       case nir_tex_src_offset:
  1653.          tex_offset = retype(src, BRW_REGISTER_TYPE_D);
  1654.          if (instr->is_array)
  1655.             offset_components = instr->coord_components - 1;
  1656.          else
  1657.             offset_components = instr->coord_components;
  1658.          break;
  1659.       case nir_tex_src_projector:
  1660.          unreachable("should be lowered");
  1661.  
  1662.       case nir_tex_src_sampler_offset: {
  1663.          /* Figure out the highest possible sampler index and mark it as used */
  1664.          uint32_t max_used = sampler + instr->sampler_array_size - 1;
  1665.          if (instr->op == nir_texop_tg4 && devinfo->gen < 8) {
  1666.             max_used += stage_prog_data->binding_table.gather_texture_start;
  1667.          } else {
  1668.             max_used += stage_prog_data->binding_table.texture_start;
  1669.          }
  1670.          brw_mark_surface_used(prog_data, max_used);
  1671.  
  1672.          /* Emit code to evaluate the actual indexing expression */
  1673.          sampler_reg = vgrf(glsl_type::uint_type);
  1674.          emit(ADD(sampler_reg, src, fs_reg(sampler)));
  1675.          emit_uniformize(sampler_reg, sampler_reg);
  1676.          break;
  1677.       }
  1678.  
  1679.       default:
  1680.          unreachable("unknown texture source");
  1681.       }
  1682.    }
  1683.  
  1684.    if (instr->op == nir_texop_txf_ms) {
  1685.       if (devinfo->gen >= 7 &&
  1686.           key_tex->compressed_multisample_layout_mask & (1 << sampler)) {
  1687.          mcs = emit_mcs_fetch(coordinate, instr->coord_components, sampler_reg);
  1688.       } else {
  1689.          mcs = fs_reg(0u);
  1690.       }
  1691.    }
  1692.  
  1693.    for (unsigned i = 0; i < 3; i++) {
  1694.       if (instr->const_offset[i] != 0) {
  1695.          assert(offset_components == 0);
  1696.          tex_offset = fs_reg(brw_texture_offset(instr->const_offset, 3));
  1697.          break;
  1698.       }
  1699.    }
  1700.  
  1701.    enum glsl_base_type dest_base_type;
  1702.    switch (instr->dest_type) {
  1703.    case nir_type_float:
  1704.       dest_base_type = GLSL_TYPE_FLOAT;
  1705.       break;
  1706.    case nir_type_int:
  1707.       dest_base_type = GLSL_TYPE_INT;
  1708.       break;
  1709.    case nir_type_unsigned:
  1710.       dest_base_type = GLSL_TYPE_UINT;
  1711.       break;
  1712.    default:
  1713.       unreachable("bad type");
  1714.    }
  1715.  
  1716.    const glsl_type *dest_type =
  1717.       glsl_type::get_instance(dest_base_type, nir_tex_instr_dest_size(instr),
  1718.                               1);
  1719.  
  1720.    ir_texture_opcode op;
  1721.    switch (instr->op) {
  1722.    case nir_texop_lod: op = ir_lod; break;
  1723.    case nir_texop_query_levels: op = ir_query_levels; break;
  1724.    case nir_texop_tex: op = ir_tex; break;
  1725.    case nir_texop_tg4: op = ir_tg4; break;
  1726.    case nir_texop_txb: op = ir_txb; break;
  1727.    case nir_texop_txd: op = ir_txd; break;
  1728.    case nir_texop_txf: op = ir_txf; break;
  1729.    case nir_texop_txf_ms: op = ir_txf_ms; break;
  1730.    case nir_texop_txl: op = ir_txl; break;
  1731.    case nir_texop_txs: op = ir_txs; break;
  1732.    default:
  1733.       unreachable("unknown texture opcode");
  1734.    }
  1735.  
  1736.    emit_texture(op, dest_type, coordinate, instr->coord_components,
  1737.                 shadow_comparitor, lod, lod2, lod_components, sample_index,
  1738.                 tex_offset, mcs, gather_component,
  1739.                 is_cube_array, is_rect, sampler, sampler_reg, texunit);
  1740.  
  1741.    fs_reg dest = get_nir_dest(instr->dest);
  1742.    dest.type = this->result.type;
  1743.    unsigned num_components = nir_tex_instr_dest_size(instr);
  1744.    emit_percomp(MOV(dest, this->result), (1 << num_components) - 1);
  1745. }
  1746.  
  1747. void
  1748. fs_visitor::nir_emit_jump(nir_jump_instr *instr)
  1749. {
  1750.    switch (instr->type) {
  1751.    case nir_jump_break:
  1752.       emit(BRW_OPCODE_BREAK);
  1753.       break;
  1754.    case nir_jump_continue:
  1755.       emit(BRW_OPCODE_CONTINUE);
  1756.       break;
  1757.    case nir_jump_return:
  1758.    default:
  1759.       unreachable("unknown jump");
  1760.    }
  1761. }
  1762.