Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2014-2015 Broadcom
  3.  * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
  4.  *
  5.  * Permission is hereby granted, free of charge, to any person obtaining a
  6.  * copy of this software and associated documentation files (the "Software"),
  7.  * to deal in the Software without restriction, including without limitation
  8.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  9.  * and/or sell copies of the Software, and to permit persons to whom the
  10.  * Software is furnished to do so, subject to the following conditions:
  11.  *
  12.  * The above copyright notice and this permission notice (including the next
  13.  * paragraph) shall be included in all copies or substantial portions of the
  14.  * Software.
  15.  *
  16.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  21.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  22.  * IN THE SOFTWARE.
  23.  */
  24.  
  25. #include "util/ralloc.h"
  26. #include "glsl/nir/nir.h"
  27. #include "glsl/nir/nir_builder.h"
  28. #include "glsl/list.h"
  29. #include "glsl/shader_enums.h"
  30.  
  31. #include "nir/tgsi_to_nir.h"
  32. #include "tgsi/tgsi_parse.h"
  33. #include "tgsi/tgsi_dump.h"
  34. #include "tgsi/tgsi_info.h"
  35. #include "tgsi/tgsi_scan.h"
  36.  
  37. #define SWIZ(X, Y, Z, W) (unsigned[4]){      \
  38.       TGSI_SWIZZLE_##X,                      \
  39.       TGSI_SWIZZLE_##Y,                      \
  40.       TGSI_SWIZZLE_##Z,                      \
  41.       TGSI_SWIZZLE_##W,                      \
  42.    }
  43.  
  44. struct ttn_reg_info {
  45.    /** nir register containing this TGSI index. */
  46.    nir_register *reg;
  47.    nir_variable *var;
  48.    /** Offset (in vec4s) from the start of var for this TGSI index. */
  49.    int offset;
  50. };
  51.  
  52. struct ttn_compile {
  53.    union tgsi_full_token *token;
  54.    nir_builder build;
  55.    struct tgsi_shader_info *scan;
  56.  
  57.    struct ttn_reg_info *output_regs;
  58.    struct ttn_reg_info *temp_regs;
  59.    nir_ssa_def **imm_defs;
  60.  
  61.    nir_register *addr_reg;
  62.  
  63.    /**
  64.     * Stack of cf_node_lists where instructions should be pushed as we pop
  65.     * back out of the control flow stack.
  66.     *
  67.     * For each IF/ELSE/ENDIF block, if_stack[if_stack_pos] has where the else
  68.     * instructions should be placed, and if_stack[if_stack_pos - 1] has where
  69.     * the next instructions outside of the if/then/else block go.
  70.     */
  71.    struct exec_list **if_stack;
  72.    unsigned if_stack_pos;
  73.  
  74.    /**
  75.     * Stack of cf_node_lists where instructions should be pushed as we pop
  76.     * back out of the control flow stack.
  77.     *
  78.     * loop_stack[loop_stack_pos - 1] contains the cf_node_list for the outside
  79.     * of the loop.
  80.     */
  81.    struct exec_list **loop_stack;
  82.    unsigned loop_stack_pos;
  83.  
  84.    /* How many TGSI_FILE_IMMEDIATE vec4s have been parsed so far. */
  85.    unsigned next_imm;
  86. };
  87.  
  88. #define ttn_swizzle(b, src, x, y, z, w) \
  89.    nir_swizzle(b, src, SWIZ(x, y, z, w), 4, false)
  90. #define ttn_channel(b, src, swiz) \
  91.    nir_swizzle(b, src, SWIZ(swiz, swiz, swiz, swiz), 1, false)
  92.  
  93. static nir_ssa_def *
  94. ttn_src_for_dest(nir_builder *b, nir_alu_dest *dest)
  95. {
  96.    nir_alu_src src;
  97.    memset(&src, 0, sizeof(src));
  98.  
  99.    if (dest->dest.is_ssa)
  100.       src.src = nir_src_for_ssa(&dest->dest.ssa);
  101.    else {
  102.       assert(!dest->dest.reg.indirect);
  103.       src.src = nir_src_for_reg(dest->dest.reg.reg);
  104.       src.src.reg.base_offset = dest->dest.reg.base_offset;
  105.    }
  106.  
  107.    for (int i = 0; i < 4; i++)
  108.       src.swizzle[i] = i;
  109.  
  110.    return nir_fmov_alu(b, src, 4);
  111. }
  112.  
  113. static void
  114. ttn_emit_declaration(struct ttn_compile *c)
  115. {
  116.    nir_builder *b = &c->build;
  117.    struct tgsi_full_declaration *decl = &c->token->FullDeclaration;
  118.    unsigned array_size = decl->Range.Last - decl->Range.First + 1;
  119.    unsigned file = decl->Declaration.File;
  120.    unsigned i;
  121.  
  122.    if (file == TGSI_FILE_TEMPORARY) {
  123.       if (decl->Declaration.Array) {
  124.          /* for arrays, we create variables instead of registers: */
  125.          nir_variable *var = rzalloc(b->shader, nir_variable);
  126.  
  127.          var->type = glsl_array_type(glsl_vec4_type(), array_size);
  128.          var->data.mode = nir_var_global;
  129.          var->name = ralloc_asprintf(var, "arr_%d", decl->Array.ArrayID);
  130.  
  131.          exec_list_push_tail(&b->shader->globals, &var->node);
  132.  
  133.          for (i = 0; i < array_size; i++) {
  134.             /* point all the matching slots to the same var,
  135.              * with appropriate offset set, mostly just so
  136.              * we know what to do when tgsi does a non-indirect
  137.              * access
  138.              */
  139.             c->temp_regs[decl->Range.First + i].reg = NULL;
  140.             c->temp_regs[decl->Range.First + i].var = var;
  141.             c->temp_regs[decl->Range.First + i].offset = i;
  142.          }
  143.       } else {
  144.          for (i = 0; i < array_size; i++) {
  145.             nir_register *reg = nir_local_reg_create(b->impl);
  146.             reg->num_components = 4;
  147.             c->temp_regs[decl->Range.First + i].reg = reg;
  148.             c->temp_regs[decl->Range.First + i].var = NULL;
  149.             c->temp_regs[decl->Range.First + i].offset = 0;
  150.          }
  151.       }
  152.    } else if (file == TGSI_FILE_ADDRESS) {
  153.       c->addr_reg = nir_local_reg_create(b->impl);
  154.       c->addr_reg->num_components = 4;
  155.    } else if (file == TGSI_FILE_SYSTEM_VALUE) {
  156.       /* Nothing to record for system values. */
  157.    } else if (file == TGSI_FILE_SAMPLER) {
  158.       /* Nothing to record for samplers. */
  159.    } else {
  160.       nir_variable *var;
  161.       assert(file == TGSI_FILE_INPUT ||
  162.              file == TGSI_FILE_OUTPUT ||
  163.              file == TGSI_FILE_CONSTANT);
  164.  
  165.       /* nothing to do for UBOs: */
  166.       if ((file == TGSI_FILE_CONSTANT) && decl->Declaration.Dimension)
  167.          return;
  168.  
  169.       var = rzalloc(b->shader, nir_variable);
  170.       var->data.driver_location = decl->Range.First;
  171.  
  172.       var->type = glsl_vec4_type();
  173.       if (array_size > 1)
  174.          var->type = glsl_array_type(var->type, array_size);
  175.  
  176.       switch (file) {
  177.       case TGSI_FILE_INPUT:
  178.          var->data.read_only = true;
  179.          var->data.mode = nir_var_shader_in;
  180.          var->name = ralloc_asprintf(var, "in_%d", decl->Range.First);
  181.  
  182.          /* We should probably translate to a VERT_ATTRIB_* or VARYING_SLOT_*
  183.           * instead, but nothing in NIR core is looking at the value
  184.           * currently, and this is less change to drivers.
  185.           */
  186.          var->data.location = decl->Semantic.Name;
  187.          var->data.index = decl->Semantic.Index;
  188.  
  189.          /* We definitely need to translate the interpolation field, because
  190.           * nir_print will decode it.
  191.           */
  192.          switch (decl->Interp.Interpolate) {
  193.          case TGSI_INTERPOLATE_CONSTANT:
  194.             var->data.interpolation = INTERP_QUALIFIER_FLAT;
  195.             break;
  196.          case TGSI_INTERPOLATE_LINEAR:
  197.             var->data.interpolation = INTERP_QUALIFIER_NOPERSPECTIVE;
  198.             break;
  199.          case TGSI_INTERPOLATE_PERSPECTIVE:
  200.             var->data.interpolation = INTERP_QUALIFIER_SMOOTH;
  201.             break;
  202.          }
  203.  
  204.          exec_list_push_tail(&b->shader->inputs, &var->node);
  205.          break;
  206.       case TGSI_FILE_OUTPUT: {
  207.          /* Since we can't load from outputs in the IR, we make temporaries
  208.           * for the outputs and emit stores to the real outputs at the end of
  209.           * the shader.
  210.           */
  211.          nir_register *reg = nir_local_reg_create(b->impl);
  212.          reg->num_components = 4;
  213.          if (array_size > 1)
  214.             reg->num_array_elems = array_size;
  215.  
  216.          var->data.mode = nir_var_shader_out;
  217.          var->name = ralloc_asprintf(var, "out_%d", decl->Range.First);
  218.  
  219.          var->data.location = decl->Semantic.Name;
  220.          var->data.index = decl->Semantic.Index;
  221.  
  222.          for (i = 0; i < array_size; i++) {
  223.             c->output_regs[decl->Range.First + i].offset = i;
  224.             c->output_regs[decl->Range.First + i].reg = reg;
  225.          }
  226.  
  227.          exec_list_push_tail(&b->shader->outputs, &var->node);
  228.       }
  229.          break;
  230.       case TGSI_FILE_CONSTANT:
  231.          var->data.mode = nir_var_uniform;
  232.          var->name = ralloc_asprintf(var, "uniform_%d", decl->Range.First);
  233.  
  234.          exec_list_push_tail(&b->shader->uniforms, &var->node);
  235.          break;
  236.       default:
  237.          unreachable("bad declaration file");
  238.          return;
  239.       }
  240.  
  241.    }
  242. }
  243.  
  244. static void
  245. ttn_emit_immediate(struct ttn_compile *c)
  246. {
  247.    nir_builder *b = &c->build;
  248.    struct tgsi_full_immediate *tgsi_imm = &c->token->FullImmediate;
  249.    nir_load_const_instr *load_const;
  250.    int i;
  251.  
  252.    load_const = nir_load_const_instr_create(b->shader, 4);
  253.    c->imm_defs[c->next_imm] = &load_const->def;
  254.    c->next_imm++;
  255.  
  256.    for (i = 0; i < 4; i++)
  257.       load_const->value.u[i] = tgsi_imm->u[i].Uint;
  258.  
  259.    nir_instr_insert_after_cf_list(b->cf_node_list, &load_const->instr);
  260. }
  261.  
  262. static nir_src
  263. ttn_src_for_indirect(struct ttn_compile *c, struct tgsi_ind_register *indirect);
  264.  
  265. /* generate either a constant or indirect deref chain for accessing an
  266.  * array variable.
  267.  */
  268. static nir_deref_var *
  269. ttn_array_deref(struct ttn_compile *c, nir_intrinsic_instr *instr,
  270.                 nir_variable *var, unsigned offset,
  271.                 struct tgsi_ind_register *indirect)
  272. {
  273.    nir_deref_var *deref = nir_deref_var_create(instr, var);
  274.    nir_deref_array *arr = nir_deref_array_create(deref);
  275.  
  276.    arr->base_offset = offset;
  277.    arr->deref.type = glsl_get_array_element(var->type);
  278.  
  279.    if (indirect) {
  280.       arr->deref_array_type = nir_deref_array_type_indirect;
  281.       arr->indirect = ttn_src_for_indirect(c, indirect);
  282.    } else {
  283.       arr->deref_array_type = nir_deref_array_type_direct;
  284.    }
  285.  
  286.    deref->deref.child = &arr->deref;
  287.  
  288.    return deref;
  289. }
  290.  
  291. static nir_src
  292. ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index,
  293.                            struct tgsi_ind_register *indirect,
  294.                            struct tgsi_dimension *dim,
  295.                            struct tgsi_ind_register *dimind)
  296. {
  297.    nir_builder *b = &c->build;
  298.    nir_src src;
  299.  
  300.    memset(&src, 0, sizeof(src));
  301.  
  302.    switch (file) {
  303.    case TGSI_FILE_TEMPORARY:
  304.       if (c->temp_regs[index].var) {
  305.          unsigned offset = c->temp_regs[index].offset;
  306.          nir_variable *var = c->temp_regs[index].var;
  307.          nir_intrinsic_instr *load;
  308.  
  309.          load = nir_intrinsic_instr_create(b->shader,
  310.                                            nir_intrinsic_load_var);
  311.          load->num_components = 4;
  312.          load->variables[0] = ttn_array_deref(c, load, var, offset, indirect);
  313.  
  314.          nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
  315.          nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr);
  316.  
  317.          src = nir_src_for_ssa(&load->dest.ssa);
  318.  
  319.       } else {
  320.          assert(!indirect);
  321.          src.reg.reg = c->temp_regs[index].reg;
  322.       }
  323.       assert(!dim);
  324.       break;
  325.  
  326.    case TGSI_FILE_ADDRESS:
  327.       src.reg.reg = c->addr_reg;
  328.       assert(!dim);
  329.       break;
  330.  
  331.    case TGSI_FILE_IMMEDIATE:
  332.       src = nir_src_for_ssa(c->imm_defs[index]);
  333.       assert(!indirect);
  334.       assert(!dim);
  335.       break;
  336.  
  337.    case TGSI_FILE_SYSTEM_VALUE: {
  338.       nir_intrinsic_instr *load;
  339.       nir_intrinsic_op op;
  340.       unsigned ncomp = 1;
  341.  
  342.       assert(!indirect);
  343.       assert(!dim);
  344.  
  345.       switch (c->scan->system_value_semantic_name[index]) {
  346.       case TGSI_SEMANTIC_VERTEXID_NOBASE:
  347.          op = nir_intrinsic_load_vertex_id_zero_base;
  348.          break;
  349.       case TGSI_SEMANTIC_VERTEXID:
  350.          op = nir_intrinsic_load_vertex_id;
  351.          break;
  352.       case TGSI_SEMANTIC_BASEVERTEX:
  353.          op = nir_intrinsic_load_base_vertex;
  354.          break;
  355.       case TGSI_SEMANTIC_INSTANCEID:
  356.          op = nir_intrinsic_load_instance_id;
  357.          break;
  358.       default:
  359.          unreachable("bad system value");
  360.       }
  361.  
  362.       load = nir_intrinsic_instr_create(b->shader, op);
  363.       load->num_components = ncomp;
  364.  
  365.       nir_ssa_dest_init(&load->instr, &load->dest, ncomp, NULL);
  366.       nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr);
  367.  
  368.       src = nir_src_for_ssa(&load->dest.ssa);
  369.       break;
  370.    }
  371.  
  372.    case TGSI_FILE_INPUT:
  373.    case TGSI_FILE_CONSTANT: {
  374.       nir_intrinsic_instr *load;
  375.       nir_intrinsic_op op;
  376.       unsigned srcn = 0;
  377.  
  378.       switch (file) {
  379.       case TGSI_FILE_INPUT:
  380.          op = indirect ? nir_intrinsic_load_input_indirect :
  381.                          nir_intrinsic_load_input;
  382.          assert(!dim);
  383.          break;
  384.       case TGSI_FILE_CONSTANT:
  385.          if (dim) {
  386.             op = indirect ? nir_intrinsic_load_ubo_indirect :
  387.                             nir_intrinsic_load_ubo;
  388.             /* convert index from vec4 to byte: */
  389.             index *= 16;
  390.          } else {
  391.             op = indirect ? nir_intrinsic_load_uniform_indirect :
  392.                             nir_intrinsic_load_uniform;
  393.          }
  394.          break;
  395.       default:
  396.          unreachable("No other load files supported");
  397.          break;
  398.       }
  399.  
  400.       load = nir_intrinsic_instr_create(b->shader, op);
  401.  
  402.       load->num_components = 4;
  403.       load->const_index[0] = index;
  404.       load->const_index[1] = 1;
  405.       if (dim) {
  406.          if (dimind) {
  407.             load->src[srcn] =
  408.                ttn_src_for_file_and_index(c, dimind->File, dimind->Index,
  409.                                           NULL, NULL, NULL);
  410.          } else {
  411.             /* UBOs start at index 1 in TGSI: */
  412.             load->src[srcn] =
  413.                nir_src_for_ssa(nir_imm_int(b, dim->Index - 1));
  414.          }
  415.          srcn++;
  416.       }
  417.       if (indirect) {
  418.          load->src[srcn] = ttn_src_for_indirect(c, indirect);
  419.          if (dim) {
  420.             assert(load->src[srcn].is_ssa);
  421.             /* we also need to covert vec4 to byte here too: */
  422.             load->src[srcn] =
  423.                nir_src_for_ssa(nir_ishl(b, load->src[srcn].ssa,
  424.                                         nir_imm_int(b, 4)));
  425.          }
  426.          srcn++;
  427.       }
  428.       nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
  429.       nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr);
  430.  
  431.       src = nir_src_for_ssa(&load->dest.ssa);
  432.       break;
  433.    }
  434.  
  435.    default:
  436.       unreachable("bad src file");
  437.    }
  438.  
  439.  
  440.    return src;
  441. }
  442.  
  443. static nir_src
  444. ttn_src_for_indirect(struct ttn_compile *c, struct tgsi_ind_register *indirect)
  445. {
  446.    nir_builder *b = &c->build;
  447.    nir_alu_src src;
  448.    memset(&src, 0, sizeof(src));
  449.    for (int i = 0; i < 4; i++)
  450.       src.swizzle[i] = indirect->Swizzle;
  451.    src.src = ttn_src_for_file_and_index(c,
  452.                                         indirect->File,
  453.                                         indirect->Index,
  454.                                         NULL, NULL, NULL);
  455.    return nir_src_for_ssa(nir_imov_alu(b, src, 1));
  456. }
  457.  
  458. static nir_alu_dest
  459. ttn_get_dest(struct ttn_compile *c, struct tgsi_full_dst_register *tgsi_fdst)
  460. {
  461.    struct tgsi_dst_register *tgsi_dst = &tgsi_fdst->Register;
  462.    nir_alu_dest dest;
  463.    unsigned index = tgsi_dst->Index;
  464.  
  465.    memset(&dest, 0, sizeof(dest));
  466.  
  467.    if (tgsi_dst->File == TGSI_FILE_TEMPORARY) {
  468.       if (c->temp_regs[index].var) {
  469.           nir_builder *b = &c->build;
  470.           nir_intrinsic_instr *load;
  471.           struct tgsi_ind_register *indirect =
  472.                 tgsi_dst->Indirect ? &tgsi_fdst->Indirect : NULL;
  473.           nir_register *reg;
  474.  
  475.          /* this works, because TGSI will give us a base offset
  476.           * (in case of indirect index) that points back into
  477.           * the array.  Access can be direct or indirect, we
  478.           * don't really care.  Just create a one-shot dst reg
  479.           * that will get store_var'd back into the array var
  480.           * at the end of ttn_emit_instruction()
  481.           */
  482.          reg = nir_local_reg_create(c->build.impl);
  483.          reg->num_components = 4;
  484.          dest.dest.reg.reg = reg;
  485.          dest.dest.reg.base_offset = 0;
  486.  
  487.          /* since the alu op might not write to all components
  488.           * of the temporary, we must first do a load_var to
  489.           * get the previous array elements into the register.
  490.           * This is one area that NIR could use a bit of
  491.           * improvement (or opt pass to clean up the mess
  492.           * once things are scalarized)
  493.           */
  494.  
  495.          load = nir_intrinsic_instr_create(c->build.shader,
  496.                                            nir_intrinsic_load_var);
  497.          load->num_components = 4;
  498.          load->variables[0] =
  499.                ttn_array_deref(c, load, c->temp_regs[index].var,
  500.                                c->temp_regs[index].offset,
  501.                                indirect);
  502.  
  503.          load->dest = nir_dest_for_reg(reg);
  504.  
  505.          nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr);
  506.       } else {
  507.          assert(!tgsi_dst->Indirect);
  508.          dest.dest.reg.reg = c->temp_regs[index].reg;
  509.          dest.dest.reg.base_offset = c->temp_regs[index].offset;
  510.       }
  511.    } else if (tgsi_dst->File == TGSI_FILE_OUTPUT) {
  512.       dest.dest.reg.reg = c->output_regs[index].reg;
  513.       dest.dest.reg.base_offset = c->output_regs[index].offset;
  514.    } else if (tgsi_dst->File == TGSI_FILE_ADDRESS) {
  515.       assert(index == 0);
  516.       dest.dest.reg.reg = c->addr_reg;
  517.    }
  518.  
  519.    dest.write_mask = tgsi_dst->WriteMask;
  520.    dest.saturate = false;
  521.  
  522.    if (tgsi_dst->Indirect && (tgsi_dst->File != TGSI_FILE_TEMPORARY)) {
  523.       nir_src *indirect = ralloc(c->build.shader, nir_src);
  524.       *indirect = ttn_src_for_indirect(c, &tgsi_fdst->Indirect);
  525.       dest.dest.reg.indirect = indirect;
  526.    }
  527.  
  528.    return dest;
  529. }
  530.  
  531. static nir_variable *
  532. ttn_get_var(struct ttn_compile *c, struct tgsi_full_dst_register *tgsi_fdst)
  533. {
  534.    struct tgsi_dst_register *tgsi_dst = &tgsi_fdst->Register;
  535.    unsigned index = tgsi_dst->Index;
  536.  
  537.    if (tgsi_dst->File == TGSI_FILE_TEMPORARY) {
  538.       /* we should not have an indirect when there is no var! */
  539.       if (!c->temp_regs[index].var)
  540.          assert(!tgsi_dst->Indirect);
  541.       return c->temp_regs[index].var;
  542.    }
  543.  
  544.    return NULL;
  545. }
  546.  
  547. static nir_ssa_def *
  548. ttn_get_src(struct ttn_compile *c, struct tgsi_full_src_register *tgsi_fsrc)
  549. {
  550.    nir_builder *b = &c->build;
  551.    struct tgsi_src_register *tgsi_src = &tgsi_fsrc->Register;
  552.    unsigned tgsi_opcode = c->token->FullInstruction.Instruction.Opcode;
  553.    unsigned tgsi_src_type = tgsi_opcode_infer_src_type(tgsi_opcode);
  554.    bool src_is_float = !(tgsi_src_type == TGSI_TYPE_SIGNED ||
  555.                          tgsi_src_type == TGSI_TYPE_UNSIGNED);
  556.    nir_alu_src src;
  557.  
  558.    memset(&src, 0, sizeof(src));
  559.  
  560.    if (tgsi_src->File == TGSI_FILE_NULL) {
  561.       return nir_imm_float(b, 0.0);
  562.    } else if (tgsi_src->File == TGSI_FILE_SAMPLER) {
  563.       /* Only the index of the sampler gets used in texturing, and it will
  564.        * handle looking that up on its own instead of using the nir_alu_src.
  565.        */
  566.       assert(!tgsi_src->Indirect);
  567.       return NULL;
  568.    } else {
  569.       struct tgsi_ind_register *ind = NULL;
  570.       struct tgsi_dimension *dim = NULL;
  571.       struct tgsi_ind_register *dimind = NULL;
  572.       if (tgsi_src->Indirect)
  573.          ind = &tgsi_fsrc->Indirect;
  574.       if (tgsi_src->Dimension) {
  575.          dim = &tgsi_fsrc->Dimension;
  576.          if (dim->Indirect)
  577.             dimind = &tgsi_fsrc->DimIndirect;
  578.       }
  579.       src.src = ttn_src_for_file_and_index(c,
  580.                                            tgsi_src->File,
  581.                                            tgsi_src->Index,
  582.                                            ind, dim, dimind);
  583.    }
  584.  
  585.    src.swizzle[0] = tgsi_src->SwizzleX;
  586.    src.swizzle[1] = tgsi_src->SwizzleY;
  587.    src.swizzle[2] = tgsi_src->SwizzleZ;
  588.    src.swizzle[3] = tgsi_src->SwizzleW;
  589.  
  590.    nir_ssa_def *def = nir_fmov_alu(b, src, 4);
  591.  
  592.    if (tgsi_src->Absolute) {
  593.       if (src_is_float)
  594.          def = nir_fabs(b, def);
  595.       else
  596.          def = nir_iabs(b, def);
  597.    }
  598.  
  599.    if (tgsi_src->Negate) {
  600.       if (src_is_float)
  601.          def = nir_fneg(b, def);
  602.       else
  603.          def = nir_ineg(b, def);
  604.    }
  605.  
  606.    return def;
  607. }
  608.  
  609. static void
  610. ttn_alu(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
  611. {
  612.    unsigned num_srcs = nir_op_infos[op].num_inputs;
  613.    nir_alu_instr *instr = nir_alu_instr_create(b->shader, op);
  614.    unsigned i;
  615.  
  616.    for (i = 0; i < num_srcs; i++)
  617.       instr->src[i].src = nir_src_for_ssa(src[i]);
  618.  
  619.    instr->dest = dest;
  620.    nir_instr_insert_after_cf_list(b->cf_node_list, &instr->instr);
  621. }
  622.  
  623. static void
  624. ttn_move_dest_masked(nir_builder *b, nir_alu_dest dest,
  625.                      nir_ssa_def *def, unsigned write_mask)
  626. {
  627.    if (!(dest.write_mask & write_mask))
  628.       return;
  629.  
  630.    nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_imov);
  631.    mov->dest = dest;
  632.    mov->dest.write_mask &= write_mask;
  633.    mov->src[0].src = nir_src_for_ssa(def);
  634.    for (unsigned i = def->num_components; i < 4; i++)
  635.       mov->src[0].swizzle[i] = def->num_components - 1;
  636.    nir_instr_insert_after_cf_list(b->cf_node_list, &mov->instr);
  637. }
  638.  
  639. static void
  640. ttn_move_dest(nir_builder *b, nir_alu_dest dest, nir_ssa_def *def)
  641. {
  642.    ttn_move_dest_masked(b, dest, def, TGSI_WRITEMASK_XYZW);
  643. }
  644.  
  645. static void
  646. ttn_arl(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
  647. {
  648.    ttn_move_dest(b, dest, nir_f2i(b, nir_ffloor(b, src[0])));
  649. }
  650.  
  651. /* EXP - Approximate Exponential Base 2
  652.  *  dst.x = 2^{\lfloor src.x\rfloor}
  653.  *  dst.y = src.x - \lfloor src.x\rfloor
  654.  *  dst.z = 2^{src.x}
  655.  *  dst.w = 1.0
  656.  */
  657. static void
  658. ttn_exp(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
  659. {
  660.    nir_ssa_def *srcx = ttn_channel(b, src[0], X);
  661.  
  662.    ttn_move_dest_masked(b, dest, nir_fexp2(b, nir_ffloor(b, srcx)),
  663.                         TGSI_WRITEMASK_X);
  664.    ttn_move_dest_masked(b, dest, nir_fsub(b, srcx, nir_ffloor(b, srcx)),
  665.                         TGSI_WRITEMASK_Y);
  666.    ttn_move_dest_masked(b, dest, nir_fexp2(b, srcx), TGSI_WRITEMASK_Z);
  667.    ttn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), TGSI_WRITEMASK_W);
  668. }
  669.  
  670. /* LOG - Approximate Logarithm Base 2
  671.  *  dst.x = \lfloor\log_2{|src.x|}\rfloor
  672.  *  dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}}
  673.  *  dst.z = \log_2{|src.x|}
  674.  *  dst.w = 1.0
  675.  */
  676. static void
  677. ttn_log(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
  678. {
  679.    nir_ssa_def *abs_srcx = nir_fabs(b, ttn_channel(b, src[0], X));
  680.    nir_ssa_def *log2 = nir_flog2(b, abs_srcx);
  681.  
  682.    ttn_move_dest_masked(b, dest, nir_ffloor(b, log2), TGSI_WRITEMASK_X);
  683.    ttn_move_dest_masked(b, dest,
  684.                         nir_fdiv(b, abs_srcx, nir_fexp2(b, nir_ffloor(b, log2))),
  685.                         TGSI_WRITEMASK_Y);
  686.    ttn_move_dest_masked(b, dest, nir_flog2(b, abs_srcx), TGSI_WRITEMASK_Z);
  687.    ttn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), TGSI_WRITEMASK_W);
  688. }
  689.  
  690. /* DST - Distance Vector
  691.  *   dst.x = 1.0
  692.  *   dst.y = src0.y \times src1.y
  693.  *   dst.z = src0.z
  694.  *   dst.w = src1.w
  695.  */
  696. static void
  697. ttn_dst(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
  698. {
  699.    ttn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), TGSI_WRITEMASK_X);
  700.    ttn_move_dest_masked(b, dest, nir_fmul(b, src[0], src[1]), TGSI_WRITEMASK_Y);
  701.    ttn_move_dest_masked(b, dest, nir_fmov(b, src[0]), TGSI_WRITEMASK_Z);
  702.    ttn_move_dest_masked(b, dest, nir_fmov(b, src[1]), TGSI_WRITEMASK_W);
  703. }
  704.  
  705. /* LIT - Light Coefficients
  706.  *  dst.x = 1.0
  707.  *  dst.y = max(src.x, 0.0)
  708.  *  dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
  709.  *  dst.w = 1.0
  710.  */
  711. static void
  712. ttn_lit(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
  713. {
  714.    ttn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), TGSI_WRITEMASK_XW);
  715.  
  716.    ttn_move_dest_masked(b, dest, nir_fmax(b, ttn_channel(b, src[0], X),
  717.                                           nir_imm_float(b, 0.0)), TGSI_WRITEMASK_Y);
  718.  
  719.    if (dest.write_mask & TGSI_WRITEMASK_Z) {
  720.       nir_ssa_def *src0_y = ttn_channel(b, src[0], Y);
  721.       nir_ssa_def *wclamp = nir_fmax(b, nir_fmin(b, ttn_channel(b, src[0], W),
  722.                                                  nir_imm_float(b, 128.0)),
  723.                                      nir_imm_float(b, -128.0));
  724.       nir_ssa_def *pow = nir_fpow(b, nir_fmax(b, src0_y, nir_imm_float(b, 0.0)),
  725.                                   wclamp);
  726.  
  727.       ttn_move_dest_masked(b, dest,
  728.                            nir_bcsel(b,
  729.                                      nir_fge(b,
  730.                                              nir_imm_float(b, 0.0),
  731.                                              ttn_channel(b, src[0], X)),
  732.                                      nir_imm_float(b, 0.0),
  733.                                      pow),
  734.                            TGSI_WRITEMASK_Z);
  735.    }
  736. }
  737.  
  738. /* SCS - Sine Cosine
  739.  *   dst.x = \cos{src.x}
  740.  *   dst.y = \sin{src.x}
  741.  *   dst.z = 0.0
  742.  *   dst.w = 1.0
  743.  */
  744. static void
  745. ttn_scs(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
  746. {
  747.    ttn_move_dest_masked(b, dest, nir_fcos(b, ttn_channel(b, src[0], X)),
  748.                         TGSI_WRITEMASK_X);
  749.    ttn_move_dest_masked(b, dest, nir_fsin(b, ttn_channel(b, src[0], X)),
  750.                         TGSI_WRITEMASK_Y);
  751.    ttn_move_dest_masked(b, dest, nir_imm_float(b, 0.0), TGSI_WRITEMASK_Z);
  752.    ttn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), TGSI_WRITEMASK_W);
  753. }
  754.  
  755. static void
  756. ttn_sle(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
  757. {
  758.    ttn_move_dest(b, dest, nir_sge(b, src[1], src[0]));
  759. }
  760.  
  761. static void
  762. ttn_sgt(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
  763. {
  764.    ttn_move_dest(b, dest, nir_slt(b, src[1], src[0]));
  765. }
  766.  
  767. static void
  768. ttn_clamp(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
  769. {
  770.    ttn_move_dest(b, dest, nir_fmin(b, nir_fmax(b, src[0], src[1]), src[2]));
  771. }
  772.  
  773. static void
  774. ttn_xpd(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
  775. {
  776.    ttn_move_dest_masked(b, dest,
  777.                         nir_fsub(b,
  778.                                  nir_fmul(b,
  779.                                           ttn_swizzle(b, src[0], Y, Z, X, X),
  780.                                           ttn_swizzle(b, src[1], Z, X, Y, X)),
  781.                                  nir_fmul(b,
  782.                                           ttn_swizzle(b, src[1], Y, Z, X, X),
  783.                                           ttn_swizzle(b, src[0], Z, X, Y, X))),
  784.                         TGSI_WRITEMASK_XYZ);
  785.    ttn_move_dest_masked(b, dest, nir_imm_float(b, 1.0), TGSI_WRITEMASK_W);
  786. }
  787.  
  788. static void
  789. ttn_dp2a(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
  790. {
  791.    ttn_move_dest(b, dest,
  792.                  ttn_channel(b, nir_fadd(b, nir_fdot2(b, src[0], src[1]),
  793.                                          src[2]),
  794.                              X));
  795. }
  796.  
  797. static void
  798. ttn_dp2(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
  799. {
  800.    ttn_move_dest(b, dest, nir_fdot2(b, src[0], src[1]));
  801. }
  802.  
  803. static void
  804. ttn_dp3(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
  805. {
  806.    ttn_move_dest(b, dest, nir_fdot3(b, src[0], src[1]));
  807. }
  808.  
  809. static void
  810. ttn_dp4(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
  811. {
  812.    ttn_move_dest(b, dest, nir_fdot4(b, src[0], src[1]));
  813. }
  814.  
  815. static void
  816. ttn_dph(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
  817. {
  818.    ttn_move_dest(b, dest, nir_fadd(b, nir_fdot3(b, src[0], src[1]),
  819.                                    ttn_channel(b, src[1], W)));
  820. }
  821.  
  822. static void
  823. ttn_umad(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
  824. {
  825.    ttn_move_dest(b, dest, nir_iadd(b, nir_imul(b, src[0], src[1]), src[2]));
  826. }
  827.  
  828. static void
  829. ttn_arr(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
  830. {
  831.    ttn_move_dest(b, dest, nir_ffloor(b, nir_fadd(b, src[0], nir_imm_float(b, 0.5))));
  832. }
  833.  
  834. static void
  835. ttn_cmp(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
  836. {
  837.    ttn_move_dest(b, dest, nir_bcsel(b,
  838.                                     nir_flt(b, src[0], nir_imm_float(b, 0.0)),
  839.                                     src[1], src[2]));
  840. }
  841.  
  842. static void
  843. ttn_ucmp(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
  844. {
  845.    ttn_move_dest(b, dest, nir_bcsel(b,
  846.                                     nir_ine(b, src[0], nir_imm_int(b, 0)),
  847.                                     src[1], src[2]));
  848. }
  849.  
  850. static void
  851. ttn_kill(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
  852. {
  853.    nir_intrinsic_instr *discard =
  854.       nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard);
  855.    nir_instr_insert_after_cf_list(b->cf_node_list, &discard->instr);
  856. }
  857.  
  858. static void
  859. ttn_kill_if(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
  860. {
  861.    nir_ssa_def *cmp = nir_bany4(b, nir_flt(b, src[0], nir_imm_float(b, 0.0)));
  862.    nir_intrinsic_instr *discard =
  863.       nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard_if);
  864.    discard->src[0] = nir_src_for_ssa(cmp);
  865.    nir_instr_insert_after_cf_list(b->cf_node_list, &discard->instr);
  866. }
  867.  
  868. static void
  869. ttn_if(struct ttn_compile *c, nir_ssa_def *src, bool is_uint)
  870. {
  871.    nir_builder *b = &c->build;
  872.  
  873.    /* Save the outside-of-the-if-statement node list. */
  874.    c->if_stack[c->if_stack_pos] = b->cf_node_list;
  875.    c->if_stack_pos++;
  876.  
  877.    src = ttn_channel(b, src, X);
  878.  
  879.    nir_if *if_stmt = nir_if_create(b->shader);
  880.    if (is_uint) {
  881.       if_stmt->condition = nir_src_for_ssa(nir_ine(b, src, nir_imm_int(b, 0)));
  882.    } else {
  883.       if_stmt->condition = nir_src_for_ssa(nir_fne(b, src, nir_imm_int(b, 0)));
  884.    }
  885.    nir_cf_node_insert_end(b->cf_node_list, &if_stmt->cf_node);
  886.  
  887.    nir_builder_insert_after_cf_list(b, &if_stmt->then_list);
  888.  
  889.    c->if_stack[c->if_stack_pos] = &if_stmt->else_list;
  890.    c->if_stack_pos++;
  891. }
  892.  
  893. static void
  894. ttn_else(struct ttn_compile *c)
  895. {
  896.    nir_builder *b = &c->build;
  897.  
  898.    nir_builder_insert_after_cf_list(b, c->if_stack[c->if_stack_pos - 1]);
  899. }
  900.  
  901. static void
  902. ttn_endif(struct ttn_compile *c)
  903. {
  904.    nir_builder *b = &c->build;
  905.  
  906.    c->if_stack_pos -= 2;
  907.    nir_builder_insert_after_cf_list(b, c->if_stack[c->if_stack_pos]);
  908. }
  909.  
  910. static void
  911. ttn_bgnloop(struct ttn_compile *c)
  912. {
  913.    nir_builder *b = &c->build;
  914.  
  915.    /* Save the outside-of-the-loop node list. */
  916.    c->loop_stack[c->loop_stack_pos] = b->cf_node_list;
  917.    c->loop_stack_pos++;
  918.  
  919.    nir_loop *loop = nir_loop_create(b->shader);
  920.    nir_cf_node_insert_end(b->cf_node_list, &loop->cf_node);
  921.  
  922.    nir_builder_insert_after_cf_list(b, &loop->body);
  923. }
  924.  
  925. static void
  926. ttn_cont(nir_builder *b)
  927. {
  928.    nir_jump_instr *instr = nir_jump_instr_create(b->shader, nir_jump_continue);
  929.    nir_instr_insert_after_cf_list(b->cf_node_list, &instr->instr);
  930. }
  931.  
  932. static void
  933. ttn_brk(nir_builder *b)
  934. {
  935.    nir_jump_instr *instr = nir_jump_instr_create(b->shader, nir_jump_break);
  936.    nir_instr_insert_after_cf_list(b->cf_node_list, &instr->instr);
  937. }
  938.  
  939. static void
  940. ttn_endloop(struct ttn_compile *c)
  941. {
  942.    nir_builder *b = &c->build;
  943.  
  944.    c->loop_stack_pos--;
  945.    nir_builder_insert_after_cf_list(b, c->loop_stack[c->loop_stack_pos]);
  946. }
  947.  
  948. static void
  949. setup_texture_info(nir_tex_instr *instr, unsigned texture)
  950. {
  951.    switch (texture) {
  952.    case TGSI_TEXTURE_1D:
  953.       instr->sampler_dim = GLSL_SAMPLER_DIM_1D;
  954.       break;
  955.    case TGSI_TEXTURE_1D_ARRAY:
  956.       instr->sampler_dim = GLSL_SAMPLER_DIM_1D;
  957.       instr->is_array = true;
  958.       break;
  959.    case TGSI_TEXTURE_SHADOW1D:
  960.       instr->sampler_dim = GLSL_SAMPLER_DIM_1D;
  961.       instr->is_shadow = true;
  962.       break;
  963.    case TGSI_TEXTURE_SHADOW1D_ARRAY:
  964.       instr->sampler_dim = GLSL_SAMPLER_DIM_1D;
  965.       instr->is_shadow = true;
  966.       instr->is_array = true;
  967.       break;
  968.    case TGSI_TEXTURE_2D:
  969.       instr->sampler_dim = GLSL_SAMPLER_DIM_2D;
  970.       break;
  971.    case TGSI_TEXTURE_2D_ARRAY:
  972.       instr->sampler_dim = GLSL_SAMPLER_DIM_2D;
  973.       instr->is_array = true;
  974.       break;
  975.    case TGSI_TEXTURE_2D_MSAA:
  976.       instr->sampler_dim = GLSL_SAMPLER_DIM_MS;
  977.       break;
  978.    case TGSI_TEXTURE_2D_ARRAY_MSAA:
  979.       instr->sampler_dim = GLSL_SAMPLER_DIM_MS;
  980.       instr->is_array = true;
  981.       break;
  982.    case TGSI_TEXTURE_SHADOW2D:
  983.       instr->sampler_dim = GLSL_SAMPLER_DIM_2D;
  984.       instr->is_shadow = true;
  985.       break;
  986.    case TGSI_TEXTURE_SHADOW2D_ARRAY:
  987.       instr->sampler_dim = GLSL_SAMPLER_DIM_2D;
  988.       instr->is_shadow = true;
  989.       instr->is_array = true;
  990.       break;
  991.    case TGSI_TEXTURE_3D:
  992.       instr->sampler_dim = GLSL_SAMPLER_DIM_3D;
  993.       break;
  994.    case TGSI_TEXTURE_CUBE:
  995.       instr->sampler_dim = GLSL_SAMPLER_DIM_CUBE;
  996.       break;
  997.    case TGSI_TEXTURE_CUBE_ARRAY:
  998.       instr->sampler_dim = GLSL_SAMPLER_DIM_CUBE;
  999.       instr->is_array = true;
  1000.       break;
  1001.    case TGSI_TEXTURE_SHADOWCUBE:
  1002.       instr->sampler_dim = GLSL_SAMPLER_DIM_CUBE;
  1003.       instr->is_shadow = true;
  1004.       break;
  1005.    case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
  1006.       instr->sampler_dim = GLSL_SAMPLER_DIM_CUBE;
  1007.       instr->is_shadow = true;
  1008.       instr->is_array = true;
  1009.       break;
  1010.    case TGSI_TEXTURE_RECT:
  1011.       instr->sampler_dim = GLSL_SAMPLER_DIM_RECT;
  1012.       break;
  1013.    case TGSI_TEXTURE_SHADOWRECT:
  1014.       instr->sampler_dim = GLSL_SAMPLER_DIM_RECT;
  1015.       instr->is_shadow = true;
  1016.       break;
  1017.    default:
  1018.       fprintf(stderr, "Unknown TGSI texture target %d\n", texture);
  1019.       abort();
  1020.    }
  1021. }
  1022.  
  1023. static void
  1024. ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
  1025. {
  1026.    nir_builder *b = &c->build;
  1027.    struct tgsi_full_instruction *tgsi_inst = &c->token->FullInstruction;
  1028.    nir_tex_instr *instr;
  1029.    nir_texop op;
  1030.    unsigned num_srcs, samp = 1, i;
  1031.  
  1032.    switch (tgsi_inst->Instruction.Opcode) {
  1033.    case TGSI_OPCODE_TEX:
  1034.       op = nir_texop_tex;
  1035.       num_srcs = 1;
  1036.       break;
  1037.    case TGSI_OPCODE_TXP:
  1038.       op = nir_texop_tex;
  1039.       num_srcs = 2;
  1040.       break;
  1041.    case TGSI_OPCODE_TXB:
  1042.       op = nir_texop_txb;
  1043.       num_srcs = 2;
  1044.       break;
  1045.    case TGSI_OPCODE_TXL:
  1046.       op = nir_texop_txl;
  1047.       num_srcs = 2;
  1048.       break;
  1049.    case TGSI_OPCODE_TXL2:
  1050.       op = nir_texop_txl;
  1051.       num_srcs = 2;
  1052.       samp = 2;
  1053.       break;
  1054.    case TGSI_OPCODE_TXF:
  1055.       op = nir_texop_txf;
  1056.       num_srcs = 2;
  1057.       break;
  1058.    case TGSI_OPCODE_TXD:
  1059.       op = nir_texop_txd;
  1060.       num_srcs = 3;
  1061.       samp = 3;
  1062.       break;
  1063.  
  1064.    default:
  1065.       fprintf(stderr, "unknown TGSI tex op %d\n", tgsi_inst->Instruction.Opcode);
  1066.       abort();
  1067.    }
  1068.  
  1069.    if (tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D ||
  1070.        tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOW1D_ARRAY ||
  1071.        tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D ||
  1072.        tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY ||
  1073.        tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOWRECT ||
  1074.        tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
  1075.        tgsi_inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
  1076.       num_srcs++;
  1077.    }
  1078.  
  1079.    num_srcs += tgsi_inst->Texture.NumOffsets;
  1080.  
  1081.    instr = nir_tex_instr_create(b->shader, num_srcs);
  1082.    instr->op = op;
  1083.  
  1084.    setup_texture_info(instr, tgsi_inst->Texture.Texture);
  1085.  
  1086.    switch (instr->sampler_dim) {
  1087.    case GLSL_SAMPLER_DIM_1D:
  1088.    case GLSL_SAMPLER_DIM_BUF:
  1089.       instr->coord_components = 1;
  1090.       break;
  1091.    case GLSL_SAMPLER_DIM_2D:
  1092.    case GLSL_SAMPLER_DIM_RECT:
  1093.    case GLSL_SAMPLER_DIM_EXTERNAL:
  1094.    case GLSL_SAMPLER_DIM_MS:
  1095.       instr->coord_components = 2;
  1096.       break;
  1097.    case GLSL_SAMPLER_DIM_3D:
  1098.    case GLSL_SAMPLER_DIM_CUBE:
  1099.       instr->coord_components = 3;
  1100.       break;
  1101.    }
  1102.  
  1103.    if (instr->is_array)
  1104.       instr->coord_components++;
  1105.  
  1106.    assert(tgsi_inst->Src[samp].Register.File == TGSI_FILE_SAMPLER);
  1107.    instr->sampler_index = tgsi_inst->Src[samp].Register.Index;
  1108.  
  1109.    unsigned src_number = 0;
  1110.  
  1111.    instr->src[src_number].src =
  1112.       nir_src_for_ssa(nir_swizzle(b, src[0], SWIZ(X, Y, Z, W),
  1113.                                   instr->coord_components, false));
  1114.    instr->src[src_number].src_type = nir_tex_src_coord;
  1115.    src_number++;
  1116.  
  1117.    if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXP) {
  1118.       instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W));
  1119.       instr->src[src_number].src_type = nir_tex_src_projector;
  1120.       src_number++;
  1121.    }
  1122.  
  1123.    if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXB) {
  1124.       instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W));
  1125.       instr->src[src_number].src_type = nir_tex_src_bias;
  1126.       src_number++;
  1127.    }
  1128.  
  1129.    if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXL) {
  1130.       instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W));
  1131.       instr->src[src_number].src_type = nir_tex_src_lod;
  1132.       src_number++;
  1133.    }
  1134.  
  1135.    if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXL2) {
  1136.       instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[1], X));
  1137.       instr->src[src_number].src_type = nir_tex_src_lod;
  1138.       src_number++;
  1139.    }
  1140.  
  1141.    if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXF) {
  1142.       instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W));
  1143.       instr->src[src_number].src_type = nir_tex_src_lod;
  1144.       src_number++;
  1145.    }
  1146.  
  1147.    if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_TXD) {
  1148.       instr->src[src_number].src =
  1149.          nir_src_for_ssa(nir_swizzle(b, src[1], SWIZ(X, Y, Z, W),
  1150.               instr->coord_components, false));
  1151.       instr->src[src_number].src_type = nir_tex_src_ddx;
  1152.       src_number++;
  1153.       instr->src[src_number].src =
  1154.          nir_src_for_ssa(nir_swizzle(b, src[2], SWIZ(X, Y, Z, W),
  1155.               instr->coord_components, false));
  1156.       instr->src[src_number].src_type = nir_tex_src_ddy;
  1157.       src_number++;
  1158.    }
  1159.  
  1160.    if (instr->is_shadow) {
  1161.       if (instr->coord_components < 3)
  1162.          instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], Z));
  1163.       else
  1164.          instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W));
  1165.  
  1166.       instr->src[src_number].src_type = nir_tex_src_comparitor;
  1167.       src_number++;
  1168.    }
  1169.  
  1170.    for (i = 0; i < tgsi_inst->Texture.NumOffsets; i++) {
  1171.       struct tgsi_texture_offset *tex_offset = &tgsi_inst->TexOffsets[i];
  1172.       /* since TexOffset ins't using tgsi_full_src_register we get to
  1173.        * do some extra gymnastics:
  1174.        */
  1175.       nir_alu_src src;
  1176.  
  1177.       memset(&src, 0, sizeof(src));
  1178.  
  1179.       src.src = ttn_src_for_file_and_index(c,
  1180.                                            tex_offset->File,
  1181.                                            tex_offset->Index,
  1182.                                            NULL, NULL, NULL);
  1183.  
  1184.       src.swizzle[0] = tex_offset->SwizzleX;
  1185.       src.swizzle[1] = tex_offset->SwizzleY;
  1186.       src.swizzle[2] = tex_offset->SwizzleZ;
  1187.       src.swizzle[3] = TGSI_SWIZZLE_W;
  1188.  
  1189.       instr->src[src_number].src_type = nir_tex_src_offset;
  1190.       instr->src[src_number].src = nir_src_for_ssa(
  1191.          nir_fmov_alu(b, src, nir_tex_instr_src_size(instr, src_number)));
  1192.       src_number++;
  1193.    }
  1194.  
  1195.    assert(src_number == num_srcs);
  1196.  
  1197.    nir_ssa_dest_init(&instr->instr, &instr->dest, 4, NULL);
  1198.    nir_instr_insert_after_cf_list(b->cf_node_list, &instr->instr);
  1199.  
  1200.    /* Resolve the writemask on the texture op. */
  1201.    ttn_move_dest(b, dest, &instr->dest.ssa);
  1202. }
  1203.  
  1204. /* TGSI_OPCODE_TXQ is actually two distinct operations:
  1205.  *
  1206.  *     dst.x = texture\_width(unit, lod)
  1207.  *     dst.y = texture\_height(unit, lod)
  1208.  *     dst.z = texture\_depth(unit, lod)
  1209.  *     dst.w = texture\_levels(unit)
  1210.  *
  1211.  * dst.xyz map to NIR txs opcode, and dst.w maps to query_levels
  1212.  */
  1213. static void
  1214. ttn_txq(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
  1215. {
  1216.    nir_builder *b = &c->build;
  1217.    struct tgsi_full_instruction *tgsi_inst = &c->token->FullInstruction;
  1218.    nir_tex_instr *txs, *qlv;
  1219.  
  1220.    txs = nir_tex_instr_create(b->shader, 1);
  1221.    txs->op = nir_texop_txs;
  1222.    setup_texture_info(txs, tgsi_inst->Texture.Texture);
  1223.  
  1224.    qlv = nir_tex_instr_create(b->shader, 0);
  1225.    qlv->op = nir_texop_query_levels;
  1226.    setup_texture_info(qlv, tgsi_inst->Texture.Texture);
  1227.  
  1228.    assert(tgsi_inst->Src[1].Register.File == TGSI_FILE_SAMPLER);
  1229.    txs->sampler_index = tgsi_inst->Src[1].Register.Index;
  1230.    qlv->sampler_index = tgsi_inst->Src[1].Register.Index;
  1231.  
  1232.    /* only single src, the lod: */
  1233.    txs->src[0].src = nir_src_for_ssa(ttn_channel(b, src[0], X));
  1234.    txs->src[0].src_type = nir_tex_src_lod;
  1235.  
  1236.    nir_ssa_dest_init(&txs->instr, &txs->dest, 3, NULL);
  1237.    nir_instr_insert_after_cf_list(b->cf_node_list, &txs->instr);
  1238.  
  1239.    nir_ssa_dest_init(&qlv->instr, &qlv->dest, 1, NULL);
  1240.    nir_instr_insert_after_cf_list(b->cf_node_list, &qlv->instr);
  1241.  
  1242.    ttn_move_dest_masked(b, dest, &txs->dest.ssa, TGSI_WRITEMASK_XYZ);
  1243.    ttn_move_dest_masked(b, dest, &qlv->dest.ssa, TGSI_WRITEMASK_W);
  1244. }
  1245.  
  1246. static const nir_op op_trans[TGSI_OPCODE_LAST] = {
  1247.    [TGSI_OPCODE_ARL] = 0,
  1248.    [TGSI_OPCODE_MOV] = nir_op_fmov,
  1249.    [TGSI_OPCODE_LIT] = 0,
  1250.    [TGSI_OPCODE_RCP] = nir_op_frcp,
  1251.    [TGSI_OPCODE_RSQ] = nir_op_frsq,
  1252.    [TGSI_OPCODE_EXP] = 0,
  1253.    [TGSI_OPCODE_LOG] = 0,
  1254.    [TGSI_OPCODE_MUL] = nir_op_fmul,
  1255.    [TGSI_OPCODE_ADD] = nir_op_fadd,
  1256.    [TGSI_OPCODE_DP3] = 0,
  1257.    [TGSI_OPCODE_DP4] = 0,
  1258.    [TGSI_OPCODE_DST] = 0,
  1259.    [TGSI_OPCODE_MIN] = nir_op_fmin,
  1260.    [TGSI_OPCODE_MAX] = nir_op_fmax,
  1261.    [TGSI_OPCODE_SLT] = nir_op_slt,
  1262.    [TGSI_OPCODE_SGE] = nir_op_sge,
  1263.    [TGSI_OPCODE_MAD] = nir_op_ffma,
  1264.    [TGSI_OPCODE_SUB] = nir_op_fsub,
  1265.    [TGSI_OPCODE_LRP] = 0,
  1266.    [TGSI_OPCODE_SQRT] = nir_op_fsqrt,
  1267.    [TGSI_OPCODE_DP2A] = 0,
  1268.    [TGSI_OPCODE_FRC] = nir_op_ffract,
  1269.    [TGSI_OPCODE_CLAMP] = 0,
  1270.    [TGSI_OPCODE_FLR] = nir_op_ffloor,
  1271.    [TGSI_OPCODE_ROUND] = nir_op_fround_even,
  1272.    [TGSI_OPCODE_EX2] = nir_op_fexp2,
  1273.    [TGSI_OPCODE_LG2] = nir_op_flog2,
  1274.    [TGSI_OPCODE_POW] = nir_op_fpow,
  1275.    [TGSI_OPCODE_XPD] = 0,
  1276.    [TGSI_OPCODE_ABS] = nir_op_fabs,
  1277.    [TGSI_OPCODE_DPH] = 0,
  1278.    [TGSI_OPCODE_COS] = nir_op_fcos,
  1279.    [TGSI_OPCODE_DDX] = nir_op_fddx,
  1280.    [TGSI_OPCODE_DDY] = nir_op_fddy,
  1281.    [TGSI_OPCODE_KILL] = 0,
  1282.    [TGSI_OPCODE_PK2H] = 0, /* XXX */
  1283.    [TGSI_OPCODE_PK2US] = 0, /* XXX */
  1284.    [TGSI_OPCODE_PK4B] = 0, /* XXX */
  1285.    [TGSI_OPCODE_PK4UB] = 0, /* XXX */
  1286.    [TGSI_OPCODE_SEQ] = nir_op_seq,
  1287.    [TGSI_OPCODE_SGT] = 0,
  1288.    [TGSI_OPCODE_SIN] = nir_op_fsin,
  1289.    [TGSI_OPCODE_SLE] = 0,
  1290.    [TGSI_OPCODE_TEX] = 0,
  1291.    [TGSI_OPCODE_TXD] = 0,
  1292.    [TGSI_OPCODE_TXP] = 0,
  1293.    [TGSI_OPCODE_UP2H] = 0, /* XXX */
  1294.    [TGSI_OPCODE_UP2US] = 0, /* XXX */
  1295.    [TGSI_OPCODE_UP4B] = 0, /* XXX */
  1296.    [TGSI_OPCODE_UP4UB] = 0, /* XXX */
  1297.    [TGSI_OPCODE_ARR] = 0,
  1298.  
  1299.    /* No function calls, yet. */
  1300.    [TGSI_OPCODE_CAL] = 0, /* XXX */
  1301.    [TGSI_OPCODE_RET] = 0, /* XXX */
  1302.  
  1303.    [TGSI_OPCODE_SSG] = nir_op_fsign,
  1304.    [TGSI_OPCODE_CMP] = 0,
  1305.    [TGSI_OPCODE_SCS] = 0,
  1306.    [TGSI_OPCODE_TXB] = 0,
  1307.    [TGSI_OPCODE_DIV] = nir_op_fdiv,
  1308.    [TGSI_OPCODE_DP2] = 0,
  1309.    [TGSI_OPCODE_DP2A] = 0,
  1310.    [TGSI_OPCODE_TXL] = 0,
  1311.  
  1312.    [TGSI_OPCODE_BRK] = 0,
  1313.    [TGSI_OPCODE_IF] = 0,
  1314.    [TGSI_OPCODE_UIF] = 0,
  1315.    [TGSI_OPCODE_ELSE] = 0,
  1316.    [TGSI_OPCODE_ENDIF] = 0,
  1317.  
  1318.    [TGSI_OPCODE_DDX_FINE] = nir_op_fddx_fine,
  1319.    [TGSI_OPCODE_DDY_FINE] = nir_op_fddy_fine,
  1320.  
  1321.    [TGSI_OPCODE_PUSHA] = 0, /* XXX */
  1322.    [TGSI_OPCODE_POPA] = 0, /* XXX */
  1323.  
  1324.    [TGSI_OPCODE_CEIL] = nir_op_fceil,
  1325.    [TGSI_OPCODE_I2F] = nir_op_i2f,
  1326.    [TGSI_OPCODE_NOT] = nir_op_inot,
  1327.    [TGSI_OPCODE_TRUNC] = nir_op_ftrunc,
  1328.    [TGSI_OPCODE_SHL] = nir_op_ishl,
  1329.    [TGSI_OPCODE_AND] = nir_op_iand,
  1330.    [TGSI_OPCODE_OR] = nir_op_ior,
  1331.    [TGSI_OPCODE_MOD] = nir_op_umod,
  1332.    [TGSI_OPCODE_XOR] = nir_op_ixor,
  1333.    [TGSI_OPCODE_SAD] = 0, /* XXX */
  1334.    [TGSI_OPCODE_TXF] = 0,
  1335.    [TGSI_OPCODE_TXQ] = 0,
  1336.  
  1337.    [TGSI_OPCODE_CONT] = 0,
  1338.  
  1339.    [TGSI_OPCODE_EMIT] = 0, /* XXX */
  1340.    [TGSI_OPCODE_ENDPRIM] = 0, /* XXX */
  1341.  
  1342.    [TGSI_OPCODE_BGNLOOP] = 0,
  1343.    [TGSI_OPCODE_BGNSUB] = 0, /* XXX: no function calls */
  1344.    [TGSI_OPCODE_ENDLOOP] = 0,
  1345.    [TGSI_OPCODE_ENDSUB] = 0, /* XXX: no function calls */
  1346.  
  1347.    [TGSI_OPCODE_TXQ_LZ] = 0,
  1348.    [TGSI_OPCODE_NOP] = 0,
  1349.    [TGSI_OPCODE_FSEQ] = nir_op_feq,
  1350.    [TGSI_OPCODE_FSGE] = nir_op_fge,
  1351.    [TGSI_OPCODE_FSLT] = nir_op_flt,
  1352.    [TGSI_OPCODE_FSNE] = nir_op_fne,
  1353.  
  1354.    /* No control flow yet */
  1355.    [TGSI_OPCODE_CALLNZ] = 0, /* XXX */
  1356.    [TGSI_OPCODE_BREAKC] = 0, /* not emitted by glsl_to_tgsi.cpp */
  1357.  
  1358.    [TGSI_OPCODE_KILL_IF] = 0,
  1359.  
  1360.    [TGSI_OPCODE_END] = 0,
  1361.  
  1362.    [TGSI_OPCODE_F2I] = nir_op_f2i,
  1363.    [TGSI_OPCODE_IDIV] = nir_op_idiv,
  1364.    [TGSI_OPCODE_IMAX] = nir_op_imax,
  1365.    [TGSI_OPCODE_IMIN] = nir_op_imin,
  1366.    [TGSI_OPCODE_INEG] = nir_op_ineg,
  1367.    [TGSI_OPCODE_ISGE] = nir_op_ige,
  1368.    [TGSI_OPCODE_ISHR] = nir_op_ishr,
  1369.    [TGSI_OPCODE_ISLT] = nir_op_ilt,
  1370.    [TGSI_OPCODE_F2U] = nir_op_f2u,
  1371.    [TGSI_OPCODE_U2F] = nir_op_u2f,
  1372.    [TGSI_OPCODE_UADD] = nir_op_iadd,
  1373.    [TGSI_OPCODE_UDIV] = nir_op_udiv,
  1374.    [TGSI_OPCODE_UMAD] = 0,
  1375.    [TGSI_OPCODE_UMAX] = nir_op_umax,
  1376.    [TGSI_OPCODE_UMIN] = nir_op_umin,
  1377.    [TGSI_OPCODE_UMOD] = nir_op_umod,
  1378.    [TGSI_OPCODE_UMUL] = nir_op_imul,
  1379.    [TGSI_OPCODE_USEQ] = nir_op_ieq,
  1380.    [TGSI_OPCODE_USGE] = nir_op_uge,
  1381.    [TGSI_OPCODE_USHR] = nir_op_ushr,
  1382.    [TGSI_OPCODE_USLT] = nir_op_ult,
  1383.    [TGSI_OPCODE_USNE] = nir_op_ine,
  1384.  
  1385.    [TGSI_OPCODE_SWITCH] = 0, /* not emitted by glsl_to_tgsi.cpp */
  1386.    [TGSI_OPCODE_CASE] = 0, /* not emitted by glsl_to_tgsi.cpp */
  1387.    [TGSI_OPCODE_DEFAULT] = 0, /* not emitted by glsl_to_tgsi.cpp */
  1388.    [TGSI_OPCODE_ENDSWITCH] = 0, /* not emitted by glsl_to_tgsi.cpp */
  1389.  
  1390.    /* XXX: SAMPLE opcodes */
  1391.  
  1392.    [TGSI_OPCODE_UARL] = nir_op_imov,
  1393.    [TGSI_OPCODE_UCMP] = 0,
  1394.    [TGSI_OPCODE_IABS] = nir_op_iabs,
  1395.    [TGSI_OPCODE_ISSG] = nir_op_isign,
  1396.  
  1397.    /* XXX: atomics */
  1398.  
  1399.    [TGSI_OPCODE_TEX2] = 0,
  1400.    [TGSI_OPCODE_TXB2] = 0,
  1401.    [TGSI_OPCODE_TXL2] = 0,
  1402.  
  1403.    [TGSI_OPCODE_IMUL_HI] = nir_op_imul_high,
  1404.    [TGSI_OPCODE_UMUL_HI] = nir_op_umul_high,
  1405.  
  1406.    [TGSI_OPCODE_TG4] = 0,
  1407.    [TGSI_OPCODE_LODQ] = 0, /* XXX */
  1408.  
  1409.    [TGSI_OPCODE_IBFE] = nir_op_ibitfield_extract,
  1410.    [TGSI_OPCODE_UBFE] = nir_op_ubitfield_extract,
  1411.    [TGSI_OPCODE_BFI] = nir_op_bitfield_insert,
  1412.    [TGSI_OPCODE_BREV] = nir_op_bitfield_reverse,
  1413.    [TGSI_OPCODE_POPC] = nir_op_bit_count,
  1414.    [TGSI_OPCODE_LSB] = nir_op_find_lsb,
  1415.    [TGSI_OPCODE_IMSB] = nir_op_ifind_msb,
  1416.    [TGSI_OPCODE_UMSB] = nir_op_ifind_msb, /* XXX: signed vs unsigned */
  1417.  
  1418.    [TGSI_OPCODE_INTERP_CENTROID] = 0, /* XXX */
  1419.    [TGSI_OPCODE_INTERP_SAMPLE] = 0, /* XXX */
  1420.    [TGSI_OPCODE_INTERP_OFFSET] = 0, /* XXX */
  1421. };
  1422.  
  1423. static void
  1424. ttn_emit_instruction(struct ttn_compile *c)
  1425. {
  1426.    nir_builder *b = &c->build;
  1427.    struct tgsi_full_instruction *tgsi_inst = &c->token->FullInstruction;
  1428.    unsigned i;
  1429.    unsigned tgsi_op = tgsi_inst->Instruction.Opcode;
  1430.    struct tgsi_full_dst_register *tgsi_dst = &tgsi_inst->Dst[0];
  1431.  
  1432.    if (tgsi_op == TGSI_OPCODE_END)
  1433.       return;
  1434.  
  1435.    nir_ssa_def *src[TGSI_FULL_MAX_SRC_REGISTERS];
  1436.    for (i = 0; i < TGSI_FULL_MAX_SRC_REGISTERS; i++) {
  1437.       src[i] = ttn_get_src(c, &tgsi_inst->Src[i]);
  1438.    }
  1439.    nir_alu_dest dest = ttn_get_dest(c, tgsi_dst);
  1440.  
  1441.    switch (tgsi_op) {
  1442.    case TGSI_OPCODE_RSQ:
  1443.       ttn_move_dest(b, dest, nir_frsq(b, ttn_channel(b, src[0], X)));
  1444.       break;
  1445.  
  1446.    case TGSI_OPCODE_SQRT:
  1447.       ttn_move_dest(b, dest, nir_fsqrt(b, ttn_channel(b, src[0], X)));
  1448.       break;
  1449.  
  1450.    case TGSI_OPCODE_RCP:
  1451.       ttn_move_dest(b, dest, nir_frcp(b, ttn_channel(b, src[0], X)));
  1452.       break;
  1453.  
  1454.    case TGSI_OPCODE_EX2:
  1455.       ttn_move_dest(b, dest, nir_fexp2(b, ttn_channel(b, src[0], X)));
  1456.       break;
  1457.  
  1458.    case TGSI_OPCODE_LG2:
  1459.       ttn_move_dest(b, dest, nir_flog2(b, ttn_channel(b, src[0], X)));
  1460.       break;
  1461.  
  1462.    case TGSI_OPCODE_POW:
  1463.       ttn_move_dest(b, dest, nir_fpow(b,
  1464.                                       ttn_channel(b, src[0], X),
  1465.                                       ttn_channel(b, src[1], X)));
  1466.       break;
  1467.  
  1468.    case TGSI_OPCODE_COS:
  1469.       ttn_move_dest(b, dest, nir_fcos(b, ttn_channel(b, src[0], X)));
  1470.       break;
  1471.  
  1472.    case TGSI_OPCODE_SIN:
  1473.       ttn_move_dest(b, dest, nir_fsin(b, ttn_channel(b, src[0], X)));
  1474.       break;
  1475.  
  1476.    case TGSI_OPCODE_ARL:
  1477.       ttn_arl(b, op_trans[tgsi_op], dest, src);
  1478.       break;
  1479.  
  1480.    case TGSI_OPCODE_EXP:
  1481.       ttn_exp(b, op_trans[tgsi_op], dest, src);
  1482.       break;
  1483.  
  1484.    case TGSI_OPCODE_LOG:
  1485.       ttn_log(b, op_trans[tgsi_op], dest, src);
  1486.       break;
  1487.  
  1488.    case TGSI_OPCODE_DST:
  1489.       ttn_dst(b, op_trans[tgsi_op], dest, src);
  1490.       break;
  1491.  
  1492.    case TGSI_OPCODE_LIT:
  1493.       ttn_lit(b, op_trans[tgsi_op], dest, src);
  1494.       break;
  1495.  
  1496.    case TGSI_OPCODE_CLAMP:
  1497.       ttn_clamp(b, op_trans[tgsi_op], dest, src);
  1498.       break;
  1499.  
  1500.    case TGSI_OPCODE_XPD:
  1501.       ttn_xpd(b, op_trans[tgsi_op], dest, src);
  1502.       break;
  1503.  
  1504.    case TGSI_OPCODE_DP2:
  1505.       ttn_dp2(b, op_trans[tgsi_op], dest, src);
  1506.       break;
  1507.  
  1508.    case TGSI_OPCODE_DP3:
  1509.       ttn_dp3(b, op_trans[tgsi_op], dest, src);
  1510.       break;
  1511.  
  1512.    case TGSI_OPCODE_DP4:
  1513.       ttn_dp4(b, op_trans[tgsi_op], dest, src);
  1514.       break;
  1515.  
  1516.    case TGSI_OPCODE_DP2A:
  1517.       ttn_dp2a(b, op_trans[tgsi_op], dest, src);
  1518.       break;
  1519.  
  1520.    case TGSI_OPCODE_DPH:
  1521.       ttn_dph(b, op_trans[tgsi_op], dest, src);
  1522.       break;
  1523.  
  1524.    case TGSI_OPCODE_UMAD:
  1525.       ttn_umad(b, op_trans[tgsi_op], dest, src);
  1526.       break;
  1527.  
  1528.    case TGSI_OPCODE_LRP:
  1529.       ttn_move_dest(b, dest, nir_flrp(b, src[2], src[1], src[0]));
  1530.       break;
  1531.  
  1532.    case TGSI_OPCODE_KILL:
  1533.       ttn_kill(b, op_trans[tgsi_op], dest, src);
  1534.       break;
  1535.  
  1536.    case TGSI_OPCODE_ARR:
  1537.       ttn_arr(b, op_trans[tgsi_op], dest, src);
  1538.       break;
  1539.  
  1540.    case TGSI_OPCODE_CMP:
  1541.       ttn_cmp(b, op_trans[tgsi_op], dest, src);
  1542.       break;
  1543.  
  1544.    case TGSI_OPCODE_UCMP:
  1545.       ttn_ucmp(b, op_trans[tgsi_op], dest, src);
  1546.       break;
  1547.  
  1548.    case TGSI_OPCODE_SCS:
  1549.       ttn_scs(b, op_trans[tgsi_op], dest, src);
  1550.       break;
  1551.  
  1552.    case TGSI_OPCODE_SGT:
  1553.       ttn_sgt(b, op_trans[tgsi_op], dest, src);
  1554.       break;
  1555.  
  1556.    case TGSI_OPCODE_SLE:
  1557.       ttn_sle(b, op_trans[tgsi_op], dest, src);
  1558.       break;
  1559.  
  1560.    case TGSI_OPCODE_KILL_IF:
  1561.       ttn_kill_if(b, op_trans[tgsi_op], dest, src);
  1562.       break;
  1563.  
  1564.    case TGSI_OPCODE_TEX:
  1565.    case TGSI_OPCODE_TXP:
  1566.    case TGSI_OPCODE_TXL:
  1567.    case TGSI_OPCODE_TXB:
  1568.    case TGSI_OPCODE_TXD:
  1569.    case TGSI_OPCODE_TXL2:
  1570.    case TGSI_OPCODE_TXB2:
  1571.    case TGSI_OPCODE_TXQ_LZ:
  1572.    case TGSI_OPCODE_TXF:
  1573.    case TGSI_OPCODE_TG4:
  1574.       ttn_tex(c, dest, src);
  1575.       break;
  1576.  
  1577.    case TGSI_OPCODE_TXQ:
  1578.       ttn_txq(c, dest, src);
  1579.       break;
  1580.  
  1581.    case TGSI_OPCODE_NOP:
  1582.       break;
  1583.  
  1584.    case TGSI_OPCODE_IF:
  1585.       ttn_if(c, src[0], false);
  1586.       break;
  1587.  
  1588.    case TGSI_OPCODE_UIF:
  1589.       ttn_if(c, src[0], true);
  1590.       break;
  1591.  
  1592.    case TGSI_OPCODE_ELSE:
  1593.       ttn_else(c);
  1594.       break;
  1595.  
  1596.    case TGSI_OPCODE_ENDIF:
  1597.       ttn_endif(c);
  1598.       break;
  1599.  
  1600.    case TGSI_OPCODE_BGNLOOP:
  1601.       ttn_bgnloop(c);
  1602.       break;
  1603.  
  1604.    case TGSI_OPCODE_BRK:
  1605.       ttn_brk(b);
  1606.       break;
  1607.  
  1608.    case TGSI_OPCODE_CONT:
  1609.       ttn_cont(b);
  1610.       break;
  1611.  
  1612.    case TGSI_OPCODE_ENDLOOP:
  1613.       ttn_endloop(c);
  1614.       break;
  1615.  
  1616.    default:
  1617.       if (op_trans[tgsi_op] != 0 || tgsi_op == TGSI_OPCODE_MOV) {
  1618.          ttn_alu(b, op_trans[tgsi_op], dest, src);
  1619.       } else {
  1620.          fprintf(stderr, "unknown TGSI opcode: %s\n",
  1621.                  tgsi_get_opcode_name(tgsi_op));
  1622.          abort();
  1623.       }
  1624.       break;
  1625.    }
  1626.  
  1627.    if (tgsi_inst->Instruction.Saturate) {
  1628.       assert(tgsi_inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE);
  1629.       assert(!dest.dest.is_ssa);
  1630.       ttn_move_dest(b, dest, nir_fsat(b, ttn_src_for_dest(b, &dest)));
  1631.    }
  1632.  
  1633.    /* if the dst has a matching var, append store_global to move
  1634.     * output from reg to var
  1635.     */
  1636.    nir_variable *var = ttn_get_var(c, tgsi_dst);
  1637.    if (var) {
  1638.       unsigned index = tgsi_dst->Register.Index;
  1639.       unsigned offset = c->temp_regs[index].offset;
  1640.       nir_intrinsic_instr *store =
  1641.          nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var);
  1642.       struct tgsi_ind_register *indirect = tgsi_dst->Register.Indirect ?
  1643.                                            &tgsi_dst->Indirect : NULL;
  1644.  
  1645.       store->num_components = 4;
  1646.       store->variables[0] = ttn_array_deref(c, store, var, offset, indirect);
  1647.       store->src[0] = nir_src_for_reg(dest.dest.reg.reg);
  1648.  
  1649.       nir_instr_insert_after_cf_list(b->cf_node_list, &store->instr);
  1650.    }
  1651. }
  1652.  
  1653. /**
  1654.  * Puts a NIR intrinsic to store of each TGSI_FILE_OUTPUT value to the output
  1655.  * variables at the end of the shader.
  1656.  *
  1657.  * We don't generate these incrementally as the TGSI_FILE_OUTPUT values are
  1658.  * written, because there's no output load intrinsic, which means we couldn't
  1659.  * handle writemasks.
  1660.  */
  1661. static void
  1662. ttn_add_output_stores(struct ttn_compile *c)
  1663. {
  1664.    nir_builder *b = &c->build;
  1665.  
  1666.    foreach_list_typed(nir_variable, var, node, &b->shader->outputs) {
  1667.       unsigned array_len = MAX2(glsl_get_length(var->type), 1);
  1668.       unsigned i;
  1669.  
  1670.       for (i = 0; i < array_len; i++) {
  1671.          nir_intrinsic_instr *store =
  1672.             nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_output);
  1673.          store->num_components = 4;
  1674.          store->const_index[0] = var->data.driver_location + i;
  1675.          store->const_index[1] = 1;
  1676.          store->src[0].reg.reg = c->output_regs[var->data.driver_location].reg;
  1677.          nir_instr_insert_after_cf_list(b->cf_node_list, &store->instr);
  1678.       }
  1679.    }
  1680. }
  1681.  
  1682. struct nir_shader *
  1683. tgsi_to_nir(const void *tgsi_tokens,
  1684.             const nir_shader_compiler_options *options)
  1685. {
  1686.    struct tgsi_parse_context parser;
  1687.    struct tgsi_shader_info scan;
  1688.    struct ttn_compile *c;
  1689.    struct nir_shader *s;
  1690.    int ret;
  1691.  
  1692.    c = rzalloc(NULL, struct ttn_compile);
  1693.    s = nir_shader_create(NULL, options);
  1694.  
  1695.    nir_function *func = nir_function_create(s, "main");
  1696.    nir_function_overload *overload = nir_function_overload_create(func);
  1697.    nir_function_impl *impl = nir_function_impl_create(overload);
  1698.  
  1699.    nir_builder_init(&c->build, impl);
  1700.    nir_builder_insert_after_cf_list(&c->build, &impl->body);
  1701.  
  1702.    tgsi_scan_shader(tgsi_tokens, &scan);
  1703.    c->scan = &scan;
  1704.  
  1705.    s->num_inputs = scan.file_max[TGSI_FILE_INPUT] + 1;
  1706.    s->num_uniforms = scan.const_file_max[0] + 1;
  1707.    s->num_outputs = scan.file_max[TGSI_FILE_OUTPUT] + 1;
  1708.  
  1709.    c->output_regs = rzalloc_array(c, struct ttn_reg_info,
  1710.                                   scan.file_max[TGSI_FILE_OUTPUT] + 1);
  1711.    c->temp_regs = rzalloc_array(c, struct ttn_reg_info,
  1712.                                 scan.file_max[TGSI_FILE_TEMPORARY] + 1);
  1713.    c->imm_defs = rzalloc_array(c, nir_ssa_def *,
  1714.                                scan.file_max[TGSI_FILE_IMMEDIATE] + 1);
  1715.  
  1716.    c->if_stack = rzalloc_array(c, struct exec_list *,
  1717.                                (scan.opcode_count[TGSI_OPCODE_IF] +
  1718.                                 scan.opcode_count[TGSI_OPCODE_UIF]) * 2);
  1719.    c->loop_stack = rzalloc_array(c, struct exec_list *,
  1720.                                  scan.opcode_count[TGSI_OPCODE_BGNLOOP]);
  1721.  
  1722.    ret = tgsi_parse_init(&parser, tgsi_tokens);
  1723.    assert(ret == TGSI_PARSE_OK);
  1724.  
  1725.    while (!tgsi_parse_end_of_tokens(&parser)) {
  1726.       tgsi_parse_token(&parser);
  1727.       c->token = &parser.FullToken;
  1728.  
  1729.       switch (parser.FullToken.Token.Type) {
  1730.       case TGSI_TOKEN_TYPE_DECLARATION:
  1731.          ttn_emit_declaration(c);
  1732.          break;
  1733.  
  1734.       case TGSI_TOKEN_TYPE_INSTRUCTION:
  1735.          ttn_emit_instruction(c);
  1736.          break;
  1737.  
  1738.       case TGSI_TOKEN_TYPE_IMMEDIATE:
  1739.          ttn_emit_immediate(c);
  1740.          break;
  1741.       }
  1742.    }
  1743.  
  1744.    tgsi_parse_free(&parser);
  1745.  
  1746.    ttn_add_output_stores(c);
  1747.  
  1748.    ralloc_free(c);
  1749.    return s;
  1750. }
  1751.