Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /**************************************************************************
  2.  *
  3.  * Copyright 2009 VMware, Inc.
  4.  * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas.
  5.  * All Rights Reserved.
  6.  *
  7.  * Permission is hereby granted, free of charge, to any person obtaining a
  8.  * copy of this software and associated documentation files (the
  9.  * "Software"), to deal in the Software without restriction, including
  10.  * without limitation the rights to use, copy, modify, merge, publish,
  11.  * distribute, sub license, and/or sell copies of the Software, and to
  12.  * permit persons to whom the Software is furnished to do so, subject to
  13.  * the following conditions:
  14.  *
  15.  * The above copyright notice and this permission notice (including the
  16.  * next paragraph) shall be included in all copies or substantial portions
  17.  * of the Software.
  18.  *
  19.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  20.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  21.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  22.  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  23.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  24.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  25.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26.  *
  27.  **************************************************************************/
  28.  
  29. /**
  30.  * @file
  31.  * TGSI to LLVM IR translation -- SoA.
  32.  *
  33.  * @author Jose Fonseca <jfonseca@vmware.com>
  34.  *
  35.  * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
  36.  * Brian Paul, and others.
  37.  */
  38.  
  39. #include "pipe/p_config.h"
  40. #include "pipe/p_shader_tokens.h"
  41. #include "util/u_debug.h"
  42. #include "util/u_math.h"
  43. #include "util/u_memory.h"
  44. #include "tgsi/tgsi_dump.h"
  45. #include "tgsi/tgsi_exec.h"
  46. #include "tgsi/tgsi_info.h"
  47. #include "tgsi/tgsi_parse.h"
  48. #include "tgsi/tgsi_util.h"
  49. #include "tgsi/tgsi_scan.h"
  50. #include "lp_bld_tgsi_action.h"
  51. #include "lp_bld_type.h"
  52. #include "lp_bld_const.h"
  53. #include "lp_bld_arit.h"
  54. #include "lp_bld_bitarit.h"
  55. #include "lp_bld_gather.h"
  56. #include "lp_bld_init.h"
  57. #include "lp_bld_logic.h"
  58. #include "lp_bld_swizzle.h"
  59. #include "lp_bld_flow.h"
  60. #include "lp_bld_quad.h"
  61. #include "lp_bld_tgsi.h"
  62. #include "lp_bld_limits.h"
  63. #include "lp_bld_debug.h"
  64. #include "lp_bld_printf.h"
  65. #include "lp_bld_sample.h"
  66. #include "lp_bld_struct.h"
  67.  
  68. #define DUMP_GS_EMITS 0
  69.  
  70. static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
  71. {
  72.    LLVMTypeRef int_type = LLVMInt32TypeInContext(bld->gallivm->context);
  73.    LLVMBuilderRef builder = bld->gallivm->builder;
  74.  
  75.    mask->bld = bld;
  76.    mask->has_mask = FALSE;
  77.    mask->ret_in_main = FALSE;
  78.    mask->cond_stack_size = 0;
  79.    mask->loop_stack_size = 0;
  80.    mask->call_stack_size = 0;
  81.    mask->switch_stack_size = 0;
  82.  
  83.    mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type);
  84.    mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask =
  85.          mask->cond_mask = mask->switch_mask =
  86.          LLVMConstAllOnes(mask->int_vec_type);
  87.  
  88.    mask->loop_limiter = lp_build_alloca(bld->gallivm, int_type, "looplimiter");
  89.  
  90.    LLVMBuildStore(
  91.       builder,
  92.       LLVMConstInt(int_type, LP_MAX_TGSI_LOOP_ITERATIONS, false),
  93.       mask->loop_limiter);
  94. }
  95.  
  96. static void lp_exec_mask_update(struct lp_exec_mask *mask)
  97. {
  98.    LLVMBuilderRef builder = mask->bld->gallivm->builder;
  99.  
  100.    if (mask->loop_stack_size) {
  101.       /*for loops we need to update the entire mask at runtime */
  102.       LLVMValueRef tmp;
  103.       assert(mask->break_mask);
  104.       tmp = LLVMBuildAnd(builder,
  105.                          mask->cont_mask,
  106.                          mask->break_mask,
  107.                          "maskcb");
  108.       mask->exec_mask = LLVMBuildAnd(builder,
  109.                                      mask->cond_mask,
  110.                                      tmp,
  111.                                      "maskfull");
  112.    } else
  113.       mask->exec_mask = mask->cond_mask;
  114.  
  115.    if (mask->switch_stack_size) {
  116.       mask->exec_mask = LLVMBuildAnd(builder,
  117.                                      mask->exec_mask,
  118.                                      mask->switch_mask,
  119.                                      "switchmask");
  120.    }
  121.  
  122.    if (mask->call_stack_size || mask->ret_in_main) {
  123.       mask->exec_mask = LLVMBuildAnd(builder,
  124.                                      mask->exec_mask,
  125.                                      mask->ret_mask,
  126.                                      "callmask");
  127.    }
  128.  
  129.    mask->has_mask = (mask->cond_stack_size > 0 ||
  130.                      mask->loop_stack_size > 0 ||
  131.                      mask->call_stack_size > 0 ||
  132.                      mask->switch_stack_size > 0 ||
  133.                      mask->ret_in_main);
  134. }
  135.  
  136. static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
  137.                                    LLVMValueRef val)
  138. {
  139.    LLVMBuilderRef builder = mask->bld->gallivm->builder;
  140.  
  141.    assert(mask->cond_stack_size < LP_MAX_TGSI_NESTING);
  142.    if (mask->cond_stack_size == 0) {
  143.       assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
  144.    }
  145.    mask->cond_stack[mask->cond_stack_size++] = mask->cond_mask;
  146.    assert(LLVMTypeOf(val) == mask->int_vec_type);
  147.    mask->cond_mask = LLVMBuildAnd(builder,
  148.                                   mask->cond_mask,
  149.                                   val,
  150.                                   "");
  151.    lp_exec_mask_update(mask);
  152. }
  153.  
  154. static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
  155. {
  156.    LLVMBuilderRef builder = mask->bld->gallivm->builder;
  157.    LLVMValueRef prev_mask;
  158.    LLVMValueRef inv_mask;
  159.  
  160.    assert(mask->cond_stack_size);
  161.    prev_mask = mask->cond_stack[mask->cond_stack_size - 1];
  162.    if (mask->cond_stack_size == 1) {
  163.       assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
  164.    }
  165.  
  166.    inv_mask = LLVMBuildNot(builder, mask->cond_mask, "");
  167.  
  168.    mask->cond_mask = LLVMBuildAnd(builder,
  169.                                   inv_mask,
  170.                                   prev_mask, "");
  171.    lp_exec_mask_update(mask);
  172. }
  173.  
  174. static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
  175. {
  176.    assert(mask->cond_stack_size);
  177.    mask->cond_mask = mask->cond_stack[--mask->cond_stack_size];
  178.    lp_exec_mask_update(mask);
  179. }
  180.  
  181. static void lp_exec_bgnloop(struct lp_exec_mask *mask)
  182. {
  183.    LLVMBuilderRef builder = mask->bld->gallivm->builder;
  184.  
  185.    if (mask->loop_stack_size == 0) {
  186.       assert(mask->loop_block == NULL);
  187.       assert(mask->cont_mask == LLVMConstAllOnes(mask->int_vec_type));
  188.       assert(mask->break_mask == LLVMConstAllOnes(mask->int_vec_type));
  189.       assert(mask->break_var == NULL);
  190.    }
  191.  
  192.    assert(mask->loop_stack_size < LP_MAX_TGSI_NESTING);
  193.  
  194.    mask->break_type_stack[mask->loop_stack_size + mask->switch_stack_size] =
  195.       mask->break_type;
  196.    mask->break_type = LP_EXEC_MASK_BREAK_TYPE_LOOP;
  197.  
  198.    mask->loop_stack[mask->loop_stack_size].loop_block = mask->loop_block;
  199.    mask->loop_stack[mask->loop_stack_size].cont_mask = mask->cont_mask;
  200.    mask->loop_stack[mask->loop_stack_size].break_mask = mask->break_mask;
  201.    mask->loop_stack[mask->loop_stack_size].break_var = mask->break_var;
  202.    ++mask->loop_stack_size;
  203.  
  204.    mask->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, "");
  205.    LLVMBuildStore(builder, mask->break_mask, mask->break_var);
  206.  
  207.    mask->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop");
  208.  
  209.    LLVMBuildBr(builder, mask->loop_block);
  210.    LLVMPositionBuilderAtEnd(builder, mask->loop_block);
  211.  
  212.    mask->break_mask = LLVMBuildLoad(builder, mask->break_var, "");
  213.  
  214.    lp_exec_mask_update(mask);
  215. }
  216.  
  217. static void lp_exec_break(struct lp_exec_mask *mask,
  218.                           struct lp_build_tgsi_context * bld_base)
  219. {
  220.    LLVMBuilderRef builder = mask->bld->gallivm->builder;
  221.  
  222.    if (mask->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) {
  223.       LLVMValueRef exec_mask = LLVMBuildNot(builder,
  224.                                             mask->exec_mask,
  225.                                             "break");
  226.  
  227.       mask->break_mask = LLVMBuildAnd(builder,
  228.                                       mask->break_mask,
  229.                                       exec_mask, "break_full");
  230.    }
  231.    else {
  232.       unsigned opcode = bld_base->instructions[bld_base->pc + 1].Instruction.Opcode;
  233.       boolean break_always = (opcode == TGSI_OPCODE_ENDSWITCH ||
  234.                               opcode == TGSI_OPCODE_CASE);
  235.  
  236.  
  237.       if (mask->switch_in_default) {
  238.          /*
  239.           * stop default execution but only if this is an unconditional switch.
  240.           * (The condition here is not perfect since dead code after break is
  241.           * allowed but should be sufficient since false negatives are just
  242.           * unoptimized - so we don't have to pre-evaluate that).
  243.           */
  244.          if(break_always && mask->switch_pc) {
  245.             bld_base->pc = mask->switch_pc;
  246.             return;
  247.          }
  248.       }
  249.  
  250.       if (break_always) {
  251.          mask->switch_mask = LLVMConstNull(mask->bld->int_vec_type);
  252.       }
  253.       else {
  254.          LLVMValueRef exec_mask = LLVMBuildNot(builder,
  255.                                                mask->exec_mask,
  256.                                                "break");
  257.          mask->switch_mask = LLVMBuildAnd(builder,
  258.                                           mask->switch_mask,
  259.                                           exec_mask, "break_switch");
  260.       }
  261.    }
  262.  
  263.    lp_exec_mask_update(mask);
  264. }
  265.  
  266. static void lp_exec_break_condition(struct lp_exec_mask *mask,
  267.                                     LLVMValueRef cond)
  268. {
  269.    LLVMBuilderRef builder = mask->bld->gallivm->builder;
  270.    LLVMValueRef cond_mask = LLVMBuildAnd(builder,
  271.                                          mask->exec_mask,
  272.                                          cond, "cond_mask");
  273.    cond_mask = LLVMBuildNot(builder, cond_mask, "break_cond");
  274.  
  275.    if (mask->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) {
  276.       mask->break_mask = LLVMBuildAnd(builder,
  277.                                       mask->break_mask,
  278.                                       cond_mask, "breakc_full");
  279.    }
  280.    else {
  281.       mask->switch_mask = LLVMBuildAnd(builder,
  282.                                        mask->switch_mask,
  283.                                        cond_mask, "breakc_switch");
  284.    }
  285.  
  286.    lp_exec_mask_update(mask);
  287. }
  288.  
  289. static void lp_exec_continue(struct lp_exec_mask *mask)
  290. {
  291.    LLVMBuilderRef builder = mask->bld->gallivm->builder;
  292.    LLVMValueRef exec_mask = LLVMBuildNot(builder,
  293.                                          mask->exec_mask,
  294.                                          "");
  295.  
  296.    mask->cont_mask = LLVMBuildAnd(builder,
  297.                                   mask->cont_mask,
  298.                                   exec_mask, "");
  299.  
  300.    lp_exec_mask_update(mask);
  301. }
  302.  
  303.  
  304. static void lp_exec_endloop(struct gallivm_state *gallivm,
  305.                             struct lp_exec_mask *mask)
  306. {
  307.    LLVMBuilderRef builder = mask->bld->gallivm->builder;
  308.    LLVMBasicBlockRef endloop;
  309.    LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
  310.    LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context,
  311.                                                mask->bld->type.width *
  312.                                                mask->bld->type.length);
  313.    LLVMValueRef i1cond, i2cond, icond, limiter;
  314.  
  315.    assert(mask->break_mask);
  316.  
  317.    /*
  318.     * Restore the cont_mask, but don't pop
  319.     */
  320.    assert(mask->loop_stack_size);
  321.    mask->cont_mask = mask->loop_stack[mask->loop_stack_size - 1].cont_mask;
  322.    lp_exec_mask_update(mask);
  323.  
  324.    /*
  325.     * Unlike the continue mask, the break_mask must be preserved across loop
  326.     * iterations
  327.     */
  328.    LLVMBuildStore(builder, mask->break_mask, mask->break_var);
  329.  
  330.    /* Decrement the loop limiter */
  331.    limiter = LLVMBuildLoad(builder, mask->loop_limiter, "");
  332.  
  333.    limiter = LLVMBuildSub(
  334.       builder,
  335.       limiter,
  336.       LLVMConstInt(int_type, 1, false),
  337.       "");
  338.  
  339.    LLVMBuildStore(builder, limiter, mask->loop_limiter);
  340.  
  341.    /* i1cond = (mask != 0) */
  342.    i1cond = LLVMBuildICmp(
  343.       builder,
  344.       LLVMIntNE,
  345.       LLVMBuildBitCast(builder, mask->exec_mask, reg_type, ""),
  346.       LLVMConstNull(reg_type), "i1cond");
  347.  
  348.    /* i2cond = (looplimiter > 0) */
  349.    i2cond = LLVMBuildICmp(
  350.       builder,
  351.       LLVMIntSGT,
  352.       limiter,
  353.       LLVMConstNull(int_type), "i2cond");
  354.  
  355.    /* if( i1cond && i2cond ) */
  356.    icond = LLVMBuildAnd(builder, i1cond, i2cond, "");
  357.  
  358.    endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop");
  359.  
  360.    LLVMBuildCondBr(builder,
  361.                    icond, mask->loop_block, endloop);
  362.  
  363.    LLVMPositionBuilderAtEnd(builder, endloop);
  364.  
  365.    assert(mask->loop_stack_size);
  366.    --mask->loop_stack_size;
  367.    mask->loop_block = mask->loop_stack[mask->loop_stack_size].loop_block;
  368.    mask->cont_mask = mask->loop_stack[mask->loop_stack_size].cont_mask;
  369.    mask->break_mask = mask->loop_stack[mask->loop_stack_size].break_mask;
  370.    mask->break_var = mask->loop_stack[mask->loop_stack_size].break_var;
  371.    mask->break_type = mask->break_type_stack[mask->loop_stack_size + mask->switch_stack_size];
  372.  
  373.    lp_exec_mask_update(mask);
  374. }
  375.  
  376. static void lp_exec_switch(struct lp_exec_mask *mask,
  377.                            LLVMValueRef switchval)
  378. {
  379.    mask->break_type_stack[mask->loop_stack_size + mask->switch_stack_size] =
  380.       mask->break_type;
  381.    mask->break_type = LP_EXEC_MASK_BREAK_TYPE_SWITCH;
  382.  
  383.    mask->switch_stack[mask->switch_stack_size].switch_val = mask->switch_val;
  384.    mask->switch_stack[mask->switch_stack_size].switch_mask = mask->switch_mask;
  385.    mask->switch_stack[mask->switch_stack_size].switch_mask_default = mask->switch_mask_default;
  386.    mask->switch_stack[mask->switch_stack_size].switch_in_default = mask->switch_in_default;
  387.    mask->switch_stack[mask->switch_stack_size].switch_pc = mask->switch_pc;
  388.    mask->switch_stack_size++;
  389.  
  390.    mask->switch_val = switchval;
  391.    mask->switch_mask = LLVMConstNull(mask->int_vec_type);
  392.    mask->switch_mask_default = LLVMConstNull(mask->int_vec_type);
  393.    mask->switch_in_default = false;
  394.    mask->switch_pc = 0;
  395.  
  396.    lp_exec_mask_update(mask);
  397. }
  398.  
  399. static void lp_exec_endswitch(struct lp_exec_mask *mask,
  400.                               struct lp_build_tgsi_context * bld_base)
  401. {
  402.    LLVMBuilderRef builder = mask->bld->gallivm->builder;
  403.  
  404.    /* check if there's deferred default if so do it now */
  405.    if (mask->switch_pc && !mask->switch_in_default) {
  406.       LLVMValueRef prevmask, defaultmask;
  407.       unsigned tmp_pc;
  408.       prevmask = mask->switch_stack[mask->switch_stack_size - 1].switch_mask;
  409.       defaultmask = LLVMBuildNot(builder, mask->switch_mask_default, "sw_default_mask");
  410.       mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
  411.       mask->switch_in_default = true;
  412.  
  413.       lp_exec_mask_update(mask);
  414.  
  415.       assert(bld_base->instructions[mask->switch_pc - 1].Instruction.Opcode ==
  416.              TGSI_OPCODE_DEFAULT);
  417.  
  418.       tmp_pc = bld_base->pc;
  419.       bld_base->pc = mask->switch_pc;
  420.       /*
  421.        * re-purpose switch_pc to point to here again, since we stop execution of
  422.        * the deferred default after next break.
  423.        */
  424.       mask->switch_pc = tmp_pc - 1;
  425.  
  426.       return;
  427.    }
  428.  
  429.    else if (mask->switch_pc && mask->switch_in_default) {
  430.       assert(bld_base->pc == mask->switch_pc + 1);
  431.    }
  432.  
  433.    mask->switch_stack_size--;
  434.    mask->switch_val = mask->switch_stack[mask->switch_stack_size].switch_val;
  435.    mask->switch_mask = mask->switch_stack[mask->switch_stack_size].switch_mask;
  436.    mask->switch_mask_default = mask->switch_stack[mask->switch_stack_size].switch_mask_default;
  437.    mask->switch_in_default = mask->switch_stack[mask->switch_stack_size].switch_in_default;
  438.    mask->switch_pc = mask->switch_stack[mask->switch_stack_size].switch_pc;
  439.  
  440.    mask->break_type = mask->break_type_stack[mask->loop_stack_size + mask->switch_stack_size];
  441.  
  442.    lp_exec_mask_update(mask);
  443. }
  444.  
  445. static void lp_exec_case(struct lp_exec_mask *mask,
  446.                          LLVMValueRef caseval)
  447. {
  448.    LLVMBuilderRef builder = mask->bld->gallivm->builder;
  449.  
  450.    LLVMValueRef casemask, prevmask;
  451.  
  452.    /* skipping case mask evaluation here is NOT optional (not in all cases anyway). */
  453.    if (!mask->switch_in_default) {
  454.       prevmask = mask->switch_stack[mask->switch_stack_size - 1].switch_mask;
  455.       casemask = lp_build_cmp(mask->bld, PIPE_FUNC_EQUAL, caseval, mask->switch_val);
  456.       mask->switch_mask_default = LLVMBuildOr(builder, casemask,
  457.                                               mask->switch_mask_default, "sw_default_mask");
  458.       casemask = LLVMBuildOr(builder, casemask, mask->switch_mask, "");
  459.       mask->switch_mask = LLVMBuildAnd(builder, casemask, prevmask, "sw_mask");
  460.  
  461.       lp_exec_mask_update(mask);
  462.    }
  463. }
  464.  
  465. /*
  466.  * Analyse default statement in a switch.
  467.  * \return true if default is last statement, false otherwise
  468.  * \param default_pc_start contains pc of instruction to jump to
  469.  *                         if default wasn't last but there's no
  470.  *                         fallthrough into default.
  471.  */
  472. static boolean default_analyse_is_last(struct lp_exec_mask *mask,
  473.                                        struct lp_build_tgsi_context * bld_base,
  474.                                        int *default_pc_start)
  475. {
  476.    unsigned pc = bld_base->pc;
  477.    unsigned curr_switch_stack = mask->switch_stack_size;
  478.  
  479.    /* skip over case statements which are together with default */
  480.    while (bld_base->instructions[pc].Instruction.Opcode == TGSI_OPCODE_CASE) {
  481.       pc++;
  482.    }
  483.  
  484.    while (pc != -1 && pc < bld_base->num_instructions) {
  485.       unsigned opcode = bld_base->instructions[pc].Instruction.Opcode;
  486.       switch (opcode) {
  487.       case TGSI_OPCODE_CASE:
  488.          if (curr_switch_stack == mask->switch_stack_size) {
  489.             *default_pc_start = pc - 1;
  490.             return false;
  491.          }
  492.          break;
  493.       case TGSI_OPCODE_SWITCH:
  494.          curr_switch_stack++;
  495.          break;
  496.       case TGSI_OPCODE_ENDSWITCH:
  497.          if (curr_switch_stack == mask->switch_stack_size) {
  498.             *default_pc_start = pc - 1;
  499.             return true;
  500.          }
  501.          curr_switch_stack--;
  502.          break;
  503.       }
  504.       pc++;
  505.    }
  506.    /* should never arrive here */
  507.    assert(0);
  508.    return true;
  509. }
  510.  
  511. static void lp_exec_default(struct lp_exec_mask *mask,
  512.                             struct lp_build_tgsi_context * bld_base)
  513. {
  514.    LLVMBuilderRef builder = mask->bld->gallivm->builder;
  515.  
  516.    int default_exec_pc;
  517.    boolean default_is_last;
  518.  
  519.    /*
  520.     * This is a messy opcode, because it may not be always at the end and
  521.     * there can be fallthrough in and out of it.
  522.     */
  523.  
  524.    default_is_last = default_analyse_is_last(mask, bld_base, &default_exec_pc);
  525.    /*
  526.     * If it is last statement in switch (note that case statements appearing
  527.     * "at the same time" as default don't change that) everything is just fine,
  528.     * update switch mask and go on. This means we can handle default with
  529.     * fallthrough INTO it without overhead, if it is last.
  530.     */
  531.    if (default_is_last) {
  532.       LLVMValueRef prevmask, defaultmask;
  533.       prevmask = mask->switch_stack[mask->switch_stack_size - 1].switch_mask;
  534.       defaultmask = LLVMBuildNot(builder, mask->switch_mask_default, "sw_default_mask");
  535.       defaultmask = LLVMBuildOr(builder, defaultmask, mask->switch_mask, "");
  536.       mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
  537.       mask->switch_in_default = true;
  538.  
  539.       lp_exec_mask_update(mask);
  540.    }
  541.    else {
  542.       /*
  543.        * Technically, "case" immediately before default isn't really a
  544.        * fallthrough, however we still have to count them as such as we
  545.        * already have updated the masks.
  546.        * If that happens in practice could add a switch optimizer pass
  547.        * which just gets rid of all case statements appearing together with
  548.        * default (or could do switch analysis at switch start time instead).
  549.        */
  550.       unsigned opcode = bld_base->instructions[bld_base->pc - 1].Instruction.Opcode;
  551.       boolean ft_into = (opcode != TGSI_OPCODE_BRK ||
  552.                          opcode != TGSI_OPCODE_SWITCH);
  553.       /*
  554.        * If it is not last statement and there was no fallthrough into it,
  555.        * we record the PC and continue execution at next case (again, those
  556.        * case encountered at the same time don't count). At endswitch
  557.        * time, we update switchmask, and go back executing the code we skipped
  558.        * until the next break (possibly re-executing some code with changed mask
  559.        * if there was a fallthrough out of default).
  560.        * Finally, if it is not last statement and there was a fallthrough into it,
  561.        * do the same as with the former case, except instead of skipping the code
  562.        * just execute it without updating the mask, then go back and re-execute.
  563.        */
  564.       mask->switch_pc = bld_base->pc;
  565.       if (!ft_into) {
  566.          bld_base->pc = default_exec_pc;
  567.       }
  568.    }
  569. }
  570.  
  571.  
  572. /* stores val into an address pointed to by dst_ptr.
  573.  * mask->exec_mask is used to figure out which bits of val
  574.  * should be stored into the address
  575.  * (0 means don't store this bit, 1 means do store).
  576.  */
  577. static void lp_exec_mask_store(struct lp_exec_mask *mask,
  578.                                struct lp_build_context *bld_store,
  579.                                LLVMValueRef pred,
  580.                                LLVMValueRef val,
  581.                                LLVMValueRef dst_ptr)
  582. {
  583.    LLVMBuilderRef builder = mask->bld->gallivm->builder;
  584.  
  585.    assert(lp_check_value(bld_store->type, val));
  586.    assert(LLVMGetTypeKind(LLVMTypeOf(dst_ptr)) == LLVMPointerTypeKind);
  587.    assert(LLVMGetElementType(LLVMTypeOf(dst_ptr)) == LLVMTypeOf(val));
  588.  
  589.    /* Mix the predicate and execution mask */
  590.    if (mask->has_mask) {
  591.       if (pred) {
  592.          pred = LLVMBuildAnd(builder, pred, mask->exec_mask, "");
  593.       } else {
  594.          pred = mask->exec_mask;
  595.       }
  596.    }
  597.  
  598.    if (pred) {
  599.       LLVMValueRef res, dst;
  600.  
  601.       dst = LLVMBuildLoad(builder, dst_ptr, "");
  602.       res = lp_build_select(bld_store, pred, val, dst);
  603.       LLVMBuildStore(builder, res, dst_ptr);
  604.    } else
  605.       LLVMBuildStore(builder, val, dst_ptr);
  606. }
  607.  
  608. static void lp_exec_mask_call(struct lp_exec_mask *mask,
  609.                               int func,
  610.                               int *pc)
  611. {
  612.    assert(mask->call_stack_size < LP_MAX_TGSI_NESTING);
  613.    mask->call_stack[mask->call_stack_size].pc = *pc;
  614.    mask->call_stack[mask->call_stack_size].ret_mask = mask->ret_mask;
  615.    mask->call_stack_size++;
  616.    *pc = func;
  617. }
  618.  
  619. static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
  620. {
  621.    LLVMBuilderRef builder = mask->bld->gallivm->builder;
  622.    LLVMValueRef exec_mask;
  623.  
  624.    if (mask->cond_stack_size == 0 &&
  625.        mask->loop_stack_size == 0 &&
  626.        mask->switch_stack_size == 0 &&
  627.        mask->call_stack_size == 0) {
  628.       /* returning from main() */
  629.       *pc = -1;
  630.       return;
  631.    }
  632.  
  633.    if (mask->call_stack_size == 0) {
  634.       /*
  635.        * This requires special handling since we need to ensure
  636.        * we don't drop the mask even if we have no call stack
  637.        * (e.g. after a ret in a if clause after the endif)
  638.        */
  639.       mask->ret_in_main = TRUE;
  640.    }
  641.  
  642.    exec_mask = LLVMBuildNot(builder,
  643.                             mask->exec_mask,
  644.                             "ret");
  645.  
  646.    mask->ret_mask = LLVMBuildAnd(builder,
  647.                                  mask->ret_mask,
  648.                                  exec_mask, "ret_full");
  649.  
  650.    lp_exec_mask_update(mask);
  651. }
  652.  
  653. static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
  654. {
  655. }
  656.  
  657. static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
  658. {
  659.    assert(mask->call_stack_size);
  660.    mask->call_stack_size--;
  661.    *pc = mask->call_stack[mask->call_stack_size].pc;
  662.    mask->ret_mask = mask->call_stack[mask->call_stack_size].ret_mask;
  663.    lp_exec_mask_update(mask);
  664. }
  665.  
  666.  
  667. /**
  668.  * Return pointer to a temporary register channel (src or dest).
  669.  * Note that indirect addressing cannot be handled here.
  670.  * \param index  which temporary register
  671.  * \param chan  which channel of the temp register.
  672.  */
  673. LLVMValueRef
  674. lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld,
  675.              unsigned index,
  676.              unsigned chan)
  677. {
  678.    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
  679.    assert(chan < 4);
  680.    if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
  681.       LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
  682.       return LLVMBuildGEP(builder, bld->temps_array, &lindex, 1, "");
  683.    }
  684.    else {
  685.       return bld->temps[index][chan];
  686.    }
  687. }
  688.  
  689. /**
  690.  * Return pointer to a output register channel (src or dest).
  691.  * Note that indirect addressing cannot be handled here.
  692.  * \param index  which output register
  693.  * \param chan  which channel of the output register.
  694.  */
  695. LLVMValueRef
  696. lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
  697.                unsigned index,
  698.                unsigned chan)
  699. {
  700.    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
  701.    assert(chan < 4);
  702.    if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
  703.       LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm,
  704.                                                  index * 4 + chan);
  705.       return LLVMBuildGEP(builder, bld->outputs_array, &lindex, 1, "");
  706.    }
  707.    else {
  708.       return bld->outputs[index][chan];
  709.    }
  710. }
  711.  
  712. /*
  713.  * If we have indirect addressing in outputs copy our alloca array
  714.  * to the outputs slots specified by the caller to make sure
  715.  * our outputs are delivered consistently via the same interface.
  716.  */
  717. static void
  718. gather_outputs(struct lp_build_tgsi_soa_context * bld)
  719. {
  720.    if ((bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
  721.       unsigned index, chan;
  722.       assert(bld->bld_base.info->num_outputs <=
  723.              bld->bld_base.info->file_max[TGSI_FILE_OUTPUT] + 1);
  724.       for (index = 0; index < bld->bld_base.info->num_outputs; ++index) {
  725.          for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
  726.             bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan);
  727.          }
  728.       }
  729.    }
  730. }
  731.  
  732. /**
  733.  * Gather vector.
  734.  * XXX the lp_build_gather() function should be capable of doing this
  735.  * with a little work.
  736.  */
  737. static LLVMValueRef
  738. build_gather(struct lp_build_context *bld,
  739.              LLVMValueRef base_ptr,
  740.              LLVMValueRef indexes)
  741. {
  742.    LLVMBuilderRef builder = bld->gallivm->builder;
  743.    LLVMValueRef res = bld->undef;
  744.    unsigned i;
  745.  
  746.    /*
  747.     * Loop over elements of index_vec, load scalar value, insert it into 'res'.
  748.     */
  749.    for (i = 0; i < bld->type.length; i++) {
  750.       LLVMValueRef ii = lp_build_const_int32(bld->gallivm, i);
  751.       LLVMValueRef index = LLVMBuildExtractElement(builder,
  752.                                                    indexes, ii, "");
  753.       LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr,
  754.                                              &index, 1, "gather_ptr");
  755.       LLVMValueRef scalar = LLVMBuildLoad(builder, scalar_ptr, "");
  756.  
  757.       res = LLVMBuildInsertElement(builder, res, scalar, ii, "");
  758.    }
  759.  
  760.    return res;
  761. }
  762.  
  763.  
  764. /**
  765.  * Scatter/store vector.
  766.  */
  767. static void
  768. emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
  769.                   LLVMValueRef base_ptr,
  770.                   LLVMValueRef indexes,
  771.                   LLVMValueRef values,
  772.                   struct lp_exec_mask *mask,
  773.                   LLVMValueRef pred)
  774. {
  775.    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
  776.    LLVMBuilderRef builder = gallivm->builder;
  777.    unsigned i;
  778.  
  779.    /* Mix the predicate and execution mask */
  780.    if (mask->has_mask) {
  781.       if (pred) {
  782.          pred = LLVMBuildAnd(builder, pred, mask->exec_mask, "");
  783.       }
  784.       else {
  785.          pred = mask->exec_mask;
  786.       }
  787.    }
  788.  
  789.    /*
  790.     * Loop over elements of index_vec, store scalar value.
  791.     */
  792.    for (i = 0; i < bld->bld_base.base.type.length; i++) {
  793.       LLVMValueRef ii = lp_build_const_int32(gallivm, i);
  794.       LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
  795.       LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
  796.       LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
  797.       LLVMValueRef scalar_pred = pred ?
  798.          LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
  799.  
  800.       if (0)
  801.          lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
  802.                          ii, val, index, scalar_ptr);
  803.  
  804.       if (scalar_pred) {
  805.          LLVMValueRef real_val, dst_val;
  806.          dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
  807.          real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
  808.          LLVMBuildStore(builder, real_val, scalar_ptr);
  809.       }
  810.       else {
  811.          LLVMBuildStore(builder, val, scalar_ptr);
  812.       }
  813.    }
  814. }
  815.  
  816.  
  817. /**
  818.  * Read the current value of the ADDR register, convert the floats to
  819.  * ints, add the base index and return the vector of offsets.
  820.  * The offsets will be used to index into the constant buffer or
  821.  * temporary register file.
  822.  */
  823. static LLVMValueRef
  824. get_indirect_index(struct lp_build_tgsi_soa_context *bld,
  825.                    unsigned reg_file, unsigned reg_index,
  826.                    const struct tgsi_ind_register *indirect_reg)
  827. {
  828.    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
  829.    struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
  830.    /* always use X component of address register */
  831.    unsigned swizzle = indirect_reg->Swizzle;
  832.    LLVMValueRef base;
  833.    LLVMValueRef rel;
  834.    LLVMValueRef max_index;
  835.    LLVMValueRef index;
  836.  
  837.    assert(bld->indirect_files & (1 << reg_file));
  838.  
  839.    base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index);
  840.  
  841.    assert(swizzle < 4);
  842.    switch (indirect_reg->File) {
  843.    case TGSI_FILE_ADDRESS:
  844.       rel = LLVMBuildLoad(builder,
  845.                           bld->addr[indirect_reg->Index][swizzle],
  846.                           "load addr reg");
  847.       /* ADDR LLVM values already have LLVM integer type. */
  848.       break;
  849.    case TGSI_FILE_TEMPORARY:
  850.       rel = lp_get_temp_ptr_soa(bld, indirect_reg->Index, swizzle);
  851.       rel = LLVMBuildLoad(builder, rel, "load temp reg");
  852.       /* TEMP LLVM values always have LLVM float type, but for indirection, the
  853.        * value actually stored is expected to be an integer */
  854.       rel = LLVMBuildBitCast(builder, rel, uint_bld->vec_type, "");
  855.       break;
  856.    default:
  857.       assert(0);
  858.       rel = uint_bld->zero;
  859.    }
  860.  
  861.    index = lp_build_add(uint_bld, base, rel);
  862.  
  863.    max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
  864.                                       uint_bld->type,
  865.                                       bld->bld_base.info->file_max[reg_file]);
  866.  
  867.    assert(!uint_bld->type.sign);
  868.    index = lp_build_min(uint_bld, index, max_index);
  869.  
  870.    return index;
  871. }
  872.  
  873. static struct lp_build_context *
  874. stype_to_fetch(struct lp_build_tgsi_context * bld_base,
  875.                enum tgsi_opcode_type stype)
  876. {
  877.    struct lp_build_context *bld_fetch;
  878.  
  879.    switch (stype) {
  880.    case TGSI_TYPE_FLOAT:
  881.    case TGSI_TYPE_UNTYPED:
  882.       bld_fetch = &bld_base->base;
  883.       break;
  884.    case TGSI_TYPE_UNSIGNED:
  885.       bld_fetch = &bld_base->uint_bld;
  886.       break;
  887.    case TGSI_TYPE_SIGNED:
  888.       bld_fetch = &bld_base->int_bld;
  889.       break;
  890.    case TGSI_TYPE_VOID:
  891.    case TGSI_TYPE_DOUBLE:
  892.    default:
  893.       assert(0);
  894.       bld_fetch = NULL;
  895.       break;
  896.    }
  897.    return bld_fetch;
  898. }
  899.  
  900. static LLVMValueRef
  901. emit_fetch_constant(
  902.    struct lp_build_tgsi_context * bld_base,
  903.    const struct tgsi_full_src_register * reg,
  904.    enum tgsi_opcode_type stype,
  905.    unsigned swizzle)
  906. {
  907.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  908.    struct gallivm_state *gallivm = bld_base->base.gallivm;
  909.    LLVMBuilderRef builder = gallivm->builder;
  910.    struct lp_build_context *uint_bld = &bld_base->uint_bld;
  911.    LLVMValueRef indirect_index = NULL;
  912.    unsigned dimension = 0;
  913.    LLVMValueRef dimension_index;
  914.    LLVMValueRef consts_ptr;
  915.    LLVMValueRef res;
  916.  
  917.    /* XXX: Handle fetching xyzw components as a vector */
  918.    assert(swizzle != ~0);
  919.  
  920.    if (reg->Register.Dimension) {
  921.       assert(!reg->Dimension.Indirect);
  922.       dimension = reg->Dimension.Index;
  923.       assert(dimension < LP_MAX_TGSI_CONST_BUFFERS);
  924.    }
  925.  
  926.    dimension_index = lp_build_const_int32(gallivm, dimension);
  927.    consts_ptr = lp_build_array_get(gallivm, bld->consts_ptr, dimension_index);
  928.  
  929.    if (reg->Register.Indirect) {
  930.       indirect_index = get_indirect_index(bld,
  931.                                           reg->Register.File,
  932.                                           reg->Register.Index,
  933.                                           &reg->Indirect);
  934.    }
  935.  
  936.    if (reg->Register.Indirect) {
  937.       LLVMValueRef swizzle_vec =
  938.          lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, swizzle);
  939.       LLVMValueRef index_vec;  /* index into the const buffer */
  940.  
  941.       /* index_vec = indirect_index * 4 + swizzle */
  942.       index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
  943.       index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
  944.  
  945.       /* Gather values from the constant buffer */
  946.       res = build_gather(&bld_base->base, consts_ptr, index_vec);
  947.    }
  948.    else {
  949.       LLVMValueRef index;  /* index into the const buffer */
  950.       LLVMValueRef scalar, scalar_ptr;
  951.  
  952.       index = lp_build_const_int32(gallivm, reg->Register.Index*4 + swizzle);
  953.  
  954.       scalar_ptr = LLVMBuildGEP(builder, consts_ptr,
  955.                                 &index, 1, "");
  956.       scalar = LLVMBuildLoad(builder, scalar_ptr, "");
  957.       res = lp_build_broadcast_scalar(&bld_base->base, scalar);
  958.    }
  959.  
  960.    if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED) {
  961.       struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
  962.       res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
  963.    }
  964.    return res;
  965. }
  966.  
  967. static LLVMValueRef
  968. emit_fetch_immediate(
  969.    struct lp_build_tgsi_context * bld_base,
  970.    const struct tgsi_full_src_register * reg,
  971.    enum tgsi_opcode_type stype,
  972.    unsigned swizzle)
  973. {
  974.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  975.    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
  976.    LLVMBuilderRef builder = gallivm->builder;
  977.    struct lp_build_context *uint_bld = &bld_base->uint_bld;
  978.    struct lp_build_context *float_bld = &bld_base->base;
  979.    LLVMValueRef res = NULL;
  980.    LLVMValueRef indirect_index = NULL;
  981.  
  982.    if (reg->Register.Indirect) {
  983.       indirect_index = get_indirect_index(bld,
  984.                                           reg->Register.File,
  985.                                           reg->Register.Index,
  986.                                           &reg->Indirect);
  987.    }
  988.  
  989.    if (reg->Register.Indirect) {
  990.       LLVMValueRef swizzle_vec =
  991.          lp_build_const_int_vec(bld->bld_base.base.gallivm,
  992.                                 uint_bld->type, swizzle);
  993.       LLVMValueRef length_vec =
  994.          lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type,
  995.                                 bld->bld_base.base.type.length);
  996.       LLVMValueRef index_vec;  /* index into the const buffer */
  997.       LLVMValueRef imms_array;
  998.       LLVMValueRef pixel_offsets;
  999.       LLVMValueRef offsets[LP_MAX_VECTOR_LENGTH];
  1000.       LLVMTypeRef float4_ptr_type;
  1001.       int i;
  1002.  
  1003.       /* build pixel offset vector: {0, 1, 2, 3, ...} */
  1004.       for (i = 0; i < float_bld->type.length; i++) {
  1005.          offsets[i] = lp_build_const_int32(gallivm, i);
  1006.       }
  1007.       pixel_offsets = LLVMConstVector(offsets, float_bld->type.length);
  1008.  
  1009.       /* index_vec = (indirect_index * 4 + swizzle) * length */
  1010.       index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
  1011.       index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
  1012.       index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
  1013.       index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
  1014.  
  1015.       /* cast imms_array pointer to float* */
  1016.       float4_ptr_type = LLVMPointerType(
  1017.          LLVMFloatTypeInContext(bld->bld_base.base.gallivm->context), 0);
  1018.       imms_array = LLVMBuildBitCast(builder, bld->imms_array,
  1019.                                     float4_ptr_type, "");
  1020.  
  1021.       /* Gather values from the temporary register array */
  1022.       res = build_gather(&bld_base->base, imms_array, index_vec);
  1023.    }
  1024.    else {
  1025.       res = bld->immediates[reg->Register.Index][swizzle];
  1026.    }
  1027.  
  1028.    if (stype == TGSI_TYPE_UNSIGNED) {
  1029.       res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
  1030.    } else if (stype == TGSI_TYPE_SIGNED) {
  1031.       res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
  1032.    }
  1033.    return res;
  1034. }
  1035.  
  1036. static LLVMValueRef
  1037. emit_fetch_input(
  1038.    struct lp_build_tgsi_context * bld_base,
  1039.    const struct tgsi_full_src_register * reg,
  1040.    enum tgsi_opcode_type stype,
  1041.    unsigned swizzle)
  1042. {
  1043.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  1044.    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
  1045.    LLVMBuilderRef builder = gallivm->builder;
  1046.    struct lp_build_context *uint_bld = &bld_base->uint_bld;
  1047.    LLVMValueRef indirect_index = NULL;
  1048.    LLVMValueRef res;
  1049.  
  1050.    if (reg->Register.Indirect) {
  1051.       indirect_index = get_indirect_index(bld,
  1052.                                           reg->Register.File,
  1053.                                           reg->Register.Index,
  1054.                                           &reg->Indirect);
  1055.    }
  1056.  
  1057.    if (reg->Register.Indirect) {
  1058.       LLVMValueRef swizzle_vec =
  1059.          lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
  1060.       LLVMValueRef length_vec =
  1061.          lp_build_const_int_vec(gallivm, uint_bld->type, bld->bld_base.base.type.length);
  1062.       LLVMValueRef index_vec;  /* index into the const buffer */
  1063.       LLVMValueRef inputs_array;
  1064.       LLVMTypeRef float4_ptr_type;
  1065.  
  1066.       /* index_vec = (indirect_index * 4 + swizzle) * length */
  1067.       index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
  1068.       index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
  1069.       index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
  1070.  
  1071.       /* cast inputs_array pointer to float* */
  1072.       float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
  1073.       inputs_array = LLVMBuildBitCast(builder, bld->inputs_array,
  1074.                                          float4_ptr_type, "");
  1075.  
  1076.       /* Gather values from the temporary register array */
  1077.       res = build_gather(&bld_base->base, inputs_array, index_vec);
  1078.    } else {
  1079.       if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
  1080.          LLVMValueRef lindex = lp_build_const_int32(gallivm,
  1081.                                         reg->Register.Index * 4 + swizzle);
  1082.          LLVMValueRef input_ptr =  LLVMBuildGEP(builder,
  1083.                                                 bld->inputs_array, &lindex, 1, "");
  1084.          res = LLVMBuildLoad(builder, input_ptr, "");
  1085.       }
  1086.       else {
  1087.          res = bld->inputs[reg->Register.Index][swizzle];
  1088.       }
  1089.    }
  1090.  
  1091.    assert(res);
  1092.  
  1093.    if (stype == TGSI_TYPE_UNSIGNED) {
  1094.       res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
  1095.    } else if (stype == TGSI_TYPE_SIGNED) {
  1096.       res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
  1097.    }
  1098.  
  1099.    return res;
  1100. }
  1101.  
  1102.  
  1103. static LLVMValueRef
  1104. emit_fetch_gs_input(
  1105.    struct lp_build_tgsi_context * bld_base,
  1106.    const struct tgsi_full_src_register * reg,
  1107.    enum tgsi_opcode_type stype,
  1108.    unsigned swizzle)
  1109. {
  1110.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  1111.    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
  1112.    LLVMBuilderRef builder = gallivm->builder;
  1113.    LLVMValueRef attrib_index = NULL;
  1114.    LLVMValueRef vertex_index = NULL;
  1115.    LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
  1116.    LLVMValueRef res;
  1117.  
  1118.    if (reg->Register.Indirect) {
  1119.       attrib_index = get_indirect_index(bld,
  1120.                                           reg->Register.File,
  1121.                                           reg->Register.Index,
  1122.                                           &reg->Indirect);
  1123.    } else {
  1124.       attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
  1125.    }
  1126.    
  1127.    if (reg->Dimension.Indirect) {
  1128.       vertex_index = get_indirect_index(bld,
  1129.                                         reg->Register.File,
  1130.                                         reg->Dimension.Index,
  1131.                                         &reg->DimIndirect);
  1132.    } else {
  1133.       vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
  1134.    }
  1135.  
  1136.    res = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
  1137.                                     reg->Dimension.Indirect,
  1138.                                     vertex_index, attrib_index,
  1139.                                     swizzle_index);
  1140.  
  1141.    assert(res);
  1142.  
  1143.    if (stype == TGSI_TYPE_UNSIGNED) {
  1144.       res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
  1145.    } else if (stype == TGSI_TYPE_SIGNED) {
  1146.       res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
  1147.    }
  1148.  
  1149.    return res;
  1150. }
  1151.  
  1152. static LLVMValueRef
  1153. emit_fetch_temporary(
  1154.    struct lp_build_tgsi_context * bld_base,
  1155.    const struct tgsi_full_src_register * reg,
  1156.    enum tgsi_opcode_type stype,
  1157.    unsigned swizzle)
  1158. {
  1159.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  1160.    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
  1161.    LLVMBuilderRef builder = gallivm->builder;
  1162.    struct lp_build_context *uint_bld = &bld_base->uint_bld;
  1163.    struct lp_build_context *float_bld = &bld_base->base;
  1164.    LLVMValueRef indirect_index = NULL;
  1165.    LLVMValueRef res;
  1166.  
  1167.    if (reg->Register.Indirect) {
  1168.       indirect_index = get_indirect_index(bld,
  1169.                                           reg->Register.File,
  1170.                                           reg->Register.Index,
  1171.                                           &reg->Indirect);
  1172.    }
  1173.  
  1174.    if (reg->Register.Indirect) {
  1175.       LLVMValueRef swizzle_vec =
  1176.          lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, swizzle);
  1177.       LLVMValueRef length_vec =
  1178.          lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type,
  1179.                                 bld->bld_base.base.type.length);
  1180.       LLVMValueRef index_vec;  /* index into the const buffer */
  1181.       LLVMValueRef temps_array;
  1182.       LLVMValueRef pixel_offsets;
  1183.       LLVMValueRef offsets[LP_MAX_VECTOR_LENGTH];
  1184.       LLVMTypeRef float4_ptr_type;
  1185.       int i;
  1186.  
  1187.       /* build pixel offset vector: {0, 1, 2, 3, ...} */
  1188.       for (i = 0; i < float_bld->type.length; i++) {
  1189.          offsets[i] = lp_build_const_int32(gallivm, i);
  1190.       }
  1191.       pixel_offsets = LLVMConstVector(offsets, float_bld->type.length);
  1192.  
  1193.       /* index_vec = (indirect_index * 4 + swizzle) * length */
  1194.       index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
  1195.       index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
  1196.       index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
  1197.       index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
  1198.  
  1199.       /* cast temps_array pointer to float* */
  1200.       float4_ptr_type = LLVMPointerType(LLVMFloatTypeInContext(bld->bld_base.base.gallivm->context), 0);
  1201.       temps_array = LLVMBuildBitCast(builder, bld->temps_array,
  1202.                                      float4_ptr_type, "");
  1203.  
  1204.       /* Gather values from the temporary register array */
  1205.       res = build_gather(&bld_base->base, temps_array, index_vec);
  1206.    }
  1207.    else {
  1208.       LLVMValueRef temp_ptr;
  1209.       temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
  1210.       res = LLVMBuildLoad(builder, temp_ptr, "");
  1211.    }
  1212.  
  1213.    if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED) {
  1214.       struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
  1215.       res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
  1216.    }
  1217.  
  1218.    return res;
  1219. }
  1220.  
  1221. static LLVMValueRef
  1222. emit_fetch_system_value(
  1223.    struct lp_build_tgsi_context * bld_base,
  1224.    const struct tgsi_full_src_register * reg,
  1225.    enum tgsi_opcode_type stype,
  1226.    unsigned swizzle)
  1227. {
  1228.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  1229.    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
  1230.    const struct tgsi_shader_info *info = bld->bld_base.info;
  1231.    LLVMBuilderRef builder = gallivm->builder;
  1232.    LLVMValueRef res;
  1233.    enum tgsi_opcode_type atype; // Actual type of the value
  1234.  
  1235.    assert(!reg->Register.Indirect);
  1236.  
  1237.    switch (info->system_value_semantic_name[reg->Register.Index]) {
  1238.    case TGSI_SEMANTIC_INSTANCEID:
  1239.       res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
  1240.       atype = TGSI_TYPE_UNSIGNED;
  1241.       break;
  1242.  
  1243.    case TGSI_SEMANTIC_VERTEXID:
  1244.       res = bld->system_values.vertex_id;
  1245.       atype = TGSI_TYPE_UNSIGNED;
  1246.       break;
  1247.  
  1248.    case TGSI_SEMANTIC_PRIMID:
  1249.       res = bld->system_values.prim_id;
  1250.       atype = TGSI_TYPE_UNSIGNED;
  1251.       break;
  1252.  
  1253.    default:
  1254.       assert(!"unexpected semantic in emit_fetch_system_value");
  1255.       res = bld_base->base.zero;
  1256.       atype = TGSI_TYPE_FLOAT;
  1257.       break;
  1258.    }
  1259.  
  1260.    if (atype != stype) {
  1261.       if (stype == TGSI_TYPE_FLOAT) {
  1262.          res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
  1263.       } else if (stype == TGSI_TYPE_UNSIGNED) {
  1264.          res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
  1265.       } else if (stype == TGSI_TYPE_SIGNED) {
  1266.          res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
  1267.       }
  1268.    }
  1269.  
  1270.    return res;
  1271. }
  1272.  
  1273. /**
  1274.  * Register fetch with derivatives.
  1275.  */
  1276. static void
  1277. emit_fetch_deriv(
  1278.    struct lp_build_tgsi_soa_context *bld,
  1279.    LLVMValueRef src,
  1280.    LLVMValueRef *res,
  1281.    LLVMValueRef *ddx,
  1282.    LLVMValueRef *ddy)
  1283. {
  1284.    if(res)
  1285.       *res = src;
  1286.  
  1287.    /* TODO: use interpolation coeffs for inputs */
  1288.  
  1289.    if(ddx)
  1290.       *ddx = lp_build_ddx(&bld->bld_base.base, src);
  1291.  
  1292.    if(ddy)
  1293.       *ddy = lp_build_ddy(&bld->bld_base.base, src);
  1294. }
  1295.  
  1296.  
  1297. /**
  1298.  * Predicate.
  1299.  */
  1300. static void
  1301. emit_fetch_predicate(
  1302.    struct lp_build_tgsi_soa_context *bld,
  1303.    const struct tgsi_full_instruction *inst,
  1304.    LLVMValueRef *pred)
  1305. {
  1306.    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
  1307.    unsigned index;
  1308.    unsigned char swizzles[4];
  1309.    LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
  1310.    LLVMValueRef value;
  1311.    unsigned chan;
  1312.  
  1313.    if (!inst->Instruction.Predicate) {
  1314.       TGSI_FOR_EACH_CHANNEL( chan ) {
  1315.          pred[chan] = NULL;
  1316.       }
  1317.       return;
  1318.    }
  1319.  
  1320.    swizzles[0] = inst->Predicate.SwizzleX;
  1321.    swizzles[1] = inst->Predicate.SwizzleY;
  1322.    swizzles[2] = inst->Predicate.SwizzleZ;
  1323.    swizzles[3] = inst->Predicate.SwizzleW;
  1324.  
  1325.    index = inst->Predicate.Index;
  1326.    assert(index < LP_MAX_TGSI_PREDS);
  1327.  
  1328.    TGSI_FOR_EACH_CHANNEL( chan ) {
  1329.       unsigned swizzle = swizzles[chan];
  1330.  
  1331.       /*
  1332.        * Only fetch the predicate register channels that are actually listed
  1333.        * in the swizzles
  1334.        */
  1335.       if (!unswizzled[swizzle]) {
  1336.          value = LLVMBuildLoad(builder,
  1337.                                bld->preds[index][swizzle], "");
  1338.  
  1339.          /*
  1340.           * Convert the value to an integer mask.
  1341.           *
  1342.           * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
  1343.           * is needlessly causing two comparisons due to storing the intermediate
  1344.           * result as float vector instead of an integer mask vector.
  1345.           */
  1346.          value = lp_build_compare(bld->bld_base.base.gallivm,
  1347.                                   bld->bld_base.base.type,
  1348.                                   PIPE_FUNC_NOTEQUAL,
  1349.                                   value,
  1350.                                   bld->bld_base.base.zero);
  1351.          if (inst->Predicate.Negate) {
  1352.             value = LLVMBuildNot(builder, value, "");
  1353.          }
  1354.  
  1355.          unswizzled[swizzle] = value;
  1356.       } else {
  1357.          value = unswizzled[swizzle];
  1358.       }
  1359.  
  1360.       pred[chan] = value;
  1361.    }
  1362. }
  1363.  
  1364. /**
  1365.  * Register store.
  1366.  */
  1367. static void
  1368. emit_store_chan(
  1369.    struct lp_build_tgsi_context *bld_base,
  1370.    const struct tgsi_full_instruction *inst,
  1371.    unsigned index,
  1372.    unsigned chan_index,
  1373.    LLVMValueRef pred,
  1374.    LLVMValueRef value)
  1375. {
  1376.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  1377.    struct gallivm_state *gallivm = bld_base->base.gallivm;
  1378.    LLVMBuilderRef builder = gallivm->builder;
  1379.    const struct tgsi_full_dst_register *reg = &inst->Dst[index];
  1380.    struct lp_build_context *float_bld = &bld_base->base;
  1381.    struct lp_build_context *int_bld = &bld_base->int_bld;
  1382.    struct lp_build_context *uint_bld = &bld_base->uint_bld;
  1383.    LLVMValueRef indirect_index = NULL;
  1384.    enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
  1385.  
  1386.    /*
  1387.     * Apply saturation.
  1388.     *
  1389.     * It is always assumed to be float.
  1390.     */
  1391.    switch( inst->Instruction.Saturate ) {
  1392.    case TGSI_SAT_NONE:
  1393.       break;
  1394.  
  1395.    case TGSI_SAT_ZERO_ONE:
  1396.       assert(dtype == TGSI_TYPE_FLOAT ||
  1397.              dtype == TGSI_TYPE_UNTYPED);
  1398.       value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
  1399.       value = lp_build_max(float_bld, value, float_bld->zero);
  1400.       value = lp_build_min(float_bld, value, float_bld->one);
  1401.       break;
  1402.  
  1403.    case TGSI_SAT_MINUS_PLUS_ONE:
  1404.       assert(dtype == TGSI_TYPE_FLOAT ||
  1405.              dtype == TGSI_TYPE_UNTYPED);
  1406.       value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
  1407.       value = lp_build_max(float_bld, value, lp_build_const_vec(gallivm, float_bld->type, -1.0));
  1408.       value = lp_build_min(float_bld, value, float_bld->one);
  1409.       break;
  1410.  
  1411.    default:
  1412.       assert(0);
  1413.    }
  1414.  
  1415.    if (reg->Register.Indirect) {
  1416.       indirect_index = get_indirect_index(bld,
  1417.                                           reg->Register.File,
  1418.                                           reg->Register.Index,
  1419.                                           &reg->Indirect);
  1420.    } else {
  1421.       assert(reg->Register.Index <=
  1422.                              bld_base->info->file_max[reg->Register.File]);
  1423.    }
  1424.  
  1425.    switch( reg->Register.File ) {
  1426.    case TGSI_FILE_OUTPUT:
  1427.       /* Outputs are always stored as floats */
  1428.       value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
  1429.  
  1430.       if (reg->Register.Indirect) {
  1431.          LLVMValueRef chan_vec =
  1432.             lp_build_const_int_vec(gallivm, uint_bld->type, chan_index);
  1433.          LLVMValueRef length_vec =
  1434.             lp_build_const_int_vec(gallivm, uint_bld->type, float_bld->type.length);
  1435.          LLVMValueRef index_vec;  /* indexes into the temp registers */
  1436.          LLVMValueRef outputs_array;
  1437.          LLVMValueRef pixel_offsets;
  1438.          LLVMTypeRef float_ptr_type;
  1439.          int i;
  1440.  
  1441.          /* build pixel offset vector: {0, 1, 2, 3, ...} */
  1442.          pixel_offsets = uint_bld->undef;
  1443.          for (i = 0; i < float_bld->type.length; i++) {
  1444.             LLVMValueRef ii = lp_build_const_int32(gallivm, i);
  1445.             pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
  1446.                                                    ii, ii, "");
  1447.          }
  1448.  
  1449.          /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
  1450.          index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
  1451.          index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
  1452.          index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
  1453.          index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
  1454.  
  1455.          float_ptr_type =
  1456.             LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
  1457.          outputs_array = LLVMBuildBitCast(builder, bld->outputs_array,
  1458.                                           float_ptr_type, "");
  1459.  
  1460.          /* Scatter store values into temp registers */
  1461.          emit_mask_scatter(bld, outputs_array, index_vec, value,
  1462.                            &bld->exec_mask, pred);
  1463.       }
  1464.       else {
  1465.          LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
  1466.                                                   chan_index);
  1467.          lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value, out_ptr);
  1468.       }
  1469.       break;
  1470.  
  1471.    case TGSI_FILE_TEMPORARY:
  1472.       /* Temporaries are always stored as floats */
  1473.       value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
  1474.  
  1475.       if (reg->Register.Indirect) {
  1476.          LLVMValueRef chan_vec =
  1477.             lp_build_const_int_vec(gallivm, uint_bld->type, chan_index);
  1478.          LLVMValueRef length_vec =
  1479.             lp_build_const_int_vec(gallivm, uint_bld->type,
  1480.                                    float_bld->type.length);
  1481.          LLVMValueRef index_vec;  /* indexes into the temp registers */
  1482.          LLVMValueRef temps_array;
  1483.          LLVMValueRef pixel_offsets;
  1484.          LLVMTypeRef float_ptr_type;
  1485.          int i;
  1486.  
  1487.          /* build pixel offset vector: {0, 1, 2, 3, ...} */
  1488.          pixel_offsets = uint_bld->undef;
  1489.          for (i = 0; i < float_bld->type.length; i++) {
  1490.             LLVMValueRef ii = lp_build_const_int32(gallivm, i);
  1491.             pixel_offsets = LLVMBuildInsertElement(builder, pixel_offsets,
  1492.                                                    ii, ii, "");
  1493.          }
  1494.  
  1495.          /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
  1496.          index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
  1497.          index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
  1498.          index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
  1499.          index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
  1500.  
  1501.          float_ptr_type =
  1502.             LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
  1503.          temps_array = LLVMBuildBitCast(builder, bld->temps_array,
  1504.                                         float_ptr_type, "");
  1505.  
  1506.          /* Scatter store values into temp registers */
  1507.          emit_mask_scatter(bld, temps_array, index_vec, value,
  1508.                            &bld->exec_mask, pred);
  1509.       }
  1510.       else {
  1511.          LLVMValueRef temp_ptr;
  1512.          temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index,
  1513.                                         chan_index);
  1514.          lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value, temp_ptr);
  1515.       }
  1516.       break;
  1517.  
  1518.    case TGSI_FILE_ADDRESS:
  1519.       assert(dtype == TGSI_TYPE_SIGNED);
  1520.       assert(LLVMTypeOf(value) == int_bld->vec_type);
  1521.       value = LLVMBuildBitCast(builder, value, int_bld->vec_type, "");
  1522.       lp_exec_mask_store(&bld->exec_mask, int_bld, pred, value,
  1523.                          bld->addr[reg->Register.Index][chan_index]);
  1524.       break;
  1525.  
  1526.    case TGSI_FILE_PREDICATE:
  1527.       assert(LLVMTypeOf(value) == float_bld->vec_type);
  1528.       value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
  1529.       lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value,
  1530.                          bld->preds[reg->Register.Index][chan_index]);
  1531.       break;
  1532.  
  1533.    default:
  1534.       assert( 0 );
  1535.    }
  1536.  
  1537.    (void)dtype;
  1538. }
  1539.  
  1540. static void
  1541. emit_store(
  1542.    struct lp_build_tgsi_context * bld_base,
  1543.    const struct tgsi_full_instruction * inst,
  1544.    const struct tgsi_opcode_info * info,
  1545.    LLVMValueRef dst[4])
  1546.  
  1547. {
  1548.    unsigned chan_index;
  1549.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  1550.  
  1551.    if(info->num_dst) {
  1552.       LLVMValueRef pred[TGSI_NUM_CHANNELS];
  1553.  
  1554.       emit_fetch_predicate( bld, inst, pred );
  1555.  
  1556.       TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
  1557.          emit_store_chan(bld_base, inst, 0, chan_index, pred[chan_index], dst[chan_index]);
  1558.       }
  1559.    }
  1560. }
  1561.  
  1562. /**
  1563.  * High-level instruction translators.
  1564.  */
  1565.  
  1566. static void
  1567. emit_tex( struct lp_build_tgsi_soa_context *bld,
  1568.           const struct tgsi_full_instruction *inst,
  1569.           enum lp_build_tex_modifier modifier,
  1570.           LLVMValueRef *texel)
  1571. {
  1572.    unsigned unit;
  1573.    LLVMValueRef lod_bias, explicit_lod;
  1574.    LLVMValueRef oow = NULL;
  1575.    LLVMValueRef coords[4];
  1576.    LLVMValueRef offsets[3] = { NULL };
  1577.    struct lp_derivatives derivs;
  1578.    struct lp_derivatives *deriv_ptr = NULL;
  1579.    boolean scalar_lod;
  1580.    unsigned num_coords, num_derivs, num_offsets;
  1581.    unsigned i;
  1582.  
  1583.    if (!bld->sampler) {
  1584.       _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
  1585.       for (i = 0; i < 4; i++) {
  1586.          texel[i] = bld->bld_base.base.undef;
  1587.       }
  1588.       return;
  1589.    }
  1590.  
  1591.    switch (inst->Texture.Texture) {
  1592.    case TGSI_TEXTURE_1D:
  1593.       num_coords = 1;
  1594.       num_offsets = 1;
  1595.       num_derivs = 1;
  1596.       break;
  1597.    case TGSI_TEXTURE_1D_ARRAY:
  1598.       num_coords = 2;
  1599.       num_offsets = 1;
  1600.       num_derivs = 1;
  1601.       break;
  1602.    case TGSI_TEXTURE_2D:
  1603.    case TGSI_TEXTURE_RECT:
  1604.       num_coords = 2;
  1605.       num_offsets = 2;
  1606.       num_derivs = 2;
  1607.       break;
  1608.    case TGSI_TEXTURE_SHADOW1D:
  1609.    case TGSI_TEXTURE_SHADOW1D_ARRAY:
  1610.       num_coords = 3;
  1611.       num_offsets = 1;
  1612.       num_derivs = 1;
  1613.       break;
  1614.    case TGSI_TEXTURE_SHADOW2D:
  1615.    case TGSI_TEXTURE_SHADOWRECT:
  1616.    case TGSI_TEXTURE_2D_ARRAY:
  1617.       num_coords = 3;
  1618.       num_offsets = 2;
  1619.       num_derivs = 2;
  1620.       break;
  1621.    case TGSI_TEXTURE_CUBE:
  1622.       num_coords = 3;
  1623.       num_offsets = 2;
  1624.       num_derivs = 3;
  1625.       break;
  1626.    case TGSI_TEXTURE_3D:
  1627.       num_coords = 3;
  1628.       num_offsets = 3;
  1629.       num_derivs = 3;
  1630.       break;
  1631.    case TGSI_TEXTURE_SHADOW2D_ARRAY:
  1632.       num_coords = 4;
  1633.       num_offsets = 2;
  1634.       num_derivs = 2;
  1635.       break;
  1636.    case TGSI_TEXTURE_SHADOWCUBE:
  1637.       num_coords = 4;
  1638.       num_offsets = 2;
  1639.       num_derivs = 3;
  1640.       break;
  1641.    default:
  1642.       assert(0);
  1643.       return;
  1644.    }
  1645.  
  1646.    /* Note lod and especially projected are illegal in a LOT of cases */
  1647.    if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
  1648.       assert(num_coords < 4);
  1649.       lod_bias = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
  1650.       explicit_lod = NULL;
  1651.    }
  1652.    else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
  1653.       assert(num_coords < 4);
  1654.       lod_bias = NULL;
  1655.       explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
  1656.    }
  1657.    else {
  1658.       lod_bias = NULL;
  1659.       explicit_lod = NULL;
  1660.    }
  1661.  
  1662.    if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
  1663.       assert(num_coords < 4);
  1664.       oow = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
  1665.       oow = lp_build_rcp(&bld->bld_base.base, oow);
  1666.    }
  1667.  
  1668.    for (i = 0; i < num_coords; i++) {
  1669.       coords[i] = lp_build_emit_fetch( &bld->bld_base, inst, 0, i );
  1670.       if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
  1671.          coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow);
  1672.    }
  1673.    for (i = num_coords; i < 4; i++) {
  1674.       coords[i] = bld->bld_base.base.undef;
  1675.    }
  1676.  
  1677.    if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
  1678.       unsigned dim;
  1679.       for (dim = 0; dim < num_derivs; ++dim) {
  1680.          derivs.ddx[dim] = lp_build_emit_fetch( &bld->bld_base, inst, 1, dim );
  1681.          derivs.ddy[dim] = lp_build_emit_fetch( &bld->bld_base, inst, 2, dim );
  1682.       }
  1683.       deriv_ptr = &derivs;
  1684.       unit = inst->Src[3].Register.Index;
  1685.    } else {
  1686.       unit = inst->Src[1].Register.Index;
  1687.    }
  1688.  
  1689.    /* some advanced gather instructions (txgo) would require 4 offsets */
  1690.    if (inst->Texture.NumOffsets == 1) {
  1691.       unsigned dim;
  1692.       for (dim = 0; dim < num_offsets; dim++) {
  1693.          offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim );
  1694.       }
  1695.    }
  1696.  
  1697.    /* TODO: use scalar lod if explicit_lod, lod_bias or derivs are broadcasted scalars */
  1698.    scalar_lod = bld->bld_base.info->processor == TGSI_PROCESSOR_FRAGMENT;
  1699.  
  1700.    bld->sampler->emit_fetch_texel(bld->sampler,
  1701.                                   bld->bld_base.base.gallivm,
  1702.                                   bld->bld_base.base.type,
  1703.                                   FALSE,
  1704.                                   unit, unit,
  1705.                                   coords,
  1706.                                   offsets,
  1707.                                   deriv_ptr,
  1708.                                   lod_bias, explicit_lod, scalar_lod,
  1709.                                   texel);
  1710. }
  1711.  
  1712. static void
  1713. emit_sample(struct lp_build_tgsi_soa_context *bld,
  1714.             const struct tgsi_full_instruction *inst,
  1715.             enum lp_build_tex_modifier modifier,
  1716.             boolean compare,
  1717.             LLVMValueRef *texel)
  1718. {
  1719.    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
  1720.    unsigned texture_unit, sampler_unit;
  1721.    LLVMValueRef lod_bias, explicit_lod;
  1722.    LLVMValueRef coords[4];
  1723.    LLVMValueRef offsets[3] = { NULL };
  1724.    struct lp_derivatives derivs;
  1725.    struct lp_derivatives *deriv_ptr = NULL;
  1726.    boolean scalar_lod;
  1727.    unsigned num_coords, num_offsets, num_derivs;
  1728.    unsigned i;
  1729.  
  1730.    if (!bld->sampler) {
  1731.       _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
  1732.       for (i = 0; i < 4; i++) {
  1733.          texel[i] = bld->bld_base.base.undef;
  1734.       }
  1735.       return;
  1736.    }
  1737.  
  1738.    /*
  1739.     * unlike old-style tex opcodes the texture/sampler indices
  1740.     * always come from src1 and src2 respectively.
  1741.     */
  1742.    texture_unit = inst->Src[1].Register.Index;
  1743.    sampler_unit = inst->Src[2].Register.Index;
  1744.  
  1745.    /*
  1746.     * Note inst->Texture.Texture will contain the number of offsets,
  1747.     * however the target information is NOT there and comes from the
  1748.     * declared sampler views instead.
  1749.     */
  1750.    switch (bld->sv[texture_unit].Resource) {
  1751.    case TGSI_TEXTURE_1D:
  1752.       num_coords = 1;
  1753.       num_offsets = 1;
  1754.       num_derivs = 1;
  1755.       break;
  1756.    case TGSI_TEXTURE_1D_ARRAY:
  1757.       num_coords = 2;
  1758.       num_offsets = 1;
  1759.       num_derivs = 1;
  1760.       break;
  1761.    case TGSI_TEXTURE_2D:
  1762.    case TGSI_TEXTURE_RECT:
  1763.       num_coords = 2;
  1764.       num_offsets = 2;
  1765.       num_derivs = 2;
  1766.       break;
  1767.    case TGSI_TEXTURE_2D_ARRAY:
  1768.       num_coords = 3;
  1769.       num_offsets = 2;
  1770.       num_derivs = 2;
  1771.       break;
  1772.    case TGSI_TEXTURE_CUBE:
  1773.       num_coords = 3;
  1774.       num_offsets = 2;
  1775.       num_derivs = 3;
  1776.       break;
  1777.    case TGSI_TEXTURE_3D:
  1778.       num_coords = 3;
  1779.       num_offsets = 3;
  1780.       num_derivs = 3;
  1781.       break;
  1782.    case TGSI_TEXTURE_CUBE_ARRAY:
  1783.       num_coords = 4;
  1784.       num_offsets = 2;
  1785.       num_derivs = 3;
  1786.       break;
  1787.    default:
  1788.       assert(0);
  1789.       return;
  1790.    }
  1791.  
  1792.    if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
  1793.       lod_bias = lp_build_emit_fetch( &bld->bld_base, inst, 3, 0 );
  1794.       explicit_lod = NULL;
  1795.    }
  1796.    else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
  1797.       lod_bias = NULL;
  1798.       explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 3, 0 );
  1799.    }
  1800.    else if (modifier == LP_BLD_TEX_MODIFIER_LOD_ZERO) {
  1801.       lod_bias = NULL;
  1802.       /* XXX might be better to explicitly pass the level zero information */
  1803.       explicit_lod = lp_build_const_vec(gallivm, bld->bld_base.base.type, 0.0F);
  1804.    }
  1805.    else {
  1806.       lod_bias = NULL;
  1807.       explicit_lod = NULL;
  1808.    }
  1809.  
  1810.    for (i = 0; i < num_coords; i++) {
  1811.       coords[i] = lp_build_emit_fetch( &bld->bld_base, inst, 0, i );
  1812.    }
  1813.    for (i = num_coords; i < 4; i++) {
  1814.       coords[i] = bld->bld_base.base.undef;
  1815.    }
  1816.    /*
  1817.     * XXX: whack shadow comparison value into place.
  1818.     * Should probably fix the interface for separate value
  1819.     * (it will not work for cube arrays if it is part of coords).
  1820.     */
  1821.    if (compare) {
  1822.       unsigned c_coord = num_coords > 2 ? 3 : 2;
  1823.       assert(num_coords < 4);
  1824.       coords[c_coord] = lp_build_emit_fetch( &bld->bld_base, inst, 3, 0 );
  1825.    }
  1826.  
  1827.    if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
  1828.       unsigned dim;
  1829.       for (dim = 0; dim < num_derivs; ++dim) {
  1830.          derivs.ddx[dim] = lp_build_emit_fetch( &bld->bld_base, inst, 3, dim );
  1831.          derivs.ddy[dim] = lp_build_emit_fetch( &bld->bld_base, inst, 4, dim );
  1832.       }
  1833.       deriv_ptr = &derivs;
  1834.    }
  1835.  
  1836.    /* some advanced gather instructions (txgo) would require 4 offsets */
  1837.    if (inst->Texture.NumOffsets == 1) {
  1838.       unsigned dim;
  1839.       for (dim = 0; dim < num_offsets; dim++) {
  1840.          offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim );
  1841.       }
  1842.    }
  1843.  
  1844.    /* TODO: use scalar lod if explicit_lod, lod_bias or derivs are broadcasted scalars */
  1845.    scalar_lod = bld->bld_base.info->processor == TGSI_PROCESSOR_FRAGMENT;
  1846.  
  1847.    bld->sampler->emit_fetch_texel(bld->sampler,
  1848.                                   bld->bld_base.base.gallivm,
  1849.                                   bld->bld_base.base.type,
  1850.                                   FALSE,
  1851.                                   texture_unit, sampler_unit,
  1852.                                   coords,
  1853.                                   offsets,
  1854.                                   deriv_ptr,
  1855.                                   lod_bias, explicit_lod, scalar_lod,
  1856.                                   texel);
  1857. }
  1858.  
  1859. static void
  1860. emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
  1861.                    const struct tgsi_full_instruction *inst,
  1862.                    LLVMValueRef *texel,
  1863.                    boolean is_samplei)
  1864. {
  1865.    unsigned unit, target;
  1866.    LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
  1867.    LLVMValueRef explicit_lod = NULL;
  1868.    LLVMValueRef coords[3];
  1869.    LLVMValueRef offsets[3] = { NULL };
  1870.    boolean scalar_lod;
  1871.    unsigned num_coords;
  1872.    unsigned dims;
  1873.    unsigned i;
  1874.  
  1875.    if (!bld->sampler) {
  1876.       _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
  1877.       for (i = 0; i < 4; i++) {
  1878.          texel[i] = coord_undef;
  1879.       }
  1880.       return;
  1881.    }
  1882.  
  1883.    unit = inst->Src[1].Register.Index;
  1884.  
  1885.    if (is_samplei) {
  1886.       target = bld->sv[unit].Resource;
  1887.    }
  1888.    else {
  1889.       target = inst->Texture.Texture;
  1890.    }
  1891.  
  1892.    switch (target) {
  1893.    case TGSI_TEXTURE_1D:
  1894.    case TGSI_TEXTURE_BUFFER:
  1895.       num_coords = 1;
  1896.       dims = 1;
  1897.       break;
  1898.    case TGSI_TEXTURE_1D_ARRAY:
  1899.       num_coords = 2;
  1900.       dims = 1;
  1901.       break;
  1902.    case TGSI_TEXTURE_2D:
  1903.    case TGSI_TEXTURE_RECT:
  1904.       num_coords = 2;
  1905.       dims = 2;
  1906.       break;
  1907.    case TGSI_TEXTURE_2D_ARRAY:
  1908.       num_coords = 3;
  1909.       dims = 2;
  1910.       break;
  1911.    case TGSI_TEXTURE_3D:
  1912.       num_coords = 3;
  1913.       dims = 3;
  1914.       break;
  1915.    default:
  1916.       assert(0);
  1917.       return;
  1918.    }
  1919.  
  1920.    /* always have lod except for buffers ? */
  1921.    if (target != TGSI_TEXTURE_BUFFER) {
  1922.       explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 0, 3 );
  1923.    }
  1924.  
  1925.    for (i = 0; i < num_coords; i++) {
  1926.       coords[i] = lp_build_emit_fetch( &bld->bld_base, inst, 0, i );
  1927.    }
  1928.    for (i = num_coords; i < 3; i++) {
  1929.       coords[i] = coord_undef;
  1930.    }
  1931.  
  1932.    if (inst->Texture.NumOffsets == 1) {
  1933.       unsigned dim;
  1934.       for (dim = 0; dim < dims; dim++) {
  1935.          offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim );
  1936.       }
  1937.    }
  1938.  
  1939.    /* TODO: use scalar lod if explicit_lod is broadcasted scalar */
  1940.    scalar_lod = bld->bld_base.info->processor == TGSI_PROCESSOR_FRAGMENT;
  1941.  
  1942.    bld->sampler->emit_fetch_texel(bld->sampler,
  1943.                                   bld->bld_base.base.gallivm,
  1944.                                   bld->bld_base.base.type,
  1945.                                   TRUE,
  1946.                                   unit, unit,
  1947.                                   coords,
  1948.                                   offsets,
  1949.                                   NULL,
  1950.                                   NULL, explicit_lod, scalar_lod,
  1951.                                   texel);
  1952. }
  1953.  
  1954. static void
  1955. emit_size_query( struct lp_build_tgsi_soa_context *bld,
  1956.                  const struct tgsi_full_instruction *inst,
  1957.                  LLVMValueRef *sizes_out,
  1958.                  boolean is_sviewinfo)
  1959. {
  1960.    LLVMValueRef explicit_lod;
  1961.    unsigned has_lod;
  1962.    unsigned i;
  1963.    unsigned unit = inst->Src[1].Register.Index;
  1964.    unsigned target;
  1965.  
  1966.    if (is_sviewinfo) {
  1967.       target = bld->sv[unit].Resource;
  1968.    }
  1969.    else {
  1970.       target = inst->Texture.Texture;
  1971.    }
  1972.    switch (target) {
  1973.    case TGSI_TEXTURE_BUFFER:
  1974.    case TGSI_TEXTURE_RECT:
  1975.    case TGSI_TEXTURE_SHADOWRECT:
  1976.       has_lod = 0;
  1977.       break;
  1978.    default:
  1979.       has_lod = 1;
  1980.       break;
  1981.    }
  1982.  
  1983.    if (!bld->sampler) {
  1984.       _debug_printf("warning: found texture query instruction but no sampler generator supplied\n");
  1985.       for (i = 0; i < 4; i++)
  1986.          sizes_out[i] = bld->bld_base.int_bld.undef;
  1987.       return;
  1988.    }
  1989.  
  1990.    if (has_lod)
  1991.       explicit_lod = lp_build_emit_fetch( &bld->bld_base, inst, 0, 0 );
  1992.    else
  1993.       explicit_lod = NULL;
  1994.  
  1995.    bld->sampler->emit_size_query(bld->sampler,
  1996.                                  bld->bld_base.base.gallivm,
  1997.                                  bld->bld_base.int_bld.type,
  1998.                                  unit,
  1999.                                  is_sviewinfo,
  2000.                                  explicit_lod,
  2001.                                  sizes_out);
  2002. }
  2003.  
  2004. static boolean
  2005. near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
  2006.                    int pc)
  2007. {
  2008.    int i;
  2009.  
  2010.    for (i = 0; i < 5; i++) {
  2011.       unsigned opcode;
  2012.  
  2013.       if (pc + i >= bld->bld_base.info->num_instructions)
  2014.          return TRUE;
  2015.  
  2016.       opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode;
  2017.  
  2018.       if (opcode == TGSI_OPCODE_END)
  2019.          return TRUE;
  2020.  
  2021.       if (opcode == TGSI_OPCODE_TEX ||
  2022.           opcode == TGSI_OPCODE_TXP ||
  2023.           opcode == TGSI_OPCODE_TXD ||
  2024.           opcode == TGSI_OPCODE_TXB ||
  2025.           opcode == TGSI_OPCODE_TXL ||
  2026.           opcode == TGSI_OPCODE_TXF ||
  2027.           opcode == TGSI_OPCODE_TXQ ||
  2028.           opcode == TGSI_OPCODE_CAL ||
  2029.           opcode == TGSI_OPCODE_CALLNZ ||
  2030.           opcode == TGSI_OPCODE_IF ||
  2031.           opcode == TGSI_OPCODE_UIF ||
  2032.           opcode == TGSI_OPCODE_BGNLOOP ||
  2033.           opcode == TGSI_OPCODE_SWITCH)
  2034.          return FALSE;
  2035.    }
  2036.  
  2037.    return TRUE;
  2038. }
  2039.  
  2040.  
  2041.  
  2042. /**
  2043.  * Kill fragment if any of the src register values are negative.
  2044.  */
  2045. static void
  2046. emit_kill_if(
  2047.    struct lp_build_tgsi_soa_context *bld,
  2048.    const struct tgsi_full_instruction *inst,
  2049.    int pc)
  2050. {
  2051.    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
  2052.    const struct tgsi_full_src_register *reg = &inst->Src[0];
  2053.    LLVMValueRef terms[TGSI_NUM_CHANNELS];
  2054.    LLVMValueRef mask;
  2055.    unsigned chan_index;
  2056.  
  2057.    memset(&terms, 0, sizeof terms);
  2058.  
  2059.    TGSI_FOR_EACH_CHANNEL( chan_index ) {
  2060.       unsigned swizzle;
  2061.  
  2062.       /* Unswizzle channel */
  2063.       swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
  2064.  
  2065.       /* Check if the component has not been already tested. */
  2066.       assert(swizzle < TGSI_NUM_CHANNELS);
  2067.       if( !terms[swizzle] )
  2068.          /* TODO: change the comparison operator instead of setting the sign */
  2069.          terms[swizzle] =  lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index );
  2070.    }
  2071.  
  2072.    mask = NULL;
  2073.    TGSI_FOR_EACH_CHANNEL( chan_index ) {
  2074.       if(terms[chan_index]) {
  2075.          LLVMValueRef chan_mask;
  2076.  
  2077.          /*
  2078.           * If term < 0 then mask = 0 else mask = ~0.
  2079.           */
  2080.          chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero);
  2081.  
  2082.          if(mask)
  2083.             mask = LLVMBuildAnd(builder, mask, chan_mask, "");
  2084.          else
  2085.             mask = chan_mask;
  2086.       }
  2087.    }
  2088.  
  2089.    if(mask) {
  2090.       lp_build_mask_update(bld->mask, mask);
  2091.  
  2092.       if (!near_end_of_shader(bld, pc))
  2093.          lp_build_mask_check(bld->mask);
  2094.    }
  2095. }
  2096.  
  2097.  
  2098. /**
  2099.  * Unconditional fragment kill.
  2100.  * The only predication is the execution mask which will apply if
  2101.  * we're inside a loop or conditional.
  2102.  */
  2103. static void
  2104. emit_kill(struct lp_build_tgsi_soa_context *bld,
  2105.           int pc)
  2106. {
  2107.    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
  2108.    LLVMValueRef mask;
  2109.  
  2110.    /* For those channels which are "alive", disable fragment shader
  2111.     * execution.
  2112.     */
  2113.    if (bld->exec_mask.has_mask) {
  2114.       mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
  2115.    }
  2116.    else {
  2117.       LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type);
  2118.       mask = zero;
  2119.    }
  2120.  
  2121.    lp_build_mask_update(bld->mask, mask);
  2122.  
  2123.    if (!near_end_of_shader(bld, pc))
  2124.       lp_build_mask_check(bld->mask);
  2125. }
  2126.  
  2127.  
  2128. /**
  2129.  * Emit code which will dump the value of all the temporary registers
  2130.  * to stdout.
  2131.  */
  2132. static void
  2133. emit_dump_temps(struct lp_build_tgsi_soa_context *bld)
  2134. {
  2135.    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
  2136.    LLVMBuilderRef builder = gallivm->builder;
  2137.    LLVMValueRef temp_ptr;
  2138.    LLVMValueRef i0 = lp_build_const_int32(gallivm, 0);
  2139.    LLVMValueRef i1 = lp_build_const_int32(gallivm, 1);
  2140.    LLVMValueRef i2 = lp_build_const_int32(gallivm, 2);
  2141.    LLVMValueRef i3 = lp_build_const_int32(gallivm, 3);
  2142.    int index;
  2143.    int n = bld->bld_base.info->file_max[TGSI_FILE_TEMPORARY];
  2144.  
  2145.    for (index = 0; index < n; index++) {
  2146.       LLVMValueRef idx = lp_build_const_int32(gallivm, index);
  2147.       LLVMValueRef v[4][4], res;
  2148.       int chan;
  2149.  
  2150.       lp_build_printf(gallivm, "TEMP[%d]:\n", idx);
  2151.  
  2152.       for (chan = 0; chan < 4; chan++) {
  2153.          temp_ptr = lp_get_temp_ptr_soa(bld, index, chan);
  2154.          res = LLVMBuildLoad(builder, temp_ptr, "");
  2155.          v[chan][0] = LLVMBuildExtractElement(builder, res, i0, "");
  2156.          v[chan][1] = LLVMBuildExtractElement(builder, res, i1, "");
  2157.          v[chan][2] = LLVMBuildExtractElement(builder, res, i2, "");
  2158.          v[chan][3] = LLVMBuildExtractElement(builder, res, i3, "");
  2159.       }
  2160.  
  2161.       lp_build_printf(gallivm, "  X: %f %f %f %f\n",
  2162.                       v[0][0], v[0][1], v[0][2], v[0][3]);
  2163.       lp_build_printf(gallivm, "  Y: %f %f %f %f\n",
  2164.                       v[1][0], v[1][1], v[1][2], v[1][3]);
  2165.       lp_build_printf(gallivm, "  Z: %f %f %f %f\n",
  2166.                       v[2][0], v[2][1], v[2][2], v[2][3]);
  2167.       lp_build_printf(gallivm, "  W: %f %f %f %f\n",
  2168.                       v[3][0], v[3][1], v[3][2], v[3][3]);
  2169.    }
  2170. }
  2171.  
  2172.  
  2173.  
  2174. void
  2175. lp_emit_declaration_soa(
  2176.    struct lp_build_tgsi_context *bld_base,
  2177.    const struct tgsi_full_declaration *decl)
  2178. {
  2179.    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
  2180.    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
  2181.    LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
  2182.    const unsigned first = decl->Range.First;
  2183.    const unsigned last = decl->Range.Last;
  2184.    unsigned idx, i;
  2185.  
  2186.    for (idx = first; idx <= last; ++idx) {
  2187.       assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
  2188.       switch (decl->Declaration.File) {
  2189.       case TGSI_FILE_TEMPORARY:
  2190.          assert(idx < LP_MAX_TGSI_TEMPS);
  2191.          if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
  2192.             for (i = 0; i < TGSI_NUM_CHANNELS; i++)
  2193.                bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
  2194.          }
  2195.          break;
  2196.  
  2197.       case TGSI_FILE_OUTPUT:
  2198.          if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
  2199.             for (i = 0; i < TGSI_NUM_CHANNELS; i++)
  2200.                bld->outputs[idx][i] = lp_build_alloca(gallivm,
  2201.                                                       vec_type, "output");
  2202.          }
  2203.          break;
  2204.  
  2205.       case TGSI_FILE_ADDRESS:
  2206.          /* ADDR registers are only allocated with an integer LLVM IR type,
  2207.           * as they are guaranteed to always have integers.
  2208.           * XXX: Not sure if this exception is worthwhile (or the whole idea of
  2209.           * an ADDR register for that matter).
  2210.           */
  2211.          assert(idx < LP_MAX_TGSI_ADDRS);
  2212.          for (i = 0; i < TGSI_NUM_CHANNELS; i++)
  2213.             bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr");
  2214.          break;
  2215.  
  2216.       case TGSI_FILE_PREDICATE:
  2217.          assert(idx < LP_MAX_TGSI_PREDS);
  2218.          for (i = 0; i < TGSI_NUM_CHANNELS; i++)
  2219.             bld->preds[idx][i] = lp_build_alloca(gallivm, vec_type,
  2220.                                                  "predicate");
  2221.          break;
  2222.  
  2223.       case TGSI_FILE_SAMPLER_VIEW:
  2224.          /*
  2225.           * The target stored here MUST match whatever there actually
  2226.           * is in the set sampler views (what about return type?).
  2227.           */
  2228.          assert(idx < PIPE_MAX_SHADER_SAMPLER_VIEWS);
  2229.          bld->sv[idx] = decl->SamplerView;
  2230.          break;
  2231.  
  2232.       default:
  2233.          /* don't need to declare other vars */
  2234.          break;
  2235.       }
  2236.    }
  2237. }
  2238.  
  2239.  
  2240. void lp_emit_immediate_soa(
  2241.    struct lp_build_tgsi_context *bld_base,
  2242.    const struct tgsi_full_immediate *imm)
  2243. {
  2244.    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
  2245.    struct gallivm_state * gallivm = bld_base->base.gallivm;
  2246.  
  2247.    /* simply copy the immediate values into the next immediates[] slot */
  2248.    unsigned i;
  2249.    const uint size = imm->Immediate.NrTokens - 1;
  2250.    assert(size <= 4);
  2251.    assert(bld->num_immediates < LP_MAX_TGSI_IMMEDIATES);
  2252.    switch (imm->Immediate.DataType) {
  2253.    case TGSI_IMM_FLOAT32:
  2254.       for( i = 0; i < size; ++i )
  2255.          bld->immediates[bld->num_immediates][i] =
  2256.             lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
  2257.  
  2258.       break;
  2259.    case TGSI_IMM_UINT32:
  2260.       for( i = 0; i < size; ++i ) {
  2261.          LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
  2262.          bld->immediates[bld->num_immediates][i] =
  2263.             LLVMConstBitCast(tmp, bld_base->base.vec_type);
  2264.       }
  2265.  
  2266.       break;
  2267.    case TGSI_IMM_INT32:
  2268.       for( i = 0; i < size; ++i ) {
  2269.          LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int);
  2270.          bld->immediates[bld->num_immediates][i] =
  2271.             LLVMConstBitCast(tmp, bld_base->base.vec_type);
  2272.       }
  2273.            
  2274.       break;
  2275.    }
  2276.    for( i = size; i < 4; ++i )
  2277.       bld->immediates[bld->num_immediates][i] = bld_base->base.undef;
  2278.  
  2279.    if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
  2280.       unsigned index = bld->num_immediates;
  2281.       struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
  2282.       LLVMBuilderRef builder = gallivm->builder;
  2283.       for (i = 0; i < 4; ++i ) {
  2284.          LLVMValueRef lindex = lp_build_const_int32(
  2285.             bld->bld_base.base.gallivm, index * 4 + i);
  2286.          LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
  2287.                                              bld->imms_array, &lindex, 1, "");
  2288.          LLVMBuildStore(builder,
  2289.                         bld->immediates[index][i],
  2290.                         imm_ptr);
  2291.       }
  2292.    }
  2293.  
  2294.    bld->num_immediates++;
  2295. }
  2296.  
  2297. static void
  2298. ddx_emit(
  2299.    const struct lp_build_tgsi_action * action,
  2300.    struct lp_build_tgsi_context * bld_base,
  2301.    struct lp_build_emit_data * emit_data)
  2302. {
  2303.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  2304.  
  2305.    emit_fetch_deriv(bld, emit_data->args[0], NULL,
  2306.                     &emit_data->output[emit_data->chan], NULL);
  2307. }
  2308.  
  2309. static void
  2310. ddy_emit(
  2311.    const struct lp_build_tgsi_action * action,
  2312.    struct lp_build_tgsi_context * bld_base,
  2313.    struct lp_build_emit_data * emit_data)
  2314. {
  2315.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  2316.  
  2317.    emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL,
  2318.                     &emit_data->output[emit_data->chan]);
  2319. }
  2320.  
  2321. static void
  2322. kill_emit(
  2323.    const struct lp_build_tgsi_action * action,
  2324.    struct lp_build_tgsi_context * bld_base,
  2325.    struct lp_build_emit_data * emit_data)
  2326. {
  2327.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  2328.  
  2329.    emit_kill(bld, bld_base->pc - 1);
  2330. }
  2331.  
  2332. static void
  2333. kill_if_emit(
  2334.    const struct lp_build_tgsi_action * action,
  2335.    struct lp_build_tgsi_context * bld_base,
  2336.    struct lp_build_emit_data * emit_data)
  2337. {
  2338.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  2339.  
  2340.    emit_kill_if(bld, emit_data->inst, bld_base->pc - 1);
  2341. }
  2342.  
  2343. static void
  2344. tex_emit(
  2345.    const struct lp_build_tgsi_action * action,
  2346.    struct lp_build_tgsi_context * bld_base,
  2347.    struct lp_build_emit_data * emit_data)
  2348. {
  2349.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  2350.  
  2351.    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, emit_data->output);
  2352. }
  2353.  
  2354. static void
  2355. txb_emit(
  2356.    const struct lp_build_tgsi_action * action,
  2357.    struct lp_build_tgsi_context * bld_base,
  2358.    struct lp_build_emit_data * emit_data)
  2359. {
  2360.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  2361.  
  2362.    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
  2363.             emit_data->output);
  2364. }
  2365.  
  2366. static void
  2367. txd_emit(
  2368.    const struct lp_build_tgsi_action * action,
  2369.    struct lp_build_tgsi_context * bld_base,
  2370.    struct lp_build_emit_data * emit_data)
  2371. {
  2372.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  2373.  
  2374.    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
  2375.             emit_data->output);
  2376. }
  2377.  
  2378. static void
  2379. txl_emit(
  2380.    const struct lp_build_tgsi_action * action,
  2381.    struct lp_build_tgsi_context * bld_base,
  2382.    struct lp_build_emit_data * emit_data)
  2383. {
  2384.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  2385.  
  2386.    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
  2387.             emit_data->output);
  2388. }
  2389.  
  2390. static void
  2391. txp_emit(
  2392.    const struct lp_build_tgsi_action * action,
  2393.    struct lp_build_tgsi_context * bld_base,
  2394.    struct lp_build_emit_data * emit_data)
  2395. {
  2396.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  2397.  
  2398.    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED,
  2399.             emit_data->output);
  2400. }
  2401.  
  2402. static void
  2403. txq_emit(
  2404.    const struct lp_build_tgsi_action * action,
  2405.    struct lp_build_tgsi_context * bld_base,
  2406.    struct lp_build_emit_data * emit_data)
  2407. {
  2408.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  2409.  
  2410.    emit_size_query(bld, emit_data->inst, emit_data->output, FALSE);
  2411. }
  2412.  
  2413. static void
  2414. txf_emit(
  2415.    const struct lp_build_tgsi_action * action,
  2416.    struct lp_build_tgsi_context * bld_base,
  2417.    struct lp_build_emit_data * emit_data)
  2418. {
  2419.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  2420.  
  2421.    emit_fetch_texels(bld, emit_data->inst, emit_data->output, FALSE);
  2422. }
  2423.  
  2424. static void
  2425. sample_i_emit(
  2426.    const struct lp_build_tgsi_action * action,
  2427.    struct lp_build_tgsi_context * bld_base,
  2428.    struct lp_build_emit_data * emit_data)
  2429. {
  2430.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  2431.  
  2432.    emit_fetch_texels(bld, emit_data->inst, emit_data->output, TRUE);
  2433. }
  2434.  
  2435. static void
  2436. sample_emit(
  2437.    const struct lp_build_tgsi_action * action,
  2438.    struct lp_build_tgsi_context * bld_base,
  2439.    struct lp_build_emit_data * emit_data)
  2440. {
  2441.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  2442.  
  2443.    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
  2444.                FALSE, emit_data->output);
  2445. }
  2446.  
  2447. static void
  2448. sample_b_emit(
  2449.    const struct lp_build_tgsi_action * action,
  2450.    struct lp_build_tgsi_context * bld_base,
  2451.    struct lp_build_emit_data * emit_data)
  2452. {
  2453.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  2454.  
  2455.    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
  2456.                FALSE, emit_data->output);
  2457. }
  2458.  
  2459. static void
  2460. sample_c_emit(
  2461.    const struct lp_build_tgsi_action * action,
  2462.    struct lp_build_tgsi_context * bld_base,
  2463.    struct lp_build_emit_data * emit_data)
  2464. {
  2465.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  2466.  
  2467.    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
  2468.                TRUE, emit_data->output);
  2469. }
  2470.  
  2471. static void
  2472. sample_c_lz_emit(
  2473.    const struct lp_build_tgsi_action * action,
  2474.    struct lp_build_tgsi_context * bld_base,
  2475.    struct lp_build_emit_data * emit_data)
  2476. {
  2477.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  2478.  
  2479.    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_ZERO,
  2480.                TRUE, emit_data->output);
  2481. }
  2482.  
  2483. static void
  2484. sample_d_emit(
  2485.    const struct lp_build_tgsi_action * action,
  2486.    struct lp_build_tgsi_context * bld_base,
  2487.    struct lp_build_emit_data * emit_data)
  2488. {
  2489.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  2490.  
  2491.    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
  2492.                FALSE, emit_data->output);
  2493. }
  2494.  
  2495. static void
  2496. sample_l_emit(
  2497.    const struct lp_build_tgsi_action * action,
  2498.    struct lp_build_tgsi_context * bld_base,
  2499.    struct lp_build_emit_data * emit_data)
  2500. {
  2501.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  2502.  
  2503.    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
  2504.                FALSE, emit_data->output);
  2505. }
  2506.  
  2507. static void
  2508. sviewinfo_emit(
  2509.    const struct lp_build_tgsi_action * action,
  2510.    struct lp_build_tgsi_context * bld_base,
  2511.    struct lp_build_emit_data * emit_data)
  2512. {
  2513.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  2514.  
  2515.    emit_size_query(bld, emit_data->inst, emit_data->output, TRUE);
  2516. }
  2517.  
  2518. static LLVMValueRef
  2519. mask_to_one_vec(struct lp_build_tgsi_context *bld_base)
  2520. {
  2521.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  2522.    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
  2523.    LLVMValueRef one_vec = bld_base->int_bld.one;
  2524.    struct lp_exec_mask *exec_mask = &bld->exec_mask;
  2525.  
  2526.    if (exec_mask->has_mask) {
  2527.       one_vec = LLVMBuildAnd(builder, one_vec, exec_mask->exec_mask, "");
  2528.    }
  2529.    one_vec = LLVMBuildAnd(builder, one_vec,
  2530.                           lp_build_mask_value(bld->mask), "");
  2531.    return one_vec;
  2532. }
  2533.  
  2534. static void
  2535. increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base,
  2536.                           LLVMValueRef ptr,
  2537.                           LLVMValueRef mask)
  2538. {
  2539.    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
  2540.  
  2541.    LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
  2542.    
  2543.    current_vec = LLVMBuildAdd(builder, current_vec, mask, "");
  2544.    
  2545.    LLVMBuildStore(builder, current_vec, ptr);
  2546. }
  2547.  
  2548. static void
  2549. clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,
  2550.                              LLVMValueRef ptr,
  2551.                              LLVMValueRef mask)
  2552. {
  2553.    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
  2554.  
  2555.    LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
  2556.    LLVMValueRef full_mask = lp_build_cmp(&bld_base->uint_bld,
  2557.                                          PIPE_FUNC_NOTEQUAL,
  2558.                                          mask,
  2559.                                          bld_base->uint_bld.zero);
  2560.  
  2561.    current_vec = lp_build_select(&bld_base->uint_bld,
  2562.                                  full_mask,
  2563.                                  bld_base->uint_bld.zero,
  2564.                                  current_vec);
  2565.    
  2566.    LLVMBuildStore(builder, current_vec, ptr);
  2567. }
  2568.  
  2569. static LLVMValueRef
  2570. clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,
  2571.                                   LLVMValueRef current_mask_vec,
  2572.                                   LLVMValueRef total_emitted_vertices_vec)
  2573. {
  2574.    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
  2575.    struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
  2576.    LLVMValueRef max_mask = lp_build_cmp(uint_bld, PIPE_FUNC_LESS,
  2577.                                         total_emitted_vertices_vec,
  2578.                                         bld->max_output_vertices_vec);
  2579.  
  2580.    return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
  2581. }
  2582.  
  2583. static void
  2584. emit_vertex(
  2585.    const struct lp_build_tgsi_action * action,
  2586.    struct lp_build_tgsi_context * bld_base,
  2587.    struct lp_build_emit_data * emit_data)
  2588. {
  2589.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  2590.    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
  2591.  
  2592.    if (bld->gs_iface->emit_vertex) {
  2593.       LLVMValueRef masked_ones = mask_to_one_vec(bld_base);
  2594.       LLVMValueRef total_emitted_vertices_vec =
  2595.          LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
  2596.       masked_ones = clamp_mask_to_max_output_vertices(bld, masked_ones,
  2597.                                                       total_emitted_vertices_vec);
  2598.       gather_outputs(bld);
  2599.       bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base,
  2600.                                  bld->outputs,
  2601.                                  total_emitted_vertices_vec);
  2602.       increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr,
  2603.                                 masked_ones);
  2604.       increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr,
  2605.                                 masked_ones);
  2606. #if DUMP_GS_EMITS
  2607.       lp_build_print_value(bld->bld_base.base.gallivm,
  2608.                            " +++ emit vertex masked ones = ",
  2609.                            masked_ones);
  2610.       lp_build_print_value(bld->bld_base.base.gallivm,
  2611.                            " +++ emit vertex emitted = ",
  2612.                            total_emitted_vertices_vec);
  2613. #endif
  2614.    }
  2615. }
  2616.  
  2617.  
  2618. static void
  2619. end_primitive_masked(struct lp_build_tgsi_context * bld_base,
  2620.                      LLVMValueRef masked_ones)
  2621. {
  2622.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  2623.    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
  2624.  
  2625.    if (bld->gs_iface->end_primitive) {
  2626.       LLVMValueRef emitted_vertices_vec =
  2627.          LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, "");
  2628.       LLVMValueRef emitted_prims_vec =
  2629.          LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
  2630.      
  2631.       bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base,
  2632.                                    emitted_vertices_vec,
  2633.                                    emitted_prims_vec);
  2634.  
  2635. #if DUMP_GS_EMITS
  2636.       lp_build_print_value(bld->bld_base.base.gallivm,
  2637.                            " +++ end prim masked ones = ",
  2638.                            masked_ones);
  2639.       lp_build_print_value(bld->bld_base.base.gallivm,
  2640.                            " +++ end prim emitted verts1 = ",
  2641.                            emitted_vertices_vec);
  2642.       lp_build_print_value(bld->bld_base.base.gallivm,
  2643.                            " +++ end prim emitted prims1 = ",
  2644.                            LLVMBuildLoad(builder,
  2645.                                          bld->emitted_prims_vec_ptr, ""));
  2646. #endif
  2647.       increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr,
  2648.                                 masked_ones);
  2649.       clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr,
  2650.                                    masked_ones);
  2651. #if DUMP_GS_EMITS
  2652.       lp_build_print_value(bld->bld_base.base.gallivm,
  2653.                            " +++ end prim emitted verts2 = ",
  2654.                            LLVMBuildLoad(builder,
  2655.                                          bld->emitted_vertices_vec_ptr, ""));
  2656. #endif
  2657.    }
  2658.  
  2659. }
  2660.  
  2661. static void
  2662. end_primitive(
  2663.    const struct lp_build_tgsi_action * action,
  2664.    struct lp_build_tgsi_context * bld_base,
  2665.    struct lp_build_emit_data * emit_data)
  2666. {
  2667.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  2668.  
  2669.    if (bld->gs_iface->end_primitive) {
  2670.       LLVMBuilderRef builder = bld_base->base.gallivm->builder;
  2671.       LLVMValueRef masked_ones = mask_to_one_vec(bld_base);
  2672.       struct lp_build_context *uint_bld = &bld_base->uint_bld;
  2673.       LLVMValueRef emitted_verts = LLVMBuildLoad(
  2674.          builder, bld->emitted_vertices_vec_ptr, "");
  2675.       LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
  2676.                                                emitted_verts,
  2677.                                                uint_bld->zero);
  2678.       /* We need to combine the current execution mask with the mask
  2679.          telling us which, if any, execution slots actually have
  2680.          unemitted primitives, this way we make sure that end_primitives
  2681.          executes only on the paths that have unflushed vertices */
  2682.       masked_ones = LLVMBuildAnd(builder, masked_ones, emitted_mask, "");
  2683.      
  2684.       end_primitive_masked(bld_base, masked_ones);
  2685.    }
  2686. }
  2687.  
  2688. static void
  2689. cal_emit(
  2690.    const struct lp_build_tgsi_action * action,
  2691.    struct lp_build_tgsi_context * bld_base,
  2692.    struct lp_build_emit_data * emit_data)
  2693. {
  2694.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  2695.  
  2696.    lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label,
  2697.                      &bld_base->pc);
  2698. }
  2699.  
  2700. static void
  2701. ret_emit(
  2702.    const struct lp_build_tgsi_action * action,
  2703.    struct lp_build_tgsi_context * bld_base,
  2704.    struct lp_build_emit_data * emit_data)
  2705. {
  2706.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  2707.  
  2708.    lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc);
  2709. }
  2710.  
  2711. static void
  2712. brk_emit(
  2713.    const struct lp_build_tgsi_action * action,
  2714.    struct lp_build_tgsi_context * bld_base,
  2715.    struct lp_build_emit_data * emit_data)
  2716. {
  2717.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  2718.  
  2719.    lp_exec_break(&bld->exec_mask, bld_base);
  2720. }
  2721.  
  2722. static void
  2723. breakc_emit(
  2724.    const struct lp_build_tgsi_action * action,
  2725.    struct lp_build_tgsi_context * bld_base,
  2726.    struct lp_build_emit_data * emit_data)
  2727. {
  2728.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  2729.    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
  2730.    struct lp_build_context *uint_bld = &bld_base->uint_bld;
  2731.    LLVMValueRef unsigned_cond =
  2732.       LLVMBuildBitCast(builder, emit_data->args[0], uint_bld->vec_type, "");
  2733.    LLVMValueRef cond = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
  2734.                                     unsigned_cond,
  2735.                                     uint_bld->zero);
  2736.  
  2737.    lp_exec_break_condition(&bld->exec_mask, cond);
  2738. }
  2739.  
  2740. static void
  2741. if_emit(
  2742.    const struct lp_build_tgsi_action * action,
  2743.    struct lp_build_tgsi_context * bld_base,
  2744.    struct lp_build_emit_data * emit_data)
  2745. {
  2746.    LLVMValueRef tmp;
  2747.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  2748.  
  2749.    tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL,
  2750.                       emit_data->args[0], bld->bld_base.base.zero);
  2751.    lp_exec_mask_cond_push(&bld->exec_mask, tmp);
  2752. }
  2753.  
  2754. static void
  2755. uif_emit(
  2756.    const struct lp_build_tgsi_action * action,
  2757.    struct lp_build_tgsi_context * bld_base,
  2758.    struct lp_build_emit_data * emit_data)
  2759. {
  2760.    LLVMValueRef tmp;
  2761.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  2762.    struct lp_build_context *uint_bld = &bld_base->uint_bld;
  2763.  
  2764.    tmp = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
  2765.                       emit_data->args[0], uint_bld->zero);
  2766.    lp_exec_mask_cond_push(&bld->exec_mask, tmp);
  2767. }
  2768.  
  2769. static void
  2770. case_emit(
  2771.    const struct lp_build_tgsi_action * action,
  2772.    struct lp_build_tgsi_context * bld_base,
  2773.    struct lp_build_emit_data * emit_data)
  2774. {
  2775.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  2776.  
  2777.    lp_exec_case(&bld->exec_mask, emit_data->args[0]);
  2778. }
  2779.  
  2780. static void
  2781. default_emit(
  2782.    const struct lp_build_tgsi_action * action,
  2783.    struct lp_build_tgsi_context * bld_base,
  2784.    struct lp_build_emit_data * emit_data)
  2785. {
  2786.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  2787.  
  2788.    lp_exec_default(&bld->exec_mask, bld_base);
  2789. }
  2790.  
  2791. static void
  2792. switch_emit(
  2793.    const struct lp_build_tgsi_action * action,
  2794.    struct lp_build_tgsi_context * bld_base,
  2795.    struct lp_build_emit_data * emit_data)
  2796. {
  2797.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  2798.  
  2799.    lp_exec_switch(&bld->exec_mask, emit_data->args[0]);
  2800. }
  2801.  
  2802. static void
  2803. endswitch_emit(
  2804.    const struct lp_build_tgsi_action * action,
  2805.    struct lp_build_tgsi_context * bld_base,
  2806.    struct lp_build_emit_data * emit_data)
  2807. {
  2808.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  2809.  
  2810.    lp_exec_endswitch(&bld->exec_mask, bld_base);
  2811. }
  2812.  
  2813. static void
  2814. bgnloop_emit(
  2815.    const struct lp_build_tgsi_action * action,
  2816.    struct lp_build_tgsi_context * bld_base,
  2817.    struct lp_build_emit_data * emit_data)
  2818. {
  2819.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  2820.  
  2821.    lp_exec_bgnloop(&bld->exec_mask);
  2822. }
  2823.  
  2824. static void
  2825. bgnsub_emit(
  2826.    const struct lp_build_tgsi_action * action,
  2827.    struct lp_build_tgsi_context * bld_base,
  2828.    struct lp_build_emit_data * emit_data)
  2829. {
  2830.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  2831.  
  2832.    lp_exec_mask_bgnsub(&bld->exec_mask);
  2833. }
  2834.  
  2835. static void
  2836. else_emit(
  2837.    const struct lp_build_tgsi_action * action,
  2838.    struct lp_build_tgsi_context * bld_base,
  2839.    struct lp_build_emit_data * emit_data)
  2840. {
  2841.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  2842.  
  2843.    lp_exec_mask_cond_invert(&bld->exec_mask);
  2844. }
  2845.  
  2846. static void
  2847. endif_emit(
  2848.    const struct lp_build_tgsi_action * action,
  2849.    struct lp_build_tgsi_context * bld_base,
  2850.    struct lp_build_emit_data * emit_data)
  2851. {
  2852.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  2853.  
  2854.    lp_exec_mask_cond_pop(&bld->exec_mask);
  2855. }
  2856.  
  2857. static void
  2858. endloop_emit(
  2859.    const struct lp_build_tgsi_action * action,
  2860.    struct lp_build_tgsi_context * bld_base,
  2861.    struct lp_build_emit_data * emit_data)
  2862. {
  2863.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  2864.  
  2865.    lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
  2866. }
  2867.  
  2868. static void
  2869. endsub_emit(
  2870.    const struct lp_build_tgsi_action * action,
  2871.    struct lp_build_tgsi_context * bld_base,
  2872.    struct lp_build_emit_data * emit_data)
  2873. {
  2874.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  2875.  
  2876.    lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc);
  2877. }
  2878.  
  2879. static void
  2880. cont_emit(
  2881.    const struct lp_build_tgsi_action * action,
  2882.    struct lp_build_tgsi_context * bld_base,
  2883.    struct lp_build_emit_data * emit_data)
  2884. {
  2885.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  2886.  
  2887.    lp_exec_continue(&bld->exec_mask);
  2888. }
  2889.  
  2890. /* XXX: Refactor and move it to lp_bld_tgsi_action.c
  2891.  *
  2892.  * XXX: What do the comments about xmm registers mean?  Maybe they are left over
  2893.  * from old code, but there is no garauntee that LLVM will use those registers
  2894.  * for this code.
  2895.  *
  2896.  * XXX: There should be no calls to lp_build_emit_fetch in this function.  This
  2897.  * should be handled by the emit_data->fetch_args function. */
  2898. static void
  2899. nrm_emit(
  2900.    const struct lp_build_tgsi_action * action,
  2901.    struct lp_build_tgsi_context * bld_base,
  2902.    struct lp_build_emit_data * emit_data)
  2903. {
  2904.    LLVMValueRef tmp0, tmp1;
  2905.    LLVMValueRef tmp4 = NULL;
  2906.    LLVMValueRef tmp5 = NULL;
  2907.    LLVMValueRef tmp6 = NULL;
  2908.    LLVMValueRef tmp7 = NULL;
  2909.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  2910.  
  2911.    uint dims = (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
  2912.  
  2913.   if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X) ||
  2914.       TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y) ||
  2915.       TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z) ||
  2916.       (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W) && dims == 4)) {
  2917.  
  2918.       /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
  2919.  
  2920.       /* xmm4 = src.x */
  2921.       /* xmm0 = src.x * src.x */
  2922.       tmp0 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_X);
  2923.       if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X)) {
  2924.          tmp4 = tmp0;
  2925.       }
  2926.       tmp0 = lp_build_mul( &bld->bld_base.base, tmp0, tmp0);
  2927.  
  2928.       /* xmm5 = src.y */
  2929.       /* xmm0 = xmm0 + src.y * src.y */
  2930.       tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_Y);
  2931.       if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y)) {
  2932.          tmp5 = tmp1;
  2933.       }
  2934.       tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
  2935.       tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
  2936.  
  2937.       /* xmm6 = src.z */
  2938.       /* xmm0 = xmm0 + src.z * src.z */
  2939.       tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_Z);
  2940.       if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z)) {
  2941.          tmp6 = tmp1;
  2942.       }
  2943.       tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
  2944.       tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
  2945.  
  2946.       if (dims == 4) {
  2947.          /* xmm7 = src.w */
  2948.          /* xmm0 = xmm0 + src.w * src.w */
  2949.          tmp1 = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, TGSI_CHAN_W);
  2950.          if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W)) {
  2951.             tmp7 = tmp1;
  2952.          }
  2953.          tmp1 = lp_build_mul( &bld->bld_base.base, tmp1, tmp1);
  2954.          tmp0 = lp_build_add( &bld->bld_base.base, tmp0, tmp1);
  2955.       }
  2956.       /* xmm1 = 1 / sqrt(xmm0) */
  2957.       tmp1 = lp_build_rsqrt( &bld->bld_base.base, tmp0);
  2958.        /* dst.x = xmm1 * src.x */
  2959.       if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X)) {
  2960.          emit_data->output[TGSI_CHAN_X] = lp_build_mul( &bld->bld_base.base, tmp4, tmp1);
  2961.       }
  2962.       /* dst.y = xmm1 * src.y */
  2963.       if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Y)) {
  2964.          emit_data->output[TGSI_CHAN_Y] = lp_build_mul( &bld->bld_base.base, tmp5, tmp1);
  2965.       }
  2966.  
  2967.       /* dst.z = xmm1 * src.z */
  2968.       if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_Z)) {
  2969.          emit_data->output[TGSI_CHAN_Z] = lp_build_mul( &bld->bld_base.base, tmp6, tmp1);
  2970.       }
  2971.       /* dst.w = xmm1 * src.w */
  2972.       if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_X) && dims == 4) {
  2973.          emit_data->output[TGSI_CHAN_W] = lp_build_mul( &bld->bld_base.base, tmp7, tmp1);
  2974.       }
  2975.    }
  2976.  
  2977.    /* dst.w = 1.0 */
  2978.    if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data->inst, TGSI_CHAN_W) && dims == 3) {
  2979.        emit_data->output[TGSI_CHAN_W] = bld->bld_base.base.one;
  2980.    }
  2981. }
  2982.  
  2983. static void emit_prologue(struct lp_build_tgsi_context * bld_base)
  2984. {
  2985.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  2986.    struct gallivm_state * gallivm = bld_base->base.gallivm;
  2987.  
  2988.    if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
  2989.       LLVMValueRef array_size =
  2990.          lp_build_const_int32(gallivm,
  2991.                          bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4);
  2992.       bld->temps_array = lp_build_array_alloca(gallivm,
  2993.                                               bld_base->base.vec_type, array_size,
  2994.                                               "temp_array");
  2995.    }
  2996.  
  2997.    if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
  2998.       LLVMValueRef array_size =
  2999.          lp_build_const_int32(gallivm,
  3000.                             bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
  3001.       bld->outputs_array = lp_build_array_alloca(gallivm,
  3002.                                                 bld_base->base.vec_type, array_size,
  3003.                                                 "output_array");
  3004.    }
  3005.  
  3006.    if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
  3007.       LLVMValueRef array_size =
  3008.          lp_build_const_int32(gallivm,
  3009.                          bld_base->info->file_max[TGSI_FILE_IMMEDIATE] * 4 + 4);
  3010.       bld->imms_array = lp_build_array_alloca(gallivm,
  3011.                                               bld_base->base.vec_type, array_size,
  3012.                                               "imms_array");
  3013.    }
  3014.  
  3015.    /* If we have indirect addressing in inputs we need to copy them into
  3016.     * our alloca array to be able to iterate over them */
  3017.    if (bld->indirect_files & (1 << TGSI_FILE_INPUT) && !bld->gs_iface) {
  3018.       unsigned index, chan;
  3019.       LLVMTypeRef vec_type = bld_base->base.vec_type;
  3020.       LLVMValueRef array_size = lp_build_const_int32(gallivm,
  3021.             bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4);
  3022.       bld->inputs_array = lp_build_array_alloca(gallivm,
  3023.                                                vec_type, array_size,
  3024.                                                "input_array");
  3025.  
  3026.       assert(bld_base->info->num_inputs
  3027.                         <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1);
  3028.  
  3029.       for (index = 0; index < bld_base->info->num_inputs; ++index) {
  3030.          for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
  3031.             LLVMValueRef lindex =
  3032.                lp_build_const_int32(gallivm, index * 4 + chan);
  3033.             LLVMValueRef input_ptr =
  3034.                LLVMBuildGEP(gallivm->builder, bld->inputs_array,
  3035.                             &lindex, 1, "");
  3036.             LLVMValueRef value = bld->inputs[index][chan];
  3037.             if (value)
  3038.                LLVMBuildStore(gallivm->builder, value, input_ptr);
  3039.          }
  3040.       }
  3041.    }
  3042.  
  3043.    if (bld->gs_iface) {
  3044.       struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
  3045.       bld->emitted_prims_vec_ptr =
  3046.          lp_build_alloca(gallivm,
  3047.                          uint_bld->vec_type,
  3048.                          "emitted_prims_ptr");
  3049.       bld->emitted_vertices_vec_ptr =
  3050.          lp_build_alloca(gallivm,
  3051.                          uint_bld->vec_type,
  3052.                          "emitted_vertices_ptr");
  3053.       bld->total_emitted_vertices_vec_ptr =
  3054.          lp_build_alloca(gallivm,
  3055.                          uint_bld->vec_type,
  3056.                          "total_emitted_vertices_ptr");
  3057.  
  3058.       LLVMBuildStore(gallivm->builder, uint_bld->zero,
  3059.                      bld->emitted_prims_vec_ptr);
  3060.       LLVMBuildStore(gallivm->builder, uint_bld->zero,
  3061.                      bld->emitted_vertices_vec_ptr);
  3062.       LLVMBuildStore(gallivm->builder, uint_bld->zero,
  3063.                      bld->total_emitted_vertices_vec_ptr);
  3064.    }
  3065. }
  3066.  
  3067. static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
  3068. {
  3069.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  3070.    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
  3071.  
  3072.    if (0) {
  3073.       /* for debugging */
  3074.       emit_dump_temps(bld);
  3075.    }
  3076.  
  3077.    /* If we have indirect addressing in outputs we need to copy our alloca array
  3078.     * to the outputs slots specified by the caller */
  3079.    if (bld->gs_iface) {
  3080.       LLVMValueRef total_emitted_vertices_vec;
  3081.       LLVMValueRef emitted_prims_vec;
  3082.       /* implicit end_primitives, needed in case there are any unflushed
  3083.          vertices in the cache */
  3084.       end_primitive(NULL, bld_base, NULL);
  3085.      
  3086.       total_emitted_vertices_vec =
  3087.          LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
  3088.       emitted_prims_vec =
  3089.          LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
  3090.  
  3091.       bld->gs_iface->gs_epilogue(bld->gs_iface,
  3092.                                  &bld->bld_base,
  3093.                                  total_emitted_vertices_vec,
  3094.                                  emitted_prims_vec);
  3095.    } else {
  3096.       gather_outputs(bld);
  3097.    }
  3098. }
  3099.  
  3100. void
  3101. lp_build_tgsi_soa(struct gallivm_state *gallivm,
  3102.                   const struct tgsi_token *tokens,
  3103.                   struct lp_type type,
  3104.                   struct lp_build_mask_context *mask,
  3105.                   LLVMValueRef consts_ptr,
  3106.                   const struct lp_bld_tgsi_system_values *system_values,
  3107.                   const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
  3108.                   LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
  3109.                   struct lp_build_sampler_soa *sampler,
  3110.                   const struct tgsi_shader_info *info,
  3111.                   const struct lp_build_tgsi_gs_iface *gs_iface)
  3112. {
  3113.    struct lp_build_tgsi_soa_context bld;
  3114.  
  3115.    struct lp_type res_type;
  3116.  
  3117.    assert(type.length <= LP_MAX_VECTOR_LENGTH);
  3118.    memset(&res_type, 0, sizeof res_type);
  3119.    res_type.width = type.width;
  3120.    res_type.length = type.length;
  3121.    res_type.sign = 1;
  3122.  
  3123.    /* Setup build context */
  3124.    memset(&bld, 0, sizeof bld);
  3125.    lp_build_context_init(&bld.bld_base.base, gallivm, type);
  3126.    lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
  3127.    lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
  3128.    lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
  3129.    bld.mask = mask;
  3130.    bld.inputs = inputs;
  3131.    bld.outputs = outputs;
  3132.    bld.consts_ptr = consts_ptr;
  3133.    bld.sampler = sampler;
  3134.    bld.bld_base.info = info;
  3135.    bld.indirect_files = info->indirect_files;
  3136.  
  3137.    bld.bld_base.soa = TRUE;
  3138.    bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
  3139.    bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
  3140.    bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
  3141.    bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
  3142.    bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value;
  3143.    bld.bld_base.emit_store = emit_store;
  3144.  
  3145.    bld.bld_base.emit_declaration = lp_emit_declaration_soa;
  3146.    bld.bld_base.emit_immediate = lp_emit_immediate_soa;
  3147.  
  3148.    bld.bld_base.emit_prologue = emit_prologue;
  3149.    bld.bld_base.emit_epilogue = emit_epilogue;
  3150.  
  3151.    /* Set opcode actions */
  3152.    lp_set_default_actions_cpu(&bld.bld_base);
  3153.  
  3154.    bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
  3155.    bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit;
  3156.    bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
  3157.    bld.bld_base.op_actions[TGSI_OPCODE_BREAKC].emit = breakc_emit;
  3158.    bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
  3159.    bld.bld_base.op_actions[TGSI_OPCODE_CASE].emit = case_emit;
  3160.    bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
  3161.    bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit;
  3162.    bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit;
  3163.    bld.bld_base.op_actions[TGSI_OPCODE_DEFAULT].emit = default_emit;
  3164.    bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
  3165.    bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
  3166.    bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
  3167.    bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit;
  3168.    bld.bld_base.op_actions[TGSI_OPCODE_ENDSWITCH].emit = endswitch_emit;
  3169.    bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
  3170.    bld.bld_base.op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
  3171.    bld.bld_base.op_actions[TGSI_OPCODE_KILL_IF].emit = kill_if_emit;
  3172.    bld.bld_base.op_actions[TGSI_OPCODE_KILL].emit = kill_emit;
  3173.    bld.bld_base.op_actions[TGSI_OPCODE_NRM].emit = nrm_emit;
  3174.    bld.bld_base.op_actions[TGSI_OPCODE_NRM4].emit = nrm_emit;
  3175.    bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
  3176.    bld.bld_base.op_actions[TGSI_OPCODE_SWITCH].emit = switch_emit;
  3177.    bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
  3178.    bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
  3179.    bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
  3180.    bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
  3181.    bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
  3182.    bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
  3183.    bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit;
  3184.    /* DX10 sampling ops */
  3185.    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE].emit = sample_emit;
  3186.    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_B].emit = sample_b_emit;
  3187.    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C].emit = sample_c_emit;
  3188.    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit;
  3189.    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit;
  3190.    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit;
  3191.    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
  3192.    bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
  3193.  
  3194.    if (gs_iface) {
  3195.       /* There's no specific value for this because it should always
  3196.        * be set, but apps using ext_geometry_shader4 quite often
  3197.        * were forgetting so we're using MAX_VERTEX_VARYING from
  3198.        * that spec even though we could debug_assert if it's not
  3199.        * set, but that's a lot uglier. */
  3200.       uint max_output_vertices = 32;
  3201.       uint i = 0;
  3202.       /* inputs are always indirect with gs */
  3203.       bld.indirect_files |= (1 << TGSI_FILE_INPUT);
  3204.       bld.gs_iface = gs_iface;
  3205.       bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input;
  3206.       bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex;
  3207.       bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive;
  3208.  
  3209.       for (i = 0; i < info->num_properties; ++i) {
  3210.          if (info->properties[i].name ==
  3211.              TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES) {
  3212.             max_output_vertices = info->properties[i].data[0];
  3213.          }
  3214.       }
  3215.       bld.max_output_vertices_vec =
  3216.          lp_build_const_int_vec(gallivm, bld.bld_base.uint_bld.type,
  3217.                                 max_output_vertices);
  3218.    }
  3219.  
  3220.    lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
  3221.  
  3222.    bld.system_values = *system_values;
  3223.  
  3224.    lp_build_tgsi_llvm(&bld.bld_base, tokens);
  3225.  
  3226.    if (0) {
  3227.       LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
  3228.       LLVMValueRef function = LLVMGetBasicBlockParent(block);
  3229.       debug_printf("11111111111111111111111111111 \n");
  3230.       tgsi_dump(tokens, 0);
  3231.       lp_debug_dump_value(function);
  3232.       debug_printf("2222222222222222222222222222 \n");
  3233.    }
  3234.  
  3235.    if (0) {
  3236.       LLVMModuleRef module = LLVMGetGlobalParent(
  3237.          LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
  3238.       LLVMDumpModule(module);
  3239.  
  3240.    }
  3241. }
  3242.