Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /**************************************************************************
  2.  *
  3.  * Copyright 2009 VMware, Inc.
  4.  * Copyright 2007-2008 VMware, Inc.
  5.  * All Rights Reserved.
  6.  *
  7.  * Permission is hereby granted, free of charge, to any person obtaining a
  8.  * copy of this software and associated documentation files (the
  9.  * "Software"), to deal in the Software without restriction, including
  10.  * without limitation the rights to use, copy, modify, merge, publish,
  11.  * distribute, sub license, and/or sell copies of the Software, and to
  12.  * permit persons to whom the Software is furnished to do so, subject to
  13.  * the following conditions:
  14.  *
  15.  * The above copyright notice and this permission notice (including the
  16.  * next paragraph) shall be included in all copies or substantial portions
  17.  * of the Software.
  18.  *
  19.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  20.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  21.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  22.  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  23.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  24.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  25.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26.  *
  27.  **************************************************************************/
  28.  
  29. /**
  30.  * @file
  31.  * TGSI to LLVM IR translation -- SoA.
  32.  *
  33.  * @author Jose Fonseca <jfonseca@vmware.com>
  34.  *
  35.  * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
  36.  * Brian Paul, and others.
  37.  */
  38.  
  39. #include "pipe/p_config.h"
  40. #include "pipe/p_shader_tokens.h"
  41. #include "util/u_debug.h"
  42. #include "util/u_math.h"
  43. #include "util/u_memory.h"
  44. #include "tgsi/tgsi_dump.h"
  45. #include "tgsi/tgsi_exec.h"
  46. #include "tgsi/tgsi_info.h"
  47. #include "tgsi/tgsi_parse.h"
  48. #include "tgsi/tgsi_util.h"
  49. #include "tgsi/tgsi_scan.h"
  50. #include "tgsi/tgsi_strings.h"
  51. #include "lp_bld_tgsi_action.h"
  52. #include "lp_bld_type.h"
  53. #include "lp_bld_const.h"
  54. #include "lp_bld_arit.h"
  55. #include "lp_bld_bitarit.h"
  56. #include "lp_bld_gather.h"
  57. #include "lp_bld_init.h"
  58. #include "lp_bld_logic.h"
  59. #include "lp_bld_swizzle.h"
  60. #include "lp_bld_flow.h"
  61. #include "lp_bld_quad.h"
  62. #include "lp_bld_tgsi.h"
  63. #include "lp_bld_limits.h"
  64. #include "lp_bld_debug.h"
  65. #include "lp_bld_printf.h"
  66. #include "lp_bld_sample.h"
  67. #include "lp_bld_struct.h"
  68.  
  69. /* SM 4.0 says that subroutines can nest 32 deep and
  70.  * we need one more for our main function */
  71. #define LP_MAX_NUM_FUNCS 33
  72.  
  73. #define DUMP_GS_EMITS 0
  74.  
  75. /*
  76.  * If non-zero, the generated LLVM IR will print intermediate results on every TGSI
  77.  * instruction.
  78.  *
  79.  * TODO:
  80.  * - take execution masks in consideration
  81.  * - debug control-flow instructions
  82.  */
  83. #define DEBUG_EXECUTION 0
  84.  
  85.  
  86. /*
  87.  * Emit code to print a register value.
  88.  */
  89. static void
  90. emit_dump_reg(struct gallivm_state *gallivm,
  91.               unsigned file,
  92.               unsigned index,
  93.               unsigned chan,
  94.               LLVMValueRef value)
  95. {
  96.    char buf[32];
  97.  
  98.    util_snprintf(buf, sizeof buf, "    %s[%u].%c = ",
  99.                  tgsi_file_name(file),
  100.                  index, "xyzw"[chan]);
  101.  
  102.    lp_build_print_value(gallivm, buf, value);
  103. }
  104.  
  105. /*
  106.  * Return the context for the current function.
  107.  * (always 'main', if shader doesn't do any function calls)
  108.  */
  109. static INLINE struct function_ctx *
  110. func_ctx(struct lp_exec_mask *mask)
  111. {
  112.    assert(mask->function_stack_size > 0);
  113.    assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
  114.    return &mask->function_stack[mask->function_stack_size - 1];
  115. }
  116.  
  117. /*
  118.  * Returns true if we're in a loop.
  119.  * It's global, meaning that it returns true even if there's
  120.  * no loop inside the current function, but we were inside
  121.  * a loop inside another function, from which this one was called.
  122.  */
  123. static INLINE boolean
  124. mask_has_loop(struct lp_exec_mask *mask)
  125. {
  126.    int i;
  127.    for (i = mask->function_stack_size - 1; i >= 0; --i) {
  128.       const struct function_ctx *ctx = &mask->function_stack[i];
  129.       if (ctx->loop_stack_size > 0)
  130.          return TRUE;
  131.    }
  132.    return FALSE;
  133. }
  134.  
  135. /*
  136.  * Returns true if we're inside a switch statement.
  137.  * It's global, meaning that it returns true even if there's
  138.  * no switch in the current function, but we were inside
  139.  * a switch inside another function, from which this one was called.
  140.  */
  141. static INLINE boolean
  142. mask_has_switch(struct lp_exec_mask *mask)
  143. {
  144.    int i;
  145.    for (i = mask->function_stack_size - 1; i >= 0; --i) {
  146.       const struct function_ctx *ctx = &mask->function_stack[i];
  147.       if (ctx->switch_stack_size > 0)
  148.          return TRUE;
  149.    }
  150.    return FALSE;
  151. }
  152.  
  153. /*
  154.  * Returns true if we're inside a conditional.
  155.  * It's global, meaning that it returns true even if there's
  156.  * no conditional in the current function, but we were inside
  157.  * a conditional inside another function, from which this one was called.
  158.  */
  159. static INLINE boolean
  160. mask_has_cond(struct lp_exec_mask *mask)
  161. {
  162.    int i;
  163.    for (i = mask->function_stack_size - 1; i >= 0; --i) {
  164.       const struct function_ctx *ctx = &mask->function_stack[i];
  165.       if (ctx->cond_stack_size > 0)
  166.          return TRUE;
  167.    }
  168.    return FALSE;
  169. }
  170.  
  171.  
  172. /*
  173.  * Initialize a function context at the specified index.
  174.  */
  175. static void
  176. lp_exec_mask_function_init(struct lp_exec_mask *mask, int function_idx)
  177. {
  178.    LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
  179.    LLVMBuilderRef builder = mask->bld->gallivm->builder;
  180.    struct function_ctx *ctx =  &mask->function_stack[function_idx];
  181.  
  182.    ctx->cond_stack_size = 0;
  183.    ctx->loop_stack_size = 0;
  184.    ctx->switch_stack_size = 0;
  185.  
  186.    if (function_idx == 0) {
  187.       ctx->ret_mask = mask->ret_mask;
  188.    }
  189.  
  190.    ctx->loop_limiter = lp_build_alloca(mask->bld->gallivm,
  191.                                        int_type, "looplimiter");
  192.    LLVMBuildStore(
  193.       builder,
  194.       LLVMConstInt(int_type, LP_MAX_TGSI_LOOP_ITERATIONS, false),
  195.       ctx->loop_limiter);
  196. }
  197.  
  198. static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
  199. {
  200.    mask->bld = bld;
  201.    mask->has_mask = FALSE;
  202.    mask->ret_in_main = FALSE;
  203.    /* For the main function */
  204.    mask->function_stack_size = 1;
  205.  
  206.    mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type);
  207.    mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask =
  208.          mask->cond_mask = mask->switch_mask =
  209.          LLVMConstAllOnes(mask->int_vec_type);
  210.  
  211.    mask->function_stack = CALLOC(LP_MAX_NUM_FUNCS,
  212.                                  sizeof(mask->function_stack[0]));
  213.    lp_exec_mask_function_init(mask, 0);
  214. }
  215.  
  216. static void
  217. lp_exec_mask_fini(struct lp_exec_mask *mask)
  218. {
  219.    FREE(mask->function_stack);
  220. }
  221.  
  222. static void lp_exec_mask_update(struct lp_exec_mask *mask)
  223. {
  224.    LLVMBuilderRef builder = mask->bld->gallivm->builder;
  225.    boolean has_loop_mask = mask_has_loop(mask);
  226.    boolean has_cond_mask = mask_has_cond(mask);
  227.    boolean has_switch_mask = mask_has_switch(mask);
  228.    boolean has_ret_mask = mask->function_stack_size > 1 ||
  229.          mask->ret_in_main;
  230.  
  231.    if (has_loop_mask) {
  232.       /*for loops we need to update the entire mask at runtime */
  233.       LLVMValueRef tmp;
  234.       assert(mask->break_mask);
  235.       tmp = LLVMBuildAnd(builder,
  236.                          mask->cont_mask,
  237.                          mask->break_mask,
  238.                          "maskcb");
  239.       mask->exec_mask = LLVMBuildAnd(builder,
  240.                                      mask->cond_mask,
  241.                                      tmp,
  242.                                      "maskfull");
  243.    } else
  244.       mask->exec_mask = mask->cond_mask;
  245.  
  246.    if (has_switch_mask) {
  247.       mask->exec_mask = LLVMBuildAnd(builder,
  248.                                      mask->exec_mask,
  249.                                      mask->switch_mask,
  250.                                      "switchmask");
  251.    }
  252.  
  253.    if (has_ret_mask) {
  254.       mask->exec_mask = LLVMBuildAnd(builder,
  255.                                      mask->exec_mask,
  256.                                      mask->ret_mask,
  257.                                      "callmask");
  258.    }
  259.  
  260.    mask->has_mask = (has_cond_mask ||
  261.                      has_loop_mask ||
  262.                      has_switch_mask ||
  263.                      has_ret_mask);
  264. }
  265.  
  266. static void lp_exec_mask_cond_push(struct lp_exec_mask *mask,
  267.                                    LLVMValueRef val)
  268. {
  269.    LLVMBuilderRef builder = mask->bld->gallivm->builder;
  270.    struct function_ctx *ctx = func_ctx(mask);
  271.  
  272.    if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING) {
  273.       ctx->cond_stack_size++;
  274.       return;
  275.    }
  276.    if (ctx->cond_stack_size == 0 && mask->function_stack_size == 1) {
  277.       assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type));
  278.    }
  279.    ctx->cond_stack[ctx->cond_stack_size++] = mask->cond_mask;
  280.    assert(LLVMTypeOf(val) == mask->int_vec_type);
  281.    mask->cond_mask = LLVMBuildAnd(builder,
  282.                                   mask->cond_mask,
  283.                                   val,
  284.                                   "");
  285.    lp_exec_mask_update(mask);
  286. }
  287.  
  288. static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask)
  289. {
  290.    LLVMBuilderRef builder = mask->bld->gallivm->builder;
  291.    struct function_ctx *ctx = func_ctx(mask);
  292.    LLVMValueRef prev_mask;
  293.    LLVMValueRef inv_mask;
  294.  
  295.    assert(ctx->cond_stack_size);
  296.    if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING)
  297.       return;
  298.    prev_mask = ctx->cond_stack[ctx->cond_stack_size - 1];
  299.    if (ctx->cond_stack_size == 1 && mask->function_stack_size == 1) {
  300.       assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type));
  301.    }
  302.  
  303.    inv_mask = LLVMBuildNot(builder, mask->cond_mask, "");
  304.  
  305.    mask->cond_mask = LLVMBuildAnd(builder,
  306.                                   inv_mask,
  307.                                   prev_mask, "");
  308.    lp_exec_mask_update(mask);
  309. }
  310.  
  311. static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask)
  312. {
  313.    struct function_ctx *ctx = func_ctx(mask);
  314.    assert(ctx->cond_stack_size);
  315.    --ctx->cond_stack_size;
  316.    if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING)
  317.       return;
  318.    mask->cond_mask = ctx->cond_stack[ctx->cond_stack_size];
  319.    lp_exec_mask_update(mask);
  320. }
  321.  
  322. static void lp_exec_bgnloop(struct lp_exec_mask *mask)
  323. {
  324.    LLVMBuilderRef builder = mask->bld->gallivm->builder;
  325.    struct function_ctx *ctx = func_ctx(mask);
  326.  
  327.    if (ctx->loop_stack_size >= LP_MAX_TGSI_NESTING) {
  328.       ++ctx->loop_stack_size;
  329.       return;
  330.    }
  331.  
  332.    ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
  333.       ctx->break_type;
  334.    ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_LOOP;
  335.  
  336.    ctx->loop_stack[ctx->loop_stack_size].loop_block = ctx->loop_block;
  337.    ctx->loop_stack[ctx->loop_stack_size].cont_mask = mask->cont_mask;
  338.    ctx->loop_stack[ctx->loop_stack_size].break_mask = mask->break_mask;
  339.    ctx->loop_stack[ctx->loop_stack_size].break_var = ctx->break_var;
  340.    ++ctx->loop_stack_size;
  341.  
  342.    ctx->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, "");
  343.    LLVMBuildStore(builder, mask->break_mask, ctx->break_var);
  344.  
  345.    ctx->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop");
  346.  
  347.    LLVMBuildBr(builder, ctx->loop_block);
  348.    LLVMPositionBuilderAtEnd(builder, ctx->loop_block);
  349.  
  350.    mask->break_mask = LLVMBuildLoad(builder, ctx->break_var, "");
  351.  
  352.    lp_exec_mask_update(mask);
  353. }
  354.  
  355. static void lp_exec_break(struct lp_exec_mask *mask,
  356.                           struct lp_build_tgsi_context * bld_base)
  357. {
  358.    LLVMBuilderRef builder = mask->bld->gallivm->builder;
  359.    struct function_ctx *ctx = func_ctx(mask);
  360.  
  361.    if (ctx->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) {
  362.       LLVMValueRef exec_mask = LLVMBuildNot(builder,
  363.                                             mask->exec_mask,
  364.                                             "break");
  365.  
  366.       mask->break_mask = LLVMBuildAnd(builder,
  367.                                       mask->break_mask,
  368.                                       exec_mask, "break_full");
  369.    }
  370.    else {
  371.       unsigned opcode = bld_base->instructions[bld_base->pc + 1].Instruction.Opcode;
  372.       boolean break_always = (opcode == TGSI_OPCODE_ENDSWITCH ||
  373.                               opcode == TGSI_OPCODE_CASE);
  374.  
  375.  
  376.       if (ctx->switch_in_default) {
  377.          /*
  378.           * stop default execution but only if this is an unconditional switch.
  379.           * (The condition here is not perfect since dead code after break is
  380.           * allowed but should be sufficient since false negatives are just
  381.           * unoptimized - so we don't have to pre-evaluate that).
  382.           */
  383.          if(break_always && ctx->switch_pc) {
  384.             bld_base->pc = ctx->switch_pc;
  385.             return;
  386.          }
  387.       }
  388.  
  389.       if (break_always) {
  390.          mask->switch_mask = LLVMConstNull(mask->bld->int_vec_type);
  391.       }
  392.       else {
  393.          LLVMValueRef exec_mask = LLVMBuildNot(builder,
  394.                                                mask->exec_mask,
  395.                                                "break");
  396.          mask->switch_mask = LLVMBuildAnd(builder,
  397.                                           mask->switch_mask,
  398.                                           exec_mask, "break_switch");
  399.       }
  400.    }
  401.  
  402.    lp_exec_mask_update(mask);
  403. }
  404.  
  405. static void lp_exec_break_condition(struct lp_exec_mask *mask,
  406.                                     LLVMValueRef cond)
  407. {
  408.    LLVMBuilderRef builder = mask->bld->gallivm->builder;
  409.    struct function_ctx *ctx = func_ctx(mask);
  410.    LLVMValueRef cond_mask = LLVMBuildAnd(builder,
  411.                                          mask->exec_mask,
  412.                                          cond, "cond_mask");
  413.    cond_mask = LLVMBuildNot(builder, cond_mask, "break_cond");
  414.  
  415.    if (ctx->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) {
  416.       mask->break_mask = LLVMBuildAnd(builder,
  417.                                       mask->break_mask,
  418.                                       cond_mask, "breakc_full");
  419.    }
  420.    else {
  421.       mask->switch_mask = LLVMBuildAnd(builder,
  422.                                        mask->switch_mask,
  423.                                        cond_mask, "breakc_switch");
  424.    }
  425.  
  426.    lp_exec_mask_update(mask);
  427. }
  428.  
  429. static void lp_exec_continue(struct lp_exec_mask *mask)
  430. {
  431.    LLVMBuilderRef builder = mask->bld->gallivm->builder;
  432.    LLVMValueRef exec_mask = LLVMBuildNot(builder,
  433.                                          mask->exec_mask,
  434.                                          "");
  435.  
  436.    mask->cont_mask = LLVMBuildAnd(builder,
  437.                                   mask->cont_mask,
  438.                                   exec_mask, "");
  439.  
  440.    lp_exec_mask_update(mask);
  441. }
  442.  
  443.  
  444. static void lp_exec_endloop(struct gallivm_state *gallivm,
  445.                             struct lp_exec_mask *mask)
  446. {
  447.    LLVMBuilderRef builder = mask->bld->gallivm->builder;
  448.    struct function_ctx *ctx = func_ctx(mask);
  449.    LLVMBasicBlockRef endloop;
  450.    LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
  451.    LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context,
  452.                                                mask->bld->type.width *
  453.                                                mask->bld->type.length);
  454.    LLVMValueRef i1cond, i2cond, icond, limiter;
  455.  
  456.    assert(mask->break_mask);
  457.  
  458.    
  459.    assert(ctx->loop_stack_size);
  460.    if (ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
  461.       --ctx->loop_stack_size;
  462.       return;
  463.    }
  464.  
  465.    /*
  466.     * Restore the cont_mask, but don't pop
  467.     */
  468.    mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size - 1].cont_mask;
  469.    lp_exec_mask_update(mask);
  470.  
  471.    /*
  472.     * Unlike the continue mask, the break_mask must be preserved across loop
  473.     * iterations
  474.     */
  475.    LLVMBuildStore(builder, mask->break_mask, ctx->break_var);
  476.  
  477.    /* Decrement the loop limiter */
  478.    limiter = LLVMBuildLoad(builder, ctx->loop_limiter, "");
  479.  
  480.    limiter = LLVMBuildSub(
  481.       builder,
  482.       limiter,
  483.       LLVMConstInt(int_type, 1, false),
  484.       "");
  485.  
  486.    LLVMBuildStore(builder, limiter, ctx->loop_limiter);
  487.  
  488.    /* i1cond = (mask != 0) */
  489.    i1cond = LLVMBuildICmp(
  490.       builder,
  491.       LLVMIntNE,
  492.       LLVMBuildBitCast(builder, mask->exec_mask, reg_type, ""),
  493.       LLVMConstNull(reg_type), "i1cond");
  494.  
  495.    /* i2cond = (looplimiter > 0) */
  496.    i2cond = LLVMBuildICmp(
  497.       builder,
  498.       LLVMIntSGT,
  499.       limiter,
  500.       LLVMConstNull(int_type), "i2cond");
  501.  
  502.    /* if( i1cond && i2cond ) */
  503.    icond = LLVMBuildAnd(builder, i1cond, i2cond, "");
  504.  
  505.    endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop");
  506.  
  507.    LLVMBuildCondBr(builder,
  508.                    icond, ctx->loop_block, endloop);
  509.  
  510.    LLVMPositionBuilderAtEnd(builder, endloop);
  511.  
  512.    assert(ctx->loop_stack_size);
  513.    --ctx->loop_stack_size;
  514.    mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size].cont_mask;
  515.    mask->break_mask = ctx->loop_stack[ctx->loop_stack_size].break_mask;
  516.    ctx->loop_block = ctx->loop_stack[ctx->loop_stack_size].loop_block;
  517.    ctx->break_var = ctx->loop_stack[ctx->loop_stack_size].break_var;
  518.    ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size +
  519.          ctx->switch_stack_size];
  520.  
  521.    lp_exec_mask_update(mask);
  522. }
  523.  
  524. static void lp_exec_switch(struct lp_exec_mask *mask,
  525.                            LLVMValueRef switchval)
  526. {
  527.    struct function_ctx *ctx = func_ctx(mask);
  528.  
  529.    if (ctx->switch_stack_size >= LP_MAX_TGSI_NESTING ||
  530.        ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
  531.       ctx->switch_stack_size++;
  532.       return;
  533.    }
  534.  
  535.    ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
  536.       ctx->break_type;
  537.    ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_SWITCH;
  538.  
  539.    ctx->switch_stack[ctx->switch_stack_size].switch_mask = mask->switch_mask;
  540.    ctx->switch_stack[ctx->switch_stack_size].switch_val = ctx->switch_val;
  541.    ctx->switch_stack[ctx->switch_stack_size].switch_mask_default = ctx->switch_mask_default;
  542.    ctx->switch_stack[ctx->switch_stack_size].switch_in_default = ctx->switch_in_default;
  543.    ctx->switch_stack[ctx->switch_stack_size].switch_pc = ctx->switch_pc;
  544.    ctx->switch_stack_size++;
  545.  
  546.    mask->switch_mask = LLVMConstNull(mask->int_vec_type);
  547.    ctx->switch_val = switchval;
  548.    ctx->switch_mask_default = LLVMConstNull(mask->int_vec_type);
  549.    ctx->switch_in_default = false;
  550.    ctx->switch_pc = 0;
  551.  
  552.    lp_exec_mask_update(mask);
  553. }
  554.  
  555. static void lp_exec_endswitch(struct lp_exec_mask *mask,
  556.                               struct lp_build_tgsi_context * bld_base)
  557. {
  558.    LLVMBuilderRef builder = mask->bld->gallivm->builder;
  559.    struct function_ctx *ctx = func_ctx(mask);
  560.  
  561.    if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
  562.       ctx->switch_stack_size--;
  563.       return;
  564.    }
  565.  
  566.    /* check if there's deferred default if so do it now */
  567.    if (ctx->switch_pc && !ctx->switch_in_default) {
  568.       LLVMValueRef prevmask, defaultmask;
  569.       unsigned tmp_pc;
  570.       prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
  571.       defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
  572.       mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
  573.       ctx->switch_in_default = true;
  574.  
  575.       lp_exec_mask_update(mask);
  576.  
  577.       assert(bld_base->instructions[ctx->switch_pc - 1].Instruction.Opcode ==
  578.              TGSI_OPCODE_DEFAULT);
  579.  
  580.       tmp_pc = bld_base->pc;
  581.       bld_base->pc = ctx->switch_pc;
  582.       /*
  583.        * re-purpose switch_pc to point to here again, since we stop execution of
  584.        * the deferred default after next break.
  585.        */
  586.       ctx->switch_pc = tmp_pc - 1;
  587.  
  588.       return;
  589.    }
  590.  
  591.    else if (ctx->switch_pc && ctx->switch_in_default) {
  592.       assert(bld_base->pc == ctx->switch_pc + 1);
  593.    }
  594.  
  595.    ctx->switch_stack_size--;
  596.    mask->switch_mask = ctx->switch_stack[ctx->switch_stack_size].switch_mask;
  597.    ctx->switch_val = ctx->switch_stack[ctx->switch_stack_size].switch_val;
  598.    ctx->switch_mask_default = ctx->switch_stack[ctx->switch_stack_size].switch_mask_default;
  599.    ctx->switch_in_default = ctx->switch_stack[ctx->switch_stack_size].switch_in_default;
  600.    ctx->switch_pc = ctx->switch_stack[ctx->switch_stack_size].switch_pc;
  601.  
  602.    ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size];
  603.  
  604.    lp_exec_mask_update(mask);
  605. }
  606.  
  607. static void lp_exec_case(struct lp_exec_mask *mask,
  608.                          LLVMValueRef caseval)
  609. {
  610.    LLVMBuilderRef builder = mask->bld->gallivm->builder;
  611.    struct function_ctx *ctx = func_ctx(mask);
  612.  
  613.    LLVMValueRef casemask, prevmask;
  614.  
  615.    if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
  616.       return;
  617.    }
  618.  
  619.    /* skipping case mask evaluation here is NOT optional (not in all cases anyway). */
  620.    if (!ctx->switch_in_default) {
  621.       prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
  622.       casemask = lp_build_cmp(mask->bld, PIPE_FUNC_EQUAL, caseval, ctx->switch_val);
  623.       ctx->switch_mask_default = LLVMBuildOr(builder, casemask,
  624.                                              ctx->switch_mask_default, "sw_default_mask");
  625.       casemask = LLVMBuildOr(builder, casemask, mask->switch_mask, "");
  626.       mask->switch_mask = LLVMBuildAnd(builder, casemask, prevmask, "sw_mask");
  627.  
  628.       lp_exec_mask_update(mask);
  629.    }
  630. }
  631.  
  632. /*
  633.  * Analyse default statement in a switch.
  634.  * \return true if default is last statement, false otherwise
  635.  * \param default_pc_start contains pc of instruction to jump to
  636.  *                         if default wasn't last but there's no
  637.  *                         fallthrough into default.
  638.  */
  639. static boolean default_analyse_is_last(struct lp_exec_mask *mask,
  640.                                        struct lp_build_tgsi_context * bld_base,
  641.                                        int *default_pc_start)
  642. {
  643.    unsigned pc = bld_base->pc;
  644.    struct function_ctx *ctx = func_ctx(mask);
  645.    unsigned curr_switch_stack = ctx->switch_stack_size;
  646.  
  647.    if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
  648.       return false;
  649.    }
  650.  
  651.    /* skip over case statements which are together with default */
  652.    while (bld_base->instructions[pc].Instruction.Opcode == TGSI_OPCODE_CASE) {
  653.       pc++;
  654.    }
  655.  
  656.    while (pc != -1 && pc < bld_base->num_instructions) {
  657.       unsigned opcode = bld_base->instructions[pc].Instruction.Opcode;
  658.       switch (opcode) {
  659.       case TGSI_OPCODE_CASE:
  660.          if (curr_switch_stack == ctx->switch_stack_size) {
  661.             *default_pc_start = pc - 1;
  662.             return false;
  663.          }
  664.          break;
  665.       case TGSI_OPCODE_SWITCH:
  666.          curr_switch_stack++;
  667.          break;
  668.       case TGSI_OPCODE_ENDSWITCH:
  669.          if (curr_switch_stack == ctx->switch_stack_size) {
  670.             *default_pc_start = pc - 1;
  671.             return true;
  672.          }
  673.          curr_switch_stack--;
  674.          break;
  675.       }
  676.       pc++;
  677.    }
  678.    /* should never arrive here */
  679.    assert(0);
  680.    return true;
  681. }
  682.  
  683. static void lp_exec_default(struct lp_exec_mask *mask,
  684.                             struct lp_build_tgsi_context * bld_base)
  685. {
  686.    LLVMBuilderRef builder = mask->bld->gallivm->builder;
  687.    struct function_ctx *ctx = func_ctx(mask);
  688.  
  689.    int default_exec_pc;
  690.    boolean default_is_last;
  691.  
  692.    if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
  693.       return;
  694.    }
  695.  
  696.    /*
  697.     * This is a messy opcode, because it may not be always at the end and
  698.     * there can be fallthrough in and out of it.
  699.     */
  700.  
  701.    default_is_last = default_analyse_is_last(mask, bld_base, &default_exec_pc);
  702.    /*
  703.     * If it is last statement in switch (note that case statements appearing
  704.     * "at the same time" as default don't change that) everything is just fine,
  705.     * update switch mask and go on. This means we can handle default with
  706.     * fallthrough INTO it without overhead, if it is last.
  707.     */
  708.    if (default_is_last) {
  709.       LLVMValueRef prevmask, defaultmask;
  710.       prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
  711.       defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
  712.       defaultmask = LLVMBuildOr(builder, defaultmask, mask->switch_mask, "");
  713.       mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
  714.       ctx->switch_in_default = true;
  715.  
  716.       lp_exec_mask_update(mask);
  717.    }
  718.    else {
  719.       /*
  720.        * Technically, "case" immediately before default isn't really a
  721.        * fallthrough, however we still have to count them as such as we
  722.        * already have updated the masks.
  723.        * If that happens in practice could add a switch optimizer pass
  724.        * which just gets rid of all case statements appearing together with
  725.        * default (or could do switch analysis at switch start time instead).
  726.        */
  727.       unsigned opcode = bld_base->instructions[bld_base->pc - 1].Instruction.Opcode;
  728.       boolean ft_into = (opcode != TGSI_OPCODE_BRK &&
  729.                          opcode != TGSI_OPCODE_SWITCH);
  730.       /*
  731.        * If it is not last statement and there was no fallthrough into it,
  732.        * we record the PC and continue execution at next case (again, those
  733.        * case encountered at the same time don't count). At endswitch
  734.        * time, we update switchmask, and go back executing the code we skipped
  735.        * until the next break (possibly re-executing some code with changed mask
  736.        * if there was a fallthrough out of default).
  737.        * Finally, if it is not last statement and there was a fallthrough into it,
  738.        * do the same as with the former case, except instead of skipping the code
  739.        * just execute it without updating the mask, then go back and re-execute.
  740.        */
  741.       ctx->switch_pc = bld_base->pc;
  742.       if (!ft_into) {
  743.          bld_base->pc = default_exec_pc;
  744.       }
  745.    }
  746. }
  747.  
  748.  
  749. /* stores val into an address pointed to by dst_ptr.
  750.  * mask->exec_mask is used to figure out which bits of val
  751.  * should be stored into the address
  752.  * (0 means don't store this bit, 1 means do store).
  753.  */
  754. static void lp_exec_mask_store(struct lp_exec_mask *mask,
  755.                                struct lp_build_context *bld_store,
  756.                                LLVMValueRef pred,
  757.                                LLVMValueRef val,
  758.                                LLVMValueRef dst_ptr)
  759. {
  760.    LLVMBuilderRef builder = mask->bld->gallivm->builder;
  761.  
  762.    assert(lp_check_value(bld_store->type, val));
  763.    assert(LLVMGetTypeKind(LLVMTypeOf(dst_ptr)) == LLVMPointerTypeKind);
  764.    assert(LLVMGetElementType(LLVMTypeOf(dst_ptr)) == LLVMTypeOf(val));
  765.  
  766.    /* Mix the predicate and execution mask */
  767.    if (mask->has_mask) {
  768.       if (pred) {
  769.          pred = LLVMBuildAnd(builder, pred, mask->exec_mask, "");
  770.       } else {
  771.          pred = mask->exec_mask;
  772.       }
  773.    }
  774.  
  775.    if (pred) {
  776.       LLVMValueRef res, dst;
  777.  
  778.       dst = LLVMBuildLoad(builder, dst_ptr, "");
  779.       res = lp_build_select(bld_store, pred, val, dst);
  780.       LLVMBuildStore(builder, res, dst_ptr);
  781.    } else
  782.       LLVMBuildStore(builder, val, dst_ptr);
  783. }
  784.  
  785. static void lp_exec_mask_call(struct lp_exec_mask *mask,
  786.                               int func,
  787.                               int *pc)
  788. {
  789.    if (mask->function_stack_size >= LP_MAX_NUM_FUNCS) {
  790.       return;
  791.    }
  792.  
  793.    lp_exec_mask_function_init(mask, mask->function_stack_size);
  794.    mask->function_stack[mask->function_stack_size].pc = *pc;
  795.    mask->function_stack[mask->function_stack_size].ret_mask = mask->ret_mask;
  796.    mask->function_stack_size++;
  797.    *pc = func;
  798. }
  799.  
  800. static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
  801. {
  802.    LLVMBuilderRef builder = mask->bld->gallivm->builder;
  803.    struct function_ctx *ctx = func_ctx(mask);
  804.    LLVMValueRef exec_mask;
  805.  
  806.    if (ctx->cond_stack_size == 0 &&
  807.        ctx->loop_stack_size == 0 &&
  808.        ctx->switch_stack_size == 0 &&
  809.        mask->function_stack_size == 1) {
  810.       /* returning from main() */
  811.       *pc = -1;
  812.       return;
  813.    }
  814.  
  815.    if (mask->function_stack_size == 1) {
  816.       /*
  817.        * This requires special handling since we need to ensure
  818.        * we don't drop the mask even if we have no call stack
  819.        * (e.g. after a ret in a if clause after the endif)
  820.        */
  821.       mask->ret_in_main = TRUE;
  822.    }
  823.  
  824.    exec_mask = LLVMBuildNot(builder,
  825.                             mask->exec_mask,
  826.                             "ret");
  827.  
  828.    mask->ret_mask = LLVMBuildAnd(builder,
  829.                                  mask->ret_mask,
  830.                                  exec_mask, "ret_full");
  831.  
  832.    lp_exec_mask_update(mask);
  833. }
  834.  
  835. static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
  836. {
  837. }
  838.  
  839. static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
  840. {
  841.    struct function_ctx *ctx;
  842.  
  843.    assert(mask->function_stack_size > 1);
  844.    assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
  845.  
  846.    ctx = func_ctx(mask);
  847.    mask->function_stack_size--;
  848.  
  849.    *pc = ctx->pc;
  850.    mask->ret_mask = ctx->ret_mask;
  851.  
  852.    lp_exec_mask_update(mask);
  853. }
  854.  
  855.  
  856. static LLVMValueRef
  857. get_file_ptr(struct lp_build_tgsi_soa_context *bld,
  858.              unsigned file,
  859.              unsigned index,
  860.              unsigned chan)
  861. {
  862.    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
  863.    LLVMValueRef (*array_of_vars)[TGSI_NUM_CHANNELS];
  864.    LLVMValueRef var_of_array;
  865.  
  866.    switch (file) {
  867.    case TGSI_FILE_TEMPORARY:
  868.       array_of_vars = bld->temps;
  869.       var_of_array = bld->temps_array;
  870.       break;
  871.    case TGSI_FILE_OUTPUT:
  872.       array_of_vars = bld->outputs;
  873.       var_of_array = bld->outputs_array;
  874.       break;
  875.    default:
  876.       assert(0);
  877.       return NULL;
  878.    }
  879.  
  880.    assert(chan < 4);
  881.  
  882.    if (bld->indirect_files & (1 << file)) {
  883.       LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
  884.       return LLVMBuildGEP(builder, var_of_array, &lindex, 1, "");
  885.    }
  886.    else {
  887.       assert(index <= bld->bld_base.info->file_max[file]);
  888.       return array_of_vars[index][chan];
  889.    }
  890. }
  891.  
  892.  
  893. /**
  894.  * Return pointer to a temporary register channel (src or dest).
  895.  * Note that indirect addressing cannot be handled here.
  896.  * \param index  which temporary register
  897.  * \param chan  which channel of the temp register.
  898.  */
  899. LLVMValueRef
  900. lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld,
  901.              unsigned index,
  902.              unsigned chan)
  903. {
  904.    return get_file_ptr(bld, TGSI_FILE_TEMPORARY, index, chan);
  905. }
  906.  
  907. /**
  908.  * Return pointer to a output register channel (src or dest).
  909.  * Note that indirect addressing cannot be handled here.
  910.  * \param index  which output register
  911.  * \param chan  which channel of the output register.
  912.  */
  913. LLVMValueRef
  914. lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
  915.                unsigned index,
  916.                unsigned chan)
  917. {
  918.    return get_file_ptr(bld, TGSI_FILE_OUTPUT, index, chan);
  919. }
  920.  
  921. /*
  922.  * If we have indirect addressing in outputs copy our alloca array
  923.  * to the outputs slots specified by the caller to make sure
  924.  * our outputs are delivered consistently via the same interface.
  925.  */
  926. static void
  927. gather_outputs(struct lp_build_tgsi_soa_context * bld)
  928. {
  929.    if ((bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
  930.       unsigned index, chan;
  931.       assert(bld->bld_base.info->num_outputs <=
  932.              bld->bld_base.info->file_max[TGSI_FILE_OUTPUT] + 1);
  933.       for (index = 0; index < bld->bld_base.info->num_outputs; ++index) {
  934.          for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
  935.             bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan);
  936.          }
  937.       }
  938.    }
  939. }
  940.  
  941. /**
  942.  * Gather vector.
  943.  * XXX the lp_build_gather() function should be capable of doing this
  944.  * with a little work.
  945.  */
  946. static LLVMValueRef
  947. build_gather(struct lp_build_tgsi_context *bld_base,
  948.              LLVMValueRef base_ptr,
  949.              LLVMValueRef indexes,
  950.              LLVMValueRef overflow_mask)
  951. {
  952.    struct gallivm_state *gallivm = bld_base->base.gallivm;
  953.    LLVMBuilderRef builder = gallivm->builder;
  954.    struct lp_build_context *uint_bld = &bld_base->uint_bld;
  955.    struct lp_build_context *bld = &bld_base->base;
  956.    LLVMValueRef res = bld->undef;
  957.    unsigned i;
  958.  
  959.    /*
  960.     * overflow_mask is a vector telling us which channels
  961.     * in the vector overflowed. We use the overflow behavior for
  962.     * constant buffers which is defined as:
  963.     * Out of bounds access to constant buffer returns 0 in all
  964.     * components. Out of bounds behavior is always with respect
  965.     * to the size of the buffer bound at that slot.
  966.     */
  967.  
  968.    if (overflow_mask) {
  969.       /*
  970.        * We avoid per-element control flow here (also due to llvm going crazy,
  971.        * though I suspect it's better anyway since overflow is likely rare).
  972.        * Note that since we still fetch from buffers even if num_elements was
  973.        * zero (in this case we'll fetch from index zero) the jit func callers
  974.        * MUST provide valid fake constant buffers of size 4x32 (the values do
  975.        * not matter), otherwise we'd still need (not per element though)
  976.        * control flow.
  977.        */
  978.       indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes);
  979.    }
  980.  
  981.    /*
  982.     * Loop over elements of index_vec, load scalar value, insert it into 'res'.
  983.     */
  984.    for (i = 0; i < bld->type.length; i++) {
  985.       LLVMValueRef ii = lp_build_const_int32(bld->gallivm, i);
  986.       LLVMValueRef index = LLVMBuildExtractElement(builder,
  987.                                                    indexes, ii, "");
  988.       LLVMValueRef scalar_ptr, scalar;
  989.  
  990.       scalar_ptr = LLVMBuildGEP(builder, base_ptr,
  991.                                 &index, 1, "gather_ptr");
  992.       scalar = LLVMBuildLoad(builder, scalar_ptr, "");
  993.  
  994.       res = LLVMBuildInsertElement(builder, res, scalar, ii, "");
  995.    }
  996.  
  997.    if (overflow_mask) {
  998.       res = lp_build_select(bld, overflow_mask, bld->zero, res);
  999.    }
  1000.  
  1001.    return res;
  1002. }
  1003.  
  1004.  
  1005. /**
  1006.  * Scatter/store vector.
  1007.  */
  1008. static void
  1009. emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
  1010.                   LLVMValueRef base_ptr,
  1011.                   LLVMValueRef indexes,
  1012.                   LLVMValueRef values,
  1013.                   struct lp_exec_mask *mask,
  1014.                   LLVMValueRef pred)
  1015. {
  1016.    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
  1017.    LLVMBuilderRef builder = gallivm->builder;
  1018.    unsigned i;
  1019.  
  1020.    /* Mix the predicate and execution mask */
  1021.    if (mask->has_mask) {
  1022.       if (pred) {
  1023.          pred = LLVMBuildAnd(builder, pred, mask->exec_mask, "");
  1024.       }
  1025.       else {
  1026.          pred = mask->exec_mask;
  1027.       }
  1028.    }
  1029.  
  1030.    /*
  1031.     * Loop over elements of index_vec, store scalar value.
  1032.     */
  1033.    for (i = 0; i < bld->bld_base.base.type.length; i++) {
  1034.       LLVMValueRef ii = lp_build_const_int32(gallivm, i);
  1035.       LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
  1036.       LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
  1037.       LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
  1038.       LLVMValueRef scalar_pred = pred ?
  1039.          LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
  1040.  
  1041.       if (0)
  1042.          lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
  1043.                          ii, val, index, scalar_ptr);
  1044.  
  1045.       if (scalar_pred) {
  1046.          LLVMValueRef real_val, dst_val;
  1047.          dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
  1048.          real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
  1049.          LLVMBuildStore(builder, real_val, scalar_ptr);
  1050.       }
  1051.       else {
  1052.          LLVMBuildStore(builder, val, scalar_ptr);
  1053.       }
  1054.    }
  1055. }
  1056.  
  1057.  
  1058. /**
  1059.  * Read the current value of the ADDR register, convert the floats to
  1060.  * ints, add the base index and return the vector of offsets.
  1061.  * The offsets will be used to index into the constant buffer or
  1062.  * temporary register file.
  1063.  */
  1064. static LLVMValueRef
  1065. get_indirect_index(struct lp_build_tgsi_soa_context *bld,
  1066.                    unsigned reg_file, unsigned reg_index,
  1067.                    const struct tgsi_ind_register *indirect_reg)
  1068. {
  1069.    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
  1070.    struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
  1071.    /* always use X component of address register */
  1072.    unsigned swizzle = indirect_reg->Swizzle;
  1073.    LLVMValueRef base;
  1074.    LLVMValueRef rel;
  1075.    LLVMValueRef max_index;
  1076.    LLVMValueRef index;
  1077.  
  1078.    assert(bld->indirect_files & (1 << reg_file));
  1079.  
  1080.    base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index);
  1081.  
  1082.    assert(swizzle < 4);
  1083.    switch (indirect_reg->File) {
  1084.    case TGSI_FILE_ADDRESS:
  1085.       rel = LLVMBuildLoad(builder,
  1086.                           bld->addr[indirect_reg->Index][swizzle],
  1087.                           "load addr reg");
  1088.       /* ADDR LLVM values already have LLVM integer type. */
  1089.       break;
  1090.    case TGSI_FILE_TEMPORARY:
  1091.       rel = lp_get_temp_ptr_soa(bld, indirect_reg->Index, swizzle);
  1092.       rel = LLVMBuildLoad(builder, rel, "load temp reg");
  1093.       /* TEMP LLVM values always have LLVM float type, but for indirection, the
  1094.        * value actually stored is expected to be an integer */
  1095.       rel = LLVMBuildBitCast(builder, rel, uint_bld->vec_type, "");
  1096.       break;
  1097.    default:
  1098.       assert(0);
  1099.       rel = uint_bld->zero;
  1100.    }
  1101.  
  1102.    index = lp_build_add(uint_bld, base, rel);
  1103.  
  1104.    /*
  1105.     * emit_fetch_constant handles constant buffer overflow so this code
  1106.     * is pointless for them.
  1107.     * Furthermore the D3D10 spec in section 6.5 says:
  1108.     * If the constant buffer bound to a slot is larger than the size
  1109.     * declared in the shader for that slot, implementations are allowed
  1110.     * to return incorrect data (not necessarily 0) for indices that are
  1111.     * larger than the declared size but smaller than the buffer size.
  1112.     */
  1113.    if (reg_file != TGSI_FILE_CONSTANT) {
  1114.       max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
  1115.                                          uint_bld->type,
  1116.                                          bld->bld_base.info->file_max[reg_file]);
  1117.  
  1118.       assert(!uint_bld->type.sign);
  1119.       index = lp_build_min(uint_bld, index, max_index);
  1120.    }
  1121.  
  1122.    return index;
  1123. }
  1124.  
  1125. static struct lp_build_context *
  1126. stype_to_fetch(struct lp_build_tgsi_context * bld_base,
  1127.                enum tgsi_opcode_type stype)
  1128. {
  1129.    struct lp_build_context *bld_fetch;
  1130.  
  1131.    switch (stype) {
  1132.    case TGSI_TYPE_FLOAT:
  1133.    case TGSI_TYPE_UNTYPED:
  1134.       bld_fetch = &bld_base->base;
  1135.       break;
  1136.    case TGSI_TYPE_UNSIGNED:
  1137.       bld_fetch = &bld_base->uint_bld;
  1138.       break;
  1139.    case TGSI_TYPE_SIGNED:
  1140.       bld_fetch = &bld_base->int_bld;
  1141.       break;
  1142.    case TGSI_TYPE_VOID:
  1143.    case TGSI_TYPE_DOUBLE:
  1144.    default:
  1145.       assert(0);
  1146.       bld_fetch = NULL;
  1147.       break;
  1148.    }
  1149.    return bld_fetch;
  1150. }
  1151.  
  1152. static LLVMValueRef
  1153. get_soa_array_offsets(struct lp_build_context *uint_bld,
  1154.                       LLVMValueRef indirect_index,
  1155.                       unsigned chan_index,
  1156.                       boolean need_perelement_offset)
  1157. {
  1158.    struct gallivm_state *gallivm = uint_bld->gallivm;
  1159.    LLVMValueRef chan_vec =
  1160.       lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index);
  1161.    LLVMValueRef length_vec =
  1162.       lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length);
  1163.    LLVMValueRef index_vec;
  1164.  
  1165.    /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
  1166.    index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
  1167.    index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
  1168.    index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
  1169.  
  1170.    if (need_perelement_offset) {
  1171.       LLVMValueRef pixel_offsets;
  1172.       int i;
  1173.      /* build pixel offset vector: {0, 1, 2, 3, ...} */
  1174.       pixel_offsets = uint_bld->undef;
  1175.       for (i = 0; i < uint_bld->type.length; i++) {
  1176.          LLVMValueRef ii = lp_build_const_int32(gallivm, i);
  1177.          pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets,
  1178.                                                 ii, ii, "");
  1179.       }
  1180.       index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
  1181.    }
  1182.    return index_vec;
  1183. }
  1184.  
  1185. static LLVMValueRef
  1186. emit_fetch_constant(
  1187.    struct lp_build_tgsi_context * bld_base,
  1188.    const struct tgsi_full_src_register * reg,
  1189.    enum tgsi_opcode_type stype,
  1190.    unsigned swizzle)
  1191. {
  1192.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  1193.    struct gallivm_state *gallivm = bld_base->base.gallivm;
  1194.    LLVMBuilderRef builder = gallivm->builder;
  1195.    struct lp_build_context *uint_bld = &bld_base->uint_bld;
  1196.    unsigned dimension = 0;
  1197.    LLVMValueRef consts_ptr;
  1198.    LLVMValueRef num_consts;
  1199.    LLVMValueRef res;
  1200.  
  1201.    /* XXX: Handle fetching xyzw components as a vector */
  1202.    assert(swizzle != ~0);
  1203.  
  1204.    if (reg->Register.Dimension) {
  1205.       assert(!reg->Dimension.Indirect);
  1206.       dimension = reg->Dimension.Index;
  1207.       assert(dimension < LP_MAX_TGSI_CONST_BUFFERS);
  1208.    }
  1209.  
  1210.    consts_ptr = bld->consts[dimension];
  1211.    num_consts = bld->consts_sizes[dimension];
  1212.  
  1213.    if (reg->Register.Indirect) {
  1214.       LLVMValueRef indirect_index;
  1215.       LLVMValueRef swizzle_vec =
  1216.          lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
  1217.       LLVMValueRef index_vec;  /* index into the const buffer */
  1218.       LLVMValueRef overflow_mask;
  1219.  
  1220.       indirect_index = get_indirect_index(bld,
  1221.                                           reg->Register.File,
  1222.                                           reg->Register.Index,
  1223.                                           &reg->Indirect);
  1224.  
  1225.       /* All fetches are from the same constant buffer, so
  1226.        * we need to propagate the size to a vector to do a
  1227.        * vector comparison */
  1228.       num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
  1229.       /* Construct a boolean vector telling us which channels
  1230.        * overflow the bound constant buffer */
  1231.       overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
  1232.                                        indirect_index, num_consts);
  1233.  
  1234.       /* index_vec = indirect_index * 4 + swizzle */
  1235.       index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
  1236.       index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
  1237.  
  1238.       /* Gather values from the constant buffer */
  1239.       res = build_gather(bld_base, consts_ptr, index_vec, overflow_mask);
  1240.    }
  1241.    else {
  1242.       LLVMValueRef index;  /* index into the const buffer */
  1243.       LLVMValueRef scalar, scalar_ptr;
  1244.  
  1245.       index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
  1246.  
  1247.       scalar_ptr = LLVMBuildGEP(builder, consts_ptr,
  1248.                                 &index, 1, "");
  1249.       scalar = LLVMBuildLoad(builder, scalar_ptr, "");
  1250.       res = lp_build_broadcast_scalar(&bld_base->base, scalar);
  1251.    }
  1252.  
  1253.    if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED) {
  1254.       struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
  1255.       res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
  1256.    }
  1257.  
  1258.    return res;
  1259. }
  1260.  
  1261. static LLVMValueRef
  1262. emit_fetch_immediate(
  1263.    struct lp_build_tgsi_context * bld_base,
  1264.    const struct tgsi_full_src_register * reg,
  1265.    enum tgsi_opcode_type stype,
  1266.    unsigned swizzle)
  1267. {
  1268.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  1269.    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
  1270.    LLVMBuilderRef builder = gallivm->builder;
  1271.    LLVMValueRef res = NULL;
  1272.  
  1273.    if (bld->use_immediates_array || reg->Register.Indirect) {
  1274.       LLVMValueRef imms_array;
  1275.       LLVMTypeRef fptr_type;
  1276.  
  1277.       /* cast imms_array pointer to float* */
  1278.       fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
  1279.       imms_array = LLVMBuildBitCast(builder, bld->imms_array, fptr_type, "");
  1280.  
  1281.       if (reg->Register.Indirect) {
  1282.          LLVMValueRef indirect_index;
  1283.          LLVMValueRef index_vec;  /* index into the immediate register array */
  1284.  
  1285.          indirect_index = get_indirect_index(bld,
  1286.                                              reg->Register.File,
  1287.                                              reg->Register.Index,
  1288.                                              &reg->Indirect);
  1289.          /*
  1290.           * Unlike for other reg classes, adding pixel offsets is unnecessary -
  1291.           * immediates are stored as full vectors (FIXME??? - might be better
  1292.           * to store them the same as constants) but all elements are the same
  1293.           * in any case.
  1294.           */
  1295.          index_vec = get_soa_array_offsets(&bld_base->uint_bld,
  1296.                                            indirect_index,
  1297.                                            swizzle,
  1298.                                            FALSE);
  1299.  
  1300.          /* Gather values from the immediate register array */
  1301.          res = build_gather(bld_base, imms_array, index_vec, NULL);
  1302.       } else {
  1303.          LLVMValueRef lindex = lp_build_const_int32(gallivm,
  1304.                                         reg->Register.Index * 4 + swizzle);
  1305.          LLVMValueRef imms_ptr =  LLVMBuildGEP(builder,
  1306.                                                 bld->imms_array, &lindex, 1, "");
  1307.          res = LLVMBuildLoad(builder, imms_ptr, "");
  1308.       }
  1309.    }
  1310.    else {
  1311.       res = bld->immediates[reg->Register.Index][swizzle];
  1312.    }
  1313.  
  1314.    if (stype == TGSI_TYPE_UNSIGNED) {
  1315.       res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
  1316.    } else if (stype == TGSI_TYPE_SIGNED) {
  1317.       res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
  1318.    }
  1319.    return res;
  1320. }
  1321.  
  1322. static LLVMValueRef
  1323. emit_fetch_input(
  1324.    struct lp_build_tgsi_context * bld_base,
  1325.    const struct tgsi_full_src_register * reg,
  1326.    enum tgsi_opcode_type stype,
  1327.    unsigned swizzle)
  1328. {
  1329.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  1330.    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
  1331.    LLVMBuilderRef builder = gallivm->builder;
  1332.    LLVMValueRef res;
  1333.  
  1334.    if (reg->Register.Indirect) {
  1335.       LLVMValueRef indirect_index;
  1336.       LLVMValueRef index_vec;  /* index into the input reg array */
  1337.       LLVMValueRef inputs_array;
  1338.       LLVMTypeRef fptr_type;
  1339.  
  1340.       indirect_index = get_indirect_index(bld,
  1341.                                           reg->Register.File,
  1342.                                           reg->Register.Index,
  1343.                                           &reg->Indirect);
  1344.  
  1345.       index_vec = get_soa_array_offsets(&bld_base->uint_bld,
  1346.                                         indirect_index,
  1347.                                         swizzle,
  1348.                                         TRUE);
  1349.  
  1350.       /* cast inputs_array pointer to float* */
  1351.       fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
  1352.       inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, fptr_type, "");
  1353.  
  1354.       /* Gather values from the input register array */
  1355.       res = build_gather(bld_base, inputs_array, index_vec, NULL);
  1356.    } else {
  1357.       if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
  1358.          LLVMValueRef lindex = lp_build_const_int32(gallivm,
  1359.                                         reg->Register.Index * 4 + swizzle);
  1360.          LLVMValueRef input_ptr =  LLVMBuildGEP(builder,
  1361.                                                 bld->inputs_array, &lindex, 1, "");
  1362.          res = LLVMBuildLoad(builder, input_ptr, "");
  1363.       }
  1364.       else {
  1365.          res = bld->inputs[reg->Register.Index][swizzle];
  1366.       }
  1367.    }
  1368.  
  1369.    assert(res);
  1370.  
  1371.    if (stype == TGSI_TYPE_UNSIGNED) {
  1372.       res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
  1373.    } else if (stype == TGSI_TYPE_SIGNED) {
  1374.       res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
  1375.    }
  1376.  
  1377.    return res;
  1378. }
  1379.  
  1380.  
  1381. static LLVMValueRef
  1382. emit_fetch_gs_input(
  1383.    struct lp_build_tgsi_context * bld_base,
  1384.    const struct tgsi_full_src_register * reg,
  1385.    enum tgsi_opcode_type stype,
  1386.    unsigned swizzle)
  1387. {
  1388.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  1389.    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
  1390.    const struct tgsi_shader_info *info = bld->bld_base.info;
  1391.    LLVMBuilderRef builder = gallivm->builder;
  1392.    LLVMValueRef attrib_index = NULL;
  1393.    LLVMValueRef vertex_index = NULL;
  1394.    LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
  1395.    LLVMValueRef res;
  1396.  
  1397.    if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
  1398.       /* This is really a system value not a regular input */
  1399.       assert(!reg->Register.Indirect);
  1400.       assert(!reg->Dimension.Indirect);
  1401.       res = bld->system_values.prim_id;
  1402.       if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
  1403.          res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
  1404.       }
  1405.       return res;
  1406.    }
  1407.  
  1408.    if (reg->Register.Indirect) {
  1409.       attrib_index = get_indirect_index(bld,
  1410.                                         reg->Register.File,
  1411.                                         reg->Register.Index,
  1412.                                         &reg->Indirect);
  1413.    } else {
  1414.       attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
  1415.    }
  1416.    
  1417.    if (reg->Dimension.Indirect) {
  1418.       vertex_index = get_indirect_index(bld,
  1419.                                         reg->Register.File,
  1420.                                         reg->Dimension.Index,
  1421.                                         &reg->DimIndirect);
  1422.    } else {
  1423.       vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
  1424.    }
  1425.  
  1426.    res = bld->gs_iface->fetch_input(bld->gs_iface, bld_base,
  1427.                                     reg->Dimension.Indirect,
  1428.                                     vertex_index,
  1429.                                     reg->Register.Indirect,
  1430.                                     attrib_index,
  1431.                                     swizzle_index);
  1432.  
  1433.    assert(res);
  1434.  
  1435.    if (stype == TGSI_TYPE_UNSIGNED) {
  1436.       res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
  1437.    } else if (stype == TGSI_TYPE_SIGNED) {
  1438.       res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
  1439.    }
  1440.  
  1441.    return res;
  1442. }
  1443.  
  1444. static LLVMValueRef
  1445. emit_fetch_temporary(
  1446.    struct lp_build_tgsi_context * bld_base,
  1447.    const struct tgsi_full_src_register * reg,
  1448.    enum tgsi_opcode_type stype,
  1449.    unsigned swizzle)
  1450. {
  1451.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  1452.    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
  1453.    LLVMBuilderRef builder = gallivm->builder;
  1454.    LLVMValueRef res;
  1455.  
  1456.    if (reg->Register.Indirect) {
  1457.       LLVMValueRef indirect_index;
  1458.       LLVMValueRef index_vec;  /* index into the temp reg array */
  1459.       LLVMValueRef temps_array;
  1460.       LLVMTypeRef fptr_type;
  1461.  
  1462.       indirect_index = get_indirect_index(bld,
  1463.                                           reg->Register.File,
  1464.                                           reg->Register.Index,
  1465.                                           &reg->Indirect);
  1466.  
  1467.       index_vec = get_soa_array_offsets(&bld_base->uint_bld,
  1468.                                         indirect_index,
  1469.                                         swizzle,
  1470.                                         TRUE);
  1471.  
  1472.       /* cast temps_array pointer to float* */
  1473.       fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
  1474.       temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
  1475.  
  1476.       /* Gather values from the temporary register array */
  1477.       res = build_gather(bld_base, temps_array, index_vec, NULL);
  1478.    }
  1479.    else {
  1480.       LLVMValueRef temp_ptr;
  1481.       temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
  1482.       res = LLVMBuildLoad(builder, temp_ptr, "");
  1483.    }
  1484.  
  1485.    if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED) {
  1486.       struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
  1487.       res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
  1488.    }
  1489.  
  1490.    return res;
  1491. }
  1492.  
  1493. static LLVMValueRef
  1494. emit_fetch_system_value(
  1495.    struct lp_build_tgsi_context * bld_base,
  1496.    const struct tgsi_full_src_register * reg,
  1497.    enum tgsi_opcode_type stype,
  1498.    unsigned swizzle)
  1499. {
  1500.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  1501.    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
  1502.    const struct tgsi_shader_info *info = bld->bld_base.info;
  1503.    LLVMBuilderRef builder = gallivm->builder;
  1504.    LLVMValueRef res;
  1505.    enum tgsi_opcode_type atype; // Actual type of the value
  1506.  
  1507.    assert(!reg->Register.Indirect);
  1508.  
  1509.    switch (info->system_value_semantic_name[reg->Register.Index]) {
  1510.    case TGSI_SEMANTIC_INSTANCEID:
  1511.       res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
  1512.       atype = TGSI_TYPE_UNSIGNED;
  1513.       break;
  1514.  
  1515.    case TGSI_SEMANTIC_VERTEXID:
  1516.       res = bld->system_values.vertex_id;
  1517.       atype = TGSI_TYPE_UNSIGNED;
  1518.       break;
  1519.  
  1520.    case TGSI_SEMANTIC_VERTEXID_NOBASE:
  1521.       res = bld->system_values.vertex_id_nobase;
  1522.       atype = TGSI_TYPE_UNSIGNED;
  1523.       break;
  1524.  
  1525.    case TGSI_SEMANTIC_BASEVERTEX:
  1526.       res = bld->system_values.basevertex;
  1527.       atype = TGSI_TYPE_UNSIGNED;
  1528.       break;
  1529.  
  1530.    case TGSI_SEMANTIC_PRIMID:
  1531.       res = bld->system_values.prim_id;
  1532.       atype = TGSI_TYPE_UNSIGNED;
  1533.       break;
  1534.  
  1535.    default:
  1536.       assert(!"unexpected semantic in emit_fetch_system_value");
  1537.       res = bld_base->base.zero;
  1538.       atype = TGSI_TYPE_FLOAT;
  1539.       break;
  1540.    }
  1541.  
  1542.    if (atype != stype) {
  1543.       if (stype == TGSI_TYPE_FLOAT) {
  1544.          res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
  1545.       } else if (stype == TGSI_TYPE_UNSIGNED) {
  1546.          res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
  1547.       } else if (stype == TGSI_TYPE_SIGNED) {
  1548.          res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
  1549.       }
  1550.    }
  1551.  
  1552.    return res;
  1553. }
  1554.  
  1555. /**
  1556.  * Register fetch with derivatives.
  1557.  */
  1558. static void
  1559. emit_fetch_deriv(
  1560.    struct lp_build_tgsi_soa_context *bld,
  1561.    LLVMValueRef src,
  1562.    LLVMValueRef *res,
  1563.    LLVMValueRef *ddx,
  1564.    LLVMValueRef *ddy)
  1565. {
  1566.    if(res)
  1567.       *res = src;
  1568.  
  1569.    /* TODO: use interpolation coeffs for inputs */
  1570.  
  1571.    if(ddx)
  1572.       *ddx = lp_build_ddx(&bld->bld_base.base, src);
  1573.  
  1574.    if(ddy)
  1575.       *ddy = lp_build_ddy(&bld->bld_base.base, src);
  1576. }
  1577.  
  1578.  
  1579. /**
  1580.  * Predicate.
  1581.  */
  1582. static void
  1583. emit_fetch_predicate(
  1584.    struct lp_build_tgsi_soa_context *bld,
  1585.    const struct tgsi_full_instruction *inst,
  1586.    LLVMValueRef *pred)
  1587. {
  1588.    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
  1589.    unsigned index;
  1590.    unsigned char swizzles[4];
  1591.    LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL};
  1592.    LLVMValueRef value;
  1593.    unsigned chan;
  1594.  
  1595.    if (!inst->Instruction.Predicate) {
  1596.       TGSI_FOR_EACH_CHANNEL( chan ) {
  1597.          pred[chan] = NULL;
  1598.       }
  1599.       return;
  1600.    }
  1601.  
  1602.    swizzles[0] = inst->Predicate.SwizzleX;
  1603.    swizzles[1] = inst->Predicate.SwizzleY;
  1604.    swizzles[2] = inst->Predicate.SwizzleZ;
  1605.    swizzles[3] = inst->Predicate.SwizzleW;
  1606.  
  1607.    index = inst->Predicate.Index;
  1608.    assert(index < LP_MAX_TGSI_PREDS);
  1609.  
  1610.    TGSI_FOR_EACH_CHANNEL( chan ) {
  1611.       unsigned swizzle = swizzles[chan];
  1612.  
  1613.       /*
  1614.        * Only fetch the predicate register channels that are actually listed
  1615.        * in the swizzles
  1616.        */
  1617.       if (!unswizzled[swizzle]) {
  1618.          value = LLVMBuildLoad(builder,
  1619.                                bld->preds[index][swizzle], "");
  1620.  
  1621.          /*
  1622.           * Convert the value to an integer mask.
  1623.           *
  1624.           * TODO: Short-circuit this comparison -- a D3D setp_xx instructions
  1625.           * is needlessly causing two comparisons due to storing the intermediate
  1626.           * result as float vector instead of an integer mask vector.
  1627.           */
  1628.          value = lp_build_compare(bld->bld_base.base.gallivm,
  1629.                                   bld->bld_base.base.type,
  1630.                                   PIPE_FUNC_NOTEQUAL,
  1631.                                   value,
  1632.                                   bld->bld_base.base.zero);
  1633.          if (inst->Predicate.Negate) {
  1634.             value = LLVMBuildNot(builder, value, "");
  1635.          }
  1636.  
  1637.          unswizzled[swizzle] = value;
  1638.       } else {
  1639.          value = unswizzled[swizzle];
  1640.       }
  1641.  
  1642.       pred[chan] = value;
  1643.    }
  1644. }
  1645.  
  1646.  
  1647. /**
  1648.  * Register store.
  1649.  */
  1650. static void
  1651. emit_store_chan(
  1652.    struct lp_build_tgsi_context *bld_base,
  1653.    const struct tgsi_full_instruction *inst,
  1654.    unsigned index,
  1655.    unsigned chan_index,
  1656.    LLVMValueRef pred,
  1657.    LLVMValueRef value)
  1658. {
  1659.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  1660.    struct gallivm_state *gallivm = bld_base->base.gallivm;
  1661.    LLVMBuilderRef builder = gallivm->builder;
  1662.    const struct tgsi_full_dst_register *reg = &inst->Dst[index];
  1663.    struct lp_build_context *float_bld = &bld_base->base;
  1664.    struct lp_build_context *int_bld = &bld_base->int_bld;
  1665.    LLVMValueRef indirect_index = NULL;
  1666.    enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
  1667.  
  1668.    /*
  1669.     * Apply saturation.
  1670.     *
  1671.     * It is always assumed to be float.
  1672.     */
  1673.    switch( inst->Instruction.Saturate ) {
  1674.    case TGSI_SAT_NONE:
  1675.       break;
  1676.  
  1677.    case TGSI_SAT_ZERO_ONE:
  1678.       assert(dtype == TGSI_TYPE_FLOAT ||
  1679.              dtype == TGSI_TYPE_UNTYPED);
  1680.       value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
  1681.       value = lp_build_clamp_zero_one_nanzero(float_bld, value);
  1682.       break;
  1683.  
  1684.    case TGSI_SAT_MINUS_PLUS_ONE:
  1685.       assert(dtype == TGSI_TYPE_FLOAT ||
  1686.              dtype == TGSI_TYPE_UNTYPED);
  1687.       value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
  1688.       /* This will give -1.0 for NaN which is probably not what we want. */
  1689.       value = lp_build_max_ext(float_bld, value,
  1690.                                lp_build_const_vec(gallivm, float_bld->type, -1.0),
  1691.                                GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
  1692.       value = lp_build_min(float_bld, value, float_bld->one);
  1693.       break;
  1694.  
  1695.    default:
  1696.       assert(0);
  1697.    }
  1698.  
  1699.    if (reg->Register.Indirect) {
  1700.       indirect_index = get_indirect_index(bld,
  1701.                                           reg->Register.File,
  1702.                                           reg->Register.Index,
  1703.                                           &reg->Indirect);
  1704.    } else {
  1705.       assert(reg->Register.Index <=
  1706.                              bld_base->info->file_max[reg->Register.File]);
  1707.    }
  1708.  
  1709.    if (DEBUG_EXECUTION) {
  1710.       emit_dump_reg(gallivm, reg->Register.File, reg->Register.Index, chan_index, value);
  1711.    }
  1712.  
  1713.    switch( reg->Register.File ) {
  1714.    case TGSI_FILE_OUTPUT:
  1715.       /* Outputs are always stored as floats */
  1716.       value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
  1717.  
  1718.       if (reg->Register.Indirect) {
  1719.          LLVMValueRef index_vec;  /* indexes into the output registers */
  1720.          LLVMValueRef outputs_array;
  1721.          LLVMTypeRef fptr_type;
  1722.  
  1723.          index_vec = get_soa_array_offsets(&bld_base->uint_bld,
  1724.                                            indirect_index,
  1725.                                            chan_index,
  1726.                                            TRUE);
  1727.  
  1728.          fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
  1729.          outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, fptr_type, "");
  1730.  
  1731.          /* Scatter store values into output registers */
  1732.          emit_mask_scatter(bld, outputs_array, index_vec, value,
  1733.                            &bld->exec_mask, pred);
  1734.       }
  1735.       else {
  1736.          LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
  1737.                                                   chan_index);
  1738.          lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value, out_ptr);
  1739.       }
  1740.       break;
  1741.  
  1742.    case TGSI_FILE_TEMPORARY:
  1743.       /* Temporaries are always stored as floats */
  1744.       value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
  1745.  
  1746.       if (reg->Register.Indirect) {
  1747.          LLVMValueRef index_vec;  /* indexes into the temp registers */
  1748.          LLVMValueRef temps_array;
  1749.          LLVMTypeRef fptr_type;
  1750.  
  1751.          index_vec = get_soa_array_offsets(&bld_base->uint_bld,
  1752.                                            indirect_index,
  1753.                                            chan_index,
  1754.                                            TRUE);
  1755.  
  1756.          fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
  1757.          temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
  1758.  
  1759.          /* Scatter store values into temp registers */
  1760.          emit_mask_scatter(bld, temps_array, index_vec, value,
  1761.                            &bld->exec_mask, pred);
  1762.       }
  1763.       else {
  1764.          LLVMValueRef temp_ptr;
  1765.          temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index);
  1766.          lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value, temp_ptr);
  1767.       }
  1768.       break;
  1769.  
  1770.    case TGSI_FILE_ADDRESS:
  1771.       assert(dtype == TGSI_TYPE_SIGNED);
  1772.       assert(LLVMTypeOf(value) == int_bld->vec_type);
  1773.       value = LLVMBuildBitCast(builder, value, int_bld->vec_type, "");
  1774.       lp_exec_mask_store(&bld->exec_mask, int_bld, pred, value,
  1775.                          bld->addr[reg->Register.Index][chan_index]);
  1776.       break;
  1777.  
  1778.    case TGSI_FILE_PREDICATE:
  1779.       assert(LLVMTypeOf(value) == float_bld->vec_type);
  1780.       value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
  1781.       lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value,
  1782.                          bld->preds[reg->Register.Index][chan_index]);
  1783.       break;
  1784.  
  1785.    default:
  1786.       assert( 0 );
  1787.    }
  1788.  
  1789.    (void)dtype;
  1790. }
  1791.  
  1792. /*
  1793.  * Called at the beginning of the translation of each TGSI instruction, to
  1794.  * emit some debug code.
  1795.  */
  1796. static void
  1797. emit_debug(
  1798.    struct lp_build_tgsi_context * bld_base,
  1799.    const struct tgsi_full_instruction * inst,
  1800.    const struct tgsi_opcode_info * info)
  1801.  
  1802. {
  1803.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  1804.  
  1805.    if (DEBUG_EXECUTION) {
  1806.       /*
  1807.        * Dump the TGSI instruction.
  1808.        */
  1809.  
  1810.       struct gallivm_state *gallivm = bld_base->base.gallivm;
  1811.       char buf[512];
  1812.       buf[0] = '$';
  1813.       buf[1] = ' ';
  1814.       tgsi_dump_instruction_str(inst, bld_base->pc, &buf[2], sizeof buf - 2);
  1815.       lp_build_printf(gallivm, buf);
  1816.  
  1817.       /* Dump the execution mask.
  1818.        */
  1819.       if (bld->exec_mask.has_mask) {
  1820.          lp_build_print_value(gallivm, "    mask = ", bld->exec_mask.exec_mask);
  1821.       }
  1822.    }
  1823. }
  1824.  
  1825. static void
  1826. emit_store(
  1827.    struct lp_build_tgsi_context * bld_base,
  1828.    const struct tgsi_full_instruction * inst,
  1829.    const struct tgsi_opcode_info * info,
  1830.    LLVMValueRef dst[4])
  1831.  
  1832. {
  1833.    unsigned chan_index;
  1834.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  1835.  
  1836.    if(info->num_dst) {
  1837.       LLVMValueRef pred[TGSI_NUM_CHANNELS];
  1838.  
  1839.       emit_fetch_predicate( bld, inst, pred );
  1840.  
  1841.       TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
  1842.          emit_store_chan(bld_base, inst, 0, chan_index, pred[chan_index], dst[chan_index]);
  1843.       }
  1844.    }
  1845. }
  1846.  
  1847. static unsigned
  1848. tgsi_to_pipe_tex_target(unsigned tgsi_target)
  1849. {
  1850.    switch (tgsi_target) {
  1851.    case TGSI_TEXTURE_BUFFER:
  1852.       return PIPE_BUFFER;
  1853.    case TGSI_TEXTURE_1D:
  1854.    case TGSI_TEXTURE_SHADOW1D:
  1855.       return PIPE_TEXTURE_1D;
  1856.    case TGSI_TEXTURE_2D:
  1857.    case TGSI_TEXTURE_SHADOW2D:
  1858.    case TGSI_TEXTURE_2D_MSAA:
  1859.       return PIPE_TEXTURE_2D;
  1860.    case TGSI_TEXTURE_3D:
  1861.       return PIPE_TEXTURE_3D;
  1862.    case TGSI_TEXTURE_CUBE:
  1863.    case TGSI_TEXTURE_SHADOWCUBE:
  1864.       return PIPE_TEXTURE_CUBE;
  1865.    case TGSI_TEXTURE_RECT:
  1866.    case TGSI_TEXTURE_SHADOWRECT:
  1867.       return PIPE_TEXTURE_RECT;
  1868.    case TGSI_TEXTURE_1D_ARRAY:
  1869.    case TGSI_TEXTURE_SHADOW1D_ARRAY:
  1870.       return PIPE_TEXTURE_1D_ARRAY;
  1871.    case TGSI_TEXTURE_2D_ARRAY:
  1872.    case TGSI_TEXTURE_SHADOW2D_ARRAY:
  1873.    case TGSI_TEXTURE_2D_ARRAY_MSAA:
  1874.       return PIPE_TEXTURE_2D_ARRAY;
  1875.    case TGSI_TEXTURE_CUBE_ARRAY:
  1876.    case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
  1877.       return PIPE_TEXTURE_CUBE_ARRAY;
  1878.    default:
  1879.       assert(0);
  1880.       return PIPE_BUFFER;
  1881.    }
  1882. }
  1883.  
  1884.  
  1885. static enum lp_sampler_lod_property
  1886. lp_build_lod_property(
  1887.    struct lp_build_tgsi_context *bld_base,
  1888.    const struct tgsi_full_instruction *inst,
  1889.    unsigned src_op)
  1890. {
  1891.    const struct tgsi_full_src_register *reg = &inst->Src[src_op];
  1892.    enum lp_sampler_lod_property lod_property;
  1893.  
  1894.    /*
  1895.     * Not much we can do here. We could try catching inputs declared
  1896.     * with constant interpolation but not sure it's worth it - since for
  1897.     * TEX opcodes as well as FETCH/LD the lod comes from same reg as
  1898.     * the coords, so it could only work for SAMPLE/TXQ/SVIEWINFO), just
  1899.     * like the constant/immediate recognition below.
  1900.     * What seems to be of more value would be to recognize temps holding
  1901.     * broadcasted scalars but no way we can do it.
  1902.     * Tried asking llvm but without any success (using LLVMIsConstant
  1903.     * even though this isn't exactly what we'd need), even as simple as
  1904.     * IMM[0] UINT32 (0,-1,0,0)
  1905.     * MOV TEMP[0] IMM[0].yyyy
  1906.     * SVIEWINFO TEMP[1], TEMP[0].xxxx, SVIEWINFO[0]
  1907.     * doesn't work.
  1908.     * This means there's ZERO chance this will ever catch a scalar lod
  1909.     * with traditional tex opcodes as well as texel fetches, since the lod
  1910.     * comes from the same reg as coords (except some test shaders using
  1911.     * constant coords maybe).
  1912.     * There's at least hope for sample opcodes as well as size queries.
  1913.     */
  1914.    if (reg->Register.File == TGSI_FILE_CONSTANT ||
  1915.        reg->Register.File == TGSI_FILE_IMMEDIATE) {
  1916.       lod_property = LP_SAMPLER_LOD_SCALAR;
  1917.    }
  1918.    else if (bld_base->info->processor == TGSI_PROCESSOR_FRAGMENT) {
  1919.       if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) {
  1920.          lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
  1921.       }
  1922.       else {
  1923.          lod_property = LP_SAMPLER_LOD_PER_QUAD;
  1924.       }
  1925.    }
  1926.    else {
  1927.       /* never use scalar (per-quad) lod the results are just too wrong. */
  1928.       lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
  1929.    }
  1930.    return lod_property;
  1931. }
  1932.  
  1933.  
  1934. /**
  1935.  * High-level instruction translators.
  1936.  */
  1937.  
  1938. static void
  1939. emit_tex( struct lp_build_tgsi_soa_context *bld,
  1940.           const struct tgsi_full_instruction *inst,
  1941.           enum lp_build_tex_modifier modifier,
  1942.           LLVMValueRef *texel,
  1943.           unsigned sampler_reg,
  1944.           enum lp_sampler_op_type sampler_op)
  1945. {
  1946.    unsigned unit = inst->Src[sampler_reg].Register.Index;
  1947.    LLVMValueRef oow = NULL;
  1948.    LLVMValueRef lod = NULL;
  1949.    LLVMValueRef coords[5];
  1950.    LLVMValueRef offsets[3] = { NULL };
  1951.    struct lp_derivatives derivs;
  1952.    struct lp_sampler_params params;
  1953.    enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
  1954.    unsigned num_derivs, num_offsets, i;
  1955.    unsigned shadow_coord = 0;
  1956.    unsigned layer_coord = 0;
  1957.    unsigned sample_key = sampler_op << LP_SAMPLER_OP_TYPE_SHIFT;
  1958.  
  1959.    memset(&params, 0, sizeof(params));
  1960.  
  1961.    if (!bld->sampler) {
  1962.       _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
  1963.       for (i = 0; i < 4; i++) {
  1964.          texel[i] = bld->bld_base.base.undef;
  1965.       }
  1966.       return;
  1967.    }
  1968.  
  1969.    switch (inst->Texture.Texture) {
  1970.    case TGSI_TEXTURE_1D_ARRAY:
  1971.       layer_coord = 1;
  1972.       /* fallthrough */
  1973.    case TGSI_TEXTURE_1D:
  1974.       num_offsets = 1;
  1975.       num_derivs = 1;
  1976.       break;
  1977.    case TGSI_TEXTURE_2D_ARRAY:
  1978.       layer_coord = 2;
  1979.       /* fallthrough */
  1980.    case TGSI_TEXTURE_2D:
  1981.    case TGSI_TEXTURE_RECT:
  1982.       num_offsets = 2;
  1983.       num_derivs = 2;
  1984.       break;
  1985.    case TGSI_TEXTURE_SHADOW1D_ARRAY:
  1986.       layer_coord = 1;
  1987.       /* fallthrough */
  1988.    case TGSI_TEXTURE_SHADOW1D:
  1989.       shadow_coord = 2;
  1990.       num_offsets = 1;
  1991.       num_derivs = 1;
  1992.       break;
  1993.    case TGSI_TEXTURE_SHADOW2D_ARRAY:
  1994.       layer_coord = 2;
  1995.       shadow_coord = 3;
  1996.       num_offsets = 2;
  1997.       num_derivs = 2;
  1998.       break;
  1999.    case TGSI_TEXTURE_SHADOW2D:
  2000.    case TGSI_TEXTURE_SHADOWRECT:
  2001.       shadow_coord = 2;
  2002.       num_offsets = 2;
  2003.       num_derivs = 2;
  2004.       break;
  2005.    case TGSI_TEXTURE_CUBE:
  2006.       num_offsets = 2;
  2007.       num_derivs = 3;
  2008.       break;
  2009.    case TGSI_TEXTURE_3D:
  2010.       num_offsets = 3;
  2011.       num_derivs = 3;
  2012.       break;
  2013.    case TGSI_TEXTURE_SHADOWCUBE:
  2014.       shadow_coord = 3;
  2015.       num_offsets = 2;
  2016.       num_derivs = 3;
  2017.       break;
  2018.    case TGSI_TEXTURE_CUBE_ARRAY:
  2019.       num_offsets = 2;
  2020.       num_derivs = 3;
  2021.       layer_coord = 3;
  2022.       break;
  2023.    case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
  2024.       num_offsets = 2;
  2025.       num_derivs = 3;
  2026.       layer_coord = 3;
  2027.       shadow_coord = 4; /* shadow coord special different reg */
  2028.       break;
  2029.    case TGSI_TEXTURE_2D_MSAA:
  2030.    case TGSI_TEXTURE_2D_ARRAY_MSAA:
  2031.    default:
  2032.       assert(0);
  2033.       return;
  2034.    }
  2035.  
  2036.    /* Note lod and especially projected are illegal in a LOT of cases */
  2037.    if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
  2038.        modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
  2039.       if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
  2040.           inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY) {
  2041.          /* note that shadow cube array with bias/explicit lod does not exist */
  2042.          lod = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
  2043.       }
  2044.       else {
  2045.          lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
  2046.       }
  2047.       if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
  2048.          sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
  2049.       }
  2050.       else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
  2051.          sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
  2052.       }
  2053.       lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
  2054.    }
  2055.  
  2056.    if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
  2057.       oow = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
  2058.       oow = lp_build_rcp(&bld->bld_base.base, oow);
  2059.    }
  2060.  
  2061.    for (i = 0; i < num_derivs; i++) {
  2062.       coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
  2063.       if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
  2064.          coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow);
  2065.    }
  2066.    for (i = num_derivs; i < 5; i++) {
  2067.       coords[i] = bld->bld_base.base.undef;
  2068.    }
  2069.  
  2070.    /* Layer coord always goes into 3rd slot, except for cube map arrays */
  2071.    if (layer_coord) {
  2072.       if (layer_coord == 3) {
  2073.          coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
  2074.       }
  2075.       else {
  2076.          coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
  2077.       }
  2078.       if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
  2079.          coords[2] = lp_build_mul(&bld->bld_base.base, coords[2], oow);
  2080.    }
  2081.    /* Shadow coord occupies always 5th slot. */
  2082.    if (shadow_coord) {
  2083.       sample_key |= LP_SAMPLER_SHADOW;
  2084.       if (shadow_coord == 4) {
  2085.          coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
  2086.       }
  2087.       else {
  2088.          coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 0, shadow_coord);
  2089.       }
  2090.       if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
  2091.          coords[4] = lp_build_mul(&bld->bld_base.base, coords[4], oow);
  2092.    }
  2093.  
  2094.    if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
  2095.       unsigned dim;
  2096.       sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
  2097.       for (dim = 0; dim < num_derivs; ++dim) {
  2098.          derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 1, dim);
  2099.          derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 2, dim);
  2100.       }
  2101.       params.derivs = &derivs;
  2102.       /*
  2103.        * could also check all src regs if constant but I doubt such
  2104.        * cases exist in practice.
  2105.        */
  2106.       if (bld->bld_base.info->processor == TGSI_PROCESSOR_FRAGMENT) {
  2107.          if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) {
  2108.             lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
  2109.          }
  2110.          else {
  2111.             lod_property = LP_SAMPLER_LOD_PER_QUAD;
  2112.          }
  2113.       }
  2114.       else {
  2115.          lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
  2116.       }
  2117.    }
  2118.    sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
  2119.  
  2120.    /* we don't handle the 4 offset version of tg4 */
  2121.    if (inst->Texture.NumOffsets == 1) {
  2122.       unsigned dim;
  2123.       sample_key |= LP_SAMPLER_OFFSETS;
  2124.       for (dim = 0; dim < num_offsets; dim++) {
  2125.          offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
  2126.       }
  2127.    }
  2128.  
  2129.    params.type = bld->bld_base.base.type;
  2130.    params.sample_key = sample_key;
  2131.    params.texture_index = unit;
  2132.    params.sampler_index = unit;
  2133.    params.context_ptr = bld->context_ptr;
  2134.    params.coords = coords;
  2135.    params.offsets = offsets;
  2136.    params.lod = lod;
  2137.    params.texel = texel;
  2138.  
  2139.    bld->sampler->emit_tex_sample(bld->sampler,
  2140.                                  bld->bld_base.base.gallivm,
  2141.                                  &params);
  2142. }
  2143.  
  2144. static void
  2145. emit_sample(struct lp_build_tgsi_soa_context *bld,
  2146.             const struct tgsi_full_instruction *inst,
  2147.             enum lp_build_tex_modifier modifier,
  2148.             boolean compare,
  2149.             LLVMValueRef *texel)
  2150. {
  2151.    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
  2152.    unsigned texture_unit, sampler_unit;
  2153.    LLVMValueRef lod = NULL;
  2154.    LLVMValueRef coords[5];
  2155.    LLVMValueRef offsets[3] = { NULL };
  2156.    struct lp_derivatives derivs;
  2157.    struct lp_sampler_params params;
  2158.    enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
  2159.  
  2160.    unsigned num_offsets, num_derivs, i;
  2161.    unsigned layer_coord = 0;
  2162.    unsigned sample_key = LP_SAMPLER_OP_TEXTURE << LP_SAMPLER_OP_TYPE_SHIFT;
  2163.  
  2164.    memset(&params, 0, sizeof(params));
  2165.  
  2166.    if (!bld->sampler) {
  2167.       _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
  2168.       for (i = 0; i < 4; i++) {
  2169.          texel[i] = bld->bld_base.base.undef;
  2170.       }
  2171.       return;
  2172.    }
  2173.  
  2174.    /*
  2175.     * unlike old-style tex opcodes the texture/sampler indices
  2176.     * always come from src1 and src2 respectively.
  2177.     */
  2178.    texture_unit = inst->Src[1].Register.Index;
  2179.    sampler_unit = inst->Src[2].Register.Index;
  2180.  
  2181.    /*
  2182.     * Note inst->Texture.Texture will contain the number of offsets,
  2183.     * however the target information is NOT there and comes from the
  2184.     * declared sampler views instead.
  2185.     */
  2186.    switch (bld->sv[texture_unit].Resource) {
  2187.    case TGSI_TEXTURE_1D:
  2188.       num_offsets = 1;
  2189.       num_derivs = 1;
  2190.       break;
  2191.    case TGSI_TEXTURE_1D_ARRAY:
  2192.       layer_coord = 1;
  2193.       num_offsets = 1;
  2194.       num_derivs = 1;
  2195.       break;
  2196.    case TGSI_TEXTURE_2D:
  2197.    case TGSI_TEXTURE_RECT:
  2198.       num_offsets = 2;
  2199.       num_derivs = 2;
  2200.       break;
  2201.    case TGSI_TEXTURE_2D_ARRAY:
  2202.       layer_coord = 2;
  2203.       num_offsets = 2;
  2204.       num_derivs = 2;
  2205.       break;
  2206.    case TGSI_TEXTURE_CUBE:
  2207.       num_offsets = 2;
  2208.       num_derivs = 3;
  2209.       break;
  2210.    case TGSI_TEXTURE_3D:
  2211.       num_offsets = 3;
  2212.       num_derivs = 3;
  2213.       break;
  2214.    case TGSI_TEXTURE_CUBE_ARRAY:
  2215.       layer_coord = 3;
  2216.       num_offsets = 2;
  2217.       num_derivs = 3;
  2218.       break;
  2219.    default:
  2220.       assert(0);
  2221.       return;
  2222.    }
  2223.  
  2224.    if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
  2225.        modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
  2226.       lod = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
  2227.       if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
  2228.          sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
  2229.       }
  2230.       else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
  2231.          sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
  2232.       }
  2233.       lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
  2234.    }
  2235.    else if (modifier == LP_BLD_TEX_MODIFIER_LOD_ZERO) {
  2236.       /* XXX might be better to explicitly pass the level zero information */
  2237.       sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
  2238.       lod = lp_build_const_vec(gallivm, bld->bld_base.base.type, 0.0F);
  2239.    }
  2240.  
  2241.    for (i = 0; i < num_derivs; i++) {
  2242.       coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
  2243.    }
  2244.    for (i = num_derivs; i < 5; i++) {
  2245.       coords[i] = bld->bld_base.base.undef;
  2246.    }
  2247.  
  2248.    /* Layer coord always goes into 3rd slot, except for cube map arrays */
  2249.    if (layer_coord) {
  2250.       if (layer_coord == 3)
  2251.          coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
  2252.       else
  2253.          coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
  2254.    }
  2255.    /* Shadow coord occupies always 5th slot. */
  2256.    if (compare) {
  2257.       sample_key |= LP_SAMPLER_SHADOW;
  2258.       coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
  2259.    }
  2260.  
  2261.    if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
  2262.       unsigned dim;
  2263.       sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
  2264.       for (dim = 0; dim < num_derivs; ++dim) {
  2265.          derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 3, dim);
  2266.          derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 4, dim);
  2267.       }
  2268.       params.derivs = &derivs;
  2269.       /*
  2270.        * could also check all src regs if constant but I doubt such
  2271.        * cases exist in practice.
  2272.        */
  2273.       if (bld->bld_base.info->processor == TGSI_PROCESSOR_FRAGMENT) {
  2274.          if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) {
  2275.             lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
  2276.          }
  2277.          else {
  2278.             lod_property = LP_SAMPLER_LOD_PER_QUAD;
  2279.          }
  2280.       }
  2281.       else {
  2282.          lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
  2283.       }
  2284.    }
  2285.  
  2286.    /* some advanced gather instructions (txgo) would require 4 offsets */
  2287.    if (inst->Texture.NumOffsets == 1) {
  2288.       unsigned dim;
  2289.       sample_key |= LP_SAMPLER_OFFSETS;
  2290.       for (dim = 0; dim < num_offsets; dim++) {
  2291.          offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
  2292.       }
  2293.    }
  2294.    sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
  2295.  
  2296.    params.type = bld->bld_base.base.type;
  2297.    params.sample_key = sample_key;
  2298.    params.texture_index = texture_unit;
  2299.    params.sampler_index = sampler_unit;
  2300.    params.context_ptr = bld->context_ptr;
  2301.    params.coords = coords;
  2302.    params.offsets = offsets;
  2303.    params.lod = lod;
  2304.    params.texel = texel;
  2305.  
  2306.    bld->sampler->emit_tex_sample(bld->sampler,
  2307.                                  bld->bld_base.base.gallivm,
  2308.                                  &params);
  2309.  
  2310.    if (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_RED ||
  2311.        inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_GREEN ||
  2312.        inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_BLUE ||
  2313.        inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_ALPHA) {
  2314.       unsigned char swizzles[4];
  2315.       swizzles[0] = inst->Src[1].Register.SwizzleX;
  2316.       swizzles[1] = inst->Src[1].Register.SwizzleY;
  2317.       swizzles[2] = inst->Src[1].Register.SwizzleZ;
  2318.       swizzles[3] = inst->Src[1].Register.SwizzleW;
  2319.  
  2320.       lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
  2321.    }
  2322. }
  2323.  
  2324. static void
  2325. emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
  2326.                    const struct tgsi_full_instruction *inst,
  2327.                    LLVMValueRef *texel,
  2328.                    boolean is_samplei)
  2329. {
  2330.    unsigned unit, target;
  2331.    LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
  2332.    LLVMValueRef explicit_lod = NULL;
  2333.    LLVMValueRef coords[5];
  2334.    LLVMValueRef offsets[3] = { NULL };
  2335.    struct lp_sampler_params params;
  2336.    enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
  2337.    unsigned dims, i;
  2338.    unsigned layer_coord = 0;
  2339.    unsigned sample_key = LP_SAMPLER_OP_FETCH << LP_SAMPLER_OP_TYPE_SHIFT;
  2340.  
  2341.    memset(&params, 0, sizeof(params));
  2342.  
  2343.    if (!bld->sampler) {
  2344.       _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
  2345.       for (i = 0; i < 4; i++) {
  2346.          texel[i] = coord_undef;
  2347.       }
  2348.       return;
  2349.    }
  2350.  
  2351.    unit = inst->Src[1].Register.Index;
  2352.  
  2353.    if (is_samplei) {
  2354.       target = bld->sv[unit].Resource;
  2355.    }
  2356.    else {
  2357.       target = inst->Texture.Texture;
  2358.    }
  2359.  
  2360.    switch (target) {
  2361.    case TGSI_TEXTURE_1D:
  2362.    case TGSI_TEXTURE_BUFFER:
  2363.       dims = 1;
  2364.       break;
  2365.    case TGSI_TEXTURE_1D_ARRAY:
  2366.       layer_coord = 1;
  2367.       dims = 1;
  2368.       break;
  2369.    case TGSI_TEXTURE_2D:
  2370.    case TGSI_TEXTURE_RECT:
  2371.    case TGSI_TEXTURE_2D_MSAA:
  2372.       dims = 2;
  2373.       break;
  2374.    case TGSI_TEXTURE_2D_ARRAY:
  2375.    case TGSI_TEXTURE_2D_ARRAY_MSAA:
  2376.       layer_coord = 2;
  2377.       dims = 2;
  2378.       break;
  2379.    case TGSI_TEXTURE_3D:
  2380.       dims = 3;
  2381.       break;
  2382.    default:
  2383.       assert(0);
  2384.       return;
  2385.    }
  2386.  
  2387.    /* always have lod except for buffers and msaa targets ? */
  2388.    if (target != TGSI_TEXTURE_BUFFER &&
  2389.        target != TGSI_TEXTURE_2D_MSAA &&
  2390.        target != TGSI_TEXTURE_2D_ARRAY_MSAA) {
  2391.       sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
  2392.       explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
  2393.       lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
  2394.    }
  2395.    /* XXX: for real msaa support, the w component would be the sample index. */
  2396.  
  2397.    for (i = 0; i < dims; i++) {
  2398.       coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
  2399.    }
  2400.    /* never use more than 3 coords here but emit_fetch_texel copies all 5 anyway */
  2401.    for (i = dims; i < 5; i++) {
  2402.       coords[i] = coord_undef;
  2403.    }
  2404.    if (layer_coord)
  2405.       coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
  2406.  
  2407.    if (inst->Texture.NumOffsets == 1) {
  2408.       unsigned dim;
  2409.       sample_key |= LP_SAMPLER_OFFSETS;
  2410.       for (dim = 0; dim < dims; dim++) {
  2411.          offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
  2412.       }
  2413.    }
  2414.    sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
  2415.  
  2416.    params.type = bld->bld_base.base.type;
  2417.    params.sample_key = sample_key;
  2418.    params.texture_index = unit;
  2419.    params.sampler_index = unit;
  2420.    params.context_ptr = bld->context_ptr;
  2421.    params.coords = coords;
  2422.    params.offsets = offsets;
  2423.    params.derivs = NULL;
  2424.    params.lod = explicit_lod;
  2425.    params.texel = texel;
  2426.  
  2427.    bld->sampler->emit_tex_sample(bld->sampler,
  2428.                                  bld->bld_base.base.gallivm,
  2429.                                  &params);
  2430.  
  2431.    if (is_samplei &&
  2432.        (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_RED ||
  2433.         inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_GREEN ||
  2434.         inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_BLUE ||
  2435.         inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_ALPHA)) {
  2436.       unsigned char swizzles[4];
  2437.       swizzles[0] = inst->Src[1].Register.SwizzleX;
  2438.       swizzles[1] = inst->Src[1].Register.SwizzleY;
  2439.       swizzles[2] = inst->Src[1].Register.SwizzleZ;
  2440.       swizzles[3] = inst->Src[1].Register.SwizzleW;
  2441.  
  2442.       lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
  2443.    }
  2444. }
  2445.  
  2446. static void
  2447. emit_size_query( struct lp_build_tgsi_soa_context *bld,
  2448.                  const struct tgsi_full_instruction *inst,
  2449.                  LLVMValueRef *sizes_out,
  2450.                  boolean is_sviewinfo)
  2451. {
  2452.    LLVMValueRef explicit_lod;
  2453.    enum lp_sampler_lod_property lod_property;
  2454.    unsigned has_lod;
  2455.    unsigned i;
  2456.    unsigned unit = inst->Src[1].Register.Index;
  2457.    unsigned target, pipe_target;
  2458.  
  2459.    if (is_sviewinfo) {
  2460.       target = bld->sv[unit].Resource;
  2461.    }
  2462.    else {
  2463.       target = inst->Texture.Texture;
  2464.    }
  2465.    switch (target) {
  2466.    case TGSI_TEXTURE_BUFFER:
  2467.    case TGSI_TEXTURE_RECT:
  2468.    case TGSI_TEXTURE_SHADOWRECT:
  2469.       has_lod = 0;
  2470.       break;
  2471.    default:
  2472.       has_lod = 1;
  2473.       break;
  2474.    }
  2475.  
  2476.    if (!bld->sampler) {
  2477.       _debug_printf("warning: found texture query instruction but no sampler generator supplied\n");
  2478.       for (i = 0; i < 4; i++)
  2479.          sizes_out[i] = bld->bld_base.int_bld.undef;
  2480.       return;
  2481.    }
  2482.  
  2483.    if (has_lod) {
  2484.       explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 0);
  2485.       lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
  2486.    }
  2487.    else {
  2488.       explicit_lod = NULL;
  2489.       lod_property = LP_SAMPLER_LOD_SCALAR;
  2490.    }
  2491.  
  2492.  
  2493.    pipe_target = tgsi_to_pipe_tex_target(target);
  2494.  
  2495.    bld->sampler->emit_size_query(bld->sampler,
  2496.                                  bld->bld_base.base.gallivm,
  2497.                                  bld->bld_base.int_bld.type,
  2498.                                  unit, pipe_target,
  2499.                                  bld->context_ptr,
  2500.                                  TRUE,
  2501.                                  lod_property,
  2502.                                  explicit_lod,
  2503.                                  sizes_out);
  2504. }
  2505.  
  2506. static boolean
  2507. near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
  2508.                    int pc)
  2509. {
  2510.    int i;
  2511.  
  2512.    for (i = 0; i < 5; i++) {
  2513.       unsigned opcode;
  2514.  
  2515.       if (pc + i >= bld->bld_base.info->num_instructions)
  2516.          return TRUE;
  2517.  
  2518.       opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode;
  2519.  
  2520.       if (opcode == TGSI_OPCODE_END)
  2521.          return TRUE;
  2522.  
  2523.       if (opcode == TGSI_OPCODE_TEX ||
  2524.          opcode == TGSI_OPCODE_TXP ||
  2525.          opcode == TGSI_OPCODE_TXD ||
  2526.          opcode == TGSI_OPCODE_TXB ||
  2527.          opcode == TGSI_OPCODE_TXL ||
  2528.          opcode == TGSI_OPCODE_TXF ||
  2529.          opcode == TGSI_OPCODE_TXQ ||
  2530.          opcode == TGSI_OPCODE_TEX2 ||
  2531.          opcode == TGSI_OPCODE_TXB2 ||
  2532.          opcode == TGSI_OPCODE_TXL2 ||
  2533.          opcode == TGSI_OPCODE_SAMPLE ||
  2534.          opcode == TGSI_OPCODE_SAMPLE_B ||
  2535.          opcode == TGSI_OPCODE_SAMPLE_C ||
  2536.          opcode == TGSI_OPCODE_SAMPLE_C_LZ ||
  2537.          opcode == TGSI_OPCODE_SAMPLE_D ||
  2538.          opcode == TGSI_OPCODE_SAMPLE_I ||
  2539.          opcode == TGSI_OPCODE_SAMPLE_L ||
  2540.          opcode == TGSI_OPCODE_SVIEWINFO ||
  2541.          opcode == TGSI_OPCODE_CAL ||
  2542.          opcode == TGSI_OPCODE_CALLNZ ||
  2543.          opcode == TGSI_OPCODE_IF ||
  2544.          opcode == TGSI_OPCODE_UIF ||
  2545.          opcode == TGSI_OPCODE_BGNLOOP ||
  2546.          opcode == TGSI_OPCODE_SWITCH)
  2547.          return FALSE;
  2548.    }
  2549.  
  2550.    return TRUE;
  2551. }
  2552.  
  2553.  
  2554.  
  2555. /**
  2556.  * Kill fragment if any of the src register values are negative.
  2557.  */
  2558. static void
  2559. emit_kill_if(
  2560.    struct lp_build_tgsi_soa_context *bld,
  2561.    const struct tgsi_full_instruction *inst,
  2562.    int pc)
  2563. {
  2564.    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
  2565.    const struct tgsi_full_src_register *reg = &inst->Src[0];
  2566.    LLVMValueRef terms[TGSI_NUM_CHANNELS];
  2567.    LLVMValueRef mask;
  2568.    unsigned chan_index;
  2569.  
  2570.    memset(&terms, 0, sizeof terms);
  2571.  
  2572.    TGSI_FOR_EACH_CHANNEL( chan_index ) {
  2573.       unsigned swizzle;
  2574.  
  2575.       /* Unswizzle channel */
  2576.       swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
  2577.  
  2578.       /* Check if the component has not been already tested. */
  2579.       assert(swizzle < TGSI_NUM_CHANNELS);
  2580.       if( !terms[swizzle] )
  2581.          /* TODO: change the comparison operator instead of setting the sign */
  2582.          terms[swizzle] =  lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index );
  2583.    }
  2584.  
  2585.    mask = NULL;
  2586.    TGSI_FOR_EACH_CHANNEL( chan_index ) {
  2587.       if(terms[chan_index]) {
  2588.          LLVMValueRef chan_mask;
  2589.  
  2590.          /*
  2591.           * If term < 0 then mask = 0 else mask = ~0.
  2592.           */
  2593.          chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero);
  2594.  
  2595.          if(mask)
  2596.             mask = LLVMBuildAnd(builder, mask, chan_mask, "");
  2597.          else
  2598.             mask = chan_mask;
  2599.       }
  2600.    }
  2601.  
  2602.    if (bld->exec_mask.has_mask) {
  2603.       LLVMValueRef invmask;
  2604.       invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
  2605.       mask = LLVMBuildOr(builder, mask, invmask, "");
  2606.    }
  2607.  
  2608.    lp_build_mask_update(bld->mask, mask);
  2609.    if (!near_end_of_shader(bld, pc))
  2610.       lp_build_mask_check(bld->mask);
  2611. }
  2612.  
  2613.  
  2614. /**
  2615.  * Unconditional fragment kill.
  2616.  * The only predication is the execution mask which will apply if
  2617.  * we're inside a loop or conditional.
  2618.  */
  2619. static void
  2620. emit_kill(struct lp_build_tgsi_soa_context *bld,
  2621.           int pc)
  2622. {
  2623.    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
  2624.    LLVMValueRef mask;
  2625.  
  2626.    /* For those channels which are "alive", disable fragment shader
  2627.     * execution.
  2628.     */
  2629.    if (bld->exec_mask.has_mask) {
  2630.       mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
  2631.    }
  2632.    else {
  2633.       LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type);
  2634.       mask = zero;
  2635.    }
  2636.  
  2637.    lp_build_mask_update(bld->mask, mask);
  2638.  
  2639.    if (!near_end_of_shader(bld, pc))
  2640.       lp_build_mask_check(bld->mask);
  2641. }
  2642.  
  2643.  
  2644. /**
  2645.  * Emit code which will dump the value of all the temporary registers
  2646.  * to stdout.
  2647.  */
  2648. static void
  2649. emit_dump_file(struct lp_build_tgsi_soa_context *bld,
  2650.                unsigned file)
  2651. {
  2652.    const struct tgsi_shader_info *info = bld->bld_base.info;
  2653.    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
  2654.    LLVMBuilderRef builder = gallivm->builder;
  2655.    LLVMValueRef reg_ptr;
  2656.    int index;
  2657.    int max_index = info->file_max[file];
  2658.  
  2659.    /*
  2660.     * Some register files, particularly constants, can be very large,
  2661.     * and dumping everything could make this unusably slow.
  2662.     */
  2663.    max_index = MIN2(max_index, 32);
  2664.  
  2665.    for (index = 0; index <= max_index; index++) {
  2666.       LLVMValueRef res;
  2667.       unsigned mask;
  2668.       int chan;
  2669.  
  2670.       if (index < 8 * sizeof(unsigned) &&
  2671.           (info->file_mask[file] & (1 << index)) == 0)  {
  2672.          /* This was not declared.*/
  2673.          continue;
  2674.       }
  2675.  
  2676.       if (file == TGSI_FILE_INPUT) {
  2677.          mask = info->input_usage_mask[index];
  2678.       } else {
  2679.          mask = TGSI_WRITEMASK_XYZW;
  2680.       }
  2681.  
  2682.       for (chan = 0; chan < 4; chan++) {
  2683.          if ((mask & (1 << chan)) == 0) {
  2684.             /* This channel is not used.*/
  2685.             continue;
  2686.          }
  2687.  
  2688.          if (file == TGSI_FILE_CONSTANT) {
  2689.             struct tgsi_full_src_register reg;
  2690.             memset(&reg, 0, sizeof reg);
  2691.             reg.Register.File = file;
  2692.             reg.Register.Index = index;
  2693.             reg.Register.SwizzleX = 0;
  2694.             reg.Register.SwizzleY = 1;
  2695.             reg.Register.SwizzleZ = 2;
  2696.             reg.Register.SwizzleW = 3;
  2697.  
  2698.             res = bld->bld_base.emit_fetch_funcs[file](&bld->bld_base, &reg, TGSI_TYPE_FLOAT, chan);
  2699.             if (!res) {
  2700.                continue;
  2701.             }
  2702.          } else if (file == TGSI_FILE_INPUT) {
  2703.             res = bld->inputs[index][chan];
  2704.             if (!res) {
  2705.                continue;
  2706.             }
  2707.          } else if (file == TGSI_FILE_TEMPORARY) {
  2708.             reg_ptr = lp_get_temp_ptr_soa(bld, index, chan);
  2709.             assert(reg_ptr);
  2710.             res = LLVMBuildLoad(builder, reg_ptr, "");
  2711.          } else if (file == TGSI_FILE_OUTPUT) {
  2712.             reg_ptr = lp_get_output_ptr(bld, index, chan);
  2713.             assert(reg_ptr);
  2714.             res = LLVMBuildLoad(builder, reg_ptr, "");
  2715.          } else {
  2716.             assert(0);
  2717.             continue;
  2718.          }
  2719.  
  2720.          emit_dump_reg(gallivm, file, index, chan, res);
  2721.       }
  2722.    }
  2723. }
  2724.  
  2725.  
  2726.  
  2727. void
  2728. lp_emit_declaration_soa(
  2729.    struct lp_build_tgsi_context *bld_base,
  2730.    const struct tgsi_full_declaration *decl)
  2731. {
  2732.    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
  2733.    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
  2734.    LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
  2735.    const unsigned first = decl->Range.First;
  2736.    const unsigned last = decl->Range.Last;
  2737.    unsigned idx, i;
  2738.  
  2739.    assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
  2740.  
  2741.    switch (decl->Declaration.File) {
  2742.    case TGSI_FILE_TEMPORARY:
  2743.       if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
  2744.          assert(last < LP_MAX_INLINED_TEMPS);
  2745.          for (idx = first; idx <= last; ++idx) {
  2746.             for (i = 0; i < TGSI_NUM_CHANNELS; i++)
  2747.                bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
  2748.          }
  2749.       }
  2750.       break;
  2751.  
  2752.    case TGSI_FILE_OUTPUT:
  2753.       if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
  2754.          for (idx = first; idx <= last; ++idx) {
  2755.             for (i = 0; i < TGSI_NUM_CHANNELS; i++)
  2756.                bld->outputs[idx][i] = lp_build_alloca(gallivm,
  2757.                                                       vec_type, "output");
  2758.          }
  2759.       }
  2760.       break;
  2761.  
  2762.    case TGSI_FILE_ADDRESS:
  2763.       /* ADDR registers are only allocated with an integer LLVM IR type,
  2764.        * as they are guaranteed to always have integers.
  2765.        * XXX: Not sure if this exception is worthwhile (or the whole idea of
  2766.        * an ADDR register for that matter).
  2767.        */
  2768.       assert(last < LP_MAX_TGSI_ADDRS);
  2769.       for (idx = first; idx <= last; ++idx) {
  2770.          assert(idx < LP_MAX_TGSI_ADDRS);
  2771.          for (i = 0; i < TGSI_NUM_CHANNELS; i++)
  2772.             bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr");
  2773.       }
  2774.       break;
  2775.  
  2776.    case TGSI_FILE_PREDICATE:
  2777.       assert(last < LP_MAX_TGSI_PREDS);
  2778.       for (idx = first; idx <= last; ++idx) {
  2779.          for (i = 0; i < TGSI_NUM_CHANNELS; i++)
  2780.             bld->preds[idx][i] = lp_build_alloca(gallivm, vec_type,
  2781.                                                  "predicate");
  2782.       }
  2783.       break;
  2784.  
  2785.    case TGSI_FILE_SAMPLER_VIEW:
  2786.       /*
  2787.        * The target stored here MUST match whatever there actually
  2788.        * is in the set sampler views (what about return type?).
  2789.        */
  2790.       assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS);
  2791.       for (idx = first; idx <= last; ++idx) {
  2792.          bld->sv[idx] = decl->SamplerView;
  2793.       }
  2794.       break;
  2795.  
  2796.    case TGSI_FILE_CONSTANT:
  2797.    {
  2798.       /*
  2799.        * We could trivially fetch the per-buffer pointer when fetching the
  2800.        * constant, relying on llvm to figure out it's always the same pointer
  2801.        * anyway. However, doing so results in a huge (more than factor of 10)
  2802.        * slowdown in llvm compilation times for some (but not all) shaders
  2803.        * (more specifically, the IR optimization spends way more time in
  2804.        * DominatorTree::dominates). At least with llvm versions 3.1, 3.3.
  2805.        */
  2806.       unsigned idx2D = decl->Dim.Index2D;
  2807.       LLVMValueRef index2D = lp_build_const_int32(gallivm, idx2D);
  2808.       assert(idx2D < LP_MAX_TGSI_CONST_BUFFERS);
  2809.       bld->consts[idx2D] =
  2810.          lp_build_array_get(gallivm, bld->consts_ptr, index2D);
  2811.       bld->consts_sizes[idx2D] =
  2812.          lp_build_array_get(gallivm, bld->const_sizes_ptr, index2D);
  2813.    }
  2814.       break;
  2815.  
  2816.    default:
  2817.       /* don't need to declare other vars */
  2818.       break;
  2819.    }
  2820. }
  2821.  
  2822.  
  2823. void lp_emit_immediate_soa(
  2824.    struct lp_build_tgsi_context *bld_base,
  2825.    const struct tgsi_full_immediate *imm)
  2826. {
  2827.    struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
  2828.    struct gallivm_state * gallivm = bld_base->base.gallivm;
  2829.    LLVMValueRef imms[4];
  2830.    unsigned i;
  2831.    const uint size = imm->Immediate.NrTokens - 1;
  2832.    assert(size <= 4);
  2833.    switch (imm->Immediate.DataType) {
  2834.    case TGSI_IMM_FLOAT32:
  2835.       for( i = 0; i < size; ++i )
  2836.          imms[i] =
  2837.                lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
  2838.  
  2839.       break;
  2840.    case TGSI_IMM_UINT32:
  2841.       for( i = 0; i < size; ++i ) {
  2842.          LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
  2843.          imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
  2844.       }
  2845.  
  2846.       break;
  2847.    case TGSI_IMM_INT32:
  2848.       for( i = 0; i < size; ++i ) {
  2849.          LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int);
  2850.          imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
  2851.       }
  2852.  
  2853.       break;
  2854.    }
  2855.    for( i = size; i < 4; ++i )
  2856.       imms[i] = bld_base->base.undef;
  2857.  
  2858.    if (bld->use_immediates_array) {
  2859.       unsigned index = bld->num_immediates;
  2860.       struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
  2861.       LLVMBuilderRef builder = gallivm->builder;
  2862.  
  2863.       assert(bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE));
  2864.       for (i = 0; i < 4; ++i ) {
  2865.          LLVMValueRef lindex = lp_build_const_int32(
  2866.                   bld->bld_base.base.gallivm, index * 4 + i);
  2867.          LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
  2868.                                              bld->imms_array, &lindex, 1, "");
  2869.          LLVMBuildStore(builder, imms[i], imm_ptr);
  2870.       }
  2871.    } else {
  2872.       /* simply copy the immediate values into the next immediates[] slot */
  2873.       unsigned i;
  2874.       const uint size = imm->Immediate.NrTokens - 1;
  2875.       assert(size <= 4);
  2876.       assert(bld->num_immediates < LP_MAX_INLINED_IMMEDIATES);
  2877.  
  2878.       for(i = 0; i < 4; ++i )
  2879.          bld->immediates[bld->num_immediates][i] = imms[i];
  2880.  
  2881.       if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
  2882.          unsigned index = bld->num_immediates;
  2883.          struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
  2884.          LLVMBuilderRef builder = gallivm->builder;
  2885.          for (i = 0; i < 4; ++i ) {
  2886.             LLVMValueRef lindex = lp_build_const_int32(
  2887.                      bld->bld_base.base.gallivm, index * 4 + i);
  2888.             LLVMValueRef imm_ptr = LLVMBuildGEP(builder,
  2889.                                                 bld->imms_array, &lindex, 1, "");
  2890.             LLVMBuildStore(builder,
  2891.                            bld->immediates[index][i],
  2892.                            imm_ptr);
  2893.          }
  2894.       }
  2895.    }
  2896.  
  2897.    bld->num_immediates++;
  2898. }
  2899.  
  2900. static void
  2901. ddx_emit(
  2902.    const struct lp_build_tgsi_action * action,
  2903.    struct lp_build_tgsi_context * bld_base,
  2904.    struct lp_build_emit_data * emit_data)
  2905. {
  2906.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  2907.  
  2908.    emit_fetch_deriv(bld, emit_data->args[0], NULL,
  2909.                     &emit_data->output[emit_data->chan], NULL);
  2910. }
  2911.  
  2912. static void
  2913. ddy_emit(
  2914.    const struct lp_build_tgsi_action * action,
  2915.    struct lp_build_tgsi_context * bld_base,
  2916.    struct lp_build_emit_data * emit_data)
  2917. {
  2918.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  2919.  
  2920.    emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL,
  2921.                     &emit_data->output[emit_data->chan]);
  2922. }
  2923.  
  2924. static void
  2925. kill_emit(
  2926.    const struct lp_build_tgsi_action * action,
  2927.    struct lp_build_tgsi_context * bld_base,
  2928.    struct lp_build_emit_data * emit_data)
  2929. {
  2930.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  2931.  
  2932.    emit_kill(bld, bld_base->pc - 1);
  2933. }
  2934.  
  2935. static void
  2936. kill_if_emit(
  2937.    const struct lp_build_tgsi_action * action,
  2938.    struct lp_build_tgsi_context * bld_base,
  2939.    struct lp_build_emit_data * emit_data)
  2940. {
  2941.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  2942.  
  2943.    emit_kill_if(bld, emit_data->inst, bld_base->pc - 1);
  2944. }
  2945.  
  2946. static void
  2947. tex_emit(
  2948.    const struct lp_build_tgsi_action * action,
  2949.    struct lp_build_tgsi_context * bld_base,
  2950.    struct lp_build_emit_data * emit_data)
  2951. {
  2952.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  2953.  
  2954.    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
  2955.             emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
  2956. }
  2957.  
  2958. static void
  2959. tex2_emit(
  2960.    const struct lp_build_tgsi_action * action,
  2961.    struct lp_build_tgsi_context * bld_base,
  2962.    struct lp_build_emit_data * emit_data)
  2963. {
  2964.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  2965.  
  2966.    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
  2967.             emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
  2968. }
  2969.  
  2970. static void
  2971. txb_emit(
  2972.    const struct lp_build_tgsi_action * action,
  2973.    struct lp_build_tgsi_context * bld_base,
  2974.    struct lp_build_emit_data * emit_data)
  2975. {
  2976.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  2977.  
  2978.    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
  2979.             emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
  2980. }
  2981.  
  2982. static void
  2983. txb2_emit(
  2984.    const struct lp_build_tgsi_action * action,
  2985.    struct lp_build_tgsi_context * bld_base,
  2986.    struct lp_build_emit_data * emit_data)
  2987. {
  2988.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  2989.  
  2990.    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
  2991.             emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
  2992. }
  2993.  
  2994. static void
  2995. txd_emit(
  2996.    const struct lp_build_tgsi_action * action,
  2997.    struct lp_build_tgsi_context * bld_base,
  2998.    struct lp_build_emit_data * emit_data)
  2999. {
  3000.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  3001.  
  3002.    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
  3003.             emit_data->output, 3, LP_SAMPLER_OP_TEXTURE);
  3004. }
  3005.  
  3006. static void
  3007. txl_emit(
  3008.    const struct lp_build_tgsi_action * action,
  3009.    struct lp_build_tgsi_context * bld_base,
  3010.    struct lp_build_emit_data * emit_data)
  3011. {
  3012.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  3013.  
  3014.    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
  3015.             emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
  3016. }
  3017.  
  3018. static void
  3019. txl2_emit(
  3020.    const struct lp_build_tgsi_action * action,
  3021.    struct lp_build_tgsi_context * bld_base,
  3022.    struct lp_build_emit_data * emit_data)
  3023. {
  3024.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  3025.  
  3026.    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
  3027.             emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
  3028. }
  3029.  
  3030. static void
  3031. txp_emit(
  3032.    const struct lp_build_tgsi_action * action,
  3033.    struct lp_build_tgsi_context * bld_base,
  3034.    struct lp_build_emit_data * emit_data)
  3035. {
  3036.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  3037.  
  3038.    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED,
  3039.             emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
  3040. }
  3041.  
  3042. static void
  3043. tg4_emit(
  3044.    const struct lp_build_tgsi_action * action,
  3045.    struct lp_build_tgsi_context * bld_base,
  3046.    struct lp_build_emit_data * emit_data)
  3047. {
  3048.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  3049.  
  3050.    emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
  3051.             emit_data->output, 2, LP_SAMPLER_OP_GATHER);
  3052. }
  3053.  
  3054. static void
  3055. txq_emit(
  3056.    const struct lp_build_tgsi_action * action,
  3057.    struct lp_build_tgsi_context * bld_base,
  3058.    struct lp_build_emit_data * emit_data)
  3059. {
  3060.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  3061.  
  3062.    emit_size_query(bld, emit_data->inst, emit_data->output, FALSE);
  3063. }
  3064.  
  3065. static void
  3066. txf_emit(
  3067.    const struct lp_build_tgsi_action * action,
  3068.    struct lp_build_tgsi_context * bld_base,
  3069.    struct lp_build_emit_data * emit_data)
  3070. {
  3071.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  3072.  
  3073.    emit_fetch_texels(bld, emit_data->inst, emit_data->output, FALSE);
  3074. }
  3075.  
  3076. static void
  3077. sample_i_emit(
  3078.    const struct lp_build_tgsi_action * action,
  3079.    struct lp_build_tgsi_context * bld_base,
  3080.    struct lp_build_emit_data * emit_data)
  3081. {
  3082.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  3083.  
  3084.    emit_fetch_texels(bld, emit_data->inst, emit_data->output, TRUE);
  3085. }
  3086.  
  3087. static void
  3088. sample_emit(
  3089.    const struct lp_build_tgsi_action * action,
  3090.    struct lp_build_tgsi_context * bld_base,
  3091.    struct lp_build_emit_data * emit_data)
  3092. {
  3093.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  3094.  
  3095.    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
  3096.                FALSE, emit_data->output);
  3097. }
  3098.  
  3099. static void
  3100. sample_b_emit(
  3101.    const struct lp_build_tgsi_action * action,
  3102.    struct lp_build_tgsi_context * bld_base,
  3103.    struct lp_build_emit_data * emit_data)
  3104. {
  3105.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  3106.  
  3107.    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
  3108.                FALSE, emit_data->output);
  3109. }
  3110.  
  3111. static void
  3112. sample_c_emit(
  3113.    const struct lp_build_tgsi_action * action,
  3114.    struct lp_build_tgsi_context * bld_base,
  3115.    struct lp_build_emit_data * emit_data)
  3116. {
  3117.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  3118.  
  3119.    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
  3120.                TRUE, emit_data->output);
  3121. }
  3122.  
  3123. static void
  3124. sample_c_lz_emit(
  3125.    const struct lp_build_tgsi_action * action,
  3126.    struct lp_build_tgsi_context * bld_base,
  3127.    struct lp_build_emit_data * emit_data)
  3128. {
  3129.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  3130.  
  3131.    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_ZERO,
  3132.                TRUE, emit_data->output);
  3133. }
  3134.  
  3135. static void
  3136. sample_d_emit(
  3137.    const struct lp_build_tgsi_action * action,
  3138.    struct lp_build_tgsi_context * bld_base,
  3139.    struct lp_build_emit_data * emit_data)
  3140. {
  3141.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  3142.  
  3143.    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
  3144.                FALSE, emit_data->output);
  3145. }
  3146.  
  3147. static void
  3148. sample_l_emit(
  3149.    const struct lp_build_tgsi_action * action,
  3150.    struct lp_build_tgsi_context * bld_base,
  3151.    struct lp_build_emit_data * emit_data)
  3152. {
  3153.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  3154.  
  3155.    emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
  3156.                FALSE, emit_data->output);
  3157. }
  3158.  
  3159. static void
  3160. sviewinfo_emit(
  3161.    const struct lp_build_tgsi_action * action,
  3162.    struct lp_build_tgsi_context * bld_base,
  3163.    struct lp_build_emit_data * emit_data)
  3164. {
  3165.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  3166.  
  3167.    emit_size_query(bld, emit_data->inst, emit_data->output, TRUE);
  3168. }
  3169.  
  3170. static LLVMValueRef
  3171. mask_vec(struct lp_build_tgsi_context *bld_base)
  3172. {
  3173.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  3174.    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
  3175.    struct lp_exec_mask *exec_mask = &bld->exec_mask;
  3176.  
  3177.    if (!exec_mask->has_mask) {
  3178.       return lp_build_mask_value(bld->mask);
  3179.    }
  3180.    return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
  3181.                        exec_mask->exec_mask, "");
  3182. }
  3183.  
  3184. static void
  3185. increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base,
  3186.                           LLVMValueRef ptr,
  3187.                           LLVMValueRef mask)
  3188. {
  3189.    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
  3190.    LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
  3191.  
  3192.    current_vec = LLVMBuildSub(builder, current_vec, mask, "");
  3193.  
  3194.    LLVMBuildStore(builder, current_vec, ptr);
  3195. }
  3196.  
  3197. static void
  3198. clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,
  3199.                              LLVMValueRef ptr,
  3200.                              LLVMValueRef mask)
  3201. {
  3202.    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
  3203.    LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
  3204.  
  3205.    current_vec = lp_build_select(&bld_base->uint_bld,
  3206.                                  mask,
  3207.                                  bld_base->uint_bld.zero,
  3208.                                  current_vec);
  3209.  
  3210.    LLVMBuildStore(builder, current_vec, ptr);
  3211. }
  3212.  
  3213. static LLVMValueRef
  3214. clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,
  3215.                                   LLVMValueRef current_mask_vec,
  3216.                                   LLVMValueRef total_emitted_vertices_vec)
  3217. {
  3218.    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
  3219.    struct lp_build_context *int_bld = &bld->bld_base.int_bld;
  3220.    LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS,
  3221.                                         total_emitted_vertices_vec,
  3222.                                         bld->max_output_vertices_vec);
  3223.  
  3224.    return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
  3225. }
  3226.  
  3227. static void
  3228. emit_vertex(
  3229.    const struct lp_build_tgsi_action * action,
  3230.    struct lp_build_tgsi_context * bld_base,
  3231.    struct lp_build_emit_data * emit_data)
  3232. {
  3233.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  3234.    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
  3235.  
  3236.    if (bld->gs_iface->emit_vertex) {
  3237.       LLVMValueRef mask = mask_vec(bld_base);
  3238.       LLVMValueRef total_emitted_vertices_vec =
  3239.          LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
  3240.       mask = clamp_mask_to_max_output_vertices(bld, mask,
  3241.                                                total_emitted_vertices_vec);
  3242.       gather_outputs(bld);
  3243.       bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base,
  3244.                                  bld->outputs,
  3245.                                  total_emitted_vertices_vec);
  3246.       increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr,
  3247.                                 mask);
  3248.       increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr,
  3249.                                 mask);
  3250. #if DUMP_GS_EMITS
  3251.       lp_build_print_value(bld->bld_base.base.gallivm,
  3252.                            " +++ emit vertex masked ones = ",
  3253.                            mask);
  3254.       lp_build_print_value(bld->bld_base.base.gallivm,
  3255.                            " +++ emit vertex emitted = ",
  3256.                            total_emitted_vertices_vec);
  3257. #endif
  3258.    }
  3259. }
  3260.  
  3261.  
  3262. static void
  3263. end_primitive_masked(struct lp_build_tgsi_context * bld_base,
  3264.                      LLVMValueRef mask)
  3265. {
  3266.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  3267.    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
  3268.  
  3269.    if (bld->gs_iface->end_primitive) {
  3270.       struct lp_build_context *uint_bld = &bld_base->uint_bld;
  3271.       LLVMValueRef emitted_vertices_vec =
  3272.          LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, "");
  3273.       LLVMValueRef emitted_prims_vec =
  3274.          LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
  3275.  
  3276.       LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
  3277.                                                emitted_vertices_vec,
  3278.                                                uint_bld->zero);
  3279.       /* We need to combine the current execution mask with the mask
  3280.          telling us which, if any, execution slots actually have
  3281.          unemitted primitives, this way we make sure that end_primitives
  3282.          executes only on the paths that have unflushed vertices */
  3283.       mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
  3284.  
  3285.       bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base,
  3286.                                    emitted_vertices_vec,
  3287.                                    emitted_prims_vec);
  3288.  
  3289. #if DUMP_GS_EMITS
  3290.       lp_build_print_value(bld->bld_base.base.gallivm,
  3291.                            " +++ end prim masked ones = ",
  3292.                            mask);
  3293.       lp_build_print_value(bld->bld_base.base.gallivm,
  3294.                            " +++ end prim emitted verts1 = ",
  3295.                            emitted_vertices_vec);
  3296.       lp_build_print_value(bld->bld_base.base.gallivm,
  3297.                            " +++ end prim emitted prims1 = ",
  3298.                            LLVMBuildLoad(builder,
  3299.                                          bld->emitted_prims_vec_ptr, ""));
  3300. #endif
  3301.       increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr,
  3302.                                 mask);
  3303.       clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr,
  3304.                                    mask);
  3305. #if DUMP_GS_EMITS
  3306.       lp_build_print_value(bld->bld_base.base.gallivm,
  3307.                            " +++ end prim emitted verts2 = ",
  3308.                            LLVMBuildLoad(builder,
  3309.                                          bld->emitted_vertices_vec_ptr, ""));
  3310. #endif
  3311.    }
  3312.  
  3313. }
  3314.  
  3315. static void
  3316. end_primitive(
  3317.    const struct lp_build_tgsi_action * action,
  3318.    struct lp_build_tgsi_context * bld_base,
  3319.    struct lp_build_emit_data * emit_data)
  3320. {
  3321.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  3322.  
  3323.    if (bld->gs_iface->end_primitive) {
  3324.       LLVMValueRef mask = mask_vec(bld_base);
  3325.       end_primitive_masked(bld_base, mask);
  3326.    }
  3327. }
  3328.  
  3329. static void
  3330. cal_emit(
  3331.    const struct lp_build_tgsi_action * action,
  3332.    struct lp_build_tgsi_context * bld_base,
  3333.    struct lp_build_emit_data * emit_data)
  3334. {
  3335.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  3336.  
  3337.    lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label,
  3338.                      &bld_base->pc);
  3339. }
  3340.  
  3341. static void
  3342. ret_emit(
  3343.    const struct lp_build_tgsi_action * action,
  3344.    struct lp_build_tgsi_context * bld_base,
  3345.    struct lp_build_emit_data * emit_data)
  3346. {
  3347.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  3348.  
  3349.    lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc);
  3350. }
  3351.  
  3352. static void
  3353. brk_emit(
  3354.    const struct lp_build_tgsi_action * action,
  3355.    struct lp_build_tgsi_context * bld_base,
  3356.    struct lp_build_emit_data * emit_data)
  3357. {
  3358.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  3359.  
  3360.    lp_exec_break(&bld->exec_mask, bld_base);
  3361. }
  3362.  
  3363. static void
  3364. breakc_emit(
  3365.    const struct lp_build_tgsi_action * action,
  3366.    struct lp_build_tgsi_context * bld_base,
  3367.    struct lp_build_emit_data * emit_data)
  3368. {
  3369.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  3370.    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
  3371.    struct lp_build_context *uint_bld = &bld_base->uint_bld;
  3372.    LLVMValueRef unsigned_cond =
  3373.       LLVMBuildBitCast(builder, emit_data->args[0], uint_bld->vec_type, "");
  3374.    LLVMValueRef cond = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
  3375.                                     unsigned_cond,
  3376.                                     uint_bld->zero);
  3377.  
  3378.    lp_exec_break_condition(&bld->exec_mask, cond);
  3379. }
  3380.  
  3381. static void
  3382. if_emit(
  3383.    const struct lp_build_tgsi_action * action,
  3384.    struct lp_build_tgsi_context * bld_base,
  3385.    struct lp_build_emit_data * emit_data)
  3386. {
  3387.    LLVMValueRef tmp;
  3388.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  3389.  
  3390.    tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL,
  3391.                       emit_data->args[0], bld->bld_base.base.zero);
  3392.    lp_exec_mask_cond_push(&bld->exec_mask, tmp);
  3393. }
  3394.  
  3395. static void
  3396. uif_emit(
  3397.    const struct lp_build_tgsi_action * action,
  3398.    struct lp_build_tgsi_context * bld_base,
  3399.    struct lp_build_emit_data * emit_data)
  3400. {
  3401.    LLVMValueRef tmp;
  3402.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  3403.    struct lp_build_context *uint_bld = &bld_base->uint_bld;
  3404.  
  3405.    tmp = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
  3406.                       emit_data->args[0], uint_bld->zero);
  3407.    lp_exec_mask_cond_push(&bld->exec_mask, tmp);
  3408. }
  3409.  
  3410. static void
  3411. case_emit(
  3412.    const struct lp_build_tgsi_action * action,
  3413.    struct lp_build_tgsi_context * bld_base,
  3414.    struct lp_build_emit_data * emit_data)
  3415. {
  3416.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  3417.  
  3418.    lp_exec_case(&bld->exec_mask, emit_data->args[0]);
  3419. }
  3420.  
  3421. static void
  3422. default_emit(
  3423.    const struct lp_build_tgsi_action * action,
  3424.    struct lp_build_tgsi_context * bld_base,
  3425.    struct lp_build_emit_data * emit_data)
  3426. {
  3427.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  3428.  
  3429.    lp_exec_default(&bld->exec_mask, bld_base);
  3430. }
  3431.  
  3432. static void
  3433. switch_emit(
  3434.    const struct lp_build_tgsi_action * action,
  3435.    struct lp_build_tgsi_context * bld_base,
  3436.    struct lp_build_emit_data * emit_data)
  3437. {
  3438.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  3439.  
  3440.    lp_exec_switch(&bld->exec_mask, emit_data->args[0]);
  3441. }
  3442.  
  3443. static void
  3444. endswitch_emit(
  3445.    const struct lp_build_tgsi_action * action,
  3446.    struct lp_build_tgsi_context * bld_base,
  3447.    struct lp_build_emit_data * emit_data)
  3448. {
  3449.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  3450.  
  3451.    lp_exec_endswitch(&bld->exec_mask, bld_base);
  3452. }
  3453.  
  3454. static void
  3455. bgnloop_emit(
  3456.    const struct lp_build_tgsi_action * action,
  3457.    struct lp_build_tgsi_context * bld_base,
  3458.    struct lp_build_emit_data * emit_data)
  3459. {
  3460.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  3461.  
  3462.    lp_exec_bgnloop(&bld->exec_mask);
  3463. }
  3464.  
  3465. static void
  3466. bgnsub_emit(
  3467.    const struct lp_build_tgsi_action * action,
  3468.    struct lp_build_tgsi_context * bld_base,
  3469.    struct lp_build_emit_data * emit_data)
  3470. {
  3471.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  3472.  
  3473.    lp_exec_mask_bgnsub(&bld->exec_mask);
  3474. }
  3475.  
  3476. static void
  3477. else_emit(
  3478.    const struct lp_build_tgsi_action * action,
  3479.    struct lp_build_tgsi_context * bld_base,
  3480.    struct lp_build_emit_data * emit_data)
  3481. {
  3482.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  3483.  
  3484.    lp_exec_mask_cond_invert(&bld->exec_mask);
  3485. }
  3486.  
  3487. static void
  3488. endif_emit(
  3489.    const struct lp_build_tgsi_action * action,
  3490.    struct lp_build_tgsi_context * bld_base,
  3491.    struct lp_build_emit_data * emit_data)
  3492. {
  3493.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  3494.  
  3495.    lp_exec_mask_cond_pop(&bld->exec_mask);
  3496. }
  3497.  
  3498. static void
  3499. endloop_emit(
  3500.    const struct lp_build_tgsi_action * action,
  3501.    struct lp_build_tgsi_context * bld_base,
  3502.    struct lp_build_emit_data * emit_data)
  3503. {
  3504.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  3505.  
  3506.    lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
  3507. }
  3508.  
  3509. static void
  3510. endsub_emit(
  3511.    const struct lp_build_tgsi_action * action,
  3512.    struct lp_build_tgsi_context * bld_base,
  3513.    struct lp_build_emit_data * emit_data)
  3514. {
  3515.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  3516.  
  3517.    lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc);
  3518. }
  3519.  
  3520. static void
  3521. cont_emit(
  3522.    const struct lp_build_tgsi_action * action,
  3523.    struct lp_build_tgsi_context * bld_base,
  3524.    struct lp_build_emit_data * emit_data)
  3525. {
  3526.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  3527.  
  3528.    lp_exec_continue(&bld->exec_mask);
  3529. }
  3530.  
  3531. static void emit_prologue(struct lp_build_tgsi_context * bld_base)
  3532. {
  3533.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  3534.    struct gallivm_state * gallivm = bld_base->base.gallivm;
  3535.  
  3536.    if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
  3537.       LLVMValueRef array_size =
  3538.          lp_build_const_int32(gallivm,
  3539.                          bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4);
  3540.       bld->temps_array = lp_build_array_alloca(gallivm,
  3541.                                               bld_base->base.vec_type, array_size,
  3542.                                               "temp_array");
  3543.    }
  3544.  
  3545.    if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
  3546.       LLVMValueRef array_size =
  3547.          lp_build_const_int32(gallivm,
  3548.                             bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
  3549.       bld->outputs_array = lp_build_array_alloca(gallivm,
  3550.                                                 bld_base->base.vec_type, array_size,
  3551.                                                 "output_array");
  3552.    }
  3553.  
  3554.    if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
  3555.       LLVMValueRef array_size =
  3556.          lp_build_const_int32(gallivm,
  3557.                          bld_base->info->file_max[TGSI_FILE_IMMEDIATE] * 4 + 4);
  3558.       bld->imms_array = lp_build_array_alloca(gallivm,
  3559.                                               bld_base->base.vec_type, array_size,
  3560.                                               "imms_array");
  3561.    }
  3562.  
  3563.    /* If we have indirect addressing in inputs we need to copy them into
  3564.     * our alloca array to be able to iterate over them */
  3565.    if (bld->indirect_files & (1 << TGSI_FILE_INPUT) && !bld->gs_iface) {
  3566.       unsigned index, chan;
  3567.       LLVMTypeRef vec_type = bld_base->base.vec_type;
  3568.       LLVMValueRef array_size = lp_build_const_int32(gallivm,
  3569.             bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4);
  3570.       bld->inputs_array = lp_build_array_alloca(gallivm,
  3571.                                                vec_type, array_size,
  3572.                                                "input_array");
  3573.  
  3574.       assert(bld_base->info->num_inputs
  3575.                         <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1);
  3576.  
  3577.       for (index = 0; index < bld_base->info->num_inputs; ++index) {
  3578.          for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
  3579.             LLVMValueRef lindex =
  3580.                lp_build_const_int32(gallivm, index * 4 + chan);
  3581.             LLVMValueRef input_ptr =
  3582.                LLVMBuildGEP(gallivm->builder, bld->inputs_array,
  3583.                             &lindex, 1, "");
  3584.             LLVMValueRef value = bld->inputs[index][chan];
  3585.             if (value)
  3586.                LLVMBuildStore(gallivm->builder, value, input_ptr);
  3587.          }
  3588.       }
  3589.    }
  3590.  
  3591.    if (bld->gs_iface) {
  3592.       struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
  3593.       bld->emitted_prims_vec_ptr =
  3594.          lp_build_alloca(gallivm,
  3595.                          uint_bld->vec_type,
  3596.                          "emitted_prims_ptr");
  3597.       bld->emitted_vertices_vec_ptr =
  3598.          lp_build_alloca(gallivm,
  3599.                          uint_bld->vec_type,
  3600.                          "emitted_vertices_ptr");
  3601.       bld->total_emitted_vertices_vec_ptr =
  3602.          lp_build_alloca(gallivm,
  3603.                          uint_bld->vec_type,
  3604.                          "total_emitted_vertices_ptr");
  3605.  
  3606.       LLVMBuildStore(gallivm->builder, uint_bld->zero,
  3607.                      bld->emitted_prims_vec_ptr);
  3608.       LLVMBuildStore(gallivm->builder, uint_bld->zero,
  3609.                      bld->emitted_vertices_vec_ptr);
  3610.       LLVMBuildStore(gallivm->builder, uint_bld->zero,
  3611.                      bld->total_emitted_vertices_vec_ptr);
  3612.    }
  3613.  
  3614.    if (DEBUG_EXECUTION) {
  3615.       lp_build_printf(gallivm, "\n");
  3616.       emit_dump_file(bld, TGSI_FILE_CONSTANT);
  3617.       if (!bld->gs_iface)
  3618.          emit_dump_file(bld, TGSI_FILE_INPUT);
  3619.    }
  3620. }
  3621.  
  3622. static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
  3623. {
  3624.    struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
  3625.    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
  3626.  
  3627.    if (DEBUG_EXECUTION) {
  3628.       /* for debugging */
  3629.       if (0) {
  3630.          emit_dump_file(bld, TGSI_FILE_TEMPORARY);
  3631.       }
  3632.       emit_dump_file(bld, TGSI_FILE_OUTPUT);
  3633.       lp_build_printf(bld_base->base.gallivm, "\n");
  3634.    }
  3635.  
  3636.    /* If we have indirect addressing in outputs we need to copy our alloca array
  3637.     * to the outputs slots specified by the caller */
  3638.    if (bld->gs_iface) {
  3639.       LLVMValueRef total_emitted_vertices_vec;
  3640.       LLVMValueRef emitted_prims_vec;
  3641.       /* implicit end_primitives, needed in case there are any unflushed
  3642.          vertices in the cache. Note must not call end_primitive here
  3643.          since the exec_mask is not valid at this point. */
  3644.       end_primitive_masked(bld_base, lp_build_mask_value(bld->mask));
  3645.      
  3646.       total_emitted_vertices_vec =
  3647.          LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, "");
  3648.       emitted_prims_vec =
  3649.          LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, "");
  3650.  
  3651.       bld->gs_iface->gs_epilogue(bld->gs_iface,
  3652.                                  &bld->bld_base,
  3653.                                  total_emitted_vertices_vec,
  3654.                                  emitted_prims_vec);
  3655.    } else {
  3656.       gather_outputs(bld);
  3657.    }
  3658. }
  3659.  
  3660. void
  3661. lp_build_tgsi_soa(struct gallivm_state *gallivm,
  3662.                   const struct tgsi_token *tokens,
  3663.                   struct lp_type type,
  3664.                   struct lp_build_mask_context *mask,
  3665.                   LLVMValueRef consts_ptr,
  3666.                   LLVMValueRef const_sizes_ptr,
  3667.                   const struct lp_bld_tgsi_system_values *system_values,
  3668.                   const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
  3669.                   LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
  3670.                   LLVMValueRef context_ptr,
  3671.                   struct lp_build_sampler_soa *sampler,
  3672.                   const struct tgsi_shader_info *info,
  3673.                   const struct lp_build_tgsi_gs_iface *gs_iface)
  3674. {
  3675.    struct lp_build_tgsi_soa_context bld;
  3676.  
  3677.    struct lp_type res_type;
  3678.  
  3679.    assert(type.length <= LP_MAX_VECTOR_LENGTH);
  3680.    memset(&res_type, 0, sizeof res_type);
  3681.    res_type.width = type.width;
  3682.    res_type.length = type.length;
  3683.    res_type.sign = 1;
  3684.  
  3685.    /* Setup build context */
  3686.    memset(&bld, 0, sizeof bld);
  3687.    lp_build_context_init(&bld.bld_base.base, gallivm, type);
  3688.    lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
  3689.    lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
  3690.    lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
  3691.    bld.mask = mask;
  3692.    bld.inputs = inputs;
  3693.    bld.outputs = outputs;
  3694.    bld.consts_ptr = consts_ptr;
  3695.    bld.const_sizes_ptr = const_sizes_ptr;
  3696.    bld.sampler = sampler;
  3697.    bld.bld_base.info = info;
  3698.    bld.indirect_files = info->indirect_files;
  3699.    bld.context_ptr = context_ptr;
  3700.  
  3701.    /*
  3702.     * If the number of temporaries is rather large then we just
  3703.     * allocate them as an array right from the start and treat
  3704.     * like indirect temporaries.
  3705.     */
  3706.    if (info->file_max[TGSI_FILE_TEMPORARY] >= LP_MAX_INLINED_TEMPS) {
  3707.       bld.indirect_files |= (1 << TGSI_FILE_TEMPORARY);
  3708.    }
  3709.    /*
  3710.     * For performance reason immediates are always backed in a static
  3711.     * array, but if their number is too great, we have to use just
  3712.     * a dynamically allocated array.
  3713.     */
  3714.    bld.use_immediates_array =
  3715.          (info->file_max[TGSI_FILE_IMMEDIATE] >= LP_MAX_INLINED_IMMEDIATES);
  3716.    if (bld.use_immediates_array) {
  3717.       bld.indirect_files |= (1 << TGSI_FILE_IMMEDIATE);
  3718.    }
  3719.  
  3720.  
  3721.    bld.bld_base.soa = TRUE;
  3722.    bld.bld_base.emit_debug = emit_debug;
  3723.    bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
  3724.    bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
  3725.    bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
  3726.    bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
  3727.    bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value;
  3728.    bld.bld_base.emit_store = emit_store;
  3729.  
  3730.    bld.bld_base.emit_declaration = lp_emit_declaration_soa;
  3731.    bld.bld_base.emit_immediate = lp_emit_immediate_soa;
  3732.  
  3733.    bld.bld_base.emit_prologue = emit_prologue;
  3734.    bld.bld_base.emit_epilogue = emit_epilogue;
  3735.  
  3736.    /* Set opcode actions */
  3737.    lp_set_default_actions_cpu(&bld.bld_base);
  3738.  
  3739.    bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
  3740.    bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit;
  3741.    bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
  3742.    bld.bld_base.op_actions[TGSI_OPCODE_BREAKC].emit = breakc_emit;
  3743.    bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
  3744.    bld.bld_base.op_actions[TGSI_OPCODE_CASE].emit = case_emit;
  3745.    bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
  3746.    bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit;
  3747.    bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit;
  3748.    bld.bld_base.op_actions[TGSI_OPCODE_DEFAULT].emit = default_emit;
  3749.    bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
  3750.    bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
  3751.    bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
  3752.    bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit;
  3753.    bld.bld_base.op_actions[TGSI_OPCODE_ENDSWITCH].emit = endswitch_emit;
  3754.    bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
  3755.    bld.bld_base.op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
  3756.    bld.bld_base.op_actions[TGSI_OPCODE_KILL_IF].emit = kill_if_emit;
  3757.    bld.bld_base.op_actions[TGSI_OPCODE_KILL].emit = kill_emit;
  3758.    bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
  3759.    bld.bld_base.op_actions[TGSI_OPCODE_SWITCH].emit = switch_emit;
  3760.    bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
  3761.    bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
  3762.    bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
  3763.    bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
  3764.    bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
  3765.    bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
  3766.    bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit;
  3767.    bld.bld_base.op_actions[TGSI_OPCODE_TEX2].emit = tex2_emit;
  3768.    bld.bld_base.op_actions[TGSI_OPCODE_TXB2].emit = txb2_emit;
  3769.    bld.bld_base.op_actions[TGSI_OPCODE_TXL2].emit = txl2_emit;
  3770.    bld.bld_base.op_actions[TGSI_OPCODE_TG4].emit = tg4_emit;
  3771.    /* DX10 sampling ops */
  3772.    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE].emit = sample_emit;
  3773.    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_B].emit = sample_b_emit;
  3774.    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C].emit = sample_c_emit;
  3775.    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit;
  3776.    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit;
  3777.    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit;
  3778.    bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
  3779.    bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
  3780.  
  3781.    if (gs_iface) {
  3782.       /* There's no specific value for this because it should always
  3783.        * be set, but apps using ext_geometry_shader4 quite often
  3784.        * were forgetting so we're using MAX_VERTEX_VARYING from
  3785.        * that spec even though we could debug_assert if it's not
  3786.        * set, but that's a lot uglier. */
  3787.       uint max_output_vertices;
  3788.  
  3789.       /* inputs are always indirect with gs */
  3790.       bld.indirect_files |= (1 << TGSI_FILE_INPUT);
  3791.       bld.gs_iface = gs_iface;
  3792.       bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input;
  3793.       bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex;
  3794.       bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive;
  3795.  
  3796.       max_output_vertices =
  3797.             info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
  3798.       if (!max_output_vertices)
  3799.          max_output_vertices = 32;
  3800.  
  3801.       bld.max_output_vertices_vec =
  3802.          lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
  3803.                                 max_output_vertices);
  3804.    }
  3805.  
  3806.    lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
  3807.  
  3808.    bld.system_values = *system_values;
  3809.  
  3810.    lp_build_tgsi_llvm(&bld.bld_base, tokens);
  3811.  
  3812.    if (0) {
  3813.       LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
  3814.       LLVMValueRef function = LLVMGetBasicBlockParent(block);
  3815.       debug_printf("11111111111111111111111111111 \n");
  3816.       tgsi_dump(tokens, 0);
  3817.       lp_debug_dump_value(function);
  3818.       debug_printf("2222222222222222222222222222 \n");
  3819.    }
  3820.  
  3821.    if (0) {
  3822.       LLVMModuleRef module = LLVMGetGlobalParent(
  3823.          LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
  3824.       LLVMDumpModule(module);
  3825.  
  3826.    }
  3827.    lp_exec_mask_fini(&bld.exec_mask);
  3828. }
  3829.