Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /**************************************************************************
  2.  *
  3.  * Copyright 2010 VMware, Inc.
  4.  * All Rights Reserved.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the
  8.  * "Software"), to deal in the Software without restriction, including
  9.  * without limitation the rights to use, copy, modify, merge, publish,
  10.  * distribute, sub license, and/or sell copies of the Software, and to
  11.  * permit persons to whom the Software is furnished to do so, subject to
  12.  * the following conditions:
  13.  *
  14.  * The above copyright notice and this permission notice (including the
  15.  * next paragraph) shall be included in all copies or substantial portions
  16.  * of the Software.
  17.  *
  18.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21.  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25.  *
  26.  **************************************************************************/
  27.  
  28. #include "draw_llvm.h"
  29.  
  30. #include "draw_context.h"
  31. #include "draw_vs.h"
  32. #include "draw_gs.h"
  33.  
  34. #include "gallivm/lp_bld_arit.h"
  35. #include "gallivm/lp_bld_arit_overflow.h"
  36. #include "gallivm/lp_bld_logic.h"
  37. #include "gallivm/lp_bld_const.h"
  38. #include "gallivm/lp_bld_swizzle.h"
  39. #include "gallivm/lp_bld_struct.h"
  40. #include "gallivm/lp_bld_type.h"
  41. #include "gallivm/lp_bld_flow.h"
  42. #include "gallivm/lp_bld_debug.h"
  43. #include "gallivm/lp_bld_tgsi.h"
  44. #include "gallivm/lp_bld_printf.h"
  45. #include "gallivm/lp_bld_intr.h"
  46. #include "gallivm/lp_bld_init.h"
  47. #include "gallivm/lp_bld_type.h"
  48. #include "gallivm/lp_bld_pack.h"
  49. #include "gallivm/lp_bld_format.h"
  50.  
  51. #include "tgsi/tgsi_exec.h"
  52. #include "tgsi/tgsi_dump.h"
  53.  
  54. #include "util/u_math.h"
  55. #include "util/u_pointer.h"
  56. #include "util/u_string.h"
  57. #include "util/u_simple_list.h"
  58.  
  59.  
  60. #define DEBUG_STORE 0
  61.  
  62.  
  63. static void
  64. draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var,
  65.                    boolean elts);
  66.  
  67.  
  68. struct draw_gs_llvm_iface {
  69.    struct lp_build_tgsi_gs_iface base;
  70.  
  71.    struct draw_gs_llvm_variant *variant;
  72.    LLVMValueRef input;
  73. };
  74.  
  75. static INLINE const struct draw_gs_llvm_iface *
  76. draw_gs_llvm_iface(const struct lp_build_tgsi_gs_iface *iface)
  77. {
  78.    return (const struct draw_gs_llvm_iface *)iface;
  79. }
  80.  
  81. /**
  82.  * Create LLVM type for draw_vertex_buffer.
  83.  */
  84. static LLVMTypeRef
  85. create_jit_dvbuffer_type(struct gallivm_state *gallivm,
  86.                          const char *struct_name)
  87. {
  88.    LLVMTargetDataRef target = gallivm->target;
  89.    LLVMTypeRef dvbuffer_type;
  90.    LLVMTypeRef elem_types[DRAW_JIT_DVBUFFER_NUM_FIELDS];
  91.    LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context);
  92.  
  93.    elem_types[DRAW_JIT_DVBUFFER_MAP] =
  94.       LLVMPointerType(LLVMIntTypeInContext(gallivm->context, 8), 0);
  95.    elem_types[DRAW_JIT_DVBUFFER_SIZE] = int32_type;
  96.  
  97.    dvbuffer_type = LLVMStructTypeInContext(gallivm->context, elem_types,
  98.                                            Elements(elem_types), 0);
  99.  
  100. #if HAVE_LLVM < 0x0300
  101.    LLVMAddTypeName(gallivm->module, struct_name, dvbuffer_type);
  102.  
  103.    /* Make sure the target's struct layout cache doesn't return
  104.     * stale/invalid data.
  105.     */
  106.    LLVMInvalidateStructLayout(gallivm->target, dvbuffer_type);
  107. #endif
  108.  
  109.    LP_CHECK_MEMBER_OFFSET(struct draw_vertex_buffer, map,
  110.                           target, dvbuffer_type,
  111.                           DRAW_JIT_DVBUFFER_MAP);
  112.    LP_CHECK_MEMBER_OFFSET(struct draw_vertex_buffer, size,
  113.                           target, dvbuffer_type,
  114.                           DRAW_JIT_DVBUFFER_SIZE);
  115.  
  116.    return dvbuffer_type;
  117. }
  118.  
  119. /**
  120.  * Create LLVM type for struct draw_jit_texture
  121.  */
  122. static LLVMTypeRef
  123. create_jit_texture_type(struct gallivm_state *gallivm, const char *struct_name)
  124. {
  125.    LLVMTargetDataRef target = gallivm->target;
  126.    LLVMTypeRef texture_type;
  127.    LLVMTypeRef elem_types[DRAW_JIT_TEXTURE_NUM_FIELDS];
  128.    LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context);
  129.  
  130.    elem_types[DRAW_JIT_TEXTURE_WIDTH]  =
  131.    elem_types[DRAW_JIT_TEXTURE_HEIGHT] =
  132.    elem_types[DRAW_JIT_TEXTURE_DEPTH] =
  133.    elem_types[DRAW_JIT_TEXTURE_FIRST_LEVEL] =
  134.    elem_types[DRAW_JIT_TEXTURE_LAST_LEVEL] = int32_type;
  135.    elem_types[DRAW_JIT_TEXTURE_BASE] =
  136.       LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
  137.    elem_types[DRAW_JIT_TEXTURE_ROW_STRIDE] =
  138.    elem_types[DRAW_JIT_TEXTURE_IMG_STRIDE] =
  139.    elem_types[DRAW_JIT_TEXTURE_MIP_OFFSETS] =
  140.       LLVMArrayType(int32_type, PIPE_MAX_TEXTURE_LEVELS);
  141.  
  142.    texture_type = LLVMStructTypeInContext(gallivm->context, elem_types,
  143.                                           Elements(elem_types), 0);
  144.  
  145. #if HAVE_LLVM < 0x0300
  146.    LLVMAddTypeName(gallivm->module, struct_name, texture_type);
  147.  
  148.    /* Make sure the target's struct layout cache doesn't return
  149.     * stale/invalid data.
  150.     */
  151.    LLVMInvalidateStructLayout(gallivm->target, texture_type);
  152. #endif
  153.  
  154.    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, width,
  155.                           target, texture_type,
  156.                           DRAW_JIT_TEXTURE_WIDTH);
  157.    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, height,
  158.                           target, texture_type,
  159.                           DRAW_JIT_TEXTURE_HEIGHT);
  160.    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, depth,
  161.                           target, texture_type,
  162.                           DRAW_JIT_TEXTURE_DEPTH);
  163.    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, first_level,
  164.                           target, texture_type,
  165.                           DRAW_JIT_TEXTURE_FIRST_LEVEL);
  166.    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, last_level,
  167.                           target, texture_type,
  168.                           DRAW_JIT_TEXTURE_LAST_LEVEL);
  169.    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, base,
  170.                           target, texture_type,
  171.                           DRAW_JIT_TEXTURE_BASE);
  172.    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, row_stride,
  173.                           target, texture_type,
  174.                           DRAW_JIT_TEXTURE_ROW_STRIDE);
  175.    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, img_stride,
  176.                           target, texture_type,
  177.                           DRAW_JIT_TEXTURE_IMG_STRIDE);
  178.    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, mip_offsets,
  179.                           target, texture_type,
  180.                           DRAW_JIT_TEXTURE_MIP_OFFSETS);
  181.  
  182.    LP_CHECK_STRUCT_SIZE(struct draw_jit_texture, target, texture_type);
  183.  
  184.    return texture_type;
  185. }
  186.  
  187.  
  188. /**
  189.  * Create LLVM type for struct draw_jit_sampler
  190.  */
  191. static LLVMTypeRef
  192. create_jit_sampler_type(struct gallivm_state *gallivm, const char *struct_name)
  193. {
  194.    LLVMTargetDataRef target = gallivm->target;
  195.    LLVMTypeRef sampler_type;
  196.    LLVMTypeRef elem_types[DRAW_JIT_SAMPLER_NUM_FIELDS];
  197.  
  198.    elem_types[DRAW_JIT_SAMPLER_MIN_LOD] =
  199.    elem_types[DRAW_JIT_SAMPLER_MAX_LOD] =
  200.    elem_types[DRAW_JIT_SAMPLER_LOD_BIAS] = LLVMFloatTypeInContext(gallivm->context);
  201.    elem_types[DRAW_JIT_SAMPLER_BORDER_COLOR] =
  202.       LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4);
  203.  
  204.    sampler_type = LLVMStructTypeInContext(gallivm->context, elem_types,
  205.                                           Elements(elem_types), 0);
  206.  
  207. #if HAVE_LLVM < 0x0300
  208.    LLVMAddTypeName(gallivm->module, struct_name, sampler_type);
  209.  
  210.    /* Make sure the target's struct layout cache doesn't return
  211.     * stale/invalid data.
  212.     */
  213.    LLVMInvalidateStructLayout(gallivm->target, sampler_type);
  214. #endif
  215.  
  216.    LP_CHECK_MEMBER_OFFSET(struct draw_jit_sampler, min_lod,
  217.                           target, sampler_type,
  218.                           DRAW_JIT_SAMPLER_MIN_LOD);
  219.    LP_CHECK_MEMBER_OFFSET(struct draw_jit_sampler, max_lod,
  220.                           target, sampler_type,
  221.                           DRAW_JIT_SAMPLER_MAX_LOD);
  222.    LP_CHECK_MEMBER_OFFSET(struct draw_jit_sampler, lod_bias,
  223.                           target, sampler_type,
  224.                           DRAW_JIT_SAMPLER_LOD_BIAS);
  225.    LP_CHECK_MEMBER_OFFSET(struct draw_jit_sampler, border_color,
  226.                           target, sampler_type,
  227.                           DRAW_JIT_SAMPLER_BORDER_COLOR);
  228.  
  229.    LP_CHECK_STRUCT_SIZE(struct draw_jit_sampler, target, sampler_type);
  230.  
  231.    return sampler_type;
  232. }
  233.  
  234.  
  235. /**
  236.  * Create LLVM type for struct draw_jit_context
  237.  */
  238. static LLVMTypeRef
  239. create_jit_context_type(struct gallivm_state *gallivm,
  240.                         LLVMTypeRef texture_type, LLVMTypeRef sampler_type,
  241.                         const char *struct_name)
  242. {
  243.    LLVMTargetDataRef target = gallivm->target;
  244.    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
  245.    LLVMTypeRef elem_types[DRAW_JIT_CTX_NUM_FIELDS];
  246.    LLVMTypeRef context_type;
  247.  
  248.    elem_types[0] = LLVMArrayType(LLVMPointerType(float_type, 0), /* vs_constants */
  249.                                  LP_MAX_TGSI_CONST_BUFFERS);
  250.    elem_types[1] = LLVMPointerType(LLVMArrayType(LLVMArrayType(float_type, 4),
  251.                                                  DRAW_TOTAL_CLIP_PLANES), 0);
  252.    elem_types[2] = LLVMPointerType(float_type, 0); /* viewport */
  253.    elem_types[3] = LLVMArrayType(texture_type,
  254.                                  PIPE_MAX_SHADER_SAMPLER_VIEWS); /* textures */
  255.    elem_types[4] = LLVMArrayType(sampler_type,
  256.                                  PIPE_MAX_SAMPLERS); /* samplers */
  257.    context_type = LLVMStructTypeInContext(gallivm->context, elem_types,
  258.                                           Elements(elem_types), 0);
  259. #if HAVE_LLVM < 0x0300
  260.    LLVMAddTypeName(gallivm->module, struct_name, context_type);
  261.  
  262.    LLVMInvalidateStructLayout(gallivm->target, context_type);
  263. #endif
  264.  
  265.    LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, vs_constants,
  266.                           target, context_type, DRAW_JIT_CTX_CONSTANTS);
  267.    LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, planes,
  268.                           target, context_type, DRAW_JIT_CTX_PLANES);
  269.    LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, viewport,
  270.                           target, context_type, DRAW_JIT_CTX_VIEWPORT);
  271.    LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, textures,
  272.                           target, context_type,
  273.                           DRAW_JIT_CTX_TEXTURES);
  274.    LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, samplers,
  275.                           target, context_type,
  276.                           DRAW_JIT_CTX_SAMPLERS);
  277.    LP_CHECK_STRUCT_SIZE(struct draw_jit_context,
  278.                         target, context_type);
  279.  
  280.    return context_type;
  281. }
  282.  
  283.  
  284. /**
  285.  * Create LLVM type for struct draw_gs_jit_context
  286.  */
  287. static LLVMTypeRef
  288. create_gs_jit_context_type(struct gallivm_state *gallivm,
  289.                            unsigned vector_length,
  290.                            LLVMTypeRef texture_type, LLVMTypeRef sampler_type,
  291.                            const char *struct_name)
  292. {
  293.    LLVMTargetDataRef target = gallivm->target;
  294.    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
  295.    LLVMTypeRef int_type = LLVMInt32TypeInContext(gallivm->context);
  296.    LLVMTypeRef elem_types[DRAW_GS_JIT_CTX_NUM_FIELDS];
  297.    LLVMTypeRef context_type;
  298.  
  299.    elem_types[0] = LLVMArrayType(LLVMPointerType(float_type, 0), /* constants */
  300.                                  LP_MAX_TGSI_CONST_BUFFERS);
  301.    elem_types[1] = LLVMPointerType(LLVMArrayType(LLVMArrayType(float_type, 4),
  302.                                                  DRAW_TOTAL_CLIP_PLANES), 0);
  303.    elem_types[2] = LLVMPointerType(float_type, 0); /* viewport */
  304.  
  305.    elem_types[3] = LLVMArrayType(texture_type,
  306.                                  PIPE_MAX_SHADER_SAMPLER_VIEWS); /* textures */
  307.    elem_types[4] = LLVMArrayType(sampler_type,
  308.                                  PIPE_MAX_SAMPLERS); /* samplers */
  309.    
  310.    elem_types[5] = LLVMPointerType(LLVMPointerType(int_type, 0), 0);
  311.    elem_types[6] = LLVMPointerType(LLVMVectorType(int_type,
  312.                                                   vector_length), 0);
  313.    elem_types[7] = LLVMPointerType(LLVMVectorType(int_type,
  314.                                                   vector_length), 0);
  315.  
  316.    context_type = LLVMStructTypeInContext(gallivm->context, elem_types,
  317.                                           Elements(elem_types), 0);
  318. #if HAVE_LLVM < 0x0300
  319.    LLVMAddTypeName(gallivm->module, struct_name, context_type);
  320.  
  321.    LLVMInvalidateStructLayout(gallivm->target, context_type);
  322. #endif
  323.  
  324.    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, constants,
  325.                           target, context_type, DRAW_GS_JIT_CTX_CONSTANTS);
  326.    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, planes,
  327.                           target, context_type, DRAW_GS_JIT_CTX_PLANES);
  328.    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, viewport,
  329.                           target, context_type, DRAW_GS_JIT_CTX_VIEWPORT);
  330.    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, textures,
  331.                           target, context_type,
  332.                           DRAW_GS_JIT_CTX_TEXTURES);
  333.    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, samplers,
  334.                           target, context_type,
  335.                           DRAW_GS_JIT_CTX_SAMPLERS);
  336.    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, prim_lengths,
  337.                           target, context_type,
  338.                           DRAW_GS_JIT_CTX_PRIM_LENGTHS);
  339.    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, emitted_vertices,
  340.                           target, context_type,
  341.                           DRAW_GS_JIT_CTX_EMITTED_VERTICES);
  342.    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, emitted_prims,
  343.                           target, context_type,
  344.                           DRAW_GS_JIT_CTX_EMITTED_PRIMS);
  345.    LP_CHECK_STRUCT_SIZE(struct draw_gs_jit_context,
  346.                         target, context_type);
  347.  
  348.    return context_type;
  349. }
  350.  
  351.  
  352. static LLVMTypeRef
  353. create_gs_jit_input_type(struct gallivm_state *gallivm)
  354. {
  355.    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
  356.    LLVMTypeRef input_array;
  357.  
  358.    input_array = LLVMVectorType(float_type, TGSI_NUM_CHANNELS); /* num primitives */
  359.    input_array = LLVMArrayType(input_array, TGSI_NUM_CHANNELS); /* num channels */
  360.    input_array = LLVMArrayType(input_array, PIPE_MAX_SHADER_INPUTS); /* num attrs per vertex */
  361.    input_array = LLVMPointerType(input_array, 0); /* num vertices per prim */
  362.  
  363.    return input_array;
  364. }
  365.  
  366. /**
  367.  * Create LLVM type for struct pipe_vertex_buffer
  368.  */
  369. static LLVMTypeRef
  370. create_jit_vertex_buffer_type(struct gallivm_state *gallivm,
  371.                               const char *struct_name)
  372. {
  373.    LLVMTargetDataRef target = gallivm->target;
  374.    LLVMTypeRef elem_types[4];
  375.    LLVMTypeRef vb_type;
  376.  
  377.    elem_types[0] =
  378.    elem_types[1] = LLVMInt32TypeInContext(gallivm->context);
  379.    elem_types[2] =
  380.    elem_types[3] = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
  381.  
  382.    vb_type = LLVMStructTypeInContext(gallivm->context, elem_types,
  383.                                      Elements(elem_types), 0);
  384. #if HAVE_LLVM < 0x0300
  385.    LLVMAddTypeName(gallivm->module, struct_name, vb_type);
  386.  
  387.    LLVMInvalidateStructLayout(gallivm->target, vb_type);
  388. #endif
  389.  
  390.    LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, stride,
  391.                           target, vb_type, 0);
  392.    LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, buffer_offset,
  393.                           target, vb_type, 1);
  394.  
  395.    LP_CHECK_STRUCT_SIZE(struct pipe_vertex_buffer, target, vb_type);
  396.  
  397.    return vb_type;
  398. }
  399.  
  400.  
  401. /**
  402.  * Create LLVM type for struct vertex_header;
  403.  */
  404. static LLVMTypeRef
  405. create_jit_vertex_header(struct gallivm_state *gallivm, int data_elems)
  406. {
  407.    LLVMTargetDataRef target = gallivm->target;
  408.    LLVMTypeRef elem_types[4];
  409.    LLVMTypeRef vertex_header;
  410.    char struct_name[24];
  411.  
  412.    util_snprintf(struct_name, 23, "vertex_header%d", data_elems);
  413.  
  414.    elem_types[DRAW_JIT_VERTEX_VERTEX_ID]  = LLVMIntTypeInContext(gallivm->context, 32);
  415.    elem_types[DRAW_JIT_VERTEX_CLIP]  = LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4);
  416.    elem_types[DRAW_JIT_VERTEX_PRE_CLIP_POS]  = LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4);
  417.    elem_types[DRAW_JIT_VERTEX_DATA]  = LLVMArrayType(elem_types[1], data_elems);
  418.  
  419.    vertex_header = LLVMStructTypeInContext(gallivm->context, elem_types,
  420.                                            Elements(elem_types), 0);
  421. #if HAVE_LLVM < 0x0300
  422.    LLVMAddTypeName(gallivm->module, struct_name, vertex_header);
  423.  
  424.    LLVMInvalidateStructLayout(gallivm->target, vertex_header);
  425. #endif
  426.  
  427.    /* these are bit-fields and we can't take address of them
  428.       LP_CHECK_MEMBER_OFFSET(struct vertex_header, clipmask,
  429.       target, vertex_header,
  430.       DRAW_JIT_VERTEX_CLIPMASK);
  431.       LP_CHECK_MEMBER_OFFSET(struct vertex_header, edgeflag,
  432.       target, vertex_header,
  433.       DRAW_JIT_VERTEX_EDGEFLAG);
  434.       LP_CHECK_MEMBER_OFFSET(struct vertex_header, pad,
  435.       target, vertex_header,
  436.       DRAW_JIT_VERTEX_PAD);
  437.       LP_CHECK_MEMBER_OFFSET(struct vertex_header, vertex_id,
  438.       target, vertex_header,
  439.       DRAW_JIT_VERTEX_VERTEX_ID);
  440.    */
  441.    LP_CHECK_MEMBER_OFFSET(struct vertex_header, clip,
  442.                           target, vertex_header,
  443.                           DRAW_JIT_VERTEX_CLIP);
  444.    LP_CHECK_MEMBER_OFFSET(struct vertex_header, pre_clip_pos,
  445.                           target, vertex_header,
  446.                           DRAW_JIT_VERTEX_PRE_CLIP_POS);
  447.    LP_CHECK_MEMBER_OFFSET(struct vertex_header, data,
  448.                           target, vertex_header,
  449.                           DRAW_JIT_VERTEX_DATA);
  450.  
  451.    assert(LLVMABISizeOfType(target, vertex_header) ==
  452.           offsetof(struct vertex_header, data[data_elems]));
  453.  
  454.    return vertex_header;
  455. }
  456.  
  457.  
  458. /**
  459.  * Create LLVM types for various structures.
  460.  */
  461. static void
  462. create_jit_types(struct draw_llvm_variant *variant)
  463. {
  464.    struct gallivm_state *gallivm = variant->gallivm;
  465.    LLVMTypeRef texture_type, sampler_type, context_type, buffer_type,
  466.       vb_type;
  467.  
  468.    texture_type = create_jit_texture_type(gallivm, "texture");
  469.    sampler_type = create_jit_sampler_type(gallivm, "sampler");
  470.  
  471.    context_type = create_jit_context_type(gallivm, texture_type, sampler_type,
  472.                                           "draw_jit_context");
  473.    variant->context_ptr_type = LLVMPointerType(context_type, 0);
  474.  
  475.    buffer_type = create_jit_dvbuffer_type(gallivm, "draw_vertex_buffer");
  476.    variant->buffer_ptr_type = LLVMPointerType(buffer_type, 0);
  477.    
  478.    vb_type = create_jit_vertex_buffer_type(gallivm, "pipe_vertex_buffer");
  479.    variant->vb_ptr_type = LLVMPointerType(vb_type, 0);
  480. }
  481.  
  482.  
  483. static LLVMTypeRef
  484. get_context_ptr_type(struct draw_llvm_variant *variant)
  485. {
  486.    if (!variant->context_ptr_type)
  487.       create_jit_types(variant);
  488.    return variant->context_ptr_type;
  489. }
  490.  
  491.  
  492. static LLVMTypeRef
  493. get_buffer_ptr_type(struct draw_llvm_variant *variant)
  494. {
  495.    if (!variant->buffer_ptr_type)
  496.       create_jit_types(variant);
  497.    return variant->buffer_ptr_type;
  498. }
  499.  
  500.  
  501. static LLVMTypeRef
  502. get_vb_ptr_type(struct draw_llvm_variant *variant)
  503. {
  504.    if (!variant->vb_ptr_type)
  505.       create_jit_types(variant);
  506.    return variant->vb_ptr_type;
  507. }
  508.  
  509. static LLVMTypeRef
  510. get_vertex_header_ptr_type(struct draw_llvm_variant *variant)
  511. {
  512.    if (!variant->vertex_header_ptr_type)
  513.       create_jit_types(variant);
  514.    return variant->vertex_header_ptr_type;
  515. }
  516.  
  517.  
  518. /**
  519.  * Create per-context LLVM info.
  520.  */
  521. struct draw_llvm *
  522. draw_llvm_create(struct draw_context *draw)
  523. {
  524.    struct draw_llvm *llvm;
  525.  
  526.    llvm = CALLOC_STRUCT( draw_llvm );
  527.    if (!llvm)
  528.       return NULL;
  529.  
  530.    lp_build_init();
  531.  
  532.    llvm->draw = draw;
  533.  
  534.    llvm->nr_variants = 0;
  535.    make_empty_list(&llvm->vs_variants_list);
  536.  
  537.    llvm->nr_gs_variants = 0;
  538.    make_empty_list(&llvm->gs_variants_list);
  539.  
  540.    return llvm;
  541. }
  542.  
  543.  
  544. /**
  545.  * Free per-context LLVM info.
  546.  */
  547. void
  548. draw_llvm_destroy(struct draw_llvm *llvm)
  549. {
  550.    /* XXX free other draw_llvm data? */
  551.    FREE(llvm);
  552. }
  553.  
  554.  
  555. /**
  556.  * Create LLVM-generated code for a vertex shader.
  557.  */
  558. struct draw_llvm_variant *
  559. draw_llvm_create_variant(struct draw_llvm *llvm,
  560.                          unsigned num_inputs,
  561.                          const struct draw_llvm_variant_key *key)
  562. {
  563.    struct draw_llvm_variant *variant;
  564.    struct llvm_vertex_shader *shader =
  565.       llvm_vertex_shader(llvm->draw->vs.vertex_shader);
  566.    LLVMTypeRef vertex_header;
  567.  
  568.    variant = MALLOC(sizeof *variant +
  569.                     shader->variant_key_size -
  570.                     sizeof variant->key);
  571.    if (variant == NULL)
  572.       return NULL;
  573.  
  574.    variant->llvm = llvm;
  575.  
  576.    variant->gallivm = gallivm_create();
  577.  
  578.    create_jit_types(variant);
  579.  
  580.    memcpy(&variant->key, key, shader->variant_key_size);
  581.  
  582.    vertex_header = create_jit_vertex_header(variant->gallivm, num_inputs);
  583.  
  584.    variant->vertex_header_ptr_type = LLVMPointerType(vertex_header, 0);
  585.  
  586.    draw_llvm_generate(llvm, variant, FALSE);  /* linear */
  587.    draw_llvm_generate(llvm, variant, TRUE);   /* elts */
  588.  
  589.    gallivm_compile_module(variant->gallivm);
  590.  
  591.    variant->jit_func = (draw_jit_vert_func)
  592.          gallivm_jit_function(variant->gallivm, variant->function);
  593.  
  594.    variant->jit_func_elts = (draw_jit_vert_func_elts)
  595.          gallivm_jit_function(variant->gallivm, variant->function_elts);
  596.  
  597.    variant->shader = shader;
  598.    variant->list_item_global.base = variant;
  599.    variant->list_item_local.base = variant;
  600.    /*variant->no = */shader->variants_created++;
  601.    variant->list_item_global.base = variant;
  602.  
  603.    return variant;
  604. }
  605.  
  606.  
  607. static void
  608. generate_vs(struct draw_llvm_variant *variant,
  609.             LLVMBuilderRef builder,
  610.             struct lp_type vs_type,
  611.             LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
  612.             const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
  613.             const struct lp_bld_tgsi_system_values *system_values,
  614.             LLVMValueRef context_ptr,
  615.             struct lp_build_sampler_soa *draw_sampler,
  616.             boolean clamp_vertex_color)
  617. {
  618.    struct draw_llvm *llvm = variant->llvm;
  619.    const struct tgsi_token *tokens = llvm->draw->vs.vertex_shader->state.tokens;
  620.    LLVMValueRef consts_ptr = draw_jit_context_vs_constants(variant->gallivm, context_ptr);
  621.    struct lp_build_sampler_soa *sampler = 0;
  622.  
  623.    if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
  624.       tgsi_dump(tokens, 0);
  625.       draw_llvm_dump_variant_key(&variant->key);
  626.    }
  627.  
  628.    if (llvm->draw->num_sampler_views && llvm->draw->num_samplers)
  629.       sampler = draw_sampler;
  630.  
  631.    lp_build_tgsi_soa(variant->gallivm,
  632.                      tokens,
  633.                      vs_type,
  634.                      NULL /*struct lp_build_mask_context *mask*/,
  635.                      consts_ptr,
  636.                      system_values,
  637.                      inputs,
  638.                      outputs,
  639.                      sampler,
  640.                      &llvm->draw->vs.vertex_shader->info,
  641.                      NULL);
  642.  
  643.    {
  644.       LLVMValueRef out;
  645.       unsigned chan, attrib;
  646.       struct lp_build_context bld;
  647.       struct tgsi_shader_info* info = &llvm->draw->vs.vertex_shader->info;
  648.       lp_build_context_init(&bld, variant->gallivm, vs_type);
  649.  
  650.       for (attrib = 0; attrib < info->num_outputs; ++attrib) {
  651.          for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
  652.             if (outputs[attrib][chan]) {
  653.                switch (info->output_semantic_name[attrib]) {
  654.                case TGSI_SEMANTIC_COLOR:
  655.                case TGSI_SEMANTIC_BCOLOR:
  656.                   if (clamp_vertex_color) {
  657.                      out = LLVMBuildLoad(builder, outputs[attrib][chan], "");
  658.                      out = lp_build_clamp(&bld, out, bld.zero, bld.one);
  659.                      LLVMBuildStore(builder, out, outputs[attrib][chan]);
  660.                   }
  661.                   break;
  662.                case TGSI_SEMANTIC_FOG:
  663.                   if (chan == 1 || chan == 2)
  664.                      LLVMBuildStore(builder, bld.zero, outputs[attrib][chan]);
  665.                   else if (chan == 3)
  666.                      LLVMBuildStore(builder, bld.one, outputs[attrib][chan]);
  667.                   break;
  668.                }
  669.             }
  670.          }
  671.       }
  672.    }
  673. }
  674.  
  675. static void
  676. generate_fetch(struct gallivm_state *gallivm,
  677.                struct draw_context *draw,
  678.                LLVMValueRef vbuffers_ptr,
  679.                LLVMValueRef *res,
  680.                struct pipe_vertex_element *velem,
  681.                LLVMValueRef vbuf,
  682.                LLVMValueRef index,
  683.                LLVMValueRef instance_id)
  684. {
  685.    const struct util_format_description *format_desc =
  686.       util_format_description(velem->src_format);
  687.    LLVMValueRef zero = LLVMConstNull(LLVMInt32TypeInContext(gallivm->context));
  688.    LLVMBuilderRef builder = gallivm->builder;
  689.    LLVMValueRef indices =
  690.       LLVMConstInt(LLVMInt64TypeInContext(gallivm->context),
  691.                    velem->vertex_buffer_index, 0);
  692.    LLVMValueRef vbuffer_ptr = LLVMBuildGEP(builder, vbuffers_ptr,
  693.                                            &indices, 1, "");
  694.    LLVMValueRef vb_stride = draw_jit_vbuffer_stride(gallivm, vbuf);
  695.    LLVMValueRef vb_buffer_offset = draw_jit_vbuffer_offset(gallivm, vbuf);
  696.    LLVMValueRef map_ptr = draw_jit_dvbuffer_map(gallivm, vbuffer_ptr);
  697.    LLVMValueRef buffer_size = draw_jit_dvbuffer_size(gallivm, vbuffer_ptr);
  698.    LLVMValueRef stride;
  699.    LLVMValueRef buffer_overflowed;
  700.    LLVMValueRef needed_buffer_size;
  701.    LLVMValueRef temp_ptr =
  702.       lp_build_alloca(gallivm,
  703.                       lp_build_vec_type(gallivm, lp_float32_vec4_type()), "");
  704.    LLVMValueRef ofbit = NULL;
  705.    struct lp_build_if_state if_ctx;
  706.  
  707.    if (velem->instance_divisor) {
  708.       /* Index is equal to the start instance plus the number of current
  709.        * instance divided by the divisor. In this case we compute it as:
  710.        * index = start_instance + ((instance_id - start_instance) / divisor)
  711.        */
  712.       LLVMValueRef current_instance;
  713.       index = lp_build_const_int32(gallivm, draw->start_instance);
  714.       current_instance = LLVMBuildSub(builder, instance_id, index, "");
  715.       current_instance = LLVMBuildUDiv(builder, current_instance,
  716.                                        lp_build_const_int32(gallivm, velem->instance_divisor),
  717.                                        "instance_divisor");
  718.       index = LLVMBuildAdd(builder, index, current_instance, "instance");
  719.    }
  720.  
  721.    stride = lp_build_umul_overflow(gallivm, vb_stride, index, &ofbit);
  722.    stride = lp_build_uadd_overflow(gallivm, stride, vb_buffer_offset, &ofbit);
  723.    stride = lp_build_uadd_overflow(
  724.       gallivm, stride,
  725.       lp_build_const_int32(gallivm, velem->src_offset), &ofbit);
  726.    needed_buffer_size = lp_build_uadd_overflow(
  727.       gallivm, stride,
  728.       lp_build_const_int32(gallivm,
  729.                            util_format_get_blocksize(velem->src_format)),
  730.       &ofbit);
  731.  
  732.    buffer_overflowed = LLVMBuildICmp(builder, LLVMIntUGT,
  733.                                      needed_buffer_size, buffer_size,
  734.                                      "buffer_overflowed");
  735.    buffer_overflowed = LLVMBuildOr(builder, buffer_overflowed, ofbit, "");
  736. #if 0
  737.    lp_build_printf(gallivm, "vbuf index = %u, vb_stride is %u\n",
  738.                    index, vb_stride);
  739.    lp_build_printf(gallivm, "   vb_buffer_offset = %u, src_offset is %u\n",
  740.                    vb_buffer_offset,
  741.                    lp_build_const_int32(gallivm, velem->src_offset));
  742.    lp_build_print_value(gallivm, "   blocksize = ",
  743.                         lp_build_const_int32(
  744.                            gallivm,
  745.                            util_format_get_blocksize(velem->src_format)));
  746.    lp_build_printf(gallivm, "   instance_id = %u\n", instance_id);
  747.    lp_build_printf(gallivm, "   stride = %u\n", stride);
  748.    lp_build_printf(gallivm, "   buffer size = %u\n", buffer_size);
  749.    lp_build_printf(gallivm, "   needed_buffer_size = %u\n", needed_buffer_size);
  750.    lp_build_print_value(gallivm, "   buffer overflowed = ", buffer_overflowed);
  751. #endif
  752.  
  753.    lp_build_if(&if_ctx, gallivm, buffer_overflowed);
  754.    {
  755.       LLVMValueRef val =
  756.          lp_build_const_vec(gallivm, lp_float32_vec4_type(), 0);
  757.       LLVMBuildStore(builder, val, temp_ptr);
  758.    }
  759.    lp_build_else(&if_ctx);
  760.    {
  761.       LLVMValueRef val;
  762.       map_ptr = LLVMBuildGEP(builder, map_ptr, &stride, 1, "");
  763.  
  764.       val = lp_build_fetch_rgba_aos(gallivm,
  765.                                     format_desc,
  766.                                     lp_float32_vec4_type(),
  767.                                     map_ptr,
  768.                                     zero, zero, zero);
  769.       LLVMBuildStore(builder, val, temp_ptr);
  770.    }
  771.    lp_build_endif(&if_ctx);
  772.  
  773.    *res = LLVMBuildLoad(builder, temp_ptr, "aos");
  774. }
  775.  
  776. static void
  777. convert_to_soa(struct gallivm_state *gallivm,
  778.                LLVMValueRef (*src_aos)[LP_MAX_VECTOR_WIDTH / 32],
  779.                LLVMValueRef (*dst_soa)[TGSI_NUM_CHANNELS],
  780.                unsigned num_attribs, const struct lp_type soa_type)
  781. {
  782.    unsigned i, j, k;
  783.    struct lp_type aos_channel_type = soa_type;
  784.  
  785.    debug_assert(TGSI_NUM_CHANNELS == 4);
  786.    debug_assert((soa_type.length % TGSI_NUM_CHANNELS) == 0);
  787.  
  788.    aos_channel_type.length >>= 1;
  789.  
  790.    for (i = 0; i < num_attribs; ++i) {
  791.       LLVMValueRef aos_channels[TGSI_NUM_CHANNELS];
  792.       unsigned pixels_per_channel = soa_type.length / TGSI_NUM_CHANNELS;
  793.  
  794.       for (j = 0; j < TGSI_NUM_CHANNELS; ++j) {
  795.          LLVMValueRef channel[LP_MAX_VECTOR_LENGTH] = { 0 };
  796.  
  797.          assert(pixels_per_channel <= LP_MAX_VECTOR_LENGTH);
  798.  
  799.          for (k = 0; k < pixels_per_channel; ++k) {
  800.             channel[k] = src_aos[i][j + TGSI_NUM_CHANNELS * k];
  801.          }
  802.  
  803.          aos_channels[j] = lp_build_concat(gallivm, channel, aos_channel_type, pixels_per_channel);
  804.       }
  805.  
  806.       lp_build_transpose_aos(gallivm, soa_type, aos_channels, dst_soa[i]);
  807.    }
  808. }
  809.  
  810.  
  811. static void
  812. store_aos(struct gallivm_state *gallivm,
  813.           LLVMValueRef io_ptr,
  814.           LLVMValueRef index,
  815.           LLVMValueRef value)
  816. {
  817.    LLVMTypeRef data_ptr_type = LLVMPointerType(lp_build_vec_type(gallivm, lp_float32_vec4_type()), 0);
  818.    LLVMBuilderRef builder = gallivm->builder;
  819.    LLVMValueRef data_ptr = draw_jit_header_data(gallivm, io_ptr);
  820.    LLVMValueRef indices[3];
  821.  
  822.    indices[0] = lp_build_const_int32(gallivm, 0);
  823.    indices[1] = index;
  824.    indices[2] = lp_build_const_int32(gallivm, 0);
  825.  
  826.    data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 3, "");
  827.    data_ptr = LLVMBuildPointerCast(builder, data_ptr, data_ptr_type, "");
  828.  
  829. #if DEBUG_STORE
  830.    lp_build_printf(gallivm, "    ---- %p storing attribute %d (io = %p)\n", data_ptr, index, io_ptr);
  831. #endif
  832.  
  833.    /* Unaligned store due to the vertex header */
  834.    lp_set_store_alignment(LLVMBuildStore(builder, value, data_ptr), sizeof(float));
  835. }
  836.  
  837. /**
  838.  * Adjust the mask to architecture endianess. The mask will the store in struct:
  839.  *
  840.  * struct vertex_header {
  841.  *    unsigned clipmask:DRAW_TOTAL_CLIP_PLANES;
  842.  *    unsigned edgeflag:1;
  843.  *    unsigned have_clipdist:1;
  844.  *    unsigned vertex_id:16;
  845.  *    [...]
  846.  * }
  847.  *
  848.  * On little-endian machine nothing needs to done, however on bit-endian machine
  849.  * the mask's fields need to be adjusted with the algorithm:
  850.  *
  851.  * uint32_t reverse (uint32_t x)
  852.  * {
  853.  *   return (x >> 16) |              // vertex_id
  854.  *          ((x & 0x3fff) << 18) |   // clipmask
  855.  *          ((x & 0x4000) << 3) |    // have_clipdist
  856.  *          ((x & 0x8000) << 1);     // edgeflag
  857.  * }
  858.  */
  859. static LLVMValueRef
  860. adjust_mask(struct gallivm_state *gallivm,
  861.             LLVMValueRef mask)
  862. {
  863. #ifdef PIPE_ARCH_BIG_ENDIAN
  864.    LLVMBuilderRef builder = gallivm->builder;
  865.    LLVMValueRef vertex_id;
  866.    LLVMValueRef clipmask;
  867.    LLVMValueRef have_clipdist;
  868.    LLVMValueRef edgeflag;
  869.  
  870.    vertex_id = LLVMBuildLShr(builder, mask, lp_build_const_int32(gallivm, 16), "");
  871.    clipmask  = LLVMBuildAnd(builder, mask, lp_build_const_int32(gallivm, 0x3fff), "");
  872.    clipmask  = LLVMBuildShl(builder, clipmask, lp_build_const_int32(gallivm, 18), "");
  873.    have_clipdist = LLVMBuildAnd(builder, mask, lp_build_const_int32(gallivm, 0x4000), "");
  874.    have_clipdist = LLVMBuildShl(builder, have_clipdist, lp_build_const_int32(gallivm, 3), "");
  875.    edgeflag = LLVMBuildAnd(builder, mask, lp_build_const_int32(gallivm, 0x8000), "");
  876.    edgeflag = LLVMBuildShl(builder, edgeflag, lp_build_const_int32(gallivm, 1), "");
  877.  
  878.    mask = LLVMBuildOr(builder, vertex_id, clipmask, "");
  879.    mask = LLVMBuildOr(builder, mask, have_clipdist, "");
  880.    mask = LLVMBuildOr(builder, mask, edgeflag, "");
  881. #endif
  882.    return mask;
  883. }
  884.  
  885. static void
  886. store_aos_array(struct gallivm_state *gallivm,
  887.                 struct lp_type soa_type,
  888.                 LLVMValueRef io_ptr,
  889.                 LLVMValueRef *indices,
  890.                 LLVMValueRef* aos,
  891.                 int attrib,
  892.                 int num_outputs,
  893.                 LLVMValueRef clipmask,
  894.                 boolean have_clipdist)
  895. {
  896.    LLVMBuilderRef builder = gallivm->builder;
  897.    LLVMValueRef attr_index = lp_build_const_int32(gallivm, attrib);
  898.    LLVMValueRef inds[LP_MAX_VECTOR_WIDTH / 32];
  899.    LLVMValueRef linear_inds[LP_MAX_VECTOR_WIDTH / 32];
  900.    LLVMValueRef io_ptrs[LP_MAX_VECTOR_WIDTH / 32];
  901.    int vector_length = soa_type.length;
  902.    int i;
  903.  
  904.    debug_assert(TGSI_NUM_CHANNELS == 4);
  905.  
  906.    for (i = 0; i < vector_length; i++) {
  907.       linear_inds[i] = lp_build_const_int32(gallivm, i);
  908.       if (indices) {
  909.          inds[i] = indices[i];
  910.       } else {
  911.          inds[i] = linear_inds[i];
  912.       }
  913.       io_ptrs[i] = LLVMBuildGEP(builder, io_ptr, &inds[i], 1, "");
  914.    }
  915.  
  916.    if (attrib == 0) {
  917.       /* store vertex header for each of the n vertices */
  918.       LLVMValueRef val, cliptmp;
  919.       int vertex_id_pad_edgeflag;
  920.  
  921.       /* If this assertion fails, it means we need to update the bit twidding
  922.        * code here.  See struct vertex_header in draw_private.h.
  923.        */
  924.       assert(DRAW_TOTAL_CLIP_PLANES==14);
  925.       /* initialize vertex id:16 = 0xffff, have_clipdist:1 = 0, edgeflag:1 = 1 */
  926.       vertex_id_pad_edgeflag = (0xffff << 16) | (1 << DRAW_TOTAL_CLIP_PLANES);
  927.       if (have_clipdist)
  928.          vertex_id_pad_edgeflag |= 1 << (DRAW_TOTAL_CLIP_PLANES+1);
  929.       val = lp_build_const_int_vec(gallivm, lp_int_type(soa_type), vertex_id_pad_edgeflag);
  930.       /* OR with the clipmask */
  931.       cliptmp = LLVMBuildOr(builder, val, clipmask, "");
  932.       for (i = 0; i < vector_length; i++) {
  933.          LLVMValueRef id_ptr = draw_jit_header_id(gallivm, io_ptrs[i]);
  934.          val = LLVMBuildExtractElement(builder, cliptmp, linear_inds[i], "");
  935.          val = adjust_mask(gallivm, val);
  936.          LLVMBuildStore(builder, val, id_ptr);
  937. #if DEBUG_STORE
  938.          lp_build_printf(gallivm, "io = %p, index %d, clipmask = %x\n",
  939.                          io_ptrs[i], inds[i], val);
  940. #endif
  941.       }
  942.    }
  943.  
  944.    /* store for each of the n vertices */
  945.    for (i = 0; i < vector_length; i++) {
  946.       store_aos(gallivm, io_ptrs[i], attr_index, aos[i]);
  947.    }
  948. }
  949.  
  950.  
  951. static void
  952. convert_to_aos(struct gallivm_state *gallivm,
  953.                LLVMValueRef io,
  954.                LLVMValueRef *indices,
  955.                LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
  956.                LLVMValueRef clipmask,
  957.                int num_outputs,
  958.                struct lp_type soa_type,
  959.                boolean have_clipdist)
  960. {
  961.    LLVMBuilderRef builder = gallivm->builder;
  962.    unsigned chan, attrib, i;
  963.  
  964. #if DEBUG_STORE
  965.    lp_build_printf(gallivm, "   # storing begin\n");
  966. #endif
  967.    for (attrib = 0; attrib < num_outputs; ++attrib) {
  968.       LLVMValueRef soa[TGSI_NUM_CHANNELS];
  969.       LLVMValueRef aos[LP_MAX_VECTOR_WIDTH / 32];
  970.       for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
  971.          if (outputs[attrib][chan]) {
  972.             LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], "");
  973.             lp_build_name(out, "output%u.%c", attrib, "xyzw"[chan]);
  974. #if DEBUG_STORE
  975.             lp_build_printf(gallivm, "output %d : %d ",
  976.                             LLVMConstInt(LLVMInt32TypeInContext(gallivm->context),
  977.                                          attrib, 0),
  978.                             LLVMConstInt(LLVMInt32TypeInContext(gallivm->context),
  979.                                          chan, 0));
  980.             lp_build_print_value(gallivm, "val = ", out);
  981. #endif
  982.             soa[chan] = out;
  983.          }
  984.          else {
  985.             soa[chan] = 0;
  986.          }
  987.       }
  988.  
  989.  
  990.       if (soa_type.length == TGSI_NUM_CHANNELS) {
  991.          lp_build_transpose_aos(gallivm, soa_type, soa, aos);
  992.       } else {
  993.          lp_build_transpose_aos(gallivm, soa_type, soa, soa);
  994.  
  995.          for (i = 0; i < soa_type.length; ++i) {
  996.             aos[i] = lp_build_extract_range(gallivm,
  997.                                             soa[i % TGSI_NUM_CHANNELS],
  998.                                             (i / TGSI_NUM_CHANNELS) * TGSI_NUM_CHANNELS,
  999.                                             TGSI_NUM_CHANNELS);
  1000.          }
  1001.       }
  1002.  
  1003.       store_aos_array(gallivm,
  1004.                       soa_type,
  1005.                       io, indices,
  1006.                       aos,
  1007.                       attrib,
  1008.                       num_outputs,
  1009.                       clipmask, have_clipdist);
  1010.    }
  1011. #if DEBUG_STORE
  1012.    lp_build_printf(gallivm, "   # storing end\n");
  1013. #endif
  1014. }
  1015.  
  1016.  
  1017. /**
  1018.  * Stores original vertex positions in clip coordinates
  1019.  */
  1020. static void
  1021. store_clip(struct gallivm_state *gallivm,
  1022.            const struct lp_type vs_type,
  1023.            LLVMValueRef io_ptr,
  1024.            LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
  1025.            boolean pre_clip_pos, int idx)
  1026. {
  1027.    LLVMBuilderRef builder = gallivm->builder;
  1028.    LLVMValueRef soa[4];
  1029.    LLVMValueRef aos[LP_MAX_VECTOR_LENGTH];
  1030.    LLVMValueRef indices[2];
  1031.    LLVMValueRef io_ptrs[LP_MAX_VECTOR_WIDTH / 32];
  1032.    LLVMValueRef inds[LP_MAX_VECTOR_WIDTH / 32];
  1033.    LLVMValueRef clip_ptrs[LP_MAX_VECTOR_WIDTH / 32];
  1034.    int i, j;
  1035.  
  1036.    indices[0] =
  1037.    indices[1] = lp_build_const_int32(gallivm, 0);
  1038.  
  1039.    for (i = 0; i < vs_type.length; i++) {
  1040.       inds[i] = lp_build_const_int32(gallivm, i);
  1041.       io_ptrs[i] = LLVMBuildGEP(builder, io_ptr, &inds[i], 1, "");
  1042.    }
  1043.  
  1044.    soa[0] = LLVMBuildLoad(builder, outputs[idx][0], ""); /*x0 x1 .. xn*/
  1045.    soa[1] = LLVMBuildLoad(builder, outputs[idx][1], ""); /*y0 y1 .. yn*/
  1046.    soa[2] = LLVMBuildLoad(builder, outputs[idx][2], ""); /*z0 z1 .. zn*/
  1047.    soa[3] = LLVMBuildLoad(builder, outputs[idx][3], ""); /*w0 w1 .. wn*/
  1048.  
  1049.    if (!pre_clip_pos) {
  1050.       for (i = 0; i < vs_type.length; i++) {
  1051.          clip_ptrs[i] = draw_jit_header_clip(gallivm, io_ptrs[i]);
  1052.       }
  1053.    } else {
  1054.       for (i = 0; i < vs_type.length; i++) {
  1055.          clip_ptrs[i] = draw_jit_header_pre_clip_pos(gallivm, io_ptrs[i]);
  1056.       }
  1057.    }
  1058.  
  1059.    lp_build_transpose_aos(gallivm, vs_type, soa, soa);
  1060.    for (i = 0; i < vs_type.length; ++i) {
  1061.       aos[i] = lp_build_extract_range(gallivm,
  1062.                                       soa[i % TGSI_NUM_CHANNELS],
  1063.                                       (i / TGSI_NUM_CHANNELS) * TGSI_NUM_CHANNELS,
  1064.                                       TGSI_NUM_CHANNELS);
  1065.    }
  1066.  
  1067.    for (j = 0; j < vs_type.length; j++) {
  1068.       LLVMTypeRef  clip_ptr_type = LLVMPointerType(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), 0);
  1069.       LLVMValueRef clip_ptr;
  1070.  
  1071.       clip_ptr = LLVMBuildGEP(builder, clip_ptrs[j], indices, 2, "clipo");
  1072.       clip_ptr = LLVMBuildPointerCast(builder, clip_ptr, clip_ptr_type, "");
  1073.  
  1074.       /* Unaligned store */
  1075.       lp_set_store_alignment(LLVMBuildStore(builder, aos[j], clip_ptr), sizeof(float));
  1076.    }
  1077. }
  1078.  
  1079.  
  1080. /**
  1081.  * Transforms the outputs for viewport mapping
  1082.  */
  1083. static void
  1084. generate_viewport(struct draw_llvm_variant *variant,
  1085.                   LLVMBuilderRef builder,
  1086.                   struct lp_type vs_type,
  1087.                   LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
  1088.                   LLVMValueRef context_ptr)
  1089. {
  1090.    int i;
  1091.    struct gallivm_state *gallivm = variant->gallivm;
  1092.    struct lp_type f32_type = vs_type;
  1093.    const unsigned pos = draw_current_shader_position_output(variant->llvm->draw);
  1094.    LLVMTypeRef vs_type_llvm = lp_build_vec_type(gallivm, vs_type);
  1095.    LLVMValueRef out3 = LLVMBuildLoad(builder, outputs[pos][3], ""); /*w0 w1 .. wn*/
  1096.    LLVMValueRef const1 = lp_build_const_vec(gallivm, f32_type, 1.0);       /*1.0 1.0 1.0 1.0*/
  1097.    LLVMValueRef vp_ptr = draw_jit_context_viewport(gallivm, context_ptr);
  1098.  
  1099.    /* for 1/w convention*/
  1100.    out3 = LLVMBuildFDiv(builder, const1, out3, "");
  1101.    LLVMBuildStore(builder, out3, outputs[pos][3]);
  1102.  
  1103.    /* Viewport Mapping */
  1104.    for (i=0; i<3; i++) {
  1105.       LLVMValueRef out = LLVMBuildLoad(builder, outputs[pos][i], ""); /*x0 x1 .. xn*/
  1106.       LLVMValueRef scale;
  1107.       LLVMValueRef trans;
  1108.       LLVMValueRef scale_i;
  1109.       LLVMValueRef trans_i;
  1110.       LLVMValueRef index;
  1111.  
  1112.       index = lp_build_const_int32(gallivm, i);
  1113.       scale_i = LLVMBuildGEP(builder, vp_ptr, &index, 1, "");
  1114.  
  1115.       index = lp_build_const_int32(gallivm, i+4);
  1116.       trans_i = LLVMBuildGEP(builder, vp_ptr, &index, 1, "");
  1117.  
  1118.       scale = lp_build_broadcast(gallivm, vs_type_llvm,
  1119.                                  LLVMBuildLoad(builder, scale_i, "scale"));
  1120.       trans = lp_build_broadcast(gallivm, vs_type_llvm,
  1121.                                  LLVMBuildLoad(builder, trans_i, "trans"));
  1122.  
  1123.       /* divide by w */
  1124.       out = LLVMBuildFMul(builder, out, out3, "");
  1125.       /* mult by scale */
  1126.       out = LLVMBuildFMul(builder, out, scale, "");
  1127.       /* add translation */
  1128.       out = LLVMBuildFAdd(builder, out, trans, "");
  1129.  
  1130.       /* store transformed outputs */
  1131.       LLVMBuildStore(builder, out, outputs[pos][i]);
  1132.    }
  1133.  
  1134. }
  1135.  
  1136.  
  1137. /**
  1138.  * Returns clipmask as nxi32 bitmask for the n vertices
  1139.  */
  1140. static LLVMValueRef
  1141. generate_clipmask(struct draw_llvm *llvm,
  1142.                   struct gallivm_state *gallivm,
  1143.                   struct lp_type vs_type,
  1144.                   LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
  1145.                   boolean clip_xy,
  1146.                   boolean clip_z,
  1147.                   boolean clip_user,
  1148.                   boolean clip_halfz,
  1149.                   unsigned ucp_enable,
  1150.                   LLVMValueRef context_ptr,
  1151.                   boolean *have_clipdist)
  1152. {
  1153.    LLVMBuilderRef builder = gallivm->builder;
  1154.    LLVMValueRef mask; /* stores the <nxi32> clipmasks */
  1155.    LLVMValueRef test, temp;
  1156.    LLVMValueRef zero, shift;
  1157.    LLVMValueRef pos_x, pos_y, pos_z, pos_w;
  1158.    LLVMValueRef cv_x, cv_y, cv_z, cv_w;
  1159.    LLVMValueRef plane1, planes, plane_ptr, sum;
  1160.    struct lp_type f32_type = vs_type;
  1161.    struct lp_type i32_type = lp_int_type(vs_type);
  1162.    const unsigned pos = draw_current_shader_position_output(llvm->draw);
  1163.    const unsigned cv = draw_current_shader_clipvertex_output(llvm->draw);
  1164.    int num_written_clipdistance = llvm->draw->vs.vertex_shader->info.num_written_clipdistance;
  1165.    bool have_cd = false;
  1166.    unsigned cd[2];
  1167.  
  1168.    cd[0] = draw_current_shader_clipdistance_output(llvm->draw, 0);
  1169.    cd[1] = draw_current_shader_clipdistance_output(llvm->draw, 1);
  1170.  
  1171.    if (cd[0] != pos || cd[1] != pos)
  1172.       have_cd = true;
  1173.  
  1174.    if (num_written_clipdistance && !clip_user) {
  1175.       clip_user = true;
  1176.       ucp_enable = (1 << num_written_clipdistance) - 1;
  1177.    }
  1178.  
  1179.    mask = lp_build_const_int_vec(gallivm, i32_type, 0);
  1180.    temp = lp_build_const_int_vec(gallivm, i32_type, 0);
  1181.    zero = lp_build_const_vec(gallivm, f32_type, 0);         /* 0.0f 0.0f 0.0f 0.0f */
  1182.    shift = lp_build_const_int_vec(gallivm, i32_type, 1);    /* 1 1 1 1 */
  1183.  
  1184.    /*
  1185.     * load clipvertex and position from correct locations.
  1186.     * if they are the same just load them once.
  1187.     */
  1188.    pos_x = LLVMBuildLoad(builder, outputs[pos][0], ""); /*x0 x1 .. xn */
  1189.    pos_y = LLVMBuildLoad(builder, outputs[pos][1], ""); /*y0 y1 .. yn */
  1190.    pos_z = LLVMBuildLoad(builder, outputs[pos][2], ""); /*z0 z1 .. zn */
  1191.    pos_w = LLVMBuildLoad(builder, outputs[pos][3], ""); /*w0 w1 .. wn */
  1192.  
  1193.    if (clip_user && cv != pos) {
  1194.       cv_x = LLVMBuildLoad(builder, outputs[cv][0], ""); /*x0 x1 .. xn */
  1195.       cv_y = LLVMBuildLoad(builder, outputs[cv][1], ""); /*y0 y1 .. yn */
  1196.       cv_z = LLVMBuildLoad(builder, outputs[cv][2], ""); /*z0 z1 .. zn */
  1197.       cv_w = LLVMBuildLoad(builder, outputs[cv][3], ""); /*w0 w1 .. wn */
  1198.    } else {
  1199.       cv_x = pos_x;
  1200.       cv_y = pos_y;
  1201.       cv_z = pos_z;
  1202.       cv_w = pos_w;
  1203.    }
  1204.  
  1205.    /* Cliptest, for hardwired planes */
  1206.    if (clip_xy) {
  1207.       /* plane 1 */
  1208.       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_x , pos_w);
  1209.       temp = shift;
  1210.       test = LLVMBuildAnd(builder, test, temp, "");
  1211.       mask = test;
  1212.  
  1213.       /* plane 2 */
  1214.       test = LLVMBuildFAdd(builder, pos_x, pos_w, "");
  1215.       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test);
  1216.       temp = LLVMBuildShl(builder, temp, shift, "");
  1217.       test = LLVMBuildAnd(builder, test, temp, "");
  1218.       mask = LLVMBuildOr(builder, mask, test, "");
  1219.  
  1220.       /* plane 3 */
  1221.       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_y, pos_w);
  1222.       temp = LLVMBuildShl(builder, temp, shift, "");
  1223.       test = LLVMBuildAnd(builder, test, temp, "");
  1224.       mask = LLVMBuildOr(builder, mask, test, "");
  1225.  
  1226.       /* plane 4 */
  1227.       test = LLVMBuildFAdd(builder, pos_y, pos_w, "");
  1228.       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test);
  1229.       temp = LLVMBuildShl(builder, temp, shift, "");
  1230.       test = LLVMBuildAnd(builder, test, temp, "");
  1231.       mask = LLVMBuildOr(builder, mask, test, "");
  1232.    }
  1233.  
  1234.    if (clip_z) {
  1235.       temp = lp_build_const_int_vec(gallivm, i32_type, 16);
  1236.       if (clip_halfz) {
  1237.          /* plane 5 */
  1238.          test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, pos_z);
  1239.          test = LLVMBuildAnd(builder, test, temp, "");
  1240.          mask = LLVMBuildOr(builder, mask, test, "");
  1241.       }
  1242.       else {
  1243.          /* plane 5 */
  1244.          test = LLVMBuildFAdd(builder, pos_z, pos_w, "");
  1245.          test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test);
  1246.          test = LLVMBuildAnd(builder, test, temp, "");
  1247.          mask = LLVMBuildOr(builder, mask, test, "");
  1248.       }
  1249.       /* plane 6 */
  1250.       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_z, pos_w);
  1251.       temp = LLVMBuildShl(builder, temp, shift, "");
  1252.       test = LLVMBuildAnd(builder, test, temp, "");
  1253.       mask = LLVMBuildOr(builder, mask, test, "");
  1254.    }
  1255.  
  1256.    if (clip_user) {
  1257.       LLVMValueRef planes_ptr = draw_jit_context_planes(gallivm, context_ptr);
  1258.       LLVMValueRef indices[3];
  1259.  
  1260.       /* userclip planes */
  1261.       while (ucp_enable) {
  1262.          unsigned plane_idx = ffs(ucp_enable)-1;
  1263.          ucp_enable &= ~(1 << plane_idx);
  1264.          plane_idx += 6;
  1265.  
  1266.          if (have_cd && num_written_clipdistance) {
  1267.             LLVMValueRef clipdist;
  1268.             int i;
  1269.             i = plane_idx - 6;
  1270.  
  1271.             *have_clipdist = TRUE;
  1272.             if (i < 4) {
  1273.                clipdist = LLVMBuildLoad(builder, outputs[cd[0]][i], "");
  1274.             } else {
  1275.                clipdist = LLVMBuildLoad(builder, outputs[cd[1]][i-4], "");
  1276.             }
  1277.             test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, clipdist);
  1278.             temp = lp_build_const_int_vec(gallivm, i32_type, 1 << plane_idx);
  1279.             test = LLVMBuildAnd(builder, test, temp, "");
  1280.             mask = LLVMBuildOr(builder, mask, test, "");
  1281.          } else {
  1282.             LLVMTypeRef vs_type_llvm = lp_build_vec_type(gallivm, vs_type);
  1283.             indices[0] = lp_build_const_int32(gallivm, 0);
  1284.             indices[1] = lp_build_const_int32(gallivm, plane_idx);
  1285.  
  1286.             indices[2] = lp_build_const_int32(gallivm, 0);
  1287.             plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
  1288.             plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_x");
  1289.             planes = lp_build_broadcast(gallivm, vs_type_llvm, plane1);
  1290.             sum = LLVMBuildFMul(builder, planes, cv_x, "");
  1291.  
  1292.             indices[2] = lp_build_const_int32(gallivm, 1);
  1293.             plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
  1294.             plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_y");
  1295.             planes = lp_build_broadcast(gallivm, vs_type_llvm, plane1);
  1296.             test = LLVMBuildFMul(builder, planes, cv_y, "");
  1297.             sum = LLVMBuildFAdd(builder, sum, test, "");
  1298.  
  1299.             indices[2] = lp_build_const_int32(gallivm, 2);
  1300.             plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
  1301.             plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_z");
  1302.             planes = lp_build_broadcast(gallivm, vs_type_llvm, plane1);
  1303.             test = LLVMBuildFMul(builder, planes, cv_z, "");
  1304.             sum = LLVMBuildFAdd(builder, sum, test, "");
  1305.  
  1306.             indices[2] = lp_build_const_int32(gallivm, 3);
  1307.             plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
  1308.             plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_w");
  1309.             planes = lp_build_broadcast(gallivm, vs_type_llvm, plane1);
  1310.             test = LLVMBuildFMul(builder, planes, cv_w, "");
  1311.             sum = LLVMBuildFAdd(builder, sum, test, "");
  1312.  
  1313.             test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, sum);
  1314.             temp = lp_build_const_int_vec(gallivm, i32_type, 1 << plane_idx);
  1315.             test = LLVMBuildAnd(builder, test, temp, "");
  1316.             mask = LLVMBuildOr(builder, mask, test, "");
  1317.          }
  1318.       }
  1319.    }
  1320.    return mask;
  1321. }
  1322.  
  1323.  
  1324. /**
  1325.  * Returns boolean if any clipping has occurred
  1326.  * Used zero/non-zero i32 value to represent boolean
  1327.  */
  1328. static LLVMValueRef
  1329. clipmask_booli32(struct gallivm_state *gallivm,
  1330.                  const struct lp_type vs_type,
  1331.                  LLVMValueRef clipmask_bool_ptr)
  1332. {
  1333.    LLVMBuilderRef builder = gallivm->builder;
  1334.    LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context);
  1335.    LLVMValueRef clipmask_bool = LLVMBuildLoad(builder, clipmask_bool_ptr, "");
  1336.    LLVMValueRef ret = LLVMConstNull(int32_type);
  1337.    LLVMValueRef temp;
  1338.    int i;
  1339.  
  1340.    /*
  1341.     * Can do this with log2(vector length) pack instructions and one extract
  1342.     * (as we don't actually need a or) with sse2 which would be way better.
  1343.     */
  1344.    for (i=0; i < vs_type.length; i++) {
  1345.       temp = LLVMBuildExtractElement(builder, clipmask_bool,
  1346.                                      lp_build_const_int32(gallivm, i) , "");
  1347.       ret = LLVMBuildOr(builder, ret, temp, "");
  1348.    }
  1349.    return ret;
  1350. }
  1351.  
  1352. static LLVMValueRef
  1353. draw_gs_llvm_fetch_input(const struct lp_build_tgsi_gs_iface *gs_iface,
  1354.                          struct lp_build_tgsi_context * bld_base,
  1355.                          boolean is_indirect,
  1356.                          LLVMValueRef vertex_index,
  1357.                          LLVMValueRef attrib_index,
  1358.                          LLVMValueRef swizzle_index)
  1359. {
  1360.    const struct draw_gs_llvm_iface *gs = draw_gs_llvm_iface(gs_iface);
  1361.    struct gallivm_state *gallivm = bld_base->base.gallivm;
  1362.    LLVMBuilderRef builder = gallivm->builder;
  1363.    LLVMValueRef indices[3];
  1364.    LLVMValueRef res;
  1365.    struct lp_type type = bld_base->base.type;
  1366.  
  1367.    if (is_indirect) {
  1368.       int i;
  1369.       res = bld_base->base.zero;
  1370.       for (i = 0; i < type.length; ++i) {
  1371.          LLVMValueRef idx = lp_build_const_int32(gallivm, i);
  1372.          LLVMValueRef vert_chan_index = LLVMBuildExtractElement(builder,
  1373.                                                                 vertex_index, idx, "");
  1374.          LLVMValueRef channel_vec, value;
  1375.          indices[0] = vert_chan_index;
  1376.          indices[1] = attrib_index;
  1377.          indices[2] = swizzle_index;
  1378.          
  1379.          channel_vec = LLVMBuildGEP(builder, gs->input, indices, 3, "");
  1380.          channel_vec = LLVMBuildLoad(builder, channel_vec, "");
  1381.          value = LLVMBuildExtractElement(builder, channel_vec, idx, "");
  1382.  
  1383.          res = LLVMBuildInsertElement(builder, res, value, idx, "");
  1384.       }
  1385.    } else {
  1386.       indices[0] = vertex_index;
  1387.       indices[1] = attrib_index;
  1388.       indices[2] = swizzle_index;
  1389.  
  1390.       res = LLVMBuildGEP(builder, gs->input, indices, 3, "");
  1391.       res = LLVMBuildLoad(builder, res, "");
  1392.    }
  1393.  
  1394.    return res;
  1395. }
  1396.  
  1397. static void
  1398. draw_gs_llvm_emit_vertex(const struct lp_build_tgsi_gs_iface *gs_base,
  1399.                          struct lp_build_tgsi_context * bld_base,
  1400.                          LLVMValueRef (*outputs)[4],
  1401.                          LLVMValueRef emitted_vertices_vec)
  1402. {
  1403.    const struct draw_gs_llvm_iface *gs_iface = draw_gs_llvm_iface(gs_base);
  1404.    struct draw_gs_llvm_variant *variant = gs_iface->variant;
  1405.    struct gallivm_state *gallivm = variant->gallivm;
  1406.    LLVMBuilderRef builder = gallivm->builder;
  1407.    struct lp_type gs_type = bld_base->base.type;
  1408.    LLVMValueRef clipmask = lp_build_const_int_vec(gallivm,
  1409.                                                   lp_int_type(gs_type), 0);
  1410.    LLVMValueRef indices[LP_MAX_VECTOR_LENGTH];
  1411.    LLVMValueRef next_prim_offset =
  1412.       lp_build_const_int32(gallivm, variant->shader->base.primitive_boundary);
  1413.    LLVMValueRef io = variant->io_ptr;
  1414.    unsigned i;
  1415.    const struct tgsi_shader_info *gs_info = &variant->shader->base.info;
  1416.  
  1417.    for (i = 0; i < gs_type.length; ++i) {
  1418.       LLVMValueRef ind = lp_build_const_int32(gallivm, i);
  1419.       LLVMValueRef currently_emitted =
  1420.          LLVMBuildExtractElement(builder, emitted_vertices_vec, ind, "");
  1421.       indices[i] = LLVMBuildMul(builder, ind, next_prim_offset, "");
  1422.       indices[i] = LLVMBuildAdd(builder, indices[i], currently_emitted, "");
  1423.    }
  1424.  
  1425.    convert_to_aos(gallivm, io, indices,
  1426.                   outputs, clipmask,
  1427.                   gs_info->num_outputs, gs_type,
  1428.                   FALSE);
  1429. }
  1430.  
  1431. static void
  1432. draw_gs_llvm_end_primitive(const struct lp_build_tgsi_gs_iface *gs_base,
  1433.                            struct lp_build_tgsi_context * bld_base,
  1434.                            LLVMValueRef verts_per_prim_vec,
  1435.                            LLVMValueRef emitted_prims_vec)
  1436. {
  1437.    const struct draw_gs_llvm_iface *gs_iface = draw_gs_llvm_iface(gs_base);
  1438.    struct draw_gs_llvm_variant *variant = gs_iface->variant;
  1439.    struct gallivm_state *gallivm = variant->gallivm;
  1440.    LLVMBuilderRef builder = gallivm->builder;
  1441.    LLVMValueRef prim_lengts_ptr =
  1442.       draw_gs_jit_prim_lengths(variant->gallivm, variant->context_ptr);
  1443.    unsigned i;
  1444.  
  1445.    for (i = 0; i < bld_base->base.type.length; ++i) {
  1446.       LLVMValueRef ind = lp_build_const_int32(gallivm, i);
  1447.       LLVMValueRef prims_emitted =
  1448.          LLVMBuildExtractElement(builder, emitted_prims_vec, ind, "");
  1449.       LLVMValueRef store_ptr;
  1450.       LLVMValueRef num_vertices =
  1451.          LLVMBuildExtractElement(builder, verts_per_prim_vec, ind, "");
  1452.  
  1453.       store_ptr = LLVMBuildGEP(builder, prim_lengts_ptr, &prims_emitted, 1, "");
  1454.       store_ptr = LLVMBuildLoad(builder, store_ptr, "");
  1455.       store_ptr = LLVMBuildGEP(builder, store_ptr, &ind, 1, "");
  1456.       LLVMBuildStore(builder, num_vertices, store_ptr);
  1457.    }
  1458. }
  1459.  
  1460. static void
  1461. draw_gs_llvm_epilogue(const struct lp_build_tgsi_gs_iface *gs_base,
  1462.                       struct lp_build_tgsi_context * bld_base,
  1463.                       LLVMValueRef total_emitted_vertices_vec,
  1464.                       LLVMValueRef emitted_prims_vec)
  1465. {
  1466.    const struct draw_gs_llvm_iface *gs_iface = draw_gs_llvm_iface(gs_base);
  1467.    struct draw_gs_llvm_variant *variant = gs_iface->variant;
  1468.    struct gallivm_state *gallivm = variant->gallivm;
  1469.    LLVMBuilderRef builder = gallivm->builder;
  1470.    LLVMValueRef emitted_verts_ptr =
  1471.       draw_gs_jit_emitted_vertices(gallivm, variant->context_ptr);
  1472.    LLVMValueRef emitted_prims_ptr =
  1473.       draw_gs_jit_emitted_prims(gallivm, variant->context_ptr);
  1474.    LLVMValueRef zero = lp_build_const_int32(gallivm, 0);
  1475.    
  1476.    emitted_verts_ptr = LLVMBuildGEP(builder, emitted_verts_ptr, &zero, 0, "");
  1477.    emitted_prims_ptr = LLVMBuildGEP(builder, emitted_prims_ptr, &zero, 0, "");
  1478.  
  1479.    LLVMBuildStore(builder, total_emitted_vertices_vec, emitted_verts_ptr);
  1480.    LLVMBuildStore(builder, emitted_prims_vec, emitted_prims_ptr);
  1481. }
  1482.  
  1483. static void
  1484. draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
  1485.                    boolean elts)
  1486. {
  1487.    struct gallivm_state *gallivm = variant->gallivm;
  1488.    LLVMContextRef context = gallivm->context;
  1489.    LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);
  1490.    LLVMTypeRef arg_types[9];
  1491.    unsigned num_arg_types =
  1492.       elts ? Elements(arg_types) : Elements(arg_types) - 1;
  1493.    LLVMTypeRef func_type;
  1494.    LLVMValueRef context_ptr;
  1495.    LLVMBasicBlockRef block;
  1496.    LLVMBuilderRef builder;
  1497.    struct lp_type vs_type;
  1498.    LLVMValueRef end, start;
  1499.    LLVMValueRef count, fetch_elts, fetch_elt_max, fetch_count;
  1500.    LLVMValueRef stride, step, io_itr;
  1501.    LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;
  1502.    LLVMValueRef zero = lp_build_const_int32(gallivm, 0);
  1503.    LLVMValueRef one = lp_build_const_int32(gallivm, 1);
  1504.    struct draw_context *draw = llvm->draw;
  1505.    const struct tgsi_shader_info *vs_info = &draw->vs.vertex_shader->info;
  1506.    unsigned i, j;
  1507.    struct lp_build_context bld;
  1508.    struct lp_build_loop_state lp_loop;
  1509.    const int vector_length = lp_native_vector_width / 32;
  1510.    LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
  1511.    LLVMValueRef fetch_max;
  1512.    struct lp_build_sampler_soa *sampler = 0;
  1513.    LLVMValueRef ret, clipmask_bool_ptr;
  1514.    const struct draw_geometry_shader *gs = draw->gs.geometry_shader;
  1515.    struct draw_llvm_variant_key *key = &variant->key;
  1516.    /* If geometry shader is present we need to skip both the viewport
  1517.     * transformation and clipping otherwise the inputs to the geometry
  1518.     * shader will be incorrect.
  1519.     */
  1520.    const boolean bypass_viewport = gs || key->bypass_viewport;
  1521.    const boolean enable_cliptest = !gs && (key->clip_xy ||
  1522.                                            key->clip_z  ||
  1523.                                            key->clip_user);
  1524.    LLVMValueRef variant_func;
  1525.    const unsigned pos = draw_current_shader_position_output(llvm->draw);
  1526.    const unsigned cv = draw_current_shader_clipvertex_output(llvm->draw);
  1527.    boolean have_clipdist = FALSE;
  1528.    struct lp_bld_tgsi_system_values system_values;
  1529.  
  1530.    memset(&system_values, 0, sizeof(system_values));
  1531.  
  1532.    i = 0;
  1533.    arg_types[i++] = get_context_ptr_type(variant);       /* context */
  1534.    arg_types[i++] = get_vertex_header_ptr_type(variant); /* vertex_header */
  1535.    arg_types[i++] = get_buffer_ptr_type(variant);        /* vbuffers */
  1536.    if (elts) {
  1537.       arg_types[i++] = LLVMPointerType(int32_type, 0);/* fetch_elts  */
  1538.       arg_types[i++] = int32_type;                  /* fetch_elt_max */
  1539.    } else
  1540.       arg_types[i++] = int32_type;                  /* start */
  1541.    arg_types[i++] = int32_type;                     /* fetch_count / count */
  1542.    arg_types[i++] = int32_type;                     /* stride */
  1543.    arg_types[i++] = get_vb_ptr_type(variant);       /* pipe_vertex_buffer's */
  1544.    arg_types[i++] = int32_type;                     /* instance_id */
  1545.  
  1546.    func_type = LLVMFunctionType(int32_type, arg_types, num_arg_types, 0);
  1547.  
  1548.    variant_func = LLVMAddFunction(gallivm->module,
  1549.                                   elts ? "draw_llvm_shader_elts" : "draw_llvm_shader",
  1550.                                   func_type);
  1551.  
  1552.    if (elts)
  1553.       variant->function_elts = variant_func;
  1554.    else
  1555.       variant->function = variant_func;
  1556.  
  1557.    LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
  1558.    for (i = 0; i < num_arg_types; ++i)
  1559.       if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
  1560.          LLVMAddAttribute(LLVMGetParam(variant_func, i),
  1561.                           LLVMNoAliasAttribute);
  1562.  
  1563.    context_ptr               = LLVMGetParam(variant_func, 0);
  1564.    io_ptr                    = LLVMGetParam(variant_func, 1);
  1565.    vbuffers_ptr              = LLVMGetParam(variant_func, 2);
  1566.    stride                    = LLVMGetParam(variant_func, 5 + (elts ? 1 : 0));
  1567.    vb_ptr                    = LLVMGetParam(variant_func, 6 + (elts ? 1 : 0));
  1568.    system_values.instance_id = LLVMGetParam(variant_func, 7 + (elts ? 1 : 0));
  1569.  
  1570.    lp_build_name(context_ptr, "context");
  1571.    lp_build_name(io_ptr, "io");
  1572.    lp_build_name(vbuffers_ptr, "vbuffers");
  1573.    lp_build_name(stride, "stride");
  1574.    lp_build_name(vb_ptr, "vb");
  1575.    lp_build_name(system_values.instance_id, "instance_id");
  1576.  
  1577.    if (elts) {
  1578.       fetch_elts    = LLVMGetParam(variant_func, 3);
  1579.       fetch_elt_max = LLVMGetParam(variant_func, 4);
  1580.       fetch_count   = LLVMGetParam(variant_func, 5);
  1581.       lp_build_name(fetch_elts, "fetch_elts");
  1582.       lp_build_name(fetch_elt_max, "fetch_elt_max");
  1583.       lp_build_name(fetch_count, "fetch_count");
  1584.       start = count = NULL;
  1585.    }
  1586.    else {
  1587.       start        = LLVMGetParam(variant_func, 3);
  1588.       count        = LLVMGetParam(variant_func, 4);
  1589.       lp_build_name(start, "start");
  1590.       lp_build_name(count, "count");
  1591.       fetch_elts = fetch_count = NULL;
  1592.    }
  1593.  
  1594.    /*
  1595.     * Function body
  1596.     */
  1597.  
  1598.    block = LLVMAppendBasicBlockInContext(gallivm->context, variant_func, "entry");
  1599.    builder = gallivm->builder;
  1600.    LLVMPositionBuilderAtEnd(builder, block);
  1601.  
  1602.    lp_build_context_init(&bld, gallivm, lp_type_int(32));
  1603.  
  1604.    memset(&vs_type, 0, sizeof vs_type);
  1605.    vs_type.floating = TRUE; /* floating point values */
  1606.    vs_type.sign = TRUE;     /* values are signed */
  1607.    vs_type.norm = FALSE;    /* values are not limited to [0,1] or [-1,1] */
  1608.    vs_type.width = 32;      /* 32-bit float */
  1609.    vs_type.length = vector_length;
  1610.  
  1611.    /* hold temporary "bool" clipmask */
  1612.    clipmask_bool_ptr = lp_build_alloca(gallivm, lp_build_int_vec_type(gallivm, vs_type), "");
  1613.    LLVMBuildStore(builder, lp_build_zero(gallivm, lp_int_type(vs_type)), clipmask_bool_ptr);
  1614.  
  1615.    /* code generated texture sampling */
  1616.    sampler = draw_llvm_sampler_soa_create(
  1617.       draw_llvm_variant_key_samplers(key),
  1618.       context_ptr);
  1619.  
  1620.    if (elts) {
  1621.       start = zero;
  1622.       end = fetch_count;
  1623.       count = fetch_count;
  1624.    }
  1625.    else {
  1626.       end = lp_build_add(&bld, start, count);
  1627.    }
  1628.  
  1629.    step = lp_build_const_int32(gallivm, vector_length);
  1630.  
  1631.    fetch_max = LLVMBuildSub(builder, end, one, "fetch_max");
  1632.  
  1633.    lp_build_loop_begin(&lp_loop, gallivm, zero);
  1634.    {
  1635.       LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
  1636.       LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][LP_MAX_VECTOR_WIDTH / 32] = { { 0 } };
  1637.       LLVMValueRef io;
  1638.       LLVMValueRef clipmask;   /* holds the clipmask value */
  1639.       const LLVMValueRef (*ptr_aos)[TGSI_NUM_CHANNELS];
  1640.  
  1641.       io_itr = lp_loop.counter;
  1642.  
  1643.       io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, "");
  1644. #if DEBUG_STORE
  1645.       lp_build_printf(gallivm, " --- io %d = %p, loop counter %d\n",
  1646.                       io_itr, io, lp_loop.counter);
  1647. #endif
  1648.       system_values.vertex_id = lp_build_zero(gallivm, lp_type_uint_vec(32, 32*vector_length));
  1649.       for (i = 0; i < vector_length; ++i) {
  1650.          LLVMValueRef true_index =
  1651.             LLVMBuildAdd(builder,
  1652.                          lp_loop.counter,
  1653.                          lp_build_const_int32(gallivm, i), "");
  1654.          true_index = LLVMBuildAdd(builder, start, true_index, "");
  1655.  
  1656.          /* make sure we're not out of bounds which can happen
  1657.           * if fetch_count % 4 != 0, because on the last iteration
  1658.           * a few of the 4 vertex fetches will be out of bounds */
  1659.          true_index = lp_build_min(&bld, true_index, fetch_max);
  1660.  
  1661.          system_values.vertex_id = LLVMBuildInsertElement(
  1662.             gallivm->builder,
  1663.             system_values.vertex_id, true_index,
  1664.             lp_build_const_int32(gallivm, i), "");
  1665.  
  1666.          if (elts) {
  1667.             LLVMValueRef fetch_ptr;
  1668.             LLVMValueRef index_overflowed;
  1669.             LLVMValueRef index_ptr =
  1670.                lp_build_alloca(
  1671.                   gallivm,
  1672.                   lp_build_vec_type(gallivm, lp_type_int(32)), "");
  1673.             struct lp_build_if_state if_ctx;
  1674.             index_overflowed = LLVMBuildICmp(builder, LLVMIntUGT,
  1675.                                              true_index, fetch_elt_max,
  1676.                                              "index_overflowed");
  1677.            
  1678.             lp_build_if(&if_ctx, gallivm, index_overflowed);
  1679.             {
  1680.                /* Generate maximum possible index so that
  1681.                 * generate_fetch can treat it just like
  1682.                 * any other overflow and return zeros.
  1683.                 * We don't have to worry about the restart
  1684.                 * primitive index because it has already been
  1685.                 * handled
  1686.                 */
  1687.                LLVMValueRef val =
  1688.                   lp_build_const_int32(gallivm, 0xffffffff);
  1689.                LLVMBuildStore(builder, val, index_ptr);
  1690.             }
  1691.             lp_build_else(&if_ctx);
  1692.             {
  1693.                LLVMValueRef val;
  1694.                fetch_ptr = LLVMBuildGEP(builder, fetch_elts,
  1695.                                         &true_index, 1, "");
  1696.                val = LLVMBuildLoad(builder, fetch_ptr, "");
  1697.                LLVMBuildStore(builder, val, index_ptr);
  1698.             }
  1699.             lp_build_endif(&if_ctx);
  1700.             true_index = LLVMBuildLoad(builder, index_ptr, "true_index");
  1701.          }
  1702.  
  1703.          for (j = 0; j < draw->pt.nr_vertex_elements; ++j) {
  1704.             struct pipe_vertex_element *velem = &draw->pt.vertex_element[j];
  1705.             LLVMValueRef vb_index =
  1706.                lp_build_const_int32(gallivm, velem->vertex_buffer_index);
  1707.             LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr, &vb_index, 1, "");
  1708.             generate_fetch(gallivm, draw, vbuffers_ptr,
  1709.                            &aos_attribs[j][i], velem, vb, true_index,
  1710.                            system_values.instance_id);
  1711.          }
  1712.       }
  1713.       convert_to_soa(gallivm, aos_attribs, inputs,
  1714.                      draw->pt.nr_vertex_elements, vs_type);
  1715.  
  1716.       ptr_aos = (const LLVMValueRef (*)[TGSI_NUM_CHANNELS]) inputs;
  1717.       generate_vs(variant,
  1718.                   builder,
  1719.                   vs_type,
  1720.                   outputs,
  1721.                   ptr_aos,
  1722.                   &system_values,
  1723.                   context_ptr,
  1724.                   sampler,
  1725.                   key->clamp_vertex_color);
  1726.  
  1727.       if (pos != -1 && cv != -1) {
  1728.          /* store original positions in clip before further manipulation */
  1729.          store_clip(gallivm, vs_type, io, outputs, 0, cv);
  1730.          store_clip(gallivm, vs_type, io, outputs, 1, pos);
  1731.  
  1732.          /* do cliptest */
  1733.          if (enable_cliptest) {
  1734.             LLVMValueRef temp = LLVMBuildLoad(builder, clipmask_bool_ptr, "");
  1735.             /* allocate clipmask, assign it integer type */
  1736.             clipmask = generate_clipmask(llvm,
  1737.                                          gallivm,
  1738.                                          vs_type,
  1739.                                          outputs,
  1740.                                          key->clip_xy,
  1741.                                          key->clip_z,
  1742.                                          key->clip_user,
  1743.                                          key->clip_halfz,
  1744.                                          key->ucp_enable,
  1745.                                          context_ptr, &have_clipdist);
  1746.             temp = LLVMBuildOr(builder, clipmask, temp, "");
  1747.             /* store temporary clipping boolean value */
  1748.             LLVMBuildStore(builder, temp, clipmask_bool_ptr);
  1749.          }
  1750.          else {
  1751.             clipmask = lp_build_const_int_vec(gallivm, lp_int_type(vs_type), 0);
  1752.          }
  1753.  
  1754.          /* do viewport mapping */
  1755.          if (!bypass_viewport) {
  1756.             generate_viewport(variant, builder, vs_type, outputs, context_ptr);
  1757.          }
  1758.       }
  1759.       else {
  1760.          clipmask = lp_build_const_int_vec(gallivm, lp_int_type(vs_type), 0);
  1761.       }
  1762.  
  1763.       /* store clipmask in vertex header,
  1764.        * original positions in clip
  1765.        * and transformed positions in data
  1766.        */
  1767.       convert_to_aos(gallivm, io, NULL, outputs, clipmask,
  1768.                      vs_info->num_outputs, vs_type,
  1769.                      have_clipdist);
  1770.    }
  1771.    lp_build_loop_end_cond(&lp_loop, count, step, LLVMIntUGE);
  1772.  
  1773.    sampler->destroy(sampler);
  1774.  
  1775.    /* return clipping boolean value for function */
  1776.    ret = clipmask_booli32(gallivm, vs_type, clipmask_bool_ptr);
  1777.  
  1778.    LLVMBuildRet(builder, ret);
  1779.  
  1780.    gallivm_verify_function(gallivm, variant_func);
  1781. }
  1782.  
  1783.  
  1784. struct draw_llvm_variant_key *
  1785. draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
  1786. {
  1787.    unsigned i;
  1788.    struct draw_llvm_variant_key *key;
  1789.    struct draw_sampler_static_state *draw_sampler;
  1790.  
  1791.    key = (struct draw_llvm_variant_key *)store;
  1792.  
  1793.    key->clamp_vertex_color = llvm->draw->rasterizer->clamp_vertex_color; /**/
  1794.  
  1795.    /* Presumably all variants of the shader should have the same
  1796.     * number of vertex elements - ie the number of shader inputs.
  1797.     * NOTE: we NEED to store the needed number of needed inputs
  1798.     * here, not the number of provided elements to match keysize
  1799.     * (and the offset of sampler state in the key).
  1800.     */
  1801.    key->nr_vertex_elements = llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_INPUT] + 1;
  1802.    assert(key->nr_vertex_elements <= llvm->draw->pt.nr_vertex_elements);
  1803.  
  1804.    /* will have to rig this up properly later */
  1805.    key->clip_xy = llvm->draw->clip_xy;
  1806.    key->clip_z = llvm->draw->clip_z;
  1807.    key->clip_user = llvm->draw->clip_user;
  1808.    key->bypass_viewport = llvm->draw->identity_viewport;
  1809.    key->clip_halfz = llvm->draw->rasterizer->clip_halfz;
  1810.    key->need_edgeflags = (llvm->draw->vs.edgeflag_output ? TRUE : FALSE);
  1811.    key->ucp_enable = llvm->draw->rasterizer->clip_plane_enable;
  1812.    key->has_gs = llvm->draw->gs.geometry_shader != NULL;
  1813.    key->pad1 = 0;
  1814.  
  1815.    /* All variants of this shader will have the same value for
  1816.     * nr_samplers.  Not yet trying to compact away holes in the
  1817.     * sampler array.
  1818.     */
  1819.    key->nr_samplers = llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
  1820.    if (llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
  1821.       key->nr_sampler_views =
  1822.          llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
  1823.    }
  1824.    else {
  1825.       key->nr_sampler_views = key->nr_samplers;
  1826.    }
  1827.  
  1828.    draw_sampler = draw_llvm_variant_key_samplers(key);
  1829.  
  1830.    memcpy(key->vertex_element,
  1831.           llvm->draw->pt.vertex_element,
  1832.           sizeof(struct pipe_vertex_element) * key->nr_vertex_elements);
  1833.  
  1834.    memset(draw_sampler, 0, MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *draw_sampler);
  1835.  
  1836.    for (i = 0 ; i < key->nr_samplers; i++) {
  1837.       lp_sampler_static_sampler_state(&draw_sampler[i].sampler_state,
  1838.                                       llvm->draw->samplers[PIPE_SHADER_VERTEX][i]);
  1839.    }
  1840.    for (i = 0 ; i < key->nr_sampler_views; i++) {
  1841.       lp_sampler_static_texture_state(&draw_sampler[i].texture_state,
  1842.                                       llvm->draw->sampler_views[PIPE_SHADER_VERTEX][i]);
  1843.    }
  1844.  
  1845.    return key;
  1846. }
  1847.  
  1848.  
  1849. void
  1850. draw_llvm_dump_variant_key(struct draw_llvm_variant_key *key)
  1851. {
  1852.    unsigned i;
  1853.    struct draw_sampler_static_state *sampler = draw_llvm_variant_key_samplers(key);
  1854.  
  1855.    debug_printf("clamp_vertex_color = %u\n", key->clamp_vertex_color);
  1856.    debug_printf("clip_xy = %u\n", key->clip_xy);
  1857.    debug_printf("clip_z = %u\n", key->clip_z);
  1858.    debug_printf("clip_user = %u\n", key->clip_user);
  1859.    debug_printf("bypass_viewport = %u\n", key->bypass_viewport);
  1860.    debug_printf("clip_halfz = %u\n", key->clip_halfz);
  1861.    debug_printf("need_edgeflags = %u\n", key->need_edgeflags);
  1862.    debug_printf("has_gs = %u\n", key->has_gs);
  1863.    debug_printf("ucp_enable = %u\n", key->ucp_enable);
  1864.  
  1865.    for (i = 0 ; i < key->nr_vertex_elements; i++) {
  1866.       debug_printf("vertex_element[%i].src_offset = %u\n", i, key->vertex_element[i].src_offset);
  1867.       debug_printf("vertex_element[%i].instance_divisor = %u\n", i, key->vertex_element[i].instance_divisor);
  1868.       debug_printf("vertex_element[%i].vertex_buffer_index = %u\n", i, key->vertex_element[i].vertex_buffer_index);
  1869.       debug_printf("vertex_element[%i].src_format = %s\n", i, util_format_name(key->vertex_element[i].src_format));
  1870.    }
  1871.  
  1872.    for (i = 0 ; i < key->nr_sampler_views; i++) {
  1873.       debug_printf("sampler[%i].src_format = %s\n", i, util_format_name(sampler[i].texture_state.format));
  1874.    }
  1875. }
  1876.  
  1877.  
  1878. void
  1879. draw_llvm_set_mapped_texture(struct draw_context *draw,
  1880.                              unsigned shader_stage,
  1881.                              unsigned sview_idx,
  1882.                              uint32_t width, uint32_t height, uint32_t depth,
  1883.                              uint32_t first_level, uint32_t last_level,
  1884.                              const void *base_ptr,
  1885.                              uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS],
  1886.                              uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS],
  1887.                              uint32_t mip_offsets[PIPE_MAX_TEXTURE_LEVELS])
  1888. {
  1889.    unsigned j;
  1890.    struct draw_jit_texture *jit_tex;
  1891.  
  1892.    assert(shader_stage == PIPE_SHADER_VERTEX ||
  1893.           shader_stage == PIPE_SHADER_GEOMETRY);
  1894.  
  1895.    if (shader_stage == PIPE_SHADER_VERTEX) {
  1896.       assert(sview_idx < Elements(draw->llvm->jit_context.textures));
  1897.  
  1898.       jit_tex = &draw->llvm->jit_context.textures[sview_idx];
  1899.    } else if (shader_stage == PIPE_SHADER_GEOMETRY) {
  1900.       assert(sview_idx < Elements(draw->llvm->gs_jit_context.textures));
  1901.  
  1902.       jit_tex = &draw->llvm->gs_jit_context.textures[sview_idx];
  1903.    } else {
  1904.       assert(0);
  1905.       return;
  1906.    }
  1907.  
  1908.    jit_tex->width = width;
  1909.    jit_tex->height = height;
  1910.    jit_tex->depth = depth;
  1911.    jit_tex->first_level = first_level;
  1912.    jit_tex->last_level = last_level;
  1913.    jit_tex->base = base_ptr;
  1914.  
  1915.    for (j = first_level; j <= last_level; j++) {
  1916.       jit_tex->mip_offsets[j] = mip_offsets[j];
  1917.       jit_tex->row_stride[j] = row_stride[j];
  1918.       jit_tex->img_stride[j] = img_stride[j];
  1919.    }
  1920. }
  1921.  
  1922.  
  1923. void
  1924. draw_llvm_set_sampler_state(struct draw_context *draw,
  1925.                             unsigned shader_type)
  1926. {
  1927.    unsigned i;
  1928.  
  1929.    if (shader_type == PIPE_SHADER_VERTEX) {
  1930.       for (i = 0; i < draw->num_samplers[PIPE_SHADER_VERTEX]; i++) {
  1931.          struct draw_jit_sampler *jit_sam = &draw->llvm->jit_context.samplers[i];
  1932.  
  1933.          if (draw->samplers[i]) {
  1934.             const struct pipe_sampler_state *s
  1935.                = draw->samplers[PIPE_SHADER_VERTEX][i];
  1936.             jit_sam->min_lod = s->min_lod;
  1937.             jit_sam->max_lod = s->max_lod;
  1938.             jit_sam->lod_bias = s->lod_bias;
  1939.             COPY_4V(jit_sam->border_color, s->border_color.f);
  1940.          }
  1941.       }
  1942.    } else if (shader_type == PIPE_SHADER_GEOMETRY) {
  1943.       for (i = 0; i < draw->num_samplers[PIPE_SHADER_GEOMETRY]; i++) {
  1944.          struct draw_jit_sampler *jit_sam = &draw->llvm->gs_jit_context.samplers[i];
  1945.  
  1946.          if (draw->samplers[i]) {
  1947.             const struct pipe_sampler_state *s
  1948.                = draw->samplers[PIPE_SHADER_GEOMETRY][i];
  1949.             jit_sam->min_lod = s->min_lod;
  1950.             jit_sam->max_lod = s->max_lod;
  1951.             jit_sam->lod_bias = s->lod_bias;
  1952.             COPY_4V(jit_sam->border_color, s->border_color.f);
  1953.          }
  1954.       }
  1955.    }
  1956. }
  1957.  
  1958.  
  1959. void
  1960. draw_llvm_destroy_variant(struct draw_llvm_variant *variant)
  1961. {
  1962.    struct draw_llvm *llvm = variant->llvm;
  1963.  
  1964.    if (variant->function_elts) {
  1965.       gallivm_free_function(variant->gallivm,
  1966.                             variant->function_elts, variant->jit_func_elts);
  1967.    }
  1968.  
  1969.    if (variant->function) {
  1970.       gallivm_free_function(variant->gallivm,
  1971.                             variant->function, variant->jit_func);
  1972.    }
  1973.  
  1974.    gallivm_destroy(variant->gallivm);
  1975.  
  1976.    remove_from_list(&variant->list_item_local);
  1977.    variant->shader->variants_cached--;
  1978.    remove_from_list(&variant->list_item_global);
  1979.    llvm->nr_variants--;
  1980.    FREE(variant);
  1981. }
  1982.  
  1983.  
  1984. /**
  1985.  * Create LLVM types for various structures.
  1986.  */
  1987. static void
  1988. create_gs_jit_types(struct draw_gs_llvm_variant *var)
  1989. {
  1990.    struct gallivm_state *gallivm = var->gallivm;
  1991.    LLVMTypeRef texture_type, sampler_type, context_type;
  1992.  
  1993.    texture_type = create_jit_texture_type(gallivm, "texture");
  1994.    sampler_type = create_jit_sampler_type(gallivm, "sampler");
  1995.  
  1996.    context_type = create_gs_jit_context_type(gallivm,
  1997.                                              var->shader->base.vector_length,
  1998.                                              texture_type, sampler_type,
  1999.                                              "draw_gs_jit_context");
  2000.    var->context_ptr_type = LLVMPointerType(context_type, 0);
  2001.  
  2002.    var->input_array_type = create_gs_jit_input_type(gallivm);
  2003. }
  2004.  
  2005. static LLVMTypeRef
  2006. get_gs_context_ptr_type(struct draw_gs_llvm_variant *variant)
  2007. {
  2008.    if (!variant->context_ptr_type)
  2009.       create_gs_jit_types(variant);
  2010.    return variant->context_ptr_type;
  2011. }
  2012.  
  2013. static LLVMValueRef
  2014. generate_mask_value(struct draw_gs_llvm_variant *variant,
  2015.                     struct lp_type gs_type)
  2016. {
  2017.    struct gallivm_state *gallivm = variant->gallivm;
  2018.    LLVMBuilderRef builder = gallivm->builder;
  2019.    LLVMValueRef bits[16];
  2020.    struct lp_type  mask_type = lp_int_type(gs_type);
  2021.    struct lp_type mask_elem_type = lp_elem_type(mask_type);
  2022.    LLVMValueRef mask_val = lp_build_const_vec(gallivm,
  2023.                                               mask_type,
  2024.                                               0);
  2025.    unsigned i;
  2026.  
  2027.    assert(gs_type.length <= Elements(bits));
  2028.  
  2029.    for (i = gs_type.length; i >= 1; --i) {
  2030.       int idx = i - 1;
  2031.       LLVMValueRef ind = lp_build_const_int32(gallivm, i);
  2032.       bits[idx] = lp_build_compare(gallivm,
  2033.                                    mask_elem_type, PIPE_FUNC_GEQUAL,
  2034.                                    variant->num_prims, ind);
  2035.    }
  2036.    for (i = 0; i < gs_type.length; ++i) {
  2037.       LLVMValueRef ind = lp_build_const_int32(gallivm, i);
  2038.       mask_val = LLVMBuildInsertElement(builder, mask_val, bits[i], ind, "");
  2039.    }
  2040.    mask_val = lp_build_compare(gallivm,
  2041.                                mask_type, PIPE_FUNC_NOTEQUAL,
  2042.                                mask_val,
  2043.                                lp_build_const_int_vec(gallivm, mask_type, 0));
  2044.  
  2045.    return mask_val;
  2046. }
  2047.  
  2048. static void
  2049. draw_gs_llvm_generate(struct draw_llvm *llvm,
  2050.                       struct draw_gs_llvm_variant *variant)
  2051. {
  2052.    struct gallivm_state *gallivm = variant->gallivm;
  2053.    LLVMContextRef context = gallivm->context;
  2054.    LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);
  2055.    LLVMTypeRef arg_types[6];
  2056.    LLVMTypeRef func_type;
  2057.    LLVMValueRef variant_func;
  2058.    LLVMValueRef context_ptr;
  2059.    LLVMValueRef prim_id_ptr;
  2060.    LLVMBasicBlockRef block;
  2061.    LLVMBuilderRef builder;
  2062.    LLVMValueRef io_ptr, input_array, num_prims, mask_val;
  2063.    struct lp_build_sampler_soa *sampler = 0;
  2064.    struct lp_build_context bld;
  2065.    struct lp_bld_tgsi_system_values system_values;
  2066.    struct lp_type gs_type;
  2067.    unsigned i;
  2068.    struct draw_gs_llvm_iface gs_iface;
  2069.    const struct tgsi_token *tokens = variant->shader->base.state.tokens;
  2070.    LLVMValueRef consts_ptr;
  2071.    LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
  2072.    struct lp_build_mask_context mask;
  2073.    const struct tgsi_shader_info *gs_info = &variant->shader->base.info;
  2074.    unsigned vector_length = variant->shader->base.vector_length;
  2075.  
  2076.    memset(&system_values, 0, sizeof(system_values));
  2077.  
  2078.    assert(variant->vertex_header_ptr_type);
  2079.  
  2080.    arg_types[0] = get_gs_context_ptr_type(variant);    /* context */
  2081.    arg_types[1] = variant->input_array_type;           /* input */
  2082.    arg_types[2] = variant->vertex_header_ptr_type;     /* vertex_header */
  2083.    arg_types[3] = int32_type;                          /* num_prims */
  2084.    arg_types[4] = int32_type;                          /* instance_id */
  2085.    arg_types[5] = LLVMPointerType(
  2086.       LLVMVectorType(int32_type, vector_length), 0);   /* prim_id_ptr */
  2087.  
  2088.    func_type = LLVMFunctionType(int32_type, arg_types, Elements(arg_types), 0);
  2089.  
  2090.    variant_func = LLVMAddFunction(gallivm->module, "draw_geometry_shader",
  2091.                                   func_type);
  2092.    variant->function = variant_func;
  2093.  
  2094.    LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
  2095.  
  2096.    for (i = 0; i < Elements(arg_types); ++i)
  2097.       if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
  2098.          LLVMAddAttribute(LLVMGetParam(variant_func, i),
  2099.                           LLVMNoAliasAttribute);
  2100.  
  2101.    context_ptr               = LLVMGetParam(variant_func, 0);
  2102.    input_array               = LLVMGetParam(variant_func, 1);
  2103.    io_ptr                    = LLVMGetParam(variant_func, 2);
  2104.    num_prims                 = LLVMGetParam(variant_func, 3);
  2105.    system_values.instance_id = LLVMGetParam(variant_func, 4);
  2106.    prim_id_ptr               = LLVMGetParam(variant_func, 5);
  2107.  
  2108.    lp_build_name(context_ptr, "context");
  2109.    lp_build_name(input_array, "input");
  2110.    lp_build_name(io_ptr, "io");
  2111.    lp_build_name(num_prims, "num_prims");
  2112.    lp_build_name(system_values.instance_id, "instance_id");
  2113.    lp_build_name(prim_id_ptr, "prim_id_ptr");
  2114.  
  2115.    variant->context_ptr = context_ptr;
  2116.    variant->io_ptr = io_ptr;
  2117.    variant->num_prims = num_prims;
  2118.  
  2119.    gs_iface.base.fetch_input = draw_gs_llvm_fetch_input;
  2120.    gs_iface.base.emit_vertex = draw_gs_llvm_emit_vertex;
  2121.    gs_iface.base.end_primitive = draw_gs_llvm_end_primitive;
  2122.    gs_iface.base.gs_epilogue = draw_gs_llvm_epilogue;
  2123.    gs_iface.input = input_array;
  2124.    gs_iface.variant = variant;
  2125.  
  2126.    /*
  2127.     * Function body
  2128.     */
  2129.  
  2130.    block = LLVMAppendBasicBlockInContext(gallivm->context, variant_func, "entry");
  2131.    builder = gallivm->builder;
  2132.    LLVMPositionBuilderAtEnd(builder, block);
  2133.  
  2134.    lp_build_context_init(&bld, gallivm, lp_type_int(32));
  2135.  
  2136.    memset(&gs_type, 0, sizeof gs_type);
  2137.    gs_type.floating = TRUE; /* floating point values */
  2138.    gs_type.sign = TRUE;     /* values are signed */
  2139.    gs_type.norm = FALSE;    /* values are not limited to [0,1] or [-1,1] */
  2140.    gs_type.width = 32;      /* 32-bit float */
  2141.    gs_type.length = vector_length;
  2142.  
  2143.    consts_ptr = draw_gs_jit_context_constants(variant->gallivm, context_ptr);
  2144.  
  2145.    /* code generated texture sampling */
  2146.    sampler = draw_llvm_sampler_soa_create(variant->key.samplers,
  2147.                                           context_ptr);
  2148.  
  2149.    mask_val = generate_mask_value(variant, gs_type);
  2150.    lp_build_mask_begin(&mask, gallivm, gs_type, mask_val);
  2151.  
  2152.    if (gs_info->uses_primid) {
  2153.       system_values.prim_id = LLVMBuildLoad(builder, prim_id_ptr, "prim_id");;
  2154.    }
  2155.  
  2156.    lp_build_tgsi_soa(variant->gallivm,
  2157.                      tokens,
  2158.                      gs_type,
  2159.                      &mask,
  2160.                      consts_ptr,
  2161.                      &system_values,
  2162.                      NULL,
  2163.                      outputs,
  2164.                      sampler,
  2165.                      &llvm->draw->gs.geometry_shader->info,
  2166.                      (const struct lp_build_tgsi_gs_iface *)&gs_iface);
  2167.  
  2168.    sampler->destroy(sampler);
  2169.  
  2170.    lp_build_mask_end(&mask);
  2171.  
  2172.    LLVMBuildRet(builder, lp_build_zero(gallivm, lp_type_uint(32)));
  2173.  
  2174.    gallivm_verify_function(gallivm, variant_func);
  2175. }
  2176.  
  2177.  
  2178. struct draw_gs_llvm_variant *
  2179. draw_gs_llvm_create_variant(struct draw_llvm *llvm,
  2180.                             unsigned num_outputs,
  2181.                             const struct draw_gs_llvm_variant_key *key)
  2182. {
  2183.    struct draw_gs_llvm_variant *variant;
  2184.    struct llvm_geometry_shader *shader =
  2185.       llvm_geometry_shader(llvm->draw->gs.geometry_shader);
  2186.    LLVMTypeRef vertex_header;
  2187.  
  2188.    variant = MALLOC(sizeof *variant +
  2189.                     shader->variant_key_size -
  2190.                     sizeof variant->key);
  2191.    if (variant == NULL)
  2192.       return NULL;
  2193.  
  2194.    variant->llvm = llvm;
  2195.    variant->shader = shader;
  2196.  
  2197.    variant->gallivm = gallivm_create();
  2198.  
  2199.    create_gs_jit_types(variant);
  2200.  
  2201.    memcpy(&variant->key, key, shader->variant_key_size);
  2202.  
  2203.    vertex_header = create_jit_vertex_header(variant->gallivm, num_outputs);
  2204.  
  2205.    variant->vertex_header_ptr_type = LLVMPointerType(vertex_header, 0);
  2206.  
  2207.    draw_gs_llvm_generate(llvm, variant);
  2208.  
  2209.    gallivm_compile_module(variant->gallivm);
  2210.  
  2211.    variant->jit_func = (draw_gs_jit_func)
  2212.          gallivm_jit_function(variant->gallivm, variant->function);
  2213.  
  2214.    variant->list_item_global.base = variant;
  2215.    variant->list_item_local.base = variant;
  2216.    /*variant->no = */shader->variants_created++;
  2217.    variant->list_item_global.base = variant;
  2218.  
  2219.    return variant;
  2220. }
  2221.  
  2222. void
  2223. draw_gs_llvm_destroy_variant(struct draw_gs_llvm_variant *variant)
  2224. {
  2225.    struct draw_llvm *llvm = variant->llvm;
  2226.  
  2227.    if (variant->function) {
  2228.       gallivm_free_function(variant->gallivm,
  2229.                             variant->function, variant->jit_func);
  2230.    }
  2231.  
  2232.    gallivm_destroy(variant->gallivm);
  2233.  
  2234.    remove_from_list(&variant->list_item_local);
  2235.    variant->shader->variants_cached--;
  2236.    remove_from_list(&variant->list_item_global);
  2237.    llvm->nr_gs_variants--;
  2238.    FREE(variant);
  2239. }
  2240.  
  2241. struct draw_gs_llvm_variant_key *
  2242. draw_gs_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
  2243. {
  2244.    unsigned i;
  2245.    struct draw_gs_llvm_variant_key *key;
  2246.    struct draw_sampler_static_state *draw_sampler;
  2247.  
  2248.    key = (struct draw_gs_llvm_variant_key *)store;
  2249.  
  2250.    /* All variants of this shader will have the same value for
  2251.     * nr_samplers.  Not yet trying to compact away holes in the
  2252.     * sampler array.
  2253.     */
  2254.    key->nr_samplers = llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
  2255.    if (llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
  2256.       key->nr_sampler_views =
  2257.          llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
  2258.    }
  2259.    else {
  2260.       key->nr_sampler_views = key->nr_samplers;
  2261.    }
  2262.  
  2263.    draw_sampler = key->samplers;
  2264.  
  2265.    memset(draw_sampler, 0, MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *draw_sampler);
  2266.  
  2267.    for (i = 0 ; i < key->nr_samplers; i++) {
  2268.       lp_sampler_static_sampler_state(&draw_sampler[i].sampler_state,
  2269.                                       llvm->draw->samplers[PIPE_SHADER_GEOMETRY][i]);
  2270.    }
  2271.    for (i = 0 ; i < key->nr_sampler_views; i++) {
  2272.       lp_sampler_static_texture_state(&draw_sampler[i].texture_state,
  2273.                                       llvm->draw->sampler_views[PIPE_SHADER_GEOMETRY][i]);
  2274.    }
  2275.  
  2276.    return key;
  2277. }
  2278.  
  2279. void
  2280. draw_gs_llvm_dump_variant_key(struct draw_gs_llvm_variant_key *key)
  2281. {
  2282.    unsigned i;
  2283.    struct draw_sampler_static_state *sampler = key->samplers;
  2284.  
  2285.    for (i = 0 ; i < key->nr_sampler_views; i++) {
  2286.       debug_printf("sampler[%i].src_format = %s\n", i,
  2287.                    util_format_name(sampler[i].texture_state.format));
  2288.    }
  2289. }
  2290.