Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /**************************************************************************
  2.  *
  3.  * Copyright 2010 VMware, Inc.
  4.  * All Rights Reserved.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the
  8.  * "Software"), to deal in the Software without restriction, including
  9.  * without limitation the rights to use, copy, modify, merge, publish,
  10.  * distribute, sub license, and/or sell copies of the Software, and to
  11.  * permit persons to whom the Software is furnished to do so, subject to
  12.  * the following conditions:
  13.  *
  14.  * The above copyright notice and this permission notice (including the
  15.  * next paragraph) shall be included in all copies or substantial portions
  16.  * of the Software.
  17.  *
  18.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21.  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25.  *
  26.  **************************************************************************/
  27.  
  28. #include "draw_llvm.h"
  29.  
  30. #include "draw_context.h"
  31. #include "draw_vs.h"
  32. #include "draw_gs.h"
  33.  
  34. #include "gallivm/lp_bld_arit.h"
  35. #include "gallivm/lp_bld_arit_overflow.h"
  36. #include "gallivm/lp_bld_logic.h"
  37. #include "gallivm/lp_bld_const.h"
  38. #include "gallivm/lp_bld_swizzle.h"
  39. #include "gallivm/lp_bld_struct.h"
  40. #include "gallivm/lp_bld_type.h"
  41. #include "gallivm/lp_bld_flow.h"
  42. #include "gallivm/lp_bld_debug.h"
  43. #include "gallivm/lp_bld_tgsi.h"
  44. #include "gallivm/lp_bld_printf.h"
  45. #include "gallivm/lp_bld_intr.h"
  46. #include "gallivm/lp_bld_init.h"
  47. #include "gallivm/lp_bld_type.h"
  48. #include "gallivm/lp_bld_pack.h"
  49. #include "gallivm/lp_bld_format.h"
  50.  
  51. #include "tgsi/tgsi_exec.h"
  52. #include "tgsi/tgsi_dump.h"
  53.  
  54. #include "util/u_math.h"
  55. #include "util/u_pointer.h"
  56. #include "util/u_string.h"
  57. #include "util/simple_list.h"
  58.  
  59.  
  60. #define DEBUG_STORE 0
  61.  
  62.  
  63. static void
  64. draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var,
  65.                    boolean elts);
  66.  
  67.  
  68. struct draw_gs_llvm_iface {
  69.    struct lp_build_tgsi_gs_iface base;
  70.  
  71.    struct draw_gs_llvm_variant *variant;
  72.    LLVMValueRef input;
  73. };
  74.  
  75. static INLINE const struct draw_gs_llvm_iface *
  76. draw_gs_llvm_iface(const struct lp_build_tgsi_gs_iface *iface)
  77. {
  78.    return (const struct draw_gs_llvm_iface *)iface;
  79. }
  80.  
  81. /**
  82.  * Create LLVM type for draw_vertex_buffer.
  83.  */
  84. static LLVMTypeRef
  85. create_jit_dvbuffer_type(struct gallivm_state *gallivm,
  86.                          const char *struct_name)
  87. {
  88.    LLVMTargetDataRef target = gallivm->target;
  89.    LLVMTypeRef dvbuffer_type;
  90.    LLVMTypeRef elem_types[DRAW_JIT_DVBUFFER_NUM_FIELDS];
  91.    LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context);
  92.  
  93.    elem_types[DRAW_JIT_DVBUFFER_MAP] =
  94.       LLVMPointerType(LLVMIntTypeInContext(gallivm->context, 8), 0);
  95.    elem_types[DRAW_JIT_DVBUFFER_SIZE] = int32_type;
  96.  
  97.    dvbuffer_type = LLVMStructTypeInContext(gallivm->context, elem_types,
  98.                                            Elements(elem_types), 0);
  99.  
  100.    LP_CHECK_MEMBER_OFFSET(struct draw_vertex_buffer, map,
  101.                           target, dvbuffer_type,
  102.                           DRAW_JIT_DVBUFFER_MAP);
  103.    LP_CHECK_MEMBER_OFFSET(struct draw_vertex_buffer, size,
  104.                           target, dvbuffer_type,
  105.                           DRAW_JIT_DVBUFFER_SIZE);
  106.  
  107.    return dvbuffer_type;
  108. }
  109.  
  110. /**
  111.  * Create LLVM type for struct draw_jit_texture
  112.  */
  113. static LLVMTypeRef
  114. create_jit_texture_type(struct gallivm_state *gallivm, const char *struct_name)
  115. {
  116.    LLVMTargetDataRef target = gallivm->target;
  117.    LLVMTypeRef texture_type;
  118.    LLVMTypeRef elem_types[DRAW_JIT_TEXTURE_NUM_FIELDS];
  119.    LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context);
  120.  
  121.    elem_types[DRAW_JIT_TEXTURE_WIDTH]  =
  122.    elem_types[DRAW_JIT_TEXTURE_HEIGHT] =
  123.    elem_types[DRAW_JIT_TEXTURE_DEPTH] =
  124.    elem_types[DRAW_JIT_TEXTURE_FIRST_LEVEL] =
  125.    elem_types[DRAW_JIT_TEXTURE_LAST_LEVEL] = int32_type;
  126.    elem_types[DRAW_JIT_TEXTURE_BASE] =
  127.       LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
  128.    elem_types[DRAW_JIT_TEXTURE_ROW_STRIDE] =
  129.    elem_types[DRAW_JIT_TEXTURE_IMG_STRIDE] =
  130.    elem_types[DRAW_JIT_TEXTURE_MIP_OFFSETS] =
  131.       LLVMArrayType(int32_type, PIPE_MAX_TEXTURE_LEVELS);
  132.  
  133.    texture_type = LLVMStructTypeInContext(gallivm->context, elem_types,
  134.                                           Elements(elem_types), 0);
  135.  
  136.    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, width,
  137.                           target, texture_type,
  138.                           DRAW_JIT_TEXTURE_WIDTH);
  139.    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, height,
  140.                           target, texture_type,
  141.                           DRAW_JIT_TEXTURE_HEIGHT);
  142.    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, depth,
  143.                           target, texture_type,
  144.                           DRAW_JIT_TEXTURE_DEPTH);
  145.    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, first_level,
  146.                           target, texture_type,
  147.                           DRAW_JIT_TEXTURE_FIRST_LEVEL);
  148.    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, last_level,
  149.                           target, texture_type,
  150.                           DRAW_JIT_TEXTURE_LAST_LEVEL);
  151.    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, base,
  152.                           target, texture_type,
  153.                           DRAW_JIT_TEXTURE_BASE);
  154.    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, row_stride,
  155.                           target, texture_type,
  156.                           DRAW_JIT_TEXTURE_ROW_STRIDE);
  157.    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, img_stride,
  158.                           target, texture_type,
  159.                           DRAW_JIT_TEXTURE_IMG_STRIDE);
  160.    LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, mip_offsets,
  161.                           target, texture_type,
  162.                           DRAW_JIT_TEXTURE_MIP_OFFSETS);
  163.  
  164.    LP_CHECK_STRUCT_SIZE(struct draw_jit_texture, target, texture_type);
  165.  
  166.    return texture_type;
  167. }
  168.  
  169.  
  170. /**
  171.  * Create LLVM type for struct draw_jit_sampler
  172.  */
  173. static LLVMTypeRef
  174. create_jit_sampler_type(struct gallivm_state *gallivm, const char *struct_name)
  175. {
  176.    LLVMTargetDataRef target = gallivm->target;
  177.    LLVMTypeRef sampler_type;
  178.    LLVMTypeRef elem_types[DRAW_JIT_SAMPLER_NUM_FIELDS];
  179.  
  180.    elem_types[DRAW_JIT_SAMPLER_MIN_LOD] =
  181.    elem_types[DRAW_JIT_SAMPLER_MAX_LOD] =
  182.    elem_types[DRAW_JIT_SAMPLER_LOD_BIAS] = LLVMFloatTypeInContext(gallivm->context);
  183.    elem_types[DRAW_JIT_SAMPLER_BORDER_COLOR] =
  184.       LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4);
  185.  
  186.    sampler_type = LLVMStructTypeInContext(gallivm->context, elem_types,
  187.                                           Elements(elem_types), 0);
  188.  
  189.    LP_CHECK_MEMBER_OFFSET(struct draw_jit_sampler, min_lod,
  190.                           target, sampler_type,
  191.                           DRAW_JIT_SAMPLER_MIN_LOD);
  192.    LP_CHECK_MEMBER_OFFSET(struct draw_jit_sampler, max_lod,
  193.                           target, sampler_type,
  194.                           DRAW_JIT_SAMPLER_MAX_LOD);
  195.    LP_CHECK_MEMBER_OFFSET(struct draw_jit_sampler, lod_bias,
  196.                           target, sampler_type,
  197.                           DRAW_JIT_SAMPLER_LOD_BIAS);
  198.    LP_CHECK_MEMBER_OFFSET(struct draw_jit_sampler, border_color,
  199.                           target, sampler_type,
  200.                           DRAW_JIT_SAMPLER_BORDER_COLOR);
  201.  
  202.    LP_CHECK_STRUCT_SIZE(struct draw_jit_sampler, target, sampler_type);
  203.  
  204.    return sampler_type;
  205. }
  206.  
  207.  
  208. /**
  209.  * Create LLVM type for struct draw_jit_context
  210.  */
  211. static LLVMTypeRef
  212. create_jit_context_type(struct gallivm_state *gallivm,
  213.                         LLVMTypeRef texture_type, LLVMTypeRef sampler_type,
  214.                         const char *struct_name)
  215. {
  216.    LLVMTargetDataRef target = gallivm->target;
  217.    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
  218.    LLVMTypeRef int_type = LLVMInt32TypeInContext(gallivm->context);
  219.    LLVMTypeRef elem_types[DRAW_JIT_CTX_NUM_FIELDS];
  220.    LLVMTypeRef context_type;
  221.  
  222.    elem_types[0] = LLVMArrayType(LLVMPointerType(float_type, 0), /* vs_constants */
  223.                                  LP_MAX_TGSI_CONST_BUFFERS);
  224.    elem_types[1] = LLVMArrayType(int_type, /* num_vs_constants */
  225.                                  LP_MAX_TGSI_CONST_BUFFERS);
  226.    elem_types[2] = LLVMPointerType(LLVMArrayType(LLVMArrayType(float_type, 4),
  227.                                                  DRAW_TOTAL_CLIP_PLANES), 0);
  228.    elem_types[3] = LLVMPointerType(float_type, 0); /* viewports */
  229.    elem_types[4] = LLVMArrayType(texture_type,
  230.                                  PIPE_MAX_SHADER_SAMPLER_VIEWS); /* textures */
  231.    elem_types[5] = LLVMArrayType(sampler_type,
  232.                                  PIPE_MAX_SAMPLERS); /* samplers */
  233.    context_type = LLVMStructTypeInContext(gallivm->context, elem_types,
  234.                                           Elements(elem_types), 0);
  235.    LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, vs_constants,
  236.                           target, context_type, DRAW_JIT_CTX_CONSTANTS);
  237.    LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, num_vs_constants,
  238.                           target, context_type, DRAW_JIT_CTX_NUM_CONSTANTS);
  239.    LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, planes,
  240.                           target, context_type, DRAW_JIT_CTX_PLANES);
  241.    LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, viewports,
  242.                           target, context_type, DRAW_JIT_CTX_VIEWPORT);
  243.    LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, textures,
  244.                           target, context_type,
  245.                           DRAW_JIT_CTX_TEXTURES);
  246.    LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, samplers,
  247.                           target, context_type,
  248.                           DRAW_JIT_CTX_SAMPLERS);
  249.    LP_CHECK_STRUCT_SIZE(struct draw_jit_context,
  250.                         target, context_type);
  251.  
  252.    return context_type;
  253. }
  254.  
  255.  
  256. /**
  257.  * Create LLVM type for struct draw_gs_jit_context
  258.  */
  259. static LLVMTypeRef
  260. create_gs_jit_context_type(struct gallivm_state *gallivm,
  261.                            unsigned vector_length,
  262.                            LLVMTypeRef texture_type, LLVMTypeRef sampler_type,
  263.                            const char *struct_name)
  264. {
  265.    LLVMTargetDataRef target = gallivm->target;
  266.    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
  267.    LLVMTypeRef int_type = LLVMInt32TypeInContext(gallivm->context);
  268.    LLVMTypeRef elem_types[DRAW_GS_JIT_CTX_NUM_FIELDS];
  269.    LLVMTypeRef context_type;
  270.  
  271.    elem_types[0] = LLVMArrayType(LLVMPointerType(float_type, 0), /* constants */
  272.                                  LP_MAX_TGSI_CONST_BUFFERS);
  273.    elem_types[1] = LLVMArrayType(int_type, /* num_constants */
  274.                                  LP_MAX_TGSI_CONST_BUFFERS);
  275.    elem_types[2] = LLVMPointerType(LLVMArrayType(LLVMArrayType(float_type, 4),
  276.                                                  DRAW_TOTAL_CLIP_PLANES), 0);
  277.    elem_types[3] = LLVMPointerType(float_type, 0); /* viewports */
  278.  
  279.    elem_types[4] = LLVMArrayType(texture_type,
  280.                                  PIPE_MAX_SHADER_SAMPLER_VIEWS); /* textures */
  281.    elem_types[5] = LLVMArrayType(sampler_type,
  282.                                  PIPE_MAX_SAMPLERS); /* samplers */
  283.    
  284.    elem_types[6] = LLVMPointerType(LLVMPointerType(int_type, 0), 0);
  285.    elem_types[7] = LLVMPointerType(LLVMVectorType(int_type,
  286.                                                   vector_length), 0);
  287.    elem_types[8] = LLVMPointerType(LLVMVectorType(int_type,
  288.                                                   vector_length), 0);
  289.  
  290.    context_type = LLVMStructTypeInContext(gallivm->context, elem_types,
  291.                                           Elements(elem_types), 0);
  292.  
  293.    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, constants,
  294.                           target, context_type, DRAW_GS_JIT_CTX_CONSTANTS);
  295.    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, num_constants,
  296.                           target, context_type, DRAW_GS_JIT_CTX_NUM_CONSTANTS);
  297.    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, planes,
  298.                           target, context_type, DRAW_GS_JIT_CTX_PLANES);
  299.    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, viewports,
  300.                           target, context_type, DRAW_GS_JIT_CTX_VIEWPORT);
  301.    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, textures,
  302.                           target, context_type,
  303.                           DRAW_GS_JIT_CTX_TEXTURES);
  304.    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, samplers,
  305.                           target, context_type,
  306.                           DRAW_GS_JIT_CTX_SAMPLERS);
  307.    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, prim_lengths,
  308.                           target, context_type,
  309.                           DRAW_GS_JIT_CTX_PRIM_LENGTHS);
  310.    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, emitted_vertices,
  311.                           target, context_type,
  312.                           DRAW_GS_JIT_CTX_EMITTED_VERTICES);
  313.    LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, emitted_prims,
  314.                           target, context_type,
  315.                           DRAW_GS_JIT_CTX_EMITTED_PRIMS);
  316.    LP_CHECK_STRUCT_SIZE(struct draw_gs_jit_context,
  317.                         target, context_type);
  318.  
  319.    return context_type;
  320. }
  321.  
  322.  
  323. static LLVMTypeRef
  324. create_gs_jit_input_type(struct gallivm_state *gallivm)
  325. {
  326.    LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
  327.    LLVMTypeRef input_array;
  328.  
  329.    input_array = LLVMVectorType(float_type, TGSI_NUM_CHANNELS); /* num primitives */
  330.    input_array = LLVMArrayType(input_array, TGSI_NUM_CHANNELS); /* num channels */
  331.    input_array = LLVMArrayType(input_array, PIPE_MAX_SHADER_INPUTS); /* num attrs per vertex */
  332.    input_array = LLVMPointerType(input_array, 0); /* num vertices per prim */
  333.  
  334.    return input_array;
  335. }
  336.  
  337. /**
  338.  * Create LLVM type for struct pipe_vertex_buffer
  339.  */
  340. static LLVMTypeRef
  341. create_jit_vertex_buffer_type(struct gallivm_state *gallivm,
  342.                               const char *struct_name)
  343. {
  344.    LLVMTargetDataRef target = gallivm->target;
  345.    LLVMTypeRef elem_types[4];
  346.    LLVMTypeRef vb_type;
  347.  
  348.    elem_types[0] =
  349.    elem_types[1] = LLVMInt32TypeInContext(gallivm->context);
  350.    elem_types[2] =
  351.    elem_types[3] = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
  352.  
  353.    vb_type = LLVMStructTypeInContext(gallivm->context, elem_types,
  354.                                      Elements(elem_types), 0);
  355.  
  356.    LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, stride,
  357.                           target, vb_type, 0);
  358.    LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, buffer_offset,
  359.                           target, vb_type, 1);
  360.  
  361.    LP_CHECK_STRUCT_SIZE(struct pipe_vertex_buffer, target, vb_type);
  362.  
  363.    return vb_type;
  364. }
  365.  
  366.  
  367. /**
  368.  * Create LLVM type for struct vertex_header;
  369.  */
  370. static LLVMTypeRef
  371. create_jit_vertex_header(struct gallivm_state *gallivm, int data_elems)
  372. {
  373.    LLVMTargetDataRef target = gallivm->target;
  374.    LLVMTypeRef elem_types[4];
  375.    LLVMTypeRef vertex_header;
  376.    char struct_name[24];
  377.  
  378.    util_snprintf(struct_name, 23, "vertex_header%d", data_elems);
  379.  
  380.    elem_types[DRAW_JIT_VERTEX_VERTEX_ID]  = LLVMIntTypeInContext(gallivm->context, 32);
  381.    elem_types[DRAW_JIT_VERTEX_CLIP]  = LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4);
  382.    elem_types[DRAW_JIT_VERTEX_PRE_CLIP_POS]  = LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4);
  383.    elem_types[DRAW_JIT_VERTEX_DATA]  = LLVMArrayType(elem_types[1], data_elems);
  384.  
  385.    vertex_header = LLVMStructTypeInContext(gallivm->context, elem_types,
  386.                                            Elements(elem_types), 0);
  387.  
  388.    /* these are bit-fields and we can't take address of them
  389.       LP_CHECK_MEMBER_OFFSET(struct vertex_header, clipmask,
  390.       target, vertex_header,
  391.       DRAW_JIT_VERTEX_CLIPMASK);
  392.       LP_CHECK_MEMBER_OFFSET(struct vertex_header, edgeflag,
  393.       target, vertex_header,
  394.       DRAW_JIT_VERTEX_EDGEFLAG);
  395.       LP_CHECK_MEMBER_OFFSET(struct vertex_header, pad,
  396.       target, vertex_header,
  397.       DRAW_JIT_VERTEX_PAD);
  398.       LP_CHECK_MEMBER_OFFSET(struct vertex_header, vertex_id,
  399.       target, vertex_header,
  400.       DRAW_JIT_VERTEX_VERTEX_ID);
  401.    */
  402.    LP_CHECK_MEMBER_OFFSET(struct vertex_header, clip,
  403.                           target, vertex_header,
  404.                           DRAW_JIT_VERTEX_CLIP);
  405.    LP_CHECK_MEMBER_OFFSET(struct vertex_header, pre_clip_pos,
  406.                           target, vertex_header,
  407.                           DRAW_JIT_VERTEX_PRE_CLIP_POS);
  408.    LP_CHECK_MEMBER_OFFSET(struct vertex_header, data,
  409.                           target, vertex_header,
  410.                           DRAW_JIT_VERTEX_DATA);
  411.  
  412.    assert(LLVMABISizeOfType(target, vertex_header) ==
  413.           offsetof(struct vertex_header, data[data_elems]));
  414.  
  415.    return vertex_header;
  416. }
  417.  
  418.  
  419. /**
  420.  * Create LLVM types for various structures.
  421.  */
  422. static void
  423. create_jit_types(struct draw_llvm_variant *variant)
  424. {
  425.    struct gallivm_state *gallivm = variant->gallivm;
  426.    LLVMTypeRef texture_type, sampler_type, context_type, buffer_type,
  427.       vb_type;
  428.  
  429.    texture_type = create_jit_texture_type(gallivm, "texture");
  430.    sampler_type = create_jit_sampler_type(gallivm, "sampler");
  431.  
  432.    context_type = create_jit_context_type(gallivm, texture_type, sampler_type,
  433.                                           "draw_jit_context");
  434.    variant->context_ptr_type = LLVMPointerType(context_type, 0);
  435.  
  436.    buffer_type = create_jit_dvbuffer_type(gallivm, "draw_vertex_buffer");
  437.    variant->buffer_ptr_type = LLVMPointerType(buffer_type, 0);
  438.    
  439.    vb_type = create_jit_vertex_buffer_type(gallivm, "pipe_vertex_buffer");
  440.    variant->vb_ptr_type = LLVMPointerType(vb_type, 0);
  441. }
  442.  
  443.  
  444. static LLVMTypeRef
  445. get_context_ptr_type(struct draw_llvm_variant *variant)
  446. {
  447.    if (!variant->context_ptr_type)
  448.       create_jit_types(variant);
  449.    return variant->context_ptr_type;
  450. }
  451.  
  452.  
  453. static LLVMTypeRef
  454. get_buffer_ptr_type(struct draw_llvm_variant *variant)
  455. {
  456.    if (!variant->buffer_ptr_type)
  457.       create_jit_types(variant);
  458.    return variant->buffer_ptr_type;
  459. }
  460.  
  461.  
  462. static LLVMTypeRef
  463. get_vb_ptr_type(struct draw_llvm_variant *variant)
  464. {
  465.    if (!variant->vb_ptr_type)
  466.       create_jit_types(variant);
  467.    return variant->vb_ptr_type;
  468. }
  469.  
  470. static LLVMTypeRef
  471. get_vertex_header_ptr_type(struct draw_llvm_variant *variant)
  472. {
  473.    if (!variant->vertex_header_ptr_type)
  474.       create_jit_types(variant);
  475.    return variant->vertex_header_ptr_type;
  476. }
  477.  
  478.  
  479. /**
  480.  * Create per-context LLVM info.
  481.  */
  482. struct draw_llvm *
  483. draw_llvm_create(struct draw_context *draw, LLVMContextRef context)
  484. {
  485.    struct draw_llvm *llvm;
  486.  
  487.    if (!lp_build_init())
  488.       return NULL;
  489.  
  490.    llvm = CALLOC_STRUCT( draw_llvm );
  491.    if (!llvm)
  492.       return NULL;
  493.  
  494.    llvm->draw = draw;
  495.  
  496.    llvm->context = context;
  497.    if (!llvm->context) {
  498.       llvm->context = LLVMContextCreate();
  499.       llvm->context_owned = true;
  500.    }
  501.    if (!llvm->context)
  502.       goto fail;
  503.  
  504.    llvm->nr_variants = 0;
  505.    make_empty_list(&llvm->vs_variants_list);
  506.  
  507.    llvm->nr_gs_variants = 0;
  508.    make_empty_list(&llvm->gs_variants_list);
  509.  
  510.    return llvm;
  511.  
  512. fail:
  513.    draw_llvm_destroy(llvm);
  514.    return NULL;
  515. }
  516.  
  517.  
  518. /**
  519.  * Free per-context LLVM info.
  520.  */
  521. void
  522. draw_llvm_destroy(struct draw_llvm *llvm)
  523. {
  524.    if (llvm->context_owned)
  525.       LLVMContextDispose(llvm->context);
  526.    llvm->context = NULL;
  527.  
  528.    /* XXX free other draw_llvm data? */
  529.    FREE(llvm);
  530. }
  531.  
  532.  
  533. /**
  534.  * Create LLVM-generated code for a vertex shader.
  535.  */
  536. struct draw_llvm_variant *
  537. draw_llvm_create_variant(struct draw_llvm *llvm,
  538.                          unsigned num_inputs,
  539.                          const struct draw_llvm_variant_key *key)
  540. {
  541.    struct draw_llvm_variant *variant;
  542.    struct llvm_vertex_shader *shader =
  543.       llvm_vertex_shader(llvm->draw->vs.vertex_shader);
  544.    LLVMTypeRef vertex_header;
  545.    char module_name[64];
  546.  
  547.    variant = MALLOC(sizeof *variant +
  548.                     shader->variant_key_size -
  549.                     sizeof variant->key);
  550.    if (variant == NULL)
  551.       return NULL;
  552.  
  553.    variant->llvm = llvm;
  554.    variant->shader = shader;
  555.  
  556.    util_snprintf(module_name, sizeof(module_name), "draw_llvm_vs_variant%u",
  557.                  variant->shader->variants_cached);
  558.  
  559.    variant->gallivm = gallivm_create(module_name, llvm->context);
  560.  
  561.    create_jit_types(variant);
  562.  
  563.    memcpy(&variant->key, key, shader->variant_key_size);
  564.  
  565.    if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
  566.       tgsi_dump(llvm->draw->vs.vertex_shader->state.tokens, 0);
  567.       draw_llvm_dump_variant_key(&variant->key);
  568.    }
  569.  
  570.    vertex_header = create_jit_vertex_header(variant->gallivm, num_inputs);
  571.  
  572.    variant->vertex_header_ptr_type = LLVMPointerType(vertex_header, 0);
  573.  
  574.    draw_llvm_generate(llvm, variant, FALSE);  /* linear */
  575.    draw_llvm_generate(llvm, variant, TRUE);   /* elts */
  576.  
  577.    gallivm_compile_module(variant->gallivm);
  578.  
  579.    variant->jit_func = (draw_jit_vert_func)
  580.          gallivm_jit_function(variant->gallivm, variant->function);
  581.  
  582.    variant->jit_func_elts = (draw_jit_vert_func_elts)
  583.          gallivm_jit_function(variant->gallivm, variant->function_elts);
  584.  
  585.    gallivm_free_ir(variant->gallivm);
  586.  
  587.    variant->list_item_global.base = variant;
  588.    variant->list_item_local.base = variant;
  589.    /*variant->no = */shader->variants_created++;
  590.    variant->list_item_global.base = variant;
  591.  
  592.    return variant;
  593. }
  594.  
  595.  
  596. static void
  597. generate_vs(struct draw_llvm_variant *variant,
  598.             LLVMBuilderRef builder,
  599.             struct lp_type vs_type,
  600.             LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
  601.             const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
  602.             const struct lp_bld_tgsi_system_values *system_values,
  603.             LLVMValueRef context_ptr,
  604.             struct lp_build_sampler_soa *draw_sampler,
  605.             boolean clamp_vertex_color)
  606. {
  607.    struct draw_llvm *llvm = variant->llvm;
  608.    const struct tgsi_token *tokens = llvm->draw->vs.vertex_shader->state.tokens;
  609.    LLVMValueRef consts_ptr =
  610.       draw_jit_context_vs_constants(variant->gallivm, context_ptr);
  611.    LLVMValueRef num_consts_ptr =
  612.       draw_jit_context_num_vs_constants(variant->gallivm, context_ptr);
  613.  
  614.    lp_build_tgsi_soa(variant->gallivm,
  615.                      tokens,
  616.                      vs_type,
  617.                      NULL /*struct lp_build_mask_context *mask*/,
  618.                      consts_ptr,
  619.                      num_consts_ptr,
  620.                      system_values,
  621.                      inputs,
  622.                      outputs,
  623.                      context_ptr,
  624.                      draw_sampler,
  625.                      &llvm->draw->vs.vertex_shader->info,
  626.                      NULL);
  627.  
  628.    {
  629.       LLVMValueRef out;
  630.       unsigned chan, attrib;
  631.       struct lp_build_context bld;
  632.       struct tgsi_shader_info* info = &llvm->draw->vs.vertex_shader->info;
  633.       lp_build_context_init(&bld, variant->gallivm, vs_type);
  634.  
  635.       for (attrib = 0; attrib < info->num_outputs; ++attrib) {
  636.          for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
  637.             if (outputs[attrib][chan]) {
  638.                switch (info->output_semantic_name[attrib]) {
  639.                case TGSI_SEMANTIC_COLOR:
  640.                case TGSI_SEMANTIC_BCOLOR:
  641.                   if (clamp_vertex_color) {
  642.                      out = LLVMBuildLoad(builder, outputs[attrib][chan], "");
  643.                      out = lp_build_clamp(&bld, out, bld.zero, bld.one);
  644.                      LLVMBuildStore(builder, out, outputs[attrib][chan]);
  645.                   }
  646.                   break;
  647.                }
  648.             }
  649.          }
  650.       }
  651.    }
  652. }
  653.  
  654. static void
  655. generate_fetch(struct gallivm_state *gallivm,
  656.                struct draw_context *draw,
  657.                LLVMValueRef vbuffers_ptr,
  658.                LLVMValueRef *res,
  659.                struct pipe_vertex_element *velem,
  660.                LLVMValueRef vbuf,
  661.                LLVMValueRef index,
  662.                LLVMValueRef instance_id,
  663.                LLVMValueRef start_instance)
  664. {
  665.    const struct util_format_description *format_desc =
  666.       util_format_description(velem->src_format);
  667.    LLVMValueRef zero = LLVMConstNull(LLVMInt32TypeInContext(gallivm->context));
  668.    LLVMBuilderRef builder = gallivm->builder;
  669.    LLVMValueRef indices =
  670.       LLVMConstInt(LLVMInt64TypeInContext(gallivm->context),
  671.                    velem->vertex_buffer_index, 0);
  672.    LLVMValueRef vbuffer_ptr = LLVMBuildGEP(builder, vbuffers_ptr,
  673.                                            &indices, 1, "");
  674.    LLVMValueRef vb_stride = draw_jit_vbuffer_stride(gallivm, vbuf);
  675.    LLVMValueRef vb_buffer_offset = draw_jit_vbuffer_offset(gallivm, vbuf);
  676.    LLVMValueRef map_ptr = draw_jit_dvbuffer_map(gallivm, vbuffer_ptr);
  677.    LLVMValueRef buffer_size = draw_jit_dvbuffer_size(gallivm, vbuffer_ptr);
  678.    LLVMValueRef stride;
  679.    LLVMValueRef buffer_overflowed;
  680.    LLVMValueRef needed_buffer_size;
  681.    LLVMValueRef temp_ptr =
  682.       lp_build_alloca(gallivm,
  683.                       lp_build_vec_type(gallivm, lp_float32_vec4_type()), "");
  684.    LLVMValueRef ofbit = NULL;
  685.    struct lp_build_if_state if_ctx;
  686.  
  687.    if (velem->instance_divisor) {
  688.       /* Index is equal to the start instance plus the number of current
  689.        * instance divided by the divisor. In this case we compute it as:
  690.        * index = start_instance + (instance_id  / divisor)
  691.        */
  692.       LLVMValueRef current_instance;
  693.       current_instance = LLVMBuildUDiv(builder, instance_id,
  694.                                        lp_build_const_int32(gallivm, velem->instance_divisor),
  695.                                        "instance_divisor");
  696.       index = lp_build_uadd_overflow(gallivm, start_instance,
  697.                                      current_instance, &ofbit);
  698.    }
  699.  
  700.    stride = lp_build_umul_overflow(gallivm, vb_stride, index, &ofbit);
  701.    stride = lp_build_uadd_overflow(gallivm, stride, vb_buffer_offset, &ofbit);
  702.    stride = lp_build_uadd_overflow(
  703.       gallivm, stride,
  704.       lp_build_const_int32(gallivm, velem->src_offset), &ofbit);
  705.    needed_buffer_size = lp_build_uadd_overflow(
  706.       gallivm, stride,
  707.       lp_build_const_int32(gallivm,
  708.                            util_format_get_blocksize(velem->src_format)),
  709.       &ofbit);
  710.  
  711.    buffer_overflowed = LLVMBuildICmp(builder, LLVMIntUGT,
  712.                                      needed_buffer_size, buffer_size,
  713.                                      "buffer_overflowed");
  714.    buffer_overflowed = LLVMBuildOr(builder, buffer_overflowed, ofbit, "");
  715. #if 0
  716.    lp_build_printf(gallivm, "vbuf index = %u, vb_stride is %u\n",
  717.                    index, vb_stride);
  718.    lp_build_printf(gallivm, "   vb_buffer_offset = %u, src_offset is %u\n",
  719.                    vb_buffer_offset,
  720.                    lp_build_const_int32(gallivm, velem->src_offset));
  721.    lp_build_print_value(gallivm, "   blocksize = ",
  722.                         lp_build_const_int32(
  723.                            gallivm,
  724.                            util_format_get_blocksize(velem->src_format)));
  725.    lp_build_printf(gallivm, "   instance_id = %u\n", instance_id);
  726.    lp_build_printf(gallivm, "   stride = %u\n", stride);
  727.    lp_build_printf(gallivm, "   buffer size = %u\n", buffer_size);
  728.    lp_build_printf(gallivm, "   needed_buffer_size = %u\n", needed_buffer_size);
  729.    lp_build_print_value(gallivm, "   buffer overflowed = ", buffer_overflowed);
  730. #endif
  731.  
  732.    lp_build_if(&if_ctx, gallivm, buffer_overflowed);
  733.    {
  734.       LLVMValueRef val =
  735.          lp_build_const_vec(gallivm, lp_float32_vec4_type(), 0);
  736.       LLVMBuildStore(builder, val, temp_ptr);
  737.    }
  738.    lp_build_else(&if_ctx);
  739.    {
  740.       LLVMValueRef val;
  741.       map_ptr = LLVMBuildGEP(builder, map_ptr, &stride, 1, "");
  742.  
  743.       val = lp_build_fetch_rgba_aos(gallivm,
  744.                                     format_desc,
  745.                                     lp_float32_vec4_type(),
  746.                                     FALSE,
  747.                                     map_ptr,
  748.                                     zero, zero, zero);
  749.       LLVMBuildStore(builder, val, temp_ptr);
  750.    }
  751.    lp_build_endif(&if_ctx);
  752.  
  753.    *res = LLVMBuildLoad(builder, temp_ptr, "aos");
  754. }
  755.  
  756. static void
  757. convert_to_soa(struct gallivm_state *gallivm,
  758.                LLVMValueRef (*src_aos)[LP_MAX_VECTOR_WIDTH / 32],
  759.                LLVMValueRef (*dst_soa)[TGSI_NUM_CHANNELS],
  760.                unsigned num_attribs, const struct lp_type soa_type)
  761. {
  762.    unsigned i, j, k;
  763.    struct lp_type aos_channel_type = soa_type;
  764.  
  765.    debug_assert(TGSI_NUM_CHANNELS == 4);
  766.    debug_assert((soa_type.length % TGSI_NUM_CHANNELS) == 0);
  767.  
  768.    aos_channel_type.length >>= 1;
  769.  
  770.    for (i = 0; i < num_attribs; ++i) {
  771.       LLVMValueRef aos_channels[TGSI_NUM_CHANNELS];
  772.       unsigned pixels_per_channel = soa_type.length / TGSI_NUM_CHANNELS;
  773.  
  774.       for (j = 0; j < TGSI_NUM_CHANNELS; ++j) {
  775.          LLVMValueRef channel[LP_MAX_VECTOR_LENGTH] = { 0 };
  776.  
  777.          assert(pixels_per_channel <= LP_MAX_VECTOR_LENGTH);
  778.  
  779.          for (k = 0; k < pixels_per_channel; ++k) {
  780.             channel[k] = src_aos[i][j + TGSI_NUM_CHANNELS * k];
  781.          }
  782.  
  783.          aos_channels[j] = lp_build_concat(gallivm, channel, aos_channel_type, pixels_per_channel);
  784.       }
  785.  
  786.       lp_build_transpose_aos(gallivm, soa_type, aos_channels, dst_soa[i]);
  787.    }
  788. }
  789.  
  790.  
  791. static void
  792. store_aos(struct gallivm_state *gallivm,
  793.           LLVMValueRef io_ptr,
  794.           LLVMValueRef index,
  795.           LLVMValueRef value)
  796. {
  797.    LLVMTypeRef data_ptr_type = LLVMPointerType(lp_build_vec_type(gallivm, lp_float32_vec4_type()), 0);
  798.    LLVMBuilderRef builder = gallivm->builder;
  799.    LLVMValueRef data_ptr = draw_jit_header_data(gallivm, io_ptr);
  800.    LLVMValueRef indices[3];
  801.  
  802.    indices[0] = lp_build_const_int32(gallivm, 0);
  803.    indices[1] = index;
  804.    indices[2] = lp_build_const_int32(gallivm, 0);
  805.  
  806.    data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 3, "");
  807.    data_ptr = LLVMBuildPointerCast(builder, data_ptr, data_ptr_type, "");
  808.  
  809. #if DEBUG_STORE
  810.    lp_build_printf(gallivm, "    ---- %p storing attribute %d (io = %p)\n", data_ptr, index, io_ptr);
  811. #endif
  812.  
  813.    /* Unaligned store due to the vertex header */
  814.    lp_set_store_alignment(LLVMBuildStore(builder, value, data_ptr), sizeof(float));
  815. }
  816.  
  817. /**
  818.  * Adjust the mask to architecture endianess. The mask will the store in struct:
  819.  *
  820.  * struct vertex_header {
  821.  *    unsigned clipmask:DRAW_TOTAL_CLIP_PLANES;
  822.  *    unsigned edgeflag:1;
  823.  *    unsigned have_clipdist:1;
  824.  *    unsigned vertex_id:16;
  825.  *    [...]
  826.  * }
  827.  *
  828.  * On little-endian machine nothing needs to done, however on bit-endian machine
  829.  * the mask's fields need to be adjusted with the algorithm:
  830.  *
  831.  * uint32_t reverse (uint32_t x)
  832.  * {
  833.  *   return (x >> 16) |              // vertex_id
  834.  *          ((x & 0x3fff) << 18) |   // clipmask
  835.  *          ((x & 0x4000) << 3) |    // have_clipdist
  836.  *          ((x & 0x8000) << 1);     // edgeflag
  837.  * }
  838.  */
  839. static LLVMValueRef
  840. adjust_mask(struct gallivm_state *gallivm,
  841.             LLVMValueRef mask)
  842. {
  843. #ifdef PIPE_ARCH_BIG_ENDIAN
  844.    LLVMBuilderRef builder = gallivm->builder;
  845.    LLVMValueRef vertex_id;
  846.    LLVMValueRef clipmask;
  847.    LLVMValueRef have_clipdist;
  848.    LLVMValueRef edgeflag;
  849.  
  850.    vertex_id = LLVMBuildLShr(builder, mask, lp_build_const_int32(gallivm, 16), "");
  851.    clipmask  = LLVMBuildAnd(builder, mask, lp_build_const_int32(gallivm, 0x3fff), "");
  852.    clipmask  = LLVMBuildShl(builder, clipmask, lp_build_const_int32(gallivm, 18), "");
  853.    have_clipdist = LLVMBuildAnd(builder, mask, lp_build_const_int32(gallivm, 0x4000), "");
  854.    have_clipdist = LLVMBuildShl(builder, have_clipdist, lp_build_const_int32(gallivm, 3), "");
  855.    edgeflag = LLVMBuildAnd(builder, mask, lp_build_const_int32(gallivm, 0x8000), "");
  856.    edgeflag = LLVMBuildShl(builder, edgeflag, lp_build_const_int32(gallivm, 1), "");
  857.  
  858.    mask = LLVMBuildOr(builder, vertex_id, clipmask, "");
  859.    mask = LLVMBuildOr(builder, mask, have_clipdist, "");
  860.    mask = LLVMBuildOr(builder, mask, edgeflag, "");
  861. #endif
  862.    return mask;
  863. }
  864.  
  865. static void
  866. store_aos_array(struct gallivm_state *gallivm,
  867.                 struct lp_type soa_type,
  868.                 LLVMValueRef io_ptr,
  869.                 LLVMValueRef *indices,
  870.                 LLVMValueRef* aos,
  871.                 int attrib,
  872.                 int num_outputs,
  873.                 LLVMValueRef clipmask,
  874.                 boolean have_clipdist)
  875. {
  876.    LLVMBuilderRef builder = gallivm->builder;
  877.    LLVMValueRef attr_index = lp_build_const_int32(gallivm, attrib);
  878.    LLVMValueRef inds[LP_MAX_VECTOR_WIDTH / 32];
  879.    LLVMValueRef linear_inds[LP_MAX_VECTOR_WIDTH / 32];
  880.    LLVMValueRef io_ptrs[LP_MAX_VECTOR_WIDTH / 32];
  881.    int vector_length = soa_type.length;
  882.    int i;
  883.  
  884.    debug_assert(TGSI_NUM_CHANNELS == 4);
  885.  
  886.    for (i = 0; i < vector_length; i++) {
  887.       linear_inds[i] = lp_build_const_int32(gallivm, i);
  888.       if (indices) {
  889.          inds[i] = indices[i];
  890.       } else {
  891.          inds[i] = linear_inds[i];
  892.       }
  893.       io_ptrs[i] = LLVMBuildGEP(builder, io_ptr, &inds[i], 1, "");
  894.    }
  895.  
  896.    if (attrib == 0) {
  897.       /* store vertex header for each of the n vertices */
  898.       LLVMValueRef val, cliptmp;
  899.       int vertex_id_pad_edgeflag;
  900.  
  901.       /* If this assertion fails, it means we need to update the bit twidding
  902.        * code here.  See struct vertex_header in draw_private.h.
  903.        */
  904.       assert(DRAW_TOTAL_CLIP_PLANES==14);
  905.       /* initialize vertex id:16 = 0xffff, have_clipdist:1 = 0, edgeflag:1 = 1 */
  906.       vertex_id_pad_edgeflag = (0xffff << 16) | (1 << DRAW_TOTAL_CLIP_PLANES);
  907.       if (have_clipdist)
  908.          vertex_id_pad_edgeflag |= 1 << (DRAW_TOTAL_CLIP_PLANES+1);
  909.       val = lp_build_const_int_vec(gallivm, lp_int_type(soa_type), vertex_id_pad_edgeflag);
  910.       /* OR with the clipmask */
  911.       cliptmp = LLVMBuildOr(builder, val, clipmask, "");
  912.       for (i = 0; i < vector_length; i++) {
  913.          LLVMValueRef id_ptr = draw_jit_header_id(gallivm, io_ptrs[i]);
  914.          val = LLVMBuildExtractElement(builder, cliptmp, linear_inds[i], "");
  915.          val = adjust_mask(gallivm, val);
  916. #if DEBUG_STORE
  917.          lp_build_printf(gallivm, "io = %p, index %d, clipmask = %x\n",
  918.                          io_ptrs[i], inds[i], val);
  919. #endif
  920.          LLVMBuildStore(builder, val, id_ptr);
  921.       }
  922.    }
  923.  
  924.    /* store for each of the n vertices */
  925.    for (i = 0; i < vector_length; i++) {
  926.       store_aos(gallivm, io_ptrs[i], attr_index, aos[i]);
  927.    }
  928. }
  929.  
  930.  
  931. static void
  932. convert_to_aos(struct gallivm_state *gallivm,
  933.                LLVMValueRef io,
  934.                LLVMValueRef *indices,
  935.                LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
  936.                LLVMValueRef clipmask,
  937.                int num_outputs,
  938.                struct lp_type soa_type,
  939.                boolean have_clipdist)
  940. {
  941.    LLVMBuilderRef builder = gallivm->builder;
  942.    unsigned chan, attrib, i;
  943.  
  944. #if DEBUG_STORE
  945.    lp_build_printf(gallivm, "   # storing begin\n");
  946. #endif
  947.    for (attrib = 0; attrib < num_outputs; ++attrib) {
  948.       LLVMValueRef soa[TGSI_NUM_CHANNELS];
  949.       LLVMValueRef aos[LP_MAX_VECTOR_WIDTH / 32];
  950.       for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
  951.          if (outputs[attrib][chan]) {
  952.             LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], "");
  953.             lp_build_name(out, "output%u.%c", attrib, "xyzw"[chan]);
  954. #if DEBUG_STORE
  955.             lp_build_printf(gallivm, "output %d : %d ",
  956.                             LLVMConstInt(LLVMInt32TypeInContext(gallivm->context),
  957.                                          attrib, 0),
  958.                             LLVMConstInt(LLVMInt32TypeInContext(gallivm->context),
  959.                                          chan, 0));
  960.             lp_build_print_value(gallivm, "val = ", out);
  961.             {
  962.                LLVMValueRef iv =
  963.                   LLVMBuildBitCast(builder, out, lp_build_int_vec_type(gallivm, soa_type), "");
  964.                
  965.                lp_build_print_value(gallivm, "  ival = ", iv);
  966.             }
  967. #endif
  968.             soa[chan] = out;
  969.          }
  970.          else {
  971.             soa[chan] = 0;
  972.          }
  973.       }
  974.  
  975.  
  976.       if (soa_type.length == TGSI_NUM_CHANNELS) {
  977.          lp_build_transpose_aos(gallivm, soa_type, soa, aos);
  978.       } else {
  979.          lp_build_transpose_aos(gallivm, soa_type, soa, soa);
  980.  
  981.          for (i = 0; i < soa_type.length; ++i) {
  982.             aos[i] = lp_build_extract_range(gallivm,
  983.                                             soa[i % TGSI_NUM_CHANNELS],
  984.                                             (i / TGSI_NUM_CHANNELS) * TGSI_NUM_CHANNELS,
  985.                                             TGSI_NUM_CHANNELS);
  986.          }
  987.       }
  988.  
  989.       store_aos_array(gallivm,
  990.                       soa_type,
  991.                       io, indices,
  992.                       aos,
  993.                       attrib,
  994.                       num_outputs,
  995.                       clipmask, have_clipdist);
  996.    }
  997. #if DEBUG_STORE
  998.    lp_build_printf(gallivm, "   # storing end\n");
  999. #endif
  1000. }
  1001.  
  1002.  
  1003. /**
  1004.  * Stores original vertex positions in clip coordinates
  1005.  */
  1006. static void
  1007. store_clip(struct gallivm_state *gallivm,
  1008.            const struct lp_type vs_type,
  1009.            LLVMValueRef io_ptr,
  1010.            LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
  1011.            boolean pre_clip_pos, int idx)
  1012. {
  1013.    LLVMBuilderRef builder = gallivm->builder;
  1014.    LLVMValueRef soa[4];
  1015.    LLVMValueRef aos[LP_MAX_VECTOR_LENGTH];
  1016.    LLVMValueRef indices[2];
  1017.    LLVMValueRef io_ptrs[LP_MAX_VECTOR_WIDTH / 32];
  1018.    LLVMValueRef inds[LP_MAX_VECTOR_WIDTH / 32];
  1019.    LLVMValueRef clip_ptrs[LP_MAX_VECTOR_WIDTH / 32];
  1020.    LLVMTypeRef clip_ptr_type =
  1021.       LLVMPointerType(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context),
  1022.                                      4), 0);
  1023.    int i, j;
  1024.  
  1025.    indices[0] =
  1026.    indices[1] = lp_build_const_int32(gallivm, 0);
  1027.  
  1028.    for (i = 0; i < vs_type.length; i++) {
  1029.       inds[i] = lp_build_const_int32(gallivm, i);
  1030.       io_ptrs[i] = LLVMBuildGEP(builder, io_ptr, &inds[i], 1, "");
  1031.    }
  1032.  
  1033.    soa[0] = LLVMBuildLoad(builder, outputs[idx][0], ""); /*x0 x1 .. xn*/
  1034.    soa[1] = LLVMBuildLoad(builder, outputs[idx][1], ""); /*y0 y1 .. yn*/
  1035.    soa[2] = LLVMBuildLoad(builder, outputs[idx][2], ""); /*z0 z1 .. zn*/
  1036.    soa[3] = LLVMBuildLoad(builder, outputs[idx][3], ""); /*w0 w1 .. wn*/
  1037.  
  1038.    if (!pre_clip_pos) {
  1039.       for (i = 0; i < vs_type.length; i++) {
  1040.          clip_ptrs[i] = draw_jit_header_clip(gallivm, io_ptrs[i]);
  1041.       }
  1042.    } else {
  1043.       for (i = 0; i < vs_type.length; i++) {
  1044.          clip_ptrs[i] = draw_jit_header_pre_clip_pos(gallivm, io_ptrs[i]);
  1045.       }
  1046.    }
  1047.  
  1048.    lp_build_transpose_aos(gallivm, vs_type, soa, soa);
  1049.    for (i = 0; i < vs_type.length; ++i) {
  1050.       aos[i] = lp_build_extract_range(gallivm,
  1051.                                       soa[i % TGSI_NUM_CHANNELS],
  1052.                                       (i / TGSI_NUM_CHANNELS) * TGSI_NUM_CHANNELS,
  1053.                                       TGSI_NUM_CHANNELS);
  1054.    }
  1055.  
  1056.    for (j = 0; j < vs_type.length; j++) {
  1057.       LLVMValueRef clip_ptr;
  1058.  
  1059.       clip_ptr = LLVMBuildGEP(builder, clip_ptrs[j], indices, 2, "clipo");
  1060.       clip_ptr = LLVMBuildPointerCast(builder, clip_ptr, clip_ptr_type, "");
  1061.  
  1062.       /* Unaligned store */
  1063.       lp_set_store_alignment(LLVMBuildStore(builder, aos[j], clip_ptr), sizeof(float));
  1064.    }
  1065. }
  1066.  
  1067.  
  1068. /**
  1069.  * Transforms the outputs for viewport mapping
  1070.  */
  1071. static void
  1072. generate_viewport(struct draw_llvm_variant *variant,
  1073.                   LLVMBuilderRef builder,
  1074.                   struct lp_type vs_type,
  1075.                   LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
  1076.                   LLVMValueRef context_ptr)
  1077. {
  1078.    int i;
  1079.    struct gallivm_state *gallivm = variant->gallivm;
  1080.    struct lp_type f32_type = vs_type;
  1081.    const unsigned pos = variant->llvm->draw->vs.position_output;
  1082.    LLVMTypeRef vs_type_llvm = lp_build_vec_type(gallivm, vs_type);
  1083.    LLVMValueRef out3 = LLVMBuildLoad(builder, outputs[pos][3], ""); /*w0 w1 .. wn*/
  1084.    LLVMValueRef const1 = lp_build_const_vec(gallivm, f32_type, 1.0);       /*1.0 1.0 1.0 1.0*/
  1085.    LLVMValueRef vp_ptr = draw_jit_context_viewports(gallivm, context_ptr);
  1086.  
  1087.    /* We treat pipe_viewport_state as a float array */
  1088.    const int scale_index_offset = offsetof(struct pipe_viewport_state, scale) / sizeof(float);
  1089.    const int trans_index_offset = offsetof(struct pipe_viewport_state, translate) / sizeof(float);
  1090.  
  1091.    /* for 1/w convention*/
  1092.    out3 = LLVMBuildFDiv(builder, const1, out3, "");
  1093.    LLVMBuildStore(builder, out3, outputs[pos][3]);
  1094.  
  1095.    /* Viewport Mapping */
  1096.    for (i=0; i<3; i++) {
  1097.       LLVMValueRef out = LLVMBuildLoad(builder, outputs[pos][i], ""); /*x0 x1 .. xn*/
  1098.       LLVMValueRef scale;
  1099.       LLVMValueRef trans;
  1100.       LLVMValueRef scale_i;
  1101.       LLVMValueRef trans_i;
  1102.       LLVMValueRef index;
  1103.  
  1104.       index = lp_build_const_int32(gallivm, i + scale_index_offset);
  1105.       scale_i = LLVMBuildGEP(builder, vp_ptr, &index, 1, "");
  1106.  
  1107.       index = lp_build_const_int32(gallivm, i + trans_index_offset);
  1108.       trans_i = LLVMBuildGEP(builder, vp_ptr, &index, 1, "");
  1109.  
  1110.       scale = lp_build_broadcast(gallivm, vs_type_llvm,
  1111.                                  LLVMBuildLoad(builder, scale_i, "scale"));
  1112.       trans = lp_build_broadcast(gallivm, vs_type_llvm,
  1113.                                  LLVMBuildLoad(builder, trans_i, "trans"));
  1114.  
  1115.       /* divide by w */
  1116.       out = LLVMBuildFMul(builder, out, out3, "");
  1117.       /* mult by scale */
  1118.       out = LLVMBuildFMul(builder, out, scale, "");
  1119.       /* add translation */
  1120.       out = LLVMBuildFAdd(builder, out, trans, "");
  1121.  
  1122.       /* store transformed outputs */
  1123.       LLVMBuildStore(builder, out, outputs[pos][i]);
  1124.    }
  1125.  
  1126. }
  1127.  
  1128.  
  1129. /**
  1130.  * Returns clipmask as nxi32 bitmask for the n vertices
  1131.  */
  1132. static LLVMValueRef
  1133. generate_clipmask(struct draw_llvm *llvm,
  1134.                   struct gallivm_state *gallivm,
  1135.                   struct lp_type vs_type,
  1136.                   LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
  1137.                   boolean clip_xy,
  1138.                   boolean clip_z,
  1139.                   boolean clip_user,
  1140.                   boolean clip_halfz,
  1141.                   unsigned ucp_enable,
  1142.                   LLVMValueRef context_ptr,
  1143.                   boolean *have_clipdist)
  1144. {
  1145.    LLVMBuilderRef builder = gallivm->builder;
  1146.    LLVMValueRef mask; /* stores the <nxi32> clipmasks */
  1147.    LLVMValueRef test, temp;
  1148.    LLVMValueRef zero, shift;
  1149.    LLVMValueRef pos_x, pos_y, pos_z, pos_w;
  1150.    LLVMValueRef cv_x, cv_y, cv_z, cv_w;
  1151.    LLVMValueRef plane1, planes, plane_ptr, sum;
  1152.    struct lp_type f32_type = vs_type;
  1153.    struct lp_type i32_type = lp_int_type(vs_type);
  1154.    const unsigned pos = llvm->draw->vs.position_output;
  1155.    const unsigned cv = llvm->draw->vs.clipvertex_output;
  1156.    int num_written_clipdistance = llvm->draw->vs.vertex_shader->info.num_written_clipdistance;
  1157.    bool have_cd = false;
  1158.    unsigned cd[2];
  1159.  
  1160.    cd[0] = llvm->draw->vs.clipdistance_output[0];
  1161.    cd[1] = llvm->draw->vs.clipdistance_output[1];
  1162.  
  1163.    if (cd[0] != pos || cd[1] != pos)
  1164.       have_cd = true;
  1165.  
  1166.    if (num_written_clipdistance && !clip_user) {
  1167.       clip_user = true;
  1168.       ucp_enable = (1 << num_written_clipdistance) - 1;
  1169.    }
  1170.  
  1171.    mask = lp_build_const_int_vec(gallivm, i32_type, 0);
  1172.    temp = lp_build_const_int_vec(gallivm, i32_type, 0);
  1173.    zero = lp_build_const_vec(gallivm, f32_type, 0);         /* 0.0f 0.0f 0.0f 0.0f */
  1174.    shift = lp_build_const_int_vec(gallivm, i32_type, 1);    /* 1 1 1 1 */
  1175.  
  1176.    /*
  1177.     * load clipvertex and position from correct locations.
  1178.     * if they are the same just load them once.
  1179.     */
  1180.    pos_x = LLVMBuildLoad(builder, outputs[pos][0], ""); /*x0 x1 .. xn */
  1181.    pos_y = LLVMBuildLoad(builder, outputs[pos][1], ""); /*y0 y1 .. yn */
  1182.    pos_z = LLVMBuildLoad(builder, outputs[pos][2], ""); /*z0 z1 .. zn */
  1183.    pos_w = LLVMBuildLoad(builder, outputs[pos][3], ""); /*w0 w1 .. wn */
  1184.  
  1185.    if (clip_user && cv != pos) {
  1186.       cv_x = LLVMBuildLoad(builder, outputs[cv][0], ""); /*x0 x1 .. xn */
  1187.       cv_y = LLVMBuildLoad(builder, outputs[cv][1], ""); /*y0 y1 .. yn */
  1188.       cv_z = LLVMBuildLoad(builder, outputs[cv][2], ""); /*z0 z1 .. zn */
  1189.       cv_w = LLVMBuildLoad(builder, outputs[cv][3], ""); /*w0 w1 .. wn */
  1190.    } else {
  1191.       cv_x = pos_x;
  1192.       cv_y = pos_y;
  1193.       cv_z = pos_z;
  1194.       cv_w = pos_w;
  1195.    }
  1196.  
  1197.    /* Cliptest, for hardwired planes */
  1198.    if (clip_xy) {
  1199.       /* plane 1 */
  1200.       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_x , pos_w);
  1201.       temp = shift;
  1202.       test = LLVMBuildAnd(builder, test, temp, "");
  1203.       mask = test;
  1204.  
  1205.       /* plane 2 */
  1206.       test = LLVMBuildFAdd(builder, pos_x, pos_w, "");
  1207.       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test);
  1208.       temp = LLVMBuildShl(builder, temp, shift, "");
  1209.       test = LLVMBuildAnd(builder, test, temp, "");
  1210.       mask = LLVMBuildOr(builder, mask, test, "");
  1211.  
  1212.       /* plane 3 */
  1213.       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_y, pos_w);
  1214.       temp = LLVMBuildShl(builder, temp, shift, "");
  1215.       test = LLVMBuildAnd(builder, test, temp, "");
  1216.       mask = LLVMBuildOr(builder, mask, test, "");
  1217.  
  1218.       /* plane 4 */
  1219.       test = LLVMBuildFAdd(builder, pos_y, pos_w, "");
  1220.       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test);
  1221.       temp = LLVMBuildShl(builder, temp, shift, "");
  1222.       test = LLVMBuildAnd(builder, test, temp, "");
  1223.       mask = LLVMBuildOr(builder, mask, test, "");
  1224.    }
  1225.  
  1226.    if (clip_z) {
  1227.       temp = lp_build_const_int_vec(gallivm, i32_type, 16);
  1228.       if (clip_halfz) {
  1229.          /* plane 5 */
  1230.          test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, pos_z);
  1231.          test = LLVMBuildAnd(builder, test, temp, "");
  1232.          mask = LLVMBuildOr(builder, mask, test, "");
  1233.       }
  1234.       else {
  1235.          /* plane 5 */
  1236.          test = LLVMBuildFAdd(builder, pos_z, pos_w, "");
  1237.          test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test);
  1238.          test = LLVMBuildAnd(builder, test, temp, "");
  1239.          mask = LLVMBuildOr(builder, mask, test, "");
  1240.       }
  1241.       /* plane 6 */
  1242.       test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_z, pos_w);
  1243.       temp = LLVMBuildShl(builder, temp, shift, "");
  1244.       test = LLVMBuildAnd(builder, test, temp, "");
  1245.       mask = LLVMBuildOr(builder, mask, test, "");
  1246.    }
  1247.  
  1248.    if (clip_user) {
  1249.       LLVMValueRef planes_ptr = draw_jit_context_planes(gallivm, context_ptr);
  1250.       LLVMValueRef indices[3];
  1251.       LLVMValueRef is_nan_or_inf;
  1252.  
  1253.       /* userclip planes */
  1254.       while (ucp_enable) {
  1255.          unsigned plane_idx = ffs(ucp_enable)-1;
  1256.          ucp_enable &= ~(1 << plane_idx);
  1257.          plane_idx += 6;
  1258.  
  1259.          if (have_cd && num_written_clipdistance) {
  1260.             LLVMValueRef clipdist;
  1261.             int i;
  1262.             i = plane_idx - 6;
  1263.  
  1264.             *have_clipdist = TRUE;
  1265.             if (i < 4) {
  1266.                clipdist = LLVMBuildLoad(builder, outputs[cd[0]][i], "");
  1267.             } else {
  1268.                clipdist = LLVMBuildLoad(builder, outputs[cd[1]][i-4], "");
  1269.             }
  1270.             test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, clipdist);
  1271.             is_nan_or_inf = lp_build_is_inf_or_nan(gallivm, vs_type, clipdist);
  1272.             test = LLVMBuildOr(builder, test, is_nan_or_inf, "");
  1273.             temp = lp_build_const_int_vec(gallivm, i32_type, 1LL << plane_idx);
  1274.             test = LLVMBuildAnd(builder, test, temp, "");
  1275.             mask = LLVMBuildOr(builder, mask, test, "");
  1276.          } else {
  1277.             LLVMTypeRef vs_type_llvm = lp_build_vec_type(gallivm, vs_type);
  1278.             indices[0] = lp_build_const_int32(gallivm, 0);
  1279.             indices[1] = lp_build_const_int32(gallivm, plane_idx);
  1280.  
  1281.             indices[2] = lp_build_const_int32(gallivm, 0);
  1282.             plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
  1283.             plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_x");
  1284.             planes = lp_build_broadcast(gallivm, vs_type_llvm, plane1);
  1285.             sum = LLVMBuildFMul(builder, planes, cv_x, "");
  1286.  
  1287.             indices[2] = lp_build_const_int32(gallivm, 1);
  1288.             plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
  1289.             plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_y");
  1290.             planes = lp_build_broadcast(gallivm, vs_type_llvm, plane1);
  1291.             test = LLVMBuildFMul(builder, planes, cv_y, "");
  1292.             sum = LLVMBuildFAdd(builder, sum, test, "");
  1293.  
  1294.             indices[2] = lp_build_const_int32(gallivm, 2);
  1295.             plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
  1296.             plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_z");
  1297.             planes = lp_build_broadcast(gallivm, vs_type_llvm, plane1);
  1298.             test = LLVMBuildFMul(builder, planes, cv_z, "");
  1299.             sum = LLVMBuildFAdd(builder, sum, test, "");
  1300.  
  1301.             indices[2] = lp_build_const_int32(gallivm, 3);
  1302.             plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, "");
  1303.             plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_w");
  1304.             planes = lp_build_broadcast(gallivm, vs_type_llvm, plane1);
  1305.             test = LLVMBuildFMul(builder, planes, cv_w, "");
  1306.             sum = LLVMBuildFAdd(builder, sum, test, "");
  1307.  
  1308.             test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, sum);
  1309.             temp = lp_build_const_int_vec(gallivm, i32_type, 1LL << plane_idx);
  1310.             test = LLVMBuildAnd(builder, test, temp, "");
  1311.             mask = LLVMBuildOr(builder, mask, test, "");
  1312.          }
  1313.       }
  1314.    }
  1315.    return mask;
  1316. }
  1317.  
  1318.  
  1319. /**
  1320.  * Returns boolean if any clipping has occurred
  1321.  * Used zero/non-zero i32 value to represent boolean
  1322.  */
  1323. static LLVMValueRef
  1324. clipmask_booli32(struct gallivm_state *gallivm,
  1325.                  const struct lp_type vs_type,
  1326.                  LLVMValueRef clipmask_bool_ptr)
  1327. {
  1328.    LLVMBuilderRef builder = gallivm->builder;
  1329.    LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context);
  1330.    LLVMValueRef clipmask_bool = LLVMBuildLoad(builder, clipmask_bool_ptr, "");
  1331.    LLVMValueRef ret = LLVMConstNull(int32_type);
  1332.    LLVMValueRef temp;
  1333.    int i;
  1334.  
  1335.    /*
  1336.     * Can do this with log2(vector length) pack instructions and one extract
  1337.     * (as we don't actually need a or) with sse2 which would be way better.
  1338.     */
  1339.    for (i=0; i < vs_type.length; i++) {
  1340.       temp = LLVMBuildExtractElement(builder, clipmask_bool,
  1341.                                      lp_build_const_int32(gallivm, i) , "");
  1342.       ret = LLVMBuildOr(builder, ret, temp, "");
  1343.    }
  1344.    return ret;
  1345. }
  1346.  
  1347. static LLVMValueRef
  1348. draw_gs_llvm_fetch_input(const struct lp_build_tgsi_gs_iface *gs_iface,
  1349.                          struct lp_build_tgsi_context * bld_base,
  1350.                          boolean is_vindex_indirect,
  1351.                          LLVMValueRef vertex_index,
  1352.                          boolean is_aindex_indirect,
  1353.                          LLVMValueRef attrib_index,
  1354.                          LLVMValueRef swizzle_index)
  1355. {
  1356.    const struct draw_gs_llvm_iface *gs = draw_gs_llvm_iface(gs_iface);
  1357.    struct gallivm_state *gallivm = bld_base->base.gallivm;
  1358.    LLVMBuilderRef builder = gallivm->builder;
  1359.    LLVMValueRef indices[3];
  1360.    LLVMValueRef res;
  1361.    struct lp_type type = bld_base->base.type;
  1362.  
  1363.    if (is_vindex_indirect || is_aindex_indirect) {
  1364.       int i;
  1365.       res = bld_base->base.zero;
  1366.       for (i = 0; i < type.length; ++i) {
  1367.          LLVMValueRef idx = lp_build_const_int32(gallivm, i);
  1368.          LLVMValueRef vert_chan_index = vertex_index;
  1369.          LLVMValueRef attr_chan_index = attrib_index;
  1370.          LLVMValueRef channel_vec, value;
  1371.  
  1372.          if (is_vindex_indirect) {
  1373.             vert_chan_index = LLVMBuildExtractElement(builder,
  1374.                                                       vertex_index, idx, "");
  1375.          }
  1376.          if (is_aindex_indirect) {
  1377.             attr_chan_index = LLVMBuildExtractElement(builder,
  1378.                                                       attrib_index, idx, "");
  1379.          }
  1380.  
  1381.          indices[0] = vert_chan_index;
  1382.          indices[1] = attr_chan_index;
  1383.          indices[2] = swizzle_index;
  1384.  
  1385.          channel_vec = LLVMBuildGEP(builder, gs->input, indices, 3, "");
  1386.          channel_vec = LLVMBuildLoad(builder, channel_vec, "");
  1387.          value = LLVMBuildExtractElement(builder, channel_vec, idx, "");
  1388.  
  1389.          res = LLVMBuildInsertElement(builder, res, value, idx, "");
  1390.       }
  1391.    } else {
  1392.       indices[0] = vertex_index;
  1393.       indices[1] = attrib_index;
  1394.       indices[2] = swizzle_index;
  1395.  
  1396.       res = LLVMBuildGEP(builder, gs->input, indices, 3, "");
  1397.       res = LLVMBuildLoad(builder, res, "");
  1398.    }
  1399.  
  1400.    return res;
  1401. }
  1402.  
  1403. static void
  1404. draw_gs_llvm_emit_vertex(const struct lp_build_tgsi_gs_iface *gs_base,
  1405.                          struct lp_build_tgsi_context * bld_base,
  1406.                          LLVMValueRef (*outputs)[4],
  1407.                          LLVMValueRef emitted_vertices_vec)
  1408. {
  1409.    const struct draw_gs_llvm_iface *gs_iface = draw_gs_llvm_iface(gs_base);
  1410.    struct draw_gs_llvm_variant *variant = gs_iface->variant;
  1411.    struct gallivm_state *gallivm = variant->gallivm;
  1412.    LLVMBuilderRef builder = gallivm->builder;
  1413.    struct lp_type gs_type = bld_base->base.type;
  1414.    LLVMValueRef clipmask = lp_build_const_int_vec(gallivm,
  1415.                                                   lp_int_type(gs_type), 0);
  1416.    LLVMValueRef indices[LP_MAX_VECTOR_LENGTH];
  1417.    LLVMValueRef next_prim_offset =
  1418.       lp_build_const_int32(gallivm, variant->shader->base.primitive_boundary);
  1419.    LLVMValueRef io = variant->io_ptr;
  1420.    unsigned i;
  1421.    const struct tgsi_shader_info *gs_info = &variant->shader->base.info;
  1422.  
  1423.    for (i = 0; i < gs_type.length; ++i) {
  1424.       LLVMValueRef ind = lp_build_const_int32(gallivm, i);
  1425.       LLVMValueRef currently_emitted =
  1426.          LLVMBuildExtractElement(builder, emitted_vertices_vec, ind, "");
  1427.       indices[i] = LLVMBuildMul(builder, ind, next_prim_offset, "");
  1428.       indices[i] = LLVMBuildAdd(builder, indices[i], currently_emitted, "");
  1429.    }
  1430.  
  1431.    convert_to_aos(gallivm, io, indices,
  1432.                   outputs, clipmask,
  1433.                   gs_info->num_outputs, gs_type,
  1434.                   FALSE);
  1435. }
  1436.  
  1437. static void
  1438. draw_gs_llvm_end_primitive(const struct lp_build_tgsi_gs_iface *gs_base,
  1439.                            struct lp_build_tgsi_context * bld_base,
  1440.                            LLVMValueRef verts_per_prim_vec,
  1441.                            LLVMValueRef emitted_prims_vec)
  1442. {
  1443.    const struct draw_gs_llvm_iface *gs_iface = draw_gs_llvm_iface(gs_base);
  1444.    struct draw_gs_llvm_variant *variant = gs_iface->variant;
  1445.    struct gallivm_state *gallivm = variant->gallivm;
  1446.    LLVMBuilderRef builder = gallivm->builder;
  1447.    LLVMValueRef prim_lengts_ptr =
  1448.       draw_gs_jit_prim_lengths(variant->gallivm, variant->context_ptr);
  1449.    unsigned i;
  1450.  
  1451.    for (i = 0; i < bld_base->base.type.length; ++i) {
  1452.       LLVMValueRef ind = lp_build_const_int32(gallivm, i);
  1453.       LLVMValueRef prims_emitted =
  1454.          LLVMBuildExtractElement(builder, emitted_prims_vec, ind, "");
  1455.       LLVMValueRef store_ptr;
  1456.       LLVMValueRef num_vertices =
  1457.          LLVMBuildExtractElement(builder, verts_per_prim_vec, ind, "");
  1458.  
  1459.       store_ptr = LLVMBuildGEP(builder, prim_lengts_ptr, &prims_emitted, 1, "");
  1460.       store_ptr = LLVMBuildLoad(builder, store_ptr, "");
  1461.       store_ptr = LLVMBuildGEP(builder, store_ptr, &ind, 1, "");
  1462.       LLVMBuildStore(builder, num_vertices, store_ptr);
  1463.    }
  1464. }
  1465.  
  1466. static void
  1467. draw_gs_llvm_epilogue(const struct lp_build_tgsi_gs_iface *gs_base,
  1468.                       struct lp_build_tgsi_context * bld_base,
  1469.                       LLVMValueRef total_emitted_vertices_vec,
  1470.                       LLVMValueRef emitted_prims_vec)
  1471. {
  1472.    const struct draw_gs_llvm_iface *gs_iface = draw_gs_llvm_iface(gs_base);
  1473.    struct draw_gs_llvm_variant *variant = gs_iface->variant;
  1474.    struct gallivm_state *gallivm = variant->gallivm;
  1475.    LLVMBuilderRef builder = gallivm->builder;
  1476.    LLVMValueRef emitted_verts_ptr =
  1477.       draw_gs_jit_emitted_vertices(gallivm, variant->context_ptr);
  1478.    LLVMValueRef emitted_prims_ptr =
  1479.       draw_gs_jit_emitted_prims(gallivm, variant->context_ptr);
  1480.    LLVMValueRef zero = lp_build_const_int32(gallivm, 0);
  1481.    
  1482.    emitted_verts_ptr = LLVMBuildGEP(builder, emitted_verts_ptr, &zero, 0, "");
  1483.    emitted_prims_ptr = LLVMBuildGEP(builder, emitted_prims_ptr, &zero, 0, "");
  1484.  
  1485.    LLVMBuildStore(builder, total_emitted_vertices_vec, emitted_verts_ptr);
  1486.    LLVMBuildStore(builder, emitted_prims_vec, emitted_prims_ptr);
  1487. }
  1488.  
  1489. static void
  1490. draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
  1491.                    boolean elts)
  1492. {
  1493.    struct gallivm_state *gallivm = variant->gallivm;
  1494.    LLVMContextRef context = gallivm->context;
  1495.    LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);
  1496.    LLVMTypeRef arg_types[11];
  1497.    unsigned num_arg_types =
  1498.       elts ? Elements(arg_types) : Elements(arg_types) - 1;
  1499.    LLVMTypeRef func_type;
  1500.    LLVMValueRef context_ptr;
  1501.    LLVMBasicBlockRef block;
  1502.    LLVMBuilderRef builder;
  1503.    char func_name[64];
  1504.    struct lp_type vs_type;
  1505.    LLVMValueRef end, start;
  1506.    LLVMValueRef count, fetch_elts, fetch_elt_max, fetch_count;
  1507.    LLVMValueRef vertex_id_offset, start_instance;
  1508.    LLVMValueRef stride, step, io_itr;
  1509.    LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;
  1510.    LLVMValueRef zero = lp_build_const_int32(gallivm, 0);
  1511.    LLVMValueRef one = lp_build_const_int32(gallivm, 1);
  1512.    struct draw_context *draw = llvm->draw;
  1513.    const struct tgsi_shader_info *vs_info = &draw->vs.vertex_shader->info;
  1514.    unsigned i, j;
  1515.    struct lp_build_context bld;
  1516.    struct lp_build_loop_state lp_loop;
  1517.    const int vector_length = lp_native_vector_width / 32;
  1518.    LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
  1519.    LLVMValueRef fetch_max;
  1520.    struct lp_build_sampler_soa *sampler = 0;
  1521.    LLVMValueRef ret, clipmask_bool_ptr;
  1522.    struct draw_llvm_variant_key *key = &variant->key;
  1523.    /* If geometry shader is present we need to skip both the viewport
  1524.     * transformation and clipping otherwise the inputs to the geometry
  1525.     * shader will be incorrect.
  1526.     * The code can't handle vp transform when vs writes vp index neither
  1527.     * (though this would be fixable here, but couldn't just broadcast
  1528.     * the values).
  1529.     */
  1530.    const boolean bypass_viewport = key->has_gs || key->bypass_viewport ||
  1531.                                    llvm->draw->vs.vertex_shader->info.writes_viewport_index;
  1532.    const boolean enable_cliptest = !key->has_gs && (key->clip_xy ||
  1533.                                                     key->clip_z  ||
  1534.                                                     key->clip_user);
  1535.    LLVMValueRef variant_func;
  1536.    const unsigned pos = llvm->draw->vs.position_output;
  1537.    const unsigned cv = llvm->draw->vs.clipvertex_output;
  1538.    boolean have_clipdist = FALSE;
  1539.    struct lp_bld_tgsi_system_values system_values;
  1540.  
  1541.    memset(&system_values, 0, sizeof(system_values));
  1542.  
  1543.    util_snprintf(func_name, sizeof(func_name), "draw_llvm_vs_variant%u_%s",
  1544.                  variant->shader->variants_cached, elts ? "elts" : "linear");
  1545.  
  1546.    i = 0;
  1547.    arg_types[i++] = get_context_ptr_type(variant);       /* context */
  1548.    arg_types[i++] = get_vertex_header_ptr_type(variant); /* vertex_header */
  1549.    arg_types[i++] = get_buffer_ptr_type(variant);        /* vbuffers */
  1550.    if (elts) {
  1551.       arg_types[i++] = LLVMPointerType(int32_type, 0);/* fetch_elts  */
  1552.       arg_types[i++] = int32_type;                  /* fetch_elt_max */
  1553.    } else
  1554.       arg_types[i++] = int32_type;                  /* start */
  1555.    arg_types[i++] = int32_type;                     /* fetch_count / count */
  1556.    arg_types[i++] = int32_type;                     /* stride */
  1557.    arg_types[i++] = get_vb_ptr_type(variant);       /* pipe_vertex_buffer's */
  1558.    arg_types[i++] = int32_type;                     /* instance_id */
  1559.    arg_types[i++] = int32_type;                     /* vertex_id_offset */
  1560.    arg_types[i++] = int32_type;                     /* start_instance */
  1561.  
  1562.    func_type = LLVMFunctionType(int32_type, arg_types, num_arg_types, 0);
  1563.  
  1564.    variant_func = LLVMAddFunction(gallivm->module, func_name, func_type);
  1565.  
  1566.    if (elts)
  1567.       variant->function_elts = variant_func;
  1568.    else
  1569.       variant->function = variant_func;
  1570.  
  1571.    LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
  1572.    for (i = 0; i < num_arg_types; ++i)
  1573.       if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
  1574.          LLVMAddAttribute(LLVMGetParam(variant_func, i),
  1575.                           LLVMNoAliasAttribute);
  1576.  
  1577.    context_ptr               = LLVMGetParam(variant_func, 0);
  1578.    io_ptr                    = LLVMGetParam(variant_func, 1);
  1579.    vbuffers_ptr              = LLVMGetParam(variant_func, 2);
  1580.    stride                    = LLVMGetParam(variant_func, 5 + (elts ? 1 : 0));
  1581.    vb_ptr                    = LLVMGetParam(variant_func, 6 + (elts ? 1 : 0));
  1582.    system_values.instance_id = LLVMGetParam(variant_func, 7 + (elts ? 1 : 0));
  1583.    vertex_id_offset          = LLVMGetParam(variant_func, 8 + (elts ? 1 : 0));
  1584.    start_instance            = LLVMGetParam(variant_func, 9 + (elts ? 1 : 0));
  1585.  
  1586.    lp_build_name(context_ptr, "context");
  1587.    lp_build_name(io_ptr, "io");
  1588.    lp_build_name(vbuffers_ptr, "vbuffers");
  1589.    lp_build_name(stride, "stride");
  1590.    lp_build_name(vb_ptr, "vb");
  1591.    lp_build_name(system_values.instance_id, "instance_id");
  1592.    lp_build_name(vertex_id_offset, "vertex_id_offset");
  1593.    lp_build_name(start_instance, "start_instance");
  1594.  
  1595.    if (elts) {
  1596.       fetch_elts    = LLVMGetParam(variant_func, 3);
  1597.       fetch_elt_max = LLVMGetParam(variant_func, 4);
  1598.       fetch_count   = LLVMGetParam(variant_func, 5);
  1599.       lp_build_name(fetch_elts, "fetch_elts");
  1600.       lp_build_name(fetch_elt_max, "fetch_elt_max");
  1601.       lp_build_name(fetch_count, "fetch_count");
  1602.       start = count = NULL;
  1603.    }
  1604.    else {
  1605.       start        = LLVMGetParam(variant_func, 3);
  1606.       count        = LLVMGetParam(variant_func, 4);
  1607.       lp_build_name(start, "start");
  1608.       lp_build_name(count, "count");
  1609.       fetch_elts = fetch_count = NULL;
  1610.    }
  1611.  
  1612.    /*
  1613.     * Function body
  1614.     */
  1615.  
  1616.    block = LLVMAppendBasicBlockInContext(gallivm->context, variant_func, "entry");
  1617.    builder = gallivm->builder;
  1618.    LLVMPositionBuilderAtEnd(builder, block);
  1619.  
  1620.    lp_build_context_init(&bld, gallivm, lp_type_int(32));
  1621.  
  1622.    memset(&vs_type, 0, sizeof vs_type);
  1623.    vs_type.floating = TRUE; /* floating point values */
  1624.    vs_type.sign = TRUE;     /* values are signed */
  1625.    vs_type.norm = FALSE;    /* values are not limited to [0,1] or [-1,1] */
  1626.    vs_type.width = 32;      /* 32-bit float */
  1627.    vs_type.length = vector_length;
  1628.  
  1629.    /* hold temporary "bool" clipmask */
  1630.    clipmask_bool_ptr = lp_build_alloca(gallivm, lp_build_int_vec_type(gallivm, vs_type), "");
  1631.    LLVMBuildStore(builder, lp_build_zero(gallivm, lp_int_type(vs_type)), clipmask_bool_ptr);
  1632.  
  1633.    /* code generated texture sampling */
  1634.    sampler = draw_llvm_sampler_soa_create(draw_llvm_variant_key_samplers(key));
  1635.  
  1636.    if (elts) {
  1637.       start = zero;
  1638.       end = fetch_count;
  1639.       count = fetch_count;
  1640.    }
  1641.    else {
  1642.       end = lp_build_add(&bld, start, count);
  1643.    }
  1644.  
  1645.    step = lp_build_const_int32(gallivm, vector_length);
  1646.  
  1647.    fetch_max = LLVMBuildSub(builder, end, one, "fetch_max");
  1648.  
  1649.    lp_build_loop_begin(&lp_loop, gallivm, zero);
  1650.    {
  1651.       LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS];
  1652.       LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][LP_MAX_VECTOR_WIDTH / 32] = { { 0 } };
  1653.       LLVMValueRef io;
  1654.       LLVMValueRef clipmask;   /* holds the clipmask value */
  1655.       LLVMValueRef true_index_array = lp_build_zero(gallivm,
  1656.                                                     lp_type_uint_vec(32, 32*vector_length));
  1657.       const LLVMValueRef (*ptr_aos)[TGSI_NUM_CHANNELS];
  1658.  
  1659.       io_itr = lp_loop.counter;
  1660.  
  1661.       io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, "");
  1662. #if DEBUG_STORE
  1663.       lp_build_printf(gallivm, " --- io %d = %p, loop counter %d\n",
  1664.                       io_itr, io, lp_loop.counter);
  1665. #endif
  1666.       for (i = 0; i < vector_length; ++i) {
  1667.          LLVMValueRef vert_index =
  1668.             LLVMBuildAdd(builder,
  1669.                          lp_loop.counter,
  1670.                          lp_build_const_int32(gallivm, i), "");
  1671.          LLVMValueRef true_index =
  1672.             LLVMBuildAdd(builder, start, vert_index, "");
  1673.  
  1674.          /* make sure we're not out of bounds which can happen
  1675.           * if fetch_count % 4 != 0, because on the last iteration
  1676.           * a few of the 4 vertex fetches will be out of bounds */
  1677.          true_index = lp_build_min(&bld, true_index, fetch_max);
  1678.  
  1679.          if (elts) {
  1680.             LLVMValueRef fetch_ptr;
  1681.             LLVMValueRef index_overflowed;
  1682.             LLVMValueRef index_ptr =
  1683.                lp_build_alloca(
  1684.                   gallivm,
  1685.                   lp_build_vec_type(gallivm, lp_type_int(32)), "");
  1686.             struct lp_build_if_state if_ctx;
  1687.             index_overflowed = LLVMBuildICmp(builder, LLVMIntUGT,
  1688.                                              true_index, fetch_elt_max,
  1689.                                              "index_overflowed");
  1690.  
  1691.             lp_build_if(&if_ctx, gallivm, index_overflowed);
  1692.             {
  1693.                /* Generate maximum possible index so that
  1694.                 * generate_fetch can treat it just like
  1695.                 * any other overflow and return zeros.
  1696.                 * We don't have to worry about the restart
  1697.                 * primitive index because it has already been
  1698.                 * handled
  1699.                 */
  1700.                LLVMValueRef val =
  1701.                   lp_build_const_int32(gallivm, 0xffffffff);
  1702.                LLVMBuildStore(builder, val, index_ptr);
  1703.             }
  1704.             lp_build_else(&if_ctx);
  1705.             {
  1706.                LLVMValueRef val;
  1707.                fetch_ptr = LLVMBuildGEP(builder, fetch_elts,
  1708.                                         &true_index, 1, "");
  1709.                val = LLVMBuildLoad(builder, fetch_ptr, "");
  1710.                LLVMBuildStore(builder, val, index_ptr);
  1711.             }
  1712.             lp_build_endif(&if_ctx);
  1713.             true_index = LLVMBuildLoad(builder, index_ptr, "true_index");
  1714.          }
  1715.          true_index_array = LLVMBuildInsertElement(
  1716.             gallivm->builder, true_index_array, true_index,
  1717.             lp_build_const_int32(gallivm, i), "");
  1718.  
  1719.          for (j = 0; j < draw->pt.nr_vertex_elements; ++j) {
  1720.             struct pipe_vertex_element *velem = &draw->pt.vertex_element[j];
  1721.             LLVMValueRef vb_index =
  1722.                lp_build_const_int32(gallivm, velem->vertex_buffer_index);
  1723.             LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr, &vb_index, 1, "");
  1724.             generate_fetch(gallivm, draw, vbuffers_ptr,
  1725.                            &aos_attribs[j][i], velem, vb, true_index,
  1726.                            system_values.instance_id, start_instance);
  1727.          }
  1728.       }
  1729.       convert_to_soa(gallivm, aos_attribs, inputs,
  1730.                      draw->pt.nr_vertex_elements, vs_type);
  1731.  
  1732.       /* In the paths with elts vertex id has to be unaffected by the
  1733.        * index bias and because indices inside our elements array have
  1734.        * already had index bias applied we need to subtract it here to
  1735.        * get back to the original index.
  1736.        * in the linear paths vertex id has to be unaffected by the
  1737.        * original start index and because we abuse the 'start' variable
  1738.        * to either represent the actual start index or the index at which
  1739.        * the primitive was split (we split rendering into chunks of at
  1740.        * most 4095-vertices) we need to back out the original start
  1741.        * index out of our vertex id here.
  1742.        */
  1743.       system_values.basevertex = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm,
  1744.                                                        lp_type_uint_vec(32, 32*vector_length)),
  1745.                                                     vertex_id_offset);
  1746.       system_values.vertex_id = true_index_array;
  1747.       system_values.vertex_id_nobase = LLVMBuildSub(builder, true_index_array,
  1748.                                                       system_values.basevertex, "");
  1749.  
  1750.       ptr_aos = (const LLVMValueRef (*)[TGSI_NUM_CHANNELS]) inputs;
  1751.       generate_vs(variant,
  1752.                   builder,
  1753.                   vs_type,
  1754.                   outputs,
  1755.                   ptr_aos,
  1756.                   &system_values,
  1757.                   context_ptr,
  1758.                   sampler,
  1759.                   key->clamp_vertex_color);
  1760.  
  1761.       if (pos != -1 && cv != -1) {
  1762.          /* store original positions in clip before further manipulation */
  1763.          store_clip(gallivm, vs_type, io, outputs, FALSE, key->clip_user ? cv : pos);
  1764.          store_clip(gallivm, vs_type, io, outputs, TRUE, pos);
  1765.  
  1766.          /* do cliptest */
  1767.          if (enable_cliptest) {
  1768.             LLVMValueRef temp = LLVMBuildLoad(builder, clipmask_bool_ptr, "");
  1769.             /* allocate clipmask, assign it integer type */
  1770.             clipmask = generate_clipmask(llvm,
  1771.                                          gallivm,
  1772.                                          vs_type,
  1773.                                          outputs,
  1774.                                          key->clip_xy,
  1775.                                          key->clip_z,
  1776.                                          key->clip_user,
  1777.                                          key->clip_halfz,
  1778.                                          key->ucp_enable,
  1779.                                          context_ptr, &have_clipdist);
  1780.             temp = LLVMBuildOr(builder, clipmask, temp, "");
  1781.             /* store temporary clipping boolean value */
  1782.             LLVMBuildStore(builder, temp, clipmask_bool_ptr);
  1783.          }
  1784.          else {
  1785.             clipmask = lp_build_const_int_vec(gallivm, lp_int_type(vs_type), 0);
  1786.          }
  1787.  
  1788.          /* do viewport mapping */
  1789.          if (!bypass_viewport) {
  1790.             generate_viewport(variant, builder, vs_type, outputs, context_ptr);
  1791.          }
  1792.       }
  1793.       else {
  1794.          clipmask = lp_build_const_int_vec(gallivm, lp_int_type(vs_type), 0);
  1795.       }
  1796.  
  1797.       /* store clipmask in vertex header,
  1798.        * original positions in clip
  1799.        * and transformed positions in data
  1800.        */
  1801.       convert_to_aos(gallivm, io, NULL, outputs, clipmask,
  1802.                      vs_info->num_outputs, vs_type,
  1803.                      have_clipdist);
  1804.    }
  1805.    lp_build_loop_end_cond(&lp_loop, count, step, LLVMIntUGE);
  1806.  
  1807.    sampler->destroy(sampler);
  1808.  
  1809.    /* return clipping boolean value for function */
  1810.    ret = clipmask_booli32(gallivm, vs_type, clipmask_bool_ptr);
  1811.  
  1812.    LLVMBuildRet(builder, ret);
  1813.  
  1814.    gallivm_verify_function(gallivm, variant_func);
  1815. }
  1816.  
  1817.  
  1818. struct draw_llvm_variant_key *
  1819. draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
  1820. {
  1821.    unsigned i;
  1822.    struct draw_llvm_variant_key *key;
  1823.    struct draw_sampler_static_state *draw_sampler;
  1824.  
  1825.    key = (struct draw_llvm_variant_key *)store;
  1826.  
  1827.    key->clamp_vertex_color = llvm->draw->rasterizer->clamp_vertex_color; /**/
  1828.  
  1829.    /* Presumably all variants of the shader should have the same
  1830.     * number of vertex elements - ie the number of shader inputs.
  1831.     * NOTE: we NEED to store the needed number of needed inputs
  1832.     * here, not the number of provided elements to match keysize
  1833.     * (and the offset of sampler state in the key).
  1834.     */
  1835.    key->nr_vertex_elements = llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_INPUT] + 1;
  1836.    assert(key->nr_vertex_elements <= llvm->draw->pt.nr_vertex_elements);
  1837.  
  1838.    /* will have to rig this up properly later */
  1839.    key->clip_xy = llvm->draw->clip_xy;
  1840.    key->clip_z = llvm->draw->clip_z;
  1841.    key->clip_user = llvm->draw->clip_user;
  1842.    key->bypass_viewport = llvm->draw->bypass_viewport;
  1843.    key->clip_halfz = llvm->draw->rasterizer->clip_halfz;
  1844.    key->need_edgeflags = (llvm->draw->vs.edgeflag_output ? TRUE : FALSE);
  1845.    key->ucp_enable = llvm->draw->rasterizer->clip_plane_enable;
  1846.    key->has_gs = llvm->draw->gs.geometry_shader != NULL;
  1847.    key->num_outputs = draw_total_vs_outputs(llvm->draw);
  1848.    key->pad1 = 0;
  1849.  
  1850.    /* All variants of this shader will have the same value for
  1851.     * nr_samplers.  Not yet trying to compact away holes in the
  1852.     * sampler array.
  1853.     */
  1854.    key->nr_samplers = llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
  1855.    if (llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
  1856.       key->nr_sampler_views =
  1857.          llvm->draw->vs.vertex_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
  1858.    }
  1859.    else {
  1860.       key->nr_sampler_views = key->nr_samplers;
  1861.    }
  1862.  
  1863.    draw_sampler = draw_llvm_variant_key_samplers(key);
  1864.  
  1865.    memcpy(key->vertex_element,
  1866.           llvm->draw->pt.vertex_element,
  1867.           sizeof(struct pipe_vertex_element) * key->nr_vertex_elements);
  1868.  
  1869.    memset(draw_sampler, 0, MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *draw_sampler);
  1870.  
  1871.    for (i = 0 ; i < key->nr_samplers; i++) {
  1872.       lp_sampler_static_sampler_state(&draw_sampler[i].sampler_state,
  1873.                                       llvm->draw->samplers[PIPE_SHADER_VERTEX][i]);
  1874.    }
  1875.    for (i = 0 ; i < key->nr_sampler_views; i++) {
  1876.       lp_sampler_static_texture_state(&draw_sampler[i].texture_state,
  1877.                                       llvm->draw->sampler_views[PIPE_SHADER_VERTEX][i]);
  1878.    }
  1879.  
  1880.    return key;
  1881. }
  1882.  
  1883.  
  1884. void
  1885. draw_llvm_dump_variant_key(struct draw_llvm_variant_key *key)
  1886. {
  1887.    unsigned i;
  1888.    struct draw_sampler_static_state *sampler = draw_llvm_variant_key_samplers(key);
  1889.  
  1890.    debug_printf("clamp_vertex_color = %u\n", key->clamp_vertex_color);
  1891.    debug_printf("clip_xy = %u\n", key->clip_xy);
  1892.    debug_printf("clip_z = %u\n", key->clip_z);
  1893.    debug_printf("clip_user = %u\n", key->clip_user);
  1894.    debug_printf("bypass_viewport = %u\n", key->bypass_viewport);
  1895.    debug_printf("clip_halfz = %u\n", key->clip_halfz);
  1896.    debug_printf("need_edgeflags = %u\n", key->need_edgeflags);
  1897.    debug_printf("has_gs = %u\n", key->has_gs);
  1898.    debug_printf("ucp_enable = %u\n", key->ucp_enable);
  1899.  
  1900.    for (i = 0 ; i < key->nr_vertex_elements; i++) {
  1901.       debug_printf("vertex_element[%i].src_offset = %u\n", i, key->vertex_element[i].src_offset);
  1902.       debug_printf("vertex_element[%i].instance_divisor = %u\n", i, key->vertex_element[i].instance_divisor);
  1903.       debug_printf("vertex_element[%i].vertex_buffer_index = %u\n", i, key->vertex_element[i].vertex_buffer_index);
  1904.       debug_printf("vertex_element[%i].src_format = %s\n", i, util_format_name(key->vertex_element[i].src_format));
  1905.    }
  1906.  
  1907.    for (i = 0 ; i < key->nr_sampler_views; i++) {
  1908.       debug_printf("sampler[%i].src_format = %s\n", i, util_format_name(sampler[i].texture_state.format));
  1909.    }
  1910. }
  1911.  
  1912.  
  1913. void
  1914. draw_llvm_set_mapped_texture(struct draw_context *draw,
  1915.                              unsigned shader_stage,
  1916.                              unsigned sview_idx,
  1917.                              uint32_t width, uint32_t height, uint32_t depth,
  1918.                              uint32_t first_level, uint32_t last_level,
  1919.                              const void *base_ptr,
  1920.                              uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS],
  1921.                              uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS],
  1922.                              uint32_t mip_offsets[PIPE_MAX_TEXTURE_LEVELS])
  1923. {
  1924.    unsigned j;
  1925.    struct draw_jit_texture *jit_tex;
  1926.  
  1927.    assert(shader_stage == PIPE_SHADER_VERTEX ||
  1928.           shader_stage == PIPE_SHADER_GEOMETRY);
  1929.  
  1930.    if (shader_stage == PIPE_SHADER_VERTEX) {
  1931.       assert(sview_idx < Elements(draw->llvm->jit_context.textures));
  1932.  
  1933.       jit_tex = &draw->llvm->jit_context.textures[sview_idx];
  1934.    } else if (shader_stage == PIPE_SHADER_GEOMETRY) {
  1935.       assert(sview_idx < Elements(draw->llvm->gs_jit_context.textures));
  1936.  
  1937.       jit_tex = &draw->llvm->gs_jit_context.textures[sview_idx];
  1938.    } else {
  1939.       assert(0);
  1940.       return;
  1941.    }
  1942.  
  1943.    jit_tex->width = width;
  1944.    jit_tex->height = height;
  1945.    jit_tex->depth = depth;
  1946.    jit_tex->first_level = first_level;
  1947.    jit_tex->last_level = last_level;
  1948.    jit_tex->base = base_ptr;
  1949.  
  1950.    for (j = first_level; j <= last_level; j++) {
  1951.       jit_tex->mip_offsets[j] = mip_offsets[j];
  1952.       jit_tex->row_stride[j] = row_stride[j];
  1953.       jit_tex->img_stride[j] = img_stride[j];
  1954.    }
  1955. }
  1956.  
  1957.  
  1958. void
  1959. draw_llvm_set_sampler_state(struct draw_context *draw,
  1960.                             unsigned shader_type)
  1961. {
  1962.    unsigned i;
  1963.  
  1964.    if (shader_type == PIPE_SHADER_VERTEX) {
  1965.       for (i = 0; i < draw->num_samplers[PIPE_SHADER_VERTEX]; i++) {
  1966.          struct draw_jit_sampler *jit_sam = &draw->llvm->jit_context.samplers[i];
  1967.  
  1968.          if (draw->samplers[i]) {
  1969.             const struct pipe_sampler_state *s
  1970.                = draw->samplers[PIPE_SHADER_VERTEX][i];
  1971.             jit_sam->min_lod = s->min_lod;
  1972.             jit_sam->max_lod = s->max_lod;
  1973.             jit_sam->lod_bias = s->lod_bias;
  1974.             COPY_4V(jit_sam->border_color, s->border_color.f);
  1975.          }
  1976.       }
  1977.    } else if (shader_type == PIPE_SHADER_GEOMETRY) {
  1978.       for (i = 0; i < draw->num_samplers[PIPE_SHADER_GEOMETRY]; i++) {
  1979.          struct draw_jit_sampler *jit_sam = &draw->llvm->gs_jit_context.samplers[i];
  1980.  
  1981.          if (draw->samplers[i]) {
  1982.             const struct pipe_sampler_state *s
  1983.                = draw->samplers[PIPE_SHADER_GEOMETRY][i];
  1984.             jit_sam->min_lod = s->min_lod;
  1985.             jit_sam->max_lod = s->max_lod;
  1986.             jit_sam->lod_bias = s->lod_bias;
  1987.             COPY_4V(jit_sam->border_color, s->border_color.f);
  1988.          }
  1989.       }
  1990.    }
  1991. }
  1992.  
  1993.  
  1994. void
  1995. draw_llvm_destroy_variant(struct draw_llvm_variant *variant)
  1996. {
  1997.    struct draw_llvm *llvm = variant->llvm;
  1998.  
  1999.    gallivm_destroy(variant->gallivm);
  2000.  
  2001.    remove_from_list(&variant->list_item_local);
  2002.    variant->shader->variants_cached--;
  2003.    remove_from_list(&variant->list_item_global);
  2004.    llvm->nr_variants--;
  2005.    FREE(variant);
  2006. }
  2007.  
  2008.  
  2009. /**
  2010.  * Create LLVM types for various structures.
  2011.  */
  2012. static void
  2013. create_gs_jit_types(struct draw_gs_llvm_variant *var)
  2014. {
  2015.    struct gallivm_state *gallivm = var->gallivm;
  2016.    LLVMTypeRef texture_type, sampler_type, context_type;
  2017.  
  2018.    texture_type = create_jit_texture_type(gallivm, "texture");
  2019.    sampler_type = create_jit_sampler_type(gallivm, "sampler");
  2020.  
  2021.    context_type = create_gs_jit_context_type(gallivm,
  2022.                                              var->shader->base.vector_length,
  2023.                                              texture_type, sampler_type,
  2024.                                              "draw_gs_jit_context");
  2025.    var->context_ptr_type = LLVMPointerType(context_type, 0);
  2026.  
  2027.    var->input_array_type = create_gs_jit_input_type(gallivm);
  2028. }
  2029.  
  2030. static LLVMTypeRef
  2031. get_gs_context_ptr_type(struct draw_gs_llvm_variant *variant)
  2032. {
  2033.    if (!variant->context_ptr_type)
  2034.       create_gs_jit_types(variant);
  2035.    return variant->context_ptr_type;
  2036. }
  2037.  
  2038. static LLVMValueRef
  2039. generate_mask_value(struct draw_gs_llvm_variant *variant,
  2040.                     struct lp_type gs_type)
  2041. {
  2042.    struct gallivm_state *gallivm = variant->gallivm;
  2043.    LLVMBuilderRef builder = gallivm->builder;
  2044.    struct lp_type mask_type = lp_int_type(gs_type);
  2045.    LLVMValueRef num_prims;
  2046.    LLVMValueRef mask_val = lp_build_const_vec(gallivm, mask_type, 0);
  2047.    unsigned i;
  2048.  
  2049.    num_prims = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, mask_type),
  2050.                                   variant->num_prims);
  2051.    for (i = 0; i < gs_type.length; i++) {
  2052.       LLVMValueRef idx = lp_build_const_int32(gallivm, i);
  2053.       mask_val = LLVMBuildInsertElement(builder, mask_val, idx, idx, "");
  2054.    }
  2055.    mask_val = lp_build_compare(gallivm, mask_type,
  2056.                                PIPE_FUNC_GREATER, num_prims, mask_val);
  2057.  
  2058.    return mask_val;
  2059. }
  2060.  
  2061. static void
  2062. draw_gs_llvm_generate(struct draw_llvm *llvm,
  2063.                       struct draw_gs_llvm_variant *variant)
  2064. {
  2065.    struct gallivm_state *gallivm = variant->gallivm;
  2066.    LLVMContextRef context = gallivm->context;
  2067.    LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);
  2068.    LLVMTypeRef arg_types[6];
  2069.    LLVMTypeRef func_type;
  2070.    LLVMValueRef variant_func;
  2071.    LLVMValueRef context_ptr;
  2072.    LLVMValueRef prim_id_ptr;
  2073.    LLVMBasicBlockRef block;
  2074.    LLVMBuilderRef builder;
  2075.    LLVMValueRef io_ptr, input_array, num_prims, mask_val;
  2076.    struct lp_build_sampler_soa *sampler = 0;
  2077.    struct lp_build_context bld;
  2078.    struct lp_bld_tgsi_system_values system_values;
  2079.    char func_name[64];
  2080.    struct lp_type gs_type;
  2081.    unsigned i;
  2082.    struct draw_gs_llvm_iface gs_iface;
  2083.    const struct tgsi_token *tokens = variant->shader->base.state.tokens;
  2084.    LLVMValueRef consts_ptr, num_consts_ptr;
  2085.    LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
  2086.    struct lp_build_mask_context mask;
  2087.    const struct tgsi_shader_info *gs_info = &variant->shader->base.info;
  2088.    unsigned vector_length = variant->shader->base.vector_length;
  2089.  
  2090.    memset(&system_values, 0, sizeof(system_values));
  2091.  
  2092.    util_snprintf(func_name, sizeof(func_name), "draw_llvm_gs_variant%u",
  2093.                  variant->shader->variants_cached);
  2094.  
  2095.    assert(variant->vertex_header_ptr_type);
  2096.  
  2097.    arg_types[0] = get_gs_context_ptr_type(variant);    /* context */
  2098.    arg_types[1] = variant->input_array_type;           /* input */
  2099.    arg_types[2] = variant->vertex_header_ptr_type;     /* vertex_header */
  2100.    arg_types[3] = int32_type;                          /* num_prims */
  2101.    arg_types[4] = int32_type;                          /* instance_id */
  2102.    arg_types[5] = LLVMPointerType(
  2103.       LLVMVectorType(int32_type, vector_length), 0);   /* prim_id_ptr */
  2104.  
  2105.    func_type = LLVMFunctionType(int32_type, arg_types, Elements(arg_types), 0);
  2106.  
  2107.    variant_func = LLVMAddFunction(gallivm->module, func_name, func_type);
  2108.  
  2109.    variant->function = variant_func;
  2110.  
  2111.    LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
  2112.  
  2113.    for (i = 0; i < Elements(arg_types); ++i)
  2114.       if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
  2115.          LLVMAddAttribute(LLVMGetParam(variant_func, i),
  2116.                           LLVMNoAliasAttribute);
  2117.  
  2118.    context_ptr               = LLVMGetParam(variant_func, 0);
  2119.    input_array               = LLVMGetParam(variant_func, 1);
  2120.    io_ptr                    = LLVMGetParam(variant_func, 2);
  2121.    num_prims                 = LLVMGetParam(variant_func, 3);
  2122.    system_values.instance_id = LLVMGetParam(variant_func, 4);
  2123.    prim_id_ptr               = LLVMGetParam(variant_func, 5);
  2124.  
  2125.    lp_build_name(context_ptr, "context");
  2126.    lp_build_name(input_array, "input");
  2127.    lp_build_name(io_ptr, "io");
  2128.    lp_build_name(num_prims, "num_prims");
  2129.    lp_build_name(system_values.instance_id, "instance_id");
  2130.    lp_build_name(prim_id_ptr, "prim_id_ptr");
  2131.  
  2132.    variant->context_ptr = context_ptr;
  2133.    variant->io_ptr = io_ptr;
  2134.    variant->num_prims = num_prims;
  2135.  
  2136.    gs_iface.base.fetch_input = draw_gs_llvm_fetch_input;
  2137.    gs_iface.base.emit_vertex = draw_gs_llvm_emit_vertex;
  2138.    gs_iface.base.end_primitive = draw_gs_llvm_end_primitive;
  2139.    gs_iface.base.gs_epilogue = draw_gs_llvm_epilogue;
  2140.    gs_iface.input = input_array;
  2141.    gs_iface.variant = variant;
  2142.  
  2143.    /*
  2144.     * Function body
  2145.     */
  2146.  
  2147.    block = LLVMAppendBasicBlockInContext(gallivm->context, variant_func, "entry");
  2148.    builder = gallivm->builder;
  2149.    LLVMPositionBuilderAtEnd(builder, block);
  2150.  
  2151.    lp_build_context_init(&bld, gallivm, lp_type_int(32));
  2152.  
  2153.    memset(&gs_type, 0, sizeof gs_type);
  2154.    gs_type.floating = TRUE; /* floating point values */
  2155.    gs_type.sign = TRUE;     /* values are signed */
  2156.    gs_type.norm = FALSE;    /* values are not limited to [0,1] or [-1,1] */
  2157.    gs_type.width = 32;      /* 32-bit float */
  2158.    gs_type.length = vector_length;
  2159.  
  2160.    consts_ptr = draw_gs_jit_context_constants(variant->gallivm, context_ptr);
  2161.    num_consts_ptr =
  2162.       draw_gs_jit_context_num_constants(variant->gallivm, context_ptr);
  2163.  
  2164.    /* code generated texture sampling */
  2165.    sampler = draw_llvm_sampler_soa_create(variant->key.samplers);
  2166.  
  2167.    mask_val = generate_mask_value(variant, gs_type);
  2168.    lp_build_mask_begin(&mask, gallivm, gs_type, mask_val);
  2169.  
  2170.    if (gs_info->uses_primid) {
  2171.       system_values.prim_id = LLVMBuildLoad(builder, prim_id_ptr, "prim_id");
  2172.    }
  2173.  
  2174.    if (gallivm_debug & (GALLIVM_DEBUG_TGSI | GALLIVM_DEBUG_IR)) {
  2175.       tgsi_dump(tokens, 0);
  2176.       draw_gs_llvm_dump_variant_key(&variant->key);
  2177.    }
  2178.  
  2179.    lp_build_tgsi_soa(variant->gallivm,
  2180.                      tokens,
  2181.                      gs_type,
  2182.                      &mask,
  2183.                      consts_ptr,
  2184.                      num_consts_ptr,
  2185.                      &system_values,
  2186.                      NULL,
  2187.                      outputs,
  2188.                      context_ptr,
  2189.                      sampler,
  2190.                      &llvm->draw->gs.geometry_shader->info,
  2191.                      (const struct lp_build_tgsi_gs_iface *)&gs_iface);
  2192.  
  2193.    sampler->destroy(sampler);
  2194.  
  2195.    lp_build_mask_end(&mask);
  2196.  
  2197.    LLVMBuildRet(builder, lp_build_zero(gallivm, lp_type_uint(32)));
  2198.  
  2199.    gallivm_verify_function(gallivm, variant_func);
  2200. }
  2201.  
  2202.  
  2203. struct draw_gs_llvm_variant *
  2204. draw_gs_llvm_create_variant(struct draw_llvm *llvm,
  2205.                             unsigned num_outputs,
  2206.                             const struct draw_gs_llvm_variant_key *key)
  2207. {
  2208.    struct draw_gs_llvm_variant *variant;
  2209.    struct llvm_geometry_shader *shader =
  2210.       llvm_geometry_shader(llvm->draw->gs.geometry_shader);
  2211.    LLVMTypeRef vertex_header;
  2212.    char module_name[64];
  2213.  
  2214.    variant = MALLOC(sizeof *variant +
  2215.                     shader->variant_key_size -
  2216.                     sizeof variant->key);
  2217.    if (variant == NULL)
  2218.       return NULL;
  2219.  
  2220.    variant->llvm = llvm;
  2221.    variant->shader = shader;
  2222.  
  2223.    util_snprintf(module_name, sizeof(module_name), "draw_llvm_gs_variant%u",
  2224.                  variant->shader->variants_cached);
  2225.  
  2226.    variant->gallivm = gallivm_create(module_name, llvm->context);
  2227.  
  2228.    create_gs_jit_types(variant);
  2229.  
  2230.    memcpy(&variant->key, key, shader->variant_key_size);
  2231.  
  2232.    vertex_header = create_jit_vertex_header(variant->gallivm, num_outputs);
  2233.  
  2234.    variant->vertex_header_ptr_type = LLVMPointerType(vertex_header, 0);
  2235.  
  2236.    draw_gs_llvm_generate(llvm, variant);
  2237.  
  2238.    gallivm_compile_module(variant->gallivm);
  2239.  
  2240.    variant->jit_func = (draw_gs_jit_func)
  2241.          gallivm_jit_function(variant->gallivm, variant->function);
  2242.  
  2243.    gallivm_free_ir(variant->gallivm);
  2244.  
  2245.    variant->list_item_global.base = variant;
  2246.    variant->list_item_local.base = variant;
  2247.    /*variant->no = */shader->variants_created++;
  2248.    variant->list_item_global.base = variant;
  2249.  
  2250.    return variant;
  2251. }
  2252.  
  2253. void
  2254. draw_gs_llvm_destroy_variant(struct draw_gs_llvm_variant *variant)
  2255. {
  2256.    struct draw_llvm *llvm = variant->llvm;
  2257.  
  2258.    gallivm_destroy(variant->gallivm);
  2259.  
  2260.    remove_from_list(&variant->list_item_local);
  2261.    variant->shader->variants_cached--;
  2262.    remove_from_list(&variant->list_item_global);
  2263.    llvm->nr_gs_variants--;
  2264.    FREE(variant);
  2265. }
  2266.  
  2267. struct draw_gs_llvm_variant_key *
  2268. draw_gs_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
  2269. {
  2270.    unsigned i;
  2271.    struct draw_gs_llvm_variant_key *key;
  2272.    struct draw_sampler_static_state *draw_sampler;
  2273.  
  2274.    key = (struct draw_gs_llvm_variant_key *)store;
  2275.  
  2276.    key->num_outputs = draw_total_gs_outputs(llvm->draw);
  2277.  
  2278.    /* All variants of this shader will have the same value for
  2279.     * nr_samplers.  Not yet trying to compact away holes in the
  2280.     * sampler array.
  2281.     */
  2282.    key->nr_samplers = llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
  2283.    if (llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
  2284.       key->nr_sampler_views =
  2285.          llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
  2286.    }
  2287.    else {
  2288.       key->nr_sampler_views = key->nr_samplers;
  2289.    }
  2290.  
  2291.    draw_sampler = key->samplers;
  2292.  
  2293.    memset(draw_sampler, 0, MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *draw_sampler);
  2294.  
  2295.    for (i = 0 ; i < key->nr_samplers; i++) {
  2296.       lp_sampler_static_sampler_state(&draw_sampler[i].sampler_state,
  2297.                                       llvm->draw->samplers[PIPE_SHADER_GEOMETRY][i]);
  2298.    }
  2299.    for (i = 0 ; i < key->nr_sampler_views; i++) {
  2300.       lp_sampler_static_texture_state(&draw_sampler[i].texture_state,
  2301.                                       llvm->draw->sampler_views[PIPE_SHADER_GEOMETRY][i]);
  2302.    }
  2303.  
  2304.    return key;
  2305. }
  2306.  
  2307. void
  2308. draw_gs_llvm_dump_variant_key(struct draw_gs_llvm_variant_key *key)
  2309. {
  2310.    unsigned i;
  2311.    struct draw_sampler_static_state *sampler = key->samplers;
  2312.  
  2313.    for (i = 0 ; i < key->nr_sampler_views; i++) {
  2314.       debug_printf("sampler[%i].src_format = %s\n", i,
  2315.                    util_format_name(sampler[i].texture_state.format));
  2316.    }
  2317. }
  2318.