Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /**************************************************************************
  2.  *
  3.  * Copyright 2009 VMware, Inc.
  4.  * All Rights Reserved.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the
  8.  * "Software"), to deal in the Software without restriction, including
  9.  * without limitation the rights to use, copy, modify, merge, publish,
  10.  * distribute, sub license, and/or sell copies of the Software, and to
  11.  * permit persons to whom the Software is furnished to do so, subject to
  12.  * the following conditions:
  13.  *
  14.  * The above copyright notice and this permission notice (including the
  15.  * next paragraph) shall be included in all copies or substantial portions
  16.  * of the Software.
  17.  *
  18.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21.  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25.  *
  26.  **************************************************************************/
  27.  
  28. #include "draw_gs.h"
  29.  
  30. #include "draw_private.h"
  31. #include "draw_context.h"
  32. #ifdef HAVE_LLVM
  33. #include "draw_llvm.h"
  34. #endif
  35.  
  36. #include "tgsi/tgsi_parse.h"
  37. #include "tgsi/tgsi_exec.h"
  38.  
  39. #include "pipe/p_shader_tokens.h"
  40.  
  41. #include "util/u_math.h"
  42. #include "util/u_memory.h"
  43. #include "util/u_prim.h"
  44.  
  45. /* fixme: move it from here */
  46. #define MAX_PRIMITIVES 64
  47.  
  48. static INLINE int
  49. draw_gs_get_input_index(int semantic, int index,
  50.                         const struct tgsi_shader_info *input_info)
  51. {
  52.    int i;
  53.    const ubyte *input_semantic_names = input_info->output_semantic_name;
  54.    const ubyte *input_semantic_indices = input_info->output_semantic_index;
  55.    for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
  56.       if (input_semantic_names[i] == semantic &&
  57.           input_semantic_indices[i] == index)
  58.          return i;
  59.    }
  60.    return -1;
  61. }
  62.  
  63. /**
  64.  * We execute geometry shaders in the SOA mode, so ideally we want to
  65.  * flush when the number of currently fetched primitives is equal to
  66.  * the number of elements in the SOA vector. This ensures that the
  67.  * throughput is optimized for the given vector instruction set.
  68.  */
  69. static INLINE boolean
  70. draw_gs_should_flush(struct draw_geometry_shader *shader)
  71. {
  72.    return (shader->fetched_prim_count == shader->vector_length);
  73. }
  74.  
  75. /*#define DEBUG_OUTPUTS 1*/
  76. static void
  77. tgsi_fetch_gs_outputs(struct draw_geometry_shader *shader,
  78.                       unsigned num_primitives,
  79.                       float (**p_output)[4])
  80. {
  81.    struct tgsi_exec_machine *machine = shader->machine;
  82.    unsigned prim_idx, j, slot;
  83.    unsigned current_idx = 0;
  84.    float (*output)[4];
  85.  
  86.    output = *p_output;
  87.  
  88.    /* Unswizzle all output results.
  89.     */
  90.  
  91.    for (prim_idx = 0; prim_idx < num_primitives; ++prim_idx) {
  92.       unsigned num_verts_per_prim = machine->Primitives[prim_idx];
  93.       shader->primitive_lengths[prim_idx + shader->emitted_primitives] =
  94.          machine->Primitives[prim_idx];
  95.       shader->emitted_vertices += num_verts_per_prim;
  96.       for (j = 0; j < num_verts_per_prim; j++, current_idx++) {
  97.          int idx = current_idx * shader->info.num_outputs;
  98. #ifdef DEBUG_OUTPUTS
  99.          debug_printf("%d) Output vert:\n", idx / shader->info.num_outputs);
  100. #endif
  101.          for (slot = 0; slot < shader->info.num_outputs; slot++) {
  102.             output[slot][0] = machine->Outputs[idx + slot].xyzw[0].f[0];
  103.             output[slot][1] = machine->Outputs[idx + slot].xyzw[1].f[0];
  104.             output[slot][2] = machine->Outputs[idx + slot].xyzw[2].f[0];
  105.             output[slot][3] = machine->Outputs[idx + slot].xyzw[3].f[0];
  106. #ifdef DEBUG_OUTPUTS
  107.             debug_printf("\t%d: %f %f %f %f\n", slot,
  108.                          output[slot][0],
  109.                          output[slot][1],
  110.                          output[slot][2],
  111.                          output[slot][3]);
  112. #endif
  113.          }
  114.          output = (float (*)[4])((char *)output + shader->vertex_size);
  115.       }
  116.    }
  117.    *p_output = output;
  118.    shader->emitted_primitives += num_primitives;
  119. }
  120.  
  121. /*#define DEBUG_INPUTS 1*/
  122. static void tgsi_fetch_gs_input(struct draw_geometry_shader *shader,
  123.                                 unsigned *indices,
  124.                                 unsigned num_vertices,
  125.                                 unsigned prim_idx)
  126. {
  127.    struct tgsi_exec_machine *machine = shader->machine;
  128.    unsigned slot, i;
  129.    int vs_slot;
  130.    unsigned input_vertex_stride = shader->input_vertex_stride;
  131.    const float (*input_ptr)[4];
  132.  
  133.    input_ptr = shader->input;
  134.  
  135.    for (i = 0; i < num_vertices; ++i) {
  136.       const float (*input)[4];
  137. #if DEBUG_INPUTS
  138.       debug_printf("%d) vertex index = %d (prim idx = %d)\n",
  139.                    i, indices[i], prim_idx);
  140. #endif
  141.       input = (const float (*)[4])(
  142.          (const char *)input_ptr + (indices[i] * input_vertex_stride));
  143.       for (slot = 0, vs_slot = 0; slot < shader->info.num_inputs; ++slot) {
  144.          unsigned idx = i * TGSI_EXEC_MAX_INPUT_ATTRIBS + slot;
  145.          if (shader->info.input_semantic_name[slot] == TGSI_SEMANTIC_PRIMID) {
  146.             machine->Inputs[idx].xyzw[0].u[prim_idx] = shader->in_prim_idx;
  147.             machine->Inputs[idx].xyzw[1].u[prim_idx] = shader->in_prim_idx;
  148.             machine->Inputs[idx].xyzw[2].u[prim_idx] = shader->in_prim_idx;
  149.             machine->Inputs[idx].xyzw[3].u[prim_idx] = shader->in_prim_idx;
  150.          } else {
  151.             vs_slot = draw_gs_get_input_index(
  152.                shader->info.input_semantic_name[slot],
  153.                shader->info.input_semantic_index[slot],
  154.                shader->input_info);
  155.             if (vs_slot < 0) {
  156.                debug_printf("VS/GS signature mismatch!\n");
  157.                machine->Inputs[idx].xyzw[0].f[prim_idx] = 0;
  158.                machine->Inputs[idx].xyzw[1].f[prim_idx] = 0;
  159.                machine->Inputs[idx].xyzw[2].f[prim_idx] = 0;
  160.                machine->Inputs[idx].xyzw[3].f[prim_idx] = 0;
  161.             } else {
  162. #if DEBUG_INPUTS
  163.                debug_printf("\tSlot = %d, vs_slot = %d, idx = %d:\n",
  164.                             slot, vs_slot, idx);
  165.                assert(!util_is_inf_or_nan(input[vs_slot][0]));
  166.                assert(!util_is_inf_or_nan(input[vs_slot][1]));
  167.                assert(!util_is_inf_or_nan(input[vs_slot][2]));
  168.                assert(!util_is_inf_or_nan(input[vs_slot][3]));
  169. #endif
  170.                machine->Inputs[idx].xyzw[0].f[prim_idx] = input[vs_slot][0];
  171.                machine->Inputs[idx].xyzw[1].f[prim_idx] = input[vs_slot][1];
  172.                machine->Inputs[idx].xyzw[2].f[prim_idx] = input[vs_slot][2];
  173.                machine->Inputs[idx].xyzw[3].f[prim_idx] = input[vs_slot][3];
  174. #if DEBUG_INPUTS
  175.                debug_printf("\t\t%f %f %f %f\n",
  176.                             machine->Inputs[idx].xyzw[0].f[prim_idx],
  177.                             machine->Inputs[idx].xyzw[1].f[prim_idx],
  178.                             machine->Inputs[idx].xyzw[2].f[prim_idx],
  179.                             machine->Inputs[idx].xyzw[3].f[prim_idx]);
  180. #endif
  181.                ++vs_slot;
  182.             }
  183.          }
  184.       }
  185.    }
  186. }
  187.  
  188. static void tgsi_gs_prepare(struct draw_geometry_shader *shader,
  189.                             const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
  190.                             const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS])
  191. {
  192.    struct tgsi_exec_machine *machine = shader->machine;
  193.  
  194.    tgsi_exec_set_constant_buffers(machine, PIPE_MAX_CONSTANT_BUFFERS,
  195.                                   constants, constants_size);
  196. }
  197.  
  198. static unsigned tgsi_gs_run(struct draw_geometry_shader *shader,
  199.                             unsigned input_primitives)
  200. {
  201.    struct tgsi_exec_machine *machine = shader->machine;
  202.  
  203.    tgsi_set_exec_mask(machine,
  204.                       1,
  205.                       input_primitives > 1,
  206.                       input_primitives > 2,
  207.                       input_primitives > 3);
  208.  
  209.    /* run interpreter */
  210.    tgsi_exec_machine_run(machine);
  211.  
  212.    return
  213.       machine->Temps[TGSI_EXEC_TEMP_PRIMITIVE_I].xyzw[TGSI_EXEC_TEMP_PRIMITIVE_C].u[0];
  214. }
  215.  
  216. #ifdef HAVE_LLVM
  217.  
  218. static void
  219. llvm_fetch_gs_input(struct draw_geometry_shader *shader,
  220.                     unsigned *indices,
  221.                     unsigned num_vertices,
  222.                     unsigned prim_idx)
  223. {
  224.    unsigned slot, i;
  225.    int vs_slot;
  226.    unsigned input_vertex_stride = shader->input_vertex_stride;
  227.    const float (*input_ptr)[4];
  228.    float (*input_data)[6][PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS][TGSI_NUM_CHANNELS] = &shader->gs_input->data;
  229.  
  230.    shader->llvm_prim_ids[shader->fetched_prim_count] = shader->in_prim_idx;
  231.  
  232.    input_ptr = shader->input;
  233.  
  234.    for (i = 0; i < num_vertices; ++i) {
  235.       const float (*input)[4];
  236. #if DEBUG_INPUTS
  237.       debug_printf("%d) vertex index = %d (prim idx = %d)\n",
  238.                    i, indices[i], prim_idx);
  239. #endif
  240.       input = (const float (*)[4])(
  241.          (const char *)input_ptr + (indices[i] * input_vertex_stride));
  242.       for (slot = 0, vs_slot = 0; slot < shader->info.num_inputs; ++slot) {
  243.          if (shader->info.input_semantic_name[slot] == TGSI_SEMANTIC_PRIMID) {
  244.             /* skip. we handle system values through gallivm */
  245.             /* NOTE: If we hit this case here it's an ordinary input not a sv,
  246.              * even though it probably should be a sv.
  247.              * Not sure how to set it up as regular input however if that even,
  248.              * would make sense so hack around this later in gallivm.
  249.              */
  250.          } else {
  251.             vs_slot = draw_gs_get_input_index(
  252.                shader->info.input_semantic_name[slot],
  253.                shader->info.input_semantic_index[slot],
  254.                shader->input_info);
  255.             if (vs_slot < 0) {
  256.                debug_printf("VS/GS signature mismatch!\n");
  257.                (*input_data)[i][slot][0][prim_idx] = 0;
  258.                (*input_data)[i][slot][1][prim_idx] = 0;
  259.                (*input_data)[i][slot][2][prim_idx] = 0;
  260.                (*input_data)[i][slot][3][prim_idx] = 0;
  261.             } else {
  262. #if DEBUG_INPUTS
  263.                debug_printf("\tSlot = %d, vs_slot = %d, i = %d:\n",
  264.                             slot, vs_slot, i);
  265.                assert(!util_is_inf_or_nan(input[vs_slot][0]));
  266.                assert(!util_is_inf_or_nan(input[vs_slot][1]));
  267.                assert(!util_is_inf_or_nan(input[vs_slot][2]));
  268.                assert(!util_is_inf_or_nan(input[vs_slot][3]));
  269. #endif
  270.                (*input_data)[i][slot][0][prim_idx] = input[vs_slot][0];
  271.                (*input_data)[i][slot][1][prim_idx] = input[vs_slot][1];
  272.                (*input_data)[i][slot][2][prim_idx] = input[vs_slot][2];
  273.                (*input_data)[i][slot][3][prim_idx] = input[vs_slot][3];
  274. #if DEBUG_INPUTS
  275.                debug_printf("\t\t%f %f %f %f\n",
  276.                             (*input_data)[i][slot][0][prim_idx],
  277.                             (*input_data)[i][slot][1][prim_idx],
  278.                             (*input_data)[i][slot][2][prim_idx],
  279.                             (*input_data)[i][slot][3][prim_idx]);
  280. #endif
  281.                ++vs_slot;
  282.             }
  283.          }
  284.       }
  285.    }
  286. }
  287.  
  288. static void
  289. llvm_fetch_gs_outputs(struct draw_geometry_shader *shader,
  290.                       unsigned num_primitives,
  291.                       float (**p_output)[4])
  292. {
  293.    int total_verts = 0;
  294.    int vertex_count = 0;
  295.    int total_prims = 0;
  296.    int max_prims_per_invocation = 0;
  297.    char *output_ptr = (char*)shader->gs_output;
  298.    int i, j, prim_idx;
  299.    unsigned next_prim_boundary = shader->primitive_boundary;
  300.  
  301.    for (i = 0; i < shader->vector_length; ++i) {
  302.       int prims = shader->llvm_emitted_primitives[i];
  303.       total_prims += prims;
  304.       max_prims_per_invocation = MAX2(max_prims_per_invocation, prims);
  305.    }
  306.    for (i = 0; i < shader->vector_length; ++i) {
  307.       total_verts += shader->llvm_emitted_vertices[i];
  308.    }
  309.  
  310.    output_ptr += shader->emitted_vertices * shader->vertex_size;
  311.    for (i = 0; i < shader->vector_length - 1; ++i) {
  312.       int current_verts = shader->llvm_emitted_vertices[i];
  313.       int next_verts = shader->llvm_emitted_vertices[i + 1];
  314. #if 0
  315.       int j;
  316.       for (j = 0; j < current_verts; ++j) {
  317.          struct vertex_header *vh = (struct vertex_header *)
  318.             (output_ptr + shader->vertex_size * (i * next_prim_boundary + j));
  319.          debug_printf("--- %d) [%f, %f, %f, %f]\n", j + vertex_count,
  320.                       vh->data[0][0], vh->data[0][1], vh->data[0][2], vh->data[0][3]);
  321.          
  322.       }
  323. #endif
  324.       debug_assert(current_verts <= shader->max_output_vertices);
  325.       debug_assert(next_verts <= shader->max_output_vertices);
  326.       if (next_verts) {
  327.          memmove(output_ptr + (vertex_count + current_verts) * shader->vertex_size,
  328.                  output_ptr + ((i + 1) * next_prim_boundary) * shader->vertex_size,
  329.                  shader->vertex_size * next_verts);
  330.       }
  331.       vertex_count += current_verts;
  332.    }
  333.  
  334. #if 0
  335.    {
  336.       int i;
  337.       for (i = 0; i < total_verts; ++i) {
  338.          struct vertex_header *vh = (struct vertex_header *)(output_ptr + shader->vertex_size * i);
  339.          debug_printf("%d) Vertex:\n", i);
  340.          for (j = 0; j < shader->info.num_outputs; ++j) {
  341.             unsigned *udata = (unsigned*)vh->data[j];
  342.             debug_printf("    %d) [%f, %f, %f, %f] [%d, %d, %d, %d]\n", j,
  343.                          vh->data[j][0], vh->data[j][1], vh->data[j][2], vh->data[j][3],
  344.                          udata[0], udata[1], udata[2], udata[3]);
  345.          }
  346.          
  347.       }
  348.    }
  349. #endif
  350.  
  351.    prim_idx = 0;
  352.    for (i = 0; i < shader->vector_length; ++i) {
  353.       int num_prims = shader->llvm_emitted_primitives[i];
  354.       for (j = 0; j < num_prims; ++j) {
  355.          int prim_length =
  356.             shader->llvm_prim_lengths[j][i];
  357.          shader->primitive_lengths[shader->emitted_primitives + prim_idx] =
  358.             prim_length;
  359.          ++prim_idx;
  360.       }
  361.    }
  362.  
  363.    shader->emitted_primitives += total_prims;
  364.    shader->emitted_vertices += total_verts;
  365. }
  366.  
  367. static void
  368. llvm_gs_prepare(struct draw_geometry_shader *shader,
  369.                 const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
  370.                 const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS])
  371. {
  372. }
  373.  
  374. static unsigned
  375. llvm_gs_run(struct draw_geometry_shader *shader,
  376.             unsigned input_primitives)
  377. {
  378.    unsigned ret;
  379.    char *input = (char*)shader->gs_output;
  380.  
  381.    input += (shader->emitted_vertices * shader->vertex_size);
  382.  
  383.    ret = shader->current_variant->jit_func(
  384.       shader->jit_context, shader->gs_input->data,
  385.       (struct vertex_header*)input,
  386.       input_primitives,
  387.       shader->draw->instance_id,
  388.       shader->llvm_prim_ids);
  389.  
  390.    return ret;
  391. }
  392.  
  393. #endif
  394.  
  395. static void gs_flush(struct draw_geometry_shader *shader)
  396. {
  397.    unsigned out_prim_count;
  398.  
  399.    unsigned input_primitives = shader->fetched_prim_count;
  400.  
  401.    if (shader->draw->collect_statistics) {
  402.       shader->draw->statistics.gs_invocations += input_primitives;
  403.    }
  404.  
  405.    debug_assert(input_primitives > 0 &&
  406.                 input_primitives <= 4);
  407.  
  408.    out_prim_count = shader->run(shader, input_primitives);
  409.    shader->fetch_outputs(shader, out_prim_count,
  410.                          &shader->tmp_output);
  411.  
  412. #if 0
  413.    debug_printf("PRIM emitted prims = %d (verts=%d), cur prim count = %d\n",
  414.                 shader->emitted_primitives, shader->emitted_vertices,
  415.                 out_prim_count);
  416. #endif
  417.  
  418.    shader->fetched_prim_count = 0;
  419. }
  420.  
  421. static void gs_point(struct draw_geometry_shader *shader,
  422.                      int idx)
  423. {
  424.    unsigned indices[1];
  425.  
  426.    indices[0] = idx;
  427.  
  428.    shader->fetch_inputs(shader, indices, 1,
  429.                         shader->fetched_prim_count);
  430.    ++shader->in_prim_idx;
  431.    ++shader->fetched_prim_count;
  432.  
  433.    if (draw_gs_should_flush(shader))
  434.       gs_flush(shader);
  435. }
  436.  
  437. static void gs_line(struct draw_geometry_shader *shader,
  438.                     int i0, int i1)
  439. {
  440.    unsigned indices[2];
  441.  
  442.    indices[0] = i0;
  443.    indices[1] = i1;
  444.  
  445.    shader->fetch_inputs(shader, indices, 2,
  446.                         shader->fetched_prim_count);
  447.    ++shader->in_prim_idx;
  448.    ++shader->fetched_prim_count;
  449.    
  450.    if (draw_gs_should_flush(shader))  
  451.       gs_flush(shader);
  452. }
  453.  
  454. static void gs_line_adj(struct draw_geometry_shader *shader,
  455.                         int i0, int i1, int i2, int i3)
  456. {
  457.    unsigned indices[4];
  458.  
  459.    indices[0] = i0;
  460.    indices[1] = i1;
  461.    indices[2] = i2;
  462.    indices[3] = i3;
  463.  
  464.    shader->fetch_inputs(shader, indices, 4,
  465.                         shader->fetched_prim_count);
  466.    ++shader->in_prim_idx;
  467.    ++shader->fetched_prim_count;
  468.  
  469.    if (draw_gs_should_flush(shader))
  470.       gs_flush(shader);
  471. }
  472.  
  473. static void gs_tri(struct draw_geometry_shader *shader,
  474.                    int i0, int i1, int i2)
  475. {
  476.    unsigned indices[3];
  477.  
  478.    indices[0] = i0;
  479.    indices[1] = i1;
  480.    indices[2] = i2;
  481.  
  482.    shader->fetch_inputs(shader, indices, 3,
  483.                         shader->fetched_prim_count);
  484.    ++shader->in_prim_idx;
  485.    ++shader->fetched_prim_count;
  486.  
  487.    if (draw_gs_should_flush(shader))
  488.       gs_flush(shader);
  489. }
  490.  
  491. static void gs_tri_adj(struct draw_geometry_shader *shader,
  492.                        int i0, int i1, int i2,
  493.                        int i3, int i4, int i5)
  494. {
  495.    unsigned indices[6];
  496.  
  497.    indices[0] = i0;
  498.    indices[1] = i1;
  499.    indices[2] = i2;
  500.    indices[3] = i3;
  501.    indices[4] = i4;
  502.    indices[5] = i5;
  503.  
  504.    shader->fetch_inputs(shader, indices, 6,
  505.                         shader->fetched_prim_count);
  506.    ++shader->in_prim_idx;
  507.    ++shader->fetched_prim_count;
  508.  
  509.    if (draw_gs_should_flush(shader))
  510.       gs_flush(shader);
  511. }
  512.  
  513. #define FUNC         gs_run
  514. #define GET_ELT(idx) (idx)
  515. #include "draw_gs_tmp.h"
  516.  
  517.  
  518. #define FUNC         gs_run_elts
  519. #define LOCAL_VARS   const ushort *elts = input_prims->elts;
  520. #define GET_ELT(idx) (elts[idx])
  521. #include "draw_gs_tmp.h"
  522.  
  523.  
  524. /**
  525.  * Execute geometry shader.
  526.  */
  527. int draw_geometry_shader_run(struct draw_geometry_shader *shader,
  528.                              const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
  529.                              const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS],
  530.                              const struct draw_vertex_info *input_verts,
  531.                              const struct draw_prim_info *input_prim,
  532.                              const struct tgsi_shader_info *input_info,
  533.                              struct draw_vertex_info *output_verts,
  534.                              struct draw_prim_info *output_prims )
  535. {
  536.    const float (*input)[4] = (const float (*)[4])input_verts->verts->data;
  537.    unsigned input_stride = input_verts->vertex_size;
  538.    unsigned num_outputs = draw_total_gs_outputs(shader->draw);
  539.    unsigned vertex_size = sizeof(struct vertex_header) + num_outputs * 4 * sizeof(float);
  540.    unsigned num_input_verts = input_prim->linear ?
  541.       input_verts->count :
  542.       input_prim->count;
  543.    unsigned num_in_primitives =
  544.       align(
  545.          MAX2(u_decomposed_prims_for_vertices(input_prim->prim,
  546.                                               num_input_verts),
  547.               u_decomposed_prims_for_vertices(shader->input_primitive,
  548.                                               num_input_verts)),
  549.          shader->vector_length);
  550.    unsigned max_out_prims =
  551.       u_decomposed_prims_for_vertices(shader->output_primitive,
  552.                                       shader->max_output_vertices)
  553.       * num_in_primitives;
  554.    /* we allocate exactly one extra vertex per primitive to allow the GS to emit
  555.     * overflown vertices into some area where they won't harm anyone */
  556.    unsigned total_verts_per_buffer = shader->primitive_boundary *
  557.       num_in_primitives;
  558.  
  559.    //Assume at least one primitive
  560.    max_out_prims = MAX2(max_out_prims, 1);
  561.  
  562.  
  563.    output_verts->vertex_size = vertex_size;
  564.    output_verts->stride = output_verts->vertex_size;
  565.    output_verts->verts =
  566.       (struct vertex_header *)MALLOC(output_verts->vertex_size *
  567.                                      total_verts_per_buffer);
  568.    debug_assert(output_verts->verts);
  569.  
  570. #if 0
  571.    debug_printf("%s count = %d (in prims # = %d)\n",
  572.                 __FUNCTION__, num_input_verts, num_in_primitives);
  573.    debug_printf("\tlinear = %d, prim_info->count = %d\n",
  574.                 input_prim->linear, input_prim->count);
  575.    debug_printf("\tprim pipe = %s, shader in = %s, shader out = %s\n"
  576.                 u_prim_name(input_prim->prim),
  577.                 u_prim_name(shader->input_primitive),
  578.                 u_prim_name(shader->output_primitive));
  579.    debug_printf("\tmaxv  = %d, maxp = %d, primitive_boundary = %d, "
  580.                 "vertex_size = %d, tverts = %d\n",
  581.                 shader->max_output_vertices, max_out_prims,
  582.                 shader->primitive_boundary, output_verts->vertex_size,
  583.                 total_verts_per_buffer);
  584. #endif
  585.  
  586.    shader->emitted_vertices = 0;
  587.    shader->emitted_primitives = 0;
  588.    shader->vertex_size = vertex_size;
  589.    shader->tmp_output = (float (*)[4])output_verts->verts->data;
  590.    shader->fetched_prim_count = 0;
  591.    shader->input_vertex_stride = input_stride;
  592.    shader->input = input;
  593.    shader->input_info = input_info;
  594.    FREE(shader->primitive_lengths);
  595.    shader->primitive_lengths = MALLOC(max_out_prims * sizeof(unsigned));
  596.  
  597.  
  598. #ifdef HAVE_LLVM
  599.    if (shader->draw->llvm) {
  600.       shader->gs_output = output_verts->verts;
  601.       if (max_out_prims > shader->max_out_prims) {
  602.          unsigned i;
  603.          if (shader->llvm_prim_lengths) {
  604.             for (i = 0; i < shader->max_out_prims; ++i) {
  605.                align_free(shader->llvm_prim_lengths[i]);
  606.             }
  607.             FREE(shader->llvm_prim_lengths);
  608.          }
  609.  
  610.          shader->llvm_prim_lengths = MALLOC(max_out_prims * sizeof(unsigned*));
  611.          for (i = 0; i < max_out_prims; ++i) {
  612.             int vector_size = shader->vector_length * sizeof(unsigned);
  613.             shader->llvm_prim_lengths[i] =
  614.                align_malloc(vector_size, vector_size);
  615.          }
  616.  
  617.          shader->max_out_prims = max_out_prims;
  618.       }
  619.       shader->jit_context->prim_lengths = shader->llvm_prim_lengths;
  620.       shader->jit_context->emitted_vertices = shader->llvm_emitted_vertices;
  621.       shader->jit_context->emitted_prims = shader->llvm_emitted_primitives;
  622.    }
  623. #endif
  624.  
  625.    shader->prepare(shader, constants, constants_size);
  626.  
  627.    if (input_prim->linear)
  628.       gs_run(shader, input_prim, input_verts,
  629.              output_prims, output_verts);
  630.    else
  631.       gs_run_elts(shader, input_prim, input_verts,
  632.                   output_prims, output_verts);
  633.  
  634.    /* Flush the remaining primitives. Will happen if
  635.     * num_input_primitives % 4 != 0
  636.     */
  637.    if (shader->fetched_prim_count > 0) {
  638.       gs_flush(shader);
  639.    }
  640.  
  641.    debug_assert(shader->fetched_prim_count == 0);
  642.  
  643.    /* Update prim_info:
  644.     */
  645.    output_prims->linear = TRUE;
  646.    output_prims->elts = NULL;
  647.    output_prims->start = 0;
  648.    output_prims->count = shader->emitted_vertices;
  649.    output_prims->prim = shader->output_primitive;
  650.    output_prims->flags = 0x0;
  651.    output_prims->primitive_lengths = shader->primitive_lengths;
  652.    output_prims->primitive_count = shader->emitted_primitives;
  653.    output_verts->count = shader->emitted_vertices;
  654.  
  655.    if (shader->draw->collect_statistics) {
  656.       unsigned i;
  657.       for (i = 0; i < shader->emitted_primitives; ++i) {
  658.          shader->draw->statistics.gs_primitives +=
  659.             u_decomposed_prims_for_vertices(shader->output_primitive,
  660.                                             shader->primitive_lengths[i]);
  661.       }
  662.    }
  663.  
  664. #if 0
  665.    debug_printf("GS finished, prims = %d, verts = %d\n",
  666.                 output_prims->primitive_count,
  667.                 output_verts->count);
  668. #endif
  669.  
  670.    return shader->emitted_vertices;
  671. }
  672.  
  673. void draw_geometry_shader_prepare(struct draw_geometry_shader *shader,
  674.                                   struct draw_context *draw)
  675. {
  676.    boolean use_llvm = draw->llvm != NULL;
  677.    if (!use_llvm && shader && shader->machine->Tokens != shader->state.tokens) {
  678.       tgsi_exec_machine_bind_shader(shader->machine,
  679.                                     shader->state.tokens,
  680.                                     draw->gs.tgsi.sampler);
  681.    }
  682. }
  683.  
  684.  
  685. boolean
  686. draw_gs_init( struct draw_context *draw )
  687. {
  688.    if (!draw->llvm) {
  689.       draw->gs.tgsi.machine = tgsi_exec_machine_create();
  690.       if (!draw->gs.tgsi.machine)
  691.          return FALSE;
  692.  
  693.       draw->gs.tgsi.machine->Primitives = align_malloc(
  694.          MAX_PRIMITIVES * sizeof(struct tgsi_exec_vector), 16);
  695.       if (!draw->gs.tgsi.machine->Primitives)
  696.          return FALSE;
  697.       memset(draw->gs.tgsi.machine->Primitives, 0,
  698.              MAX_PRIMITIVES * sizeof(struct tgsi_exec_vector));
  699.    }
  700.  
  701.    return TRUE;
  702. }
  703.  
  704. void draw_gs_destroy( struct draw_context *draw )
  705. {
  706.    if (draw->gs.tgsi.machine) {
  707.       align_free(draw->gs.tgsi.machine->Primitives);
  708.       tgsi_exec_machine_destroy(draw->gs.tgsi.machine);
  709.    }
  710. }
  711.  
  712. struct draw_geometry_shader *
  713. draw_create_geometry_shader(struct draw_context *draw,
  714.                             const struct pipe_shader_state *state)
  715. {
  716. #ifdef HAVE_LLVM
  717.    boolean use_llvm = draw->llvm != NULL;
  718.    struct llvm_geometry_shader *llvm_gs = NULL;
  719. #endif
  720.    struct draw_geometry_shader *gs;
  721.    unsigned i;
  722.  
  723. #ifdef HAVE_LLVM
  724.    if (use_llvm) {
  725.       llvm_gs = CALLOC_STRUCT(llvm_geometry_shader);
  726.  
  727.       if (llvm_gs == NULL)
  728.          return NULL;
  729.  
  730.       gs = &llvm_gs->base;
  731.  
  732.       make_empty_list(&llvm_gs->variants);
  733.    } else
  734. #endif
  735.    {
  736.       gs = CALLOC_STRUCT(draw_geometry_shader);
  737.    }
  738.  
  739.    if (!gs)
  740.       return NULL;
  741.  
  742.    gs->draw = draw;
  743.    gs->state = *state;
  744.    gs->state.tokens = tgsi_dup_tokens(state->tokens);
  745.    if (!gs->state.tokens) {
  746.       FREE(gs);
  747.       return NULL;
  748.    }
  749.  
  750.    tgsi_scan_shader(state->tokens, &gs->info);
  751.  
  752.    /* setup the defaults */
  753.    gs->max_out_prims = 0;
  754.  
  755. #ifdef HAVE_LLVM
  756.    if (use_llvm) {
  757.       /* TODO: change the input array to handle the following
  758.          vector length, instead of the currently hardcoded
  759.          TGSI_NUM_CHANNELS
  760.       gs->vector_length = lp_native_vector_width / 32;*/
  761.       gs->vector_length = TGSI_NUM_CHANNELS;
  762.    } else
  763. #endif
  764.    {
  765.       gs->vector_length = 1;
  766.    }
  767.  
  768.    gs->input_primitive =
  769.          gs->info.properties[TGSI_PROPERTY_GS_INPUT_PRIM];
  770.    gs->output_primitive =
  771.          gs->info.properties[TGSI_PROPERTY_GS_OUTPUT_PRIM];
  772.    gs->max_output_vertices =
  773.          gs->info.properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
  774.    if (!gs->max_output_vertices)
  775.       gs->max_output_vertices = 32;
  776.  
  777.    /* Primitive boundary is bigger than max_output_vertices by one, because
  778.     * the specification says that the geometry shader should exit if the
  779.     * number of emitted vertices is bigger or equal to max_output_vertices and
  780.     * we can't do that because we're running in the SoA mode, which means that
  781.     * our storing routines will keep getting called on channels that have
  782.     * overflown.
  783.     * So we need some scratch area where we can keep writing the overflown
  784.     * vertices without overwriting anything important or crashing.
  785.     */
  786.    gs->primitive_boundary = gs->max_output_vertices + 1;
  787.  
  788.    gs->position_output = -1;
  789.    for (i = 0; i < gs->info.num_outputs; i++) {
  790.       if (gs->info.output_semantic_name[i] == TGSI_SEMANTIC_POSITION &&
  791.           gs->info.output_semantic_index[i] == 0)
  792.          gs->position_output = i;
  793.       if (gs->info.output_semantic_name[i] == TGSI_SEMANTIC_VIEWPORT_INDEX)
  794.          gs->viewport_index_output = i;
  795.       if (gs->info.output_semantic_name[i] == TGSI_SEMANTIC_CLIPDIST) {
  796.          debug_assert(gs->info.output_semantic_index[i] <
  797.                       PIPE_MAX_CLIP_OR_CULL_DISTANCE_ELEMENT_COUNT);
  798.          gs->clipdistance_output[gs->info.output_semantic_index[i]] = i;
  799.       }
  800.       if (gs->info.output_semantic_name[i] == TGSI_SEMANTIC_CULLDIST) {
  801.          debug_assert(gs->info.output_semantic_index[i] <
  802.                       PIPE_MAX_CLIP_OR_CULL_DISTANCE_ELEMENT_COUNT);
  803.          gs->culldistance_output[gs->info.output_semantic_index[i]] = i;
  804.       }
  805.    }
  806.  
  807.    gs->machine = draw->gs.tgsi.machine;
  808.  
  809. #ifdef HAVE_LLVM
  810.    if (use_llvm) {
  811.       int vector_size = gs->vector_length * sizeof(float);
  812.       gs->gs_input = align_malloc(sizeof(struct draw_gs_inputs), 16);
  813.       memset(gs->gs_input, 0, sizeof(struct draw_gs_inputs));
  814.       gs->llvm_prim_lengths = 0;
  815.  
  816.       gs->llvm_emitted_primitives = align_malloc(vector_size, vector_size);
  817.       gs->llvm_emitted_vertices = align_malloc(vector_size, vector_size);
  818.       gs->llvm_prim_ids = align_malloc(vector_size, vector_size);
  819.  
  820.       gs->fetch_outputs = llvm_fetch_gs_outputs;
  821.       gs->fetch_inputs = llvm_fetch_gs_input;
  822.       gs->prepare = llvm_gs_prepare;
  823.       gs->run = llvm_gs_run;
  824.  
  825.       gs->jit_context = &draw->llvm->gs_jit_context;
  826.  
  827.  
  828.       llvm_gs->variant_key_size =
  829.          draw_gs_llvm_variant_key_size(
  830.             MAX2(gs->info.file_max[TGSI_FILE_SAMPLER]+1,
  831.                  gs->info.file_max[TGSI_FILE_SAMPLER_VIEW]+1));
  832.    } else
  833. #endif
  834.    {
  835.       gs->fetch_outputs = tgsi_fetch_gs_outputs;
  836.       gs->fetch_inputs = tgsi_fetch_gs_input;
  837.       gs->prepare = tgsi_gs_prepare;
  838.       gs->run = tgsi_gs_run;
  839.    }
  840.  
  841.    return gs;
  842. }
  843.  
  844. void draw_bind_geometry_shader(struct draw_context *draw,
  845.                                struct draw_geometry_shader *dgs)
  846. {
  847.    draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE);
  848.  
  849.    if (dgs) {
  850.       draw->gs.geometry_shader = dgs;
  851.       draw->gs.num_gs_outputs = dgs->info.num_outputs;
  852.       draw->gs.position_output = dgs->position_output;
  853.       draw_geometry_shader_prepare(dgs, draw);
  854.    }
  855.    else {
  856.       draw->gs.geometry_shader = NULL;
  857.       draw->gs.num_gs_outputs = 0;
  858.    }
  859. }
  860.  
  861. void draw_delete_geometry_shader(struct draw_context *draw,
  862.                                  struct draw_geometry_shader *dgs)
  863. {
  864.    if (!dgs) {
  865.       return;
  866.    }
  867. #ifdef HAVE_LLVM
  868.    if (draw->llvm) {
  869.       struct llvm_geometry_shader *shader = llvm_geometry_shader(dgs);
  870.       struct draw_gs_llvm_variant_list_item *li;
  871.  
  872.       li = first_elem(&shader->variants);
  873.       while(!at_end(&shader->variants, li)) {
  874.          struct draw_gs_llvm_variant_list_item *next = next_elem(li);
  875.          draw_gs_llvm_destroy_variant(li->base);
  876.          li = next;
  877.       }
  878.  
  879.       assert(shader->variants_cached == 0);
  880.  
  881.       if (dgs->llvm_prim_lengths) {
  882.          unsigned i;
  883.          for (i = 0; i < dgs->max_out_prims; ++i) {
  884.             align_free(dgs->llvm_prim_lengths[i]);
  885.          }
  886.          FREE(dgs->llvm_prim_lengths);
  887.       }
  888.       align_free(dgs->llvm_emitted_primitives);
  889.       align_free(dgs->llvm_emitted_vertices);
  890.       align_free(dgs->llvm_prim_ids);
  891.  
  892.       align_free(dgs->gs_input);
  893.    }
  894. #endif
  895.  
  896.    FREE(dgs->primitive_lengths);
  897.    FREE((void*) dgs->state.tokens);
  898.    FREE(dgs);
  899. }
  900.  
  901.  
  902. #ifdef HAVE_LLVM
  903. void draw_gs_set_current_variant(struct draw_geometry_shader *shader,
  904.                                  struct draw_gs_llvm_variant *variant)
  905. {
  906.    shader->current_variant = variant;
  907. }
  908. #endif
  909.  
  910. /*
  911.  * Called at the very begin of the draw call with a new instance
  912.  * Used to reset state that should persist between primitive restart.
  913.  */
  914. void
  915. draw_geometry_shader_new_instance(struct draw_geometry_shader *gs)
  916. {
  917.    if (!gs)
  918.       return;
  919.  
  920.    gs->in_prim_idx = 0;
  921. }
  922.