Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /**************************************************************************
  2.  *
  3.  * Copyright 2009 VMware, Inc.
  4.  * All Rights Reserved.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the
  8.  * "Software"), to deal in the Software without restriction, including
  9.  * without limitation the rights to use, copy, modify, merge, publish,
  10.  * distribute, sub license, and/or sell copies of the Software, and to
  11.  * permit persons to whom the Software is furnished to do so, subject to
  12.  * the following conditions:
  13.  *
  14.  * The above copyright notice and this permission notice (including the
  15.  * next paragraph) shall be included in all copies or substantial portions
  16.  * of the Software.
  17.  *
  18.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21.  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  22.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25.  *
  26.  **************************************************************************/
  27.  
  28. #include "draw_gs.h"
  29.  
  30. #include "draw_private.h"
  31. #include "draw_context.h"
  32. #ifdef HAVE_LLVM
  33. #include "draw_llvm.h"
  34. #endif
  35.  
  36. #include "tgsi/tgsi_parse.h"
  37. #include "tgsi/tgsi_exec.h"
  38.  
  39. #include "pipe/p_shader_tokens.h"
  40.  
  41. #include "util/u_math.h"
  42. #include "util/u_memory.h"
  43. #include "util/u_prim.h"
  44.  
  45. /* fixme: move it from here */
  46. #define MAX_PRIMITIVES 64
  47.  
  48. static INLINE int
  49. draw_gs_get_input_index(int semantic, int index,
  50.                         const struct tgsi_shader_info *input_info)
  51. {
  52.    int i;
  53.    const ubyte *input_semantic_names = input_info->output_semantic_name;
  54.    const ubyte *input_semantic_indices = input_info->output_semantic_index;
  55.    for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) {
  56.       if (input_semantic_names[i] == semantic &&
  57.           input_semantic_indices[i] == index)
  58.          return i;
  59.    }
  60.    return -1;
  61. }
  62.  
  63. /**
  64.  * We execute geometry shaders in the SOA mode, so ideally we want to
  65.  * flush when the number of currently fetched primitives is equal to
  66.  * the number of elements in the SOA vector. This ensures that the
  67.  * throughput is optimized for the given vector instrunction set.
  68.  */
  69. static INLINE boolean
  70. draw_gs_should_flush(struct draw_geometry_shader *shader)
  71. {
  72.    return (shader->fetched_prim_count == shader->vector_length);
  73. }
  74.  
  75. /*#define DEBUG_OUTPUTS 1*/
  76. static void
  77. tgsi_fetch_gs_outputs(struct draw_geometry_shader *shader,
  78.                       unsigned num_primitives,
  79.                       float (**p_output)[4])
  80. {
  81.    struct tgsi_exec_machine *machine = shader->machine;
  82.    unsigned prim_idx, j, slot;
  83.    unsigned current_idx = 0;
  84.    float (*output)[4];
  85.  
  86.    output = *p_output;
  87.  
  88.    /* Unswizzle all output results.
  89.     */
  90.  
  91.    for (prim_idx = 0; prim_idx < num_primitives; ++prim_idx) {
  92.       unsigned num_verts_per_prim = machine->Primitives[prim_idx];
  93.       shader->primitive_lengths[prim_idx +   shader->emitted_primitives] =
  94.          machine->Primitives[prim_idx];
  95.       shader->emitted_vertices += num_verts_per_prim;
  96.       for (j = 0; j < num_verts_per_prim; j++, current_idx++) {
  97.          int idx = current_idx * shader->info.num_outputs;
  98. #ifdef DEBUG_OUTPUTS
  99.          debug_printf("%d) Output vert:\n", idx / shader->info.num_outputs);
  100. #endif
  101.          for (slot = 0; slot < shader->info.num_outputs; slot++) {
  102.             output[slot][0] = machine->Outputs[idx + slot].xyzw[0].f[0];
  103.             output[slot][1] = machine->Outputs[idx + slot].xyzw[1].f[0];
  104.             output[slot][2] = machine->Outputs[idx + slot].xyzw[2].f[0];
  105.             output[slot][3] = machine->Outputs[idx + slot].xyzw[3].f[0];
  106. #ifdef DEBUG_OUTPUTS
  107.             debug_printf("\t%d: %f %f %f %f\n", slot,
  108.                          output[slot][0],
  109.                          output[slot][1],
  110.                          output[slot][2],
  111.                          output[slot][3]);
  112. #endif
  113.             debug_assert(!util_is_inf_or_nan(output[slot][0]));
  114.          }
  115.          output = (float (*)[4])((char *)output + shader->vertex_size);
  116.       }
  117.    }
  118.    *p_output = output;
  119.    shader->emitted_primitives += num_primitives;
  120. }
  121.  
  122. /*#define DEBUG_INPUTS 1*/
  123. static void tgsi_fetch_gs_input(struct draw_geometry_shader *shader,
  124.                                 unsigned *indices,
  125.                                 unsigned num_vertices,
  126.                                 unsigned prim_idx)
  127. {
  128.    struct tgsi_exec_machine *machine = shader->machine;
  129.    unsigned slot, vs_slot, i;
  130.    unsigned input_vertex_stride = shader->input_vertex_stride;
  131.    const float (*input_ptr)[4];
  132.  
  133.    input_ptr = shader->input;
  134.  
  135.    for (i = 0; i < num_vertices; ++i) {
  136.       const float (*input)[4];
  137. #if DEBUG_INPUTS
  138.       debug_printf("%d) vertex index = %d (prim idx = %d)\n",
  139.                    i, indices[i], prim_idx);
  140. #endif
  141.       input = (const float (*)[4])(
  142.          (const char *)input_ptr + (indices[i] * input_vertex_stride));
  143.       for (slot = 0, vs_slot = 0; slot < shader->info.num_inputs; ++slot) {
  144.          unsigned idx = i * TGSI_EXEC_MAX_INPUT_ATTRIBS + slot;
  145.          if (shader->info.input_semantic_name[slot] == TGSI_SEMANTIC_PRIMID) {
  146.             machine->Inputs[idx].xyzw[0].f[prim_idx] =
  147.                (float)shader->in_prim_idx;
  148.             machine->Inputs[idx].xyzw[1].f[prim_idx] =
  149.                (float)shader->in_prim_idx;
  150.             machine->Inputs[idx].xyzw[2].f[prim_idx] =
  151.                (float)shader->in_prim_idx;
  152.             machine->Inputs[idx].xyzw[3].f[prim_idx] =
  153.                (float)shader->in_prim_idx;
  154.          } else {
  155.             vs_slot = draw_gs_get_input_index(
  156.                shader->info.input_semantic_name[slot],
  157.                shader->info.input_semantic_index[slot],
  158.                shader->input_info);
  159.             if (vs_slot < 0) {
  160.                debug_printf("VS/GS signature mismatch!\n");
  161.                machine->Inputs[idx].xyzw[0].f[prim_idx] = 0;
  162.                machine->Inputs[idx].xyzw[1].f[prim_idx] = 0;
  163.                machine->Inputs[idx].xyzw[2].f[prim_idx] = 0;
  164.                machine->Inputs[idx].xyzw[3].f[prim_idx] = 0;
  165.             } else {
  166. #if DEBUG_INPUTS
  167.                debug_printf("\tSlot = %d, vs_slot = %d, idx = %d:\n",
  168.                             slot, vs_slot, idx);
  169.                assert(!util_is_inf_or_nan(input[vs_slot][0]));
  170.                assert(!util_is_inf_or_nan(input[vs_slot][1]));
  171.                assert(!util_is_inf_or_nan(input[vs_slot][2]));
  172.                assert(!util_is_inf_or_nan(input[vs_slot][3]));
  173. #endif
  174.                machine->Inputs[idx].xyzw[0].f[prim_idx] = input[vs_slot][0];
  175.                machine->Inputs[idx].xyzw[1].f[prim_idx] = input[vs_slot][1];
  176.                machine->Inputs[idx].xyzw[2].f[prim_idx] = input[vs_slot][2];
  177.                machine->Inputs[idx].xyzw[3].f[prim_idx] = input[vs_slot][3];
  178. #if DEBUG_INPUTS
  179.                debug_printf("\t\t%f %f %f %f\n",
  180.                             machine->Inputs[idx].xyzw[0].f[prim_idx],
  181.                             machine->Inputs[idx].xyzw[1].f[prim_idx],
  182.                             machine->Inputs[idx].xyzw[2].f[prim_idx],
  183.                             machine->Inputs[idx].xyzw[3].f[prim_idx]);
  184. #endif
  185.                ++vs_slot;
  186.             }
  187.          }
  188.       }
  189.    }
  190. }
  191.  
  192. static void tgsi_gs_prepare(struct draw_geometry_shader *shader,
  193.                             const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
  194.                             const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS])
  195. {
  196.    struct tgsi_exec_machine *machine = shader->machine;
  197.  
  198.    tgsi_exec_set_constant_buffers(machine, PIPE_MAX_CONSTANT_BUFFERS,
  199.                                   constants, constants_size);
  200. }
  201.  
  202. static unsigned tgsi_gs_run(struct draw_geometry_shader *shader,
  203.                             unsigned input_primitives)
  204. {
  205.    struct tgsi_exec_machine *machine = shader->machine;
  206.  
  207.    tgsi_set_exec_mask(machine,
  208.                       1,
  209.                       input_primitives > 1,
  210.                       input_primitives > 2,
  211.                       input_primitives > 3);
  212.  
  213.    /* run interpreter */
  214.    tgsi_exec_machine_run(machine);
  215.  
  216.    return
  217.       machine->Temps[TGSI_EXEC_TEMP_PRIMITIVE_I].xyzw[TGSI_EXEC_TEMP_PRIMITIVE_C].u[0];
  218. }
  219.  
  220. #ifdef HAVE_LLVM
  221.  
  222. static void
  223. llvm_fetch_gs_input(struct draw_geometry_shader *shader,
  224.                     unsigned *indices,
  225.                     unsigned num_vertices,
  226.                     unsigned prim_idx)
  227. {
  228.    unsigned slot, vs_slot, i;
  229.    unsigned input_vertex_stride = shader->input_vertex_stride;
  230.    const float (*input_ptr)[4];
  231.    float (*input_data)[6][PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS][TGSI_NUM_CHANNELS] = &shader->gs_input->data;
  232.  
  233.    shader->llvm_prim_ids[shader->fetched_prim_count] =
  234.       shader->in_prim_idx;
  235.  
  236.    input_ptr = shader->input;
  237.  
  238.    for (i = 0; i < num_vertices; ++i) {
  239.       const float (*input)[4];
  240. #if DEBUG_INPUTS
  241.       debug_printf("%d) vertex index = %d (prim idx = %d)\n",
  242.                    i, indices[i], prim_idx);
  243. #endif
  244.       input = (const float (*)[4])(
  245.          (const char *)input_ptr + (indices[i] * input_vertex_stride));
  246.       for (slot = 0, vs_slot = 0; slot < shader->info.num_inputs; ++slot) {
  247.          if (shader->info.input_semantic_name[slot] == TGSI_SEMANTIC_PRIMID) {
  248.             /* skip. we handle system values through gallivm */
  249.          } else {
  250.             vs_slot = draw_gs_get_input_index(
  251.                shader->info.input_semantic_name[slot],
  252.                shader->info.input_semantic_index[slot],
  253.                shader->input_info);
  254.             if (vs_slot < 0) {
  255.                debug_printf("VS/GS signature mismatch!\n");
  256.                (*input_data)[i][slot][0][prim_idx] = 0;
  257.                (*input_data)[i][slot][1][prim_idx] = 0;
  258.                (*input_data)[i][slot][2][prim_idx] = 0;
  259.                (*input_data)[i][slot][3][prim_idx] = 0;
  260.             } else {
  261. #if DEBUG_INPUTS
  262.                debug_printf("\tSlot = %d, vs_slot = %d, i = %d:\n",
  263.                             slot, vs_slot, i);
  264.                assert(!util_is_inf_or_nan(input[vs_slot][0]));
  265.                assert(!util_is_inf_or_nan(input[vs_slot][1]));
  266.                assert(!util_is_inf_or_nan(input[vs_slot][2]));
  267.                assert(!util_is_inf_or_nan(input[vs_slot][3]));
  268. #endif
  269.                (*input_data)[i][slot][0][prim_idx] = input[vs_slot][0];
  270.                (*input_data)[i][slot][1][prim_idx] = input[vs_slot][1];
  271.                (*input_data)[i][slot][2][prim_idx] = input[vs_slot][2];
  272.                (*input_data)[i][slot][3][prim_idx] = input[vs_slot][3];
  273. #if DEBUG_INPUTS
  274.                debug_printf("\t\t%f %f %f %f\n",
  275.                             (*input_data)[i][slot][0][prim_idx],
  276.                             (*input_data)[i][slot][1][prim_idx],
  277.                             (*input_data)[i][slot][2][prim_idx],
  278.                             (*input_data)[i][slot][3][prim_idx]);
  279. #endif
  280.                ++vs_slot;
  281.             }
  282.          }
  283.       }
  284.    }
  285. }
  286.  
  287. static void
  288. llvm_fetch_gs_outputs(struct draw_geometry_shader *shader,
  289.                       unsigned num_primitives,
  290.                       float (**p_output)[4])
  291. {
  292.    int total_verts = 0;
  293.    int vertex_count = 0;
  294.    int total_prims = 0;
  295.    int max_prims_per_invocation = 0;
  296.    char *output_ptr = (char*)shader->gs_output;
  297.    int i, j, prim_idx;
  298.    unsigned next_prim_boundary = shader->primitive_boundary;
  299.  
  300.    for (i = 0; i < shader->vector_length; ++i) {
  301.       int prims = shader->llvm_emitted_primitives[i];
  302.       total_prims += prims;
  303.       max_prims_per_invocation = MAX2(max_prims_per_invocation, prims);
  304.    }
  305.    for (i = 0; i < shader->vector_length; ++i) {
  306.       total_verts += shader->llvm_emitted_vertices[i];
  307.    }
  308.  
  309.    output_ptr += shader->emitted_vertices * shader->vertex_size;
  310.    for (i = 0; i < shader->vector_length - 1; ++i) {
  311.       int current_verts = shader->llvm_emitted_vertices[i];
  312.       int next_verts = shader->llvm_emitted_vertices[i + 1];
  313. #if 0
  314.       int j;
  315.       for (j = 0; j < current_verts; ++j) {
  316.          struct vertex_header *vh = (struct vertex_header *)
  317.             (output_ptr + shader->vertex_size * (i * next_prim_boundary + j));
  318.          debug_printf("--- %d) [%f, %f, %f, %f]\n", j + vertex_count,
  319.                       vh->data[0][0], vh->data[0][1], vh->data[0][2], vh->data[0][3]);
  320.          
  321.       }
  322. #endif
  323.       debug_assert(current_verts <= shader->max_output_vertices);
  324.       debug_assert(next_verts <= shader->max_output_vertices);
  325.       if (next_verts) {
  326.          memmove(output_ptr + (vertex_count + current_verts) * shader->vertex_size,
  327.                  output_ptr + ((i + 1) * next_prim_boundary) * shader->vertex_size,
  328.                  shader->vertex_size * next_verts);
  329.       }
  330.       vertex_count += current_verts;
  331.    }
  332.  
  333. #if 0
  334.    {
  335.       int i;
  336.       for (i = 0; i < total_verts; ++i) {
  337.          struct vertex_header *vh = (struct vertex_header *)(output_ptr + shader->vertex_size * i);
  338.          debug_printf("%d) Vertex:\n", i);
  339.          for (j = 0; j < shader->info.num_outputs; ++j) {
  340.             unsigned *udata = (unsigned*)vh->data[j];
  341.             debug_printf("    %d) [%f, %f, %f, %f] [%d, %d, %d, %d]\n", j,
  342.                          vh->data[j][0], vh->data[j][1], vh->data[j][2], vh->data[j][3],
  343.                          udata[0], udata[1], udata[2], udata[3]);
  344.          }
  345.          
  346.       }
  347.    }
  348. #endif
  349.  
  350.    prim_idx = 0;
  351.    for (i = 0; i < shader->vector_length; ++i) {
  352.       int num_prims = shader->llvm_emitted_primitives[i];
  353.       for (j = 0; j < num_prims; ++j) {
  354.          int prim_length =
  355.             shader->llvm_prim_lengths[j][i];
  356.          shader->primitive_lengths[shader->emitted_primitives + prim_idx] =
  357.             prim_length;
  358.          ++prim_idx;
  359.       }
  360.    }
  361.  
  362.    shader->emitted_primitives += total_prims;
  363.    shader->emitted_vertices += total_verts;
  364. }
  365.  
  366. static void
  367. llvm_gs_prepare(struct draw_geometry_shader *shader,
  368.                 const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
  369.                 const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS])
  370. {
  371. }
  372.  
  373. static unsigned
  374. llvm_gs_run(struct draw_geometry_shader *shader,
  375.             unsigned input_primitives)
  376. {
  377.    unsigned ret;
  378.    char *input = (char*)shader->gs_output;
  379.  
  380.    input += (shader->emitted_vertices * shader->vertex_size);
  381.  
  382.    ret = shader->current_variant->jit_func(
  383.       shader->jit_context, shader->gs_input->data,
  384.       (struct vertex_header*)input,
  385.       input_primitives,
  386.       shader->draw->instance_id,
  387.       shader->llvm_prim_ids);
  388.  
  389.    return ret;
  390. }
  391.  
  392. #endif
  393.  
  394. static void gs_flush(struct draw_geometry_shader *shader)
  395. {
  396.    unsigned out_prim_count;
  397.  
  398.    unsigned input_primitives = shader->fetched_prim_count;
  399.  
  400.    if (shader->draw->collect_statistics) {
  401.       shader->draw->statistics.gs_invocations += input_primitives;
  402.    }
  403.  
  404.    debug_assert(input_primitives > 0 &&
  405.                 input_primitives <= 4);
  406.  
  407.    out_prim_count = shader->run(shader, input_primitives);
  408.    shader->fetch_outputs(shader, out_prim_count,
  409.                          &shader->tmp_output);
  410.  
  411. #if 0
  412.    debug_printf("PRIM emitted prims = %d (verts=%d), cur prim count = %d\n",
  413.                 shader->emitted_primitives, shader->emitted_vertices,
  414.                 out_prim_count);
  415. #endif
  416.  
  417.    shader->fetched_prim_count = 0;
  418. }
  419.  
  420. static void gs_point(struct draw_geometry_shader *shader,
  421.                      int idx)
  422. {
  423.    unsigned indices[1];
  424.  
  425.    indices[0] = idx;
  426.  
  427.    shader->fetch_inputs(shader, indices, 1,
  428.                         shader->fetched_prim_count);
  429.    ++shader->in_prim_idx;
  430.    ++shader->fetched_prim_count;
  431.  
  432.    if (draw_gs_should_flush(shader))
  433.       gs_flush(shader);
  434. }
  435.  
  436. static void gs_line(struct draw_geometry_shader *shader,
  437.                     int i0, int i1)
  438. {
  439.    unsigned indices[2];
  440.  
  441.    indices[0] = i0;
  442.    indices[1] = i1;
  443.  
  444.    shader->fetch_inputs(shader, indices, 2,
  445.                         shader->fetched_prim_count);
  446.    ++shader->in_prim_idx;
  447.    ++shader->fetched_prim_count;
  448.    
  449.    if (draw_gs_should_flush(shader))  
  450.       gs_flush(shader);
  451. }
  452.  
  453. static void gs_line_adj(struct draw_geometry_shader *shader,
  454.                         int i0, int i1, int i2, int i3)
  455. {
  456.    unsigned indices[4];
  457.  
  458.    indices[0] = i0;
  459.    indices[1] = i1;
  460.    indices[2] = i2;
  461.    indices[3] = i3;
  462.  
  463.    shader->fetch_inputs(shader, indices, 4,
  464.                         shader->fetched_prim_count);
  465.    ++shader->in_prim_idx;
  466.    ++shader->fetched_prim_count;
  467.  
  468.    if (draw_gs_should_flush(shader))
  469.       gs_flush(shader);
  470. }
  471.  
  472. static void gs_tri(struct draw_geometry_shader *shader,
  473.                    int i0, int i1, int i2)
  474. {
  475.    unsigned indices[3];
  476.  
  477.    indices[0] = i0;
  478.    indices[1] = i1;
  479.    indices[2] = i2;
  480.  
  481.    shader->fetch_inputs(shader, indices, 3,
  482.                         shader->fetched_prim_count);
  483.    ++shader->in_prim_idx;
  484.    ++shader->fetched_prim_count;
  485.  
  486.    if (draw_gs_should_flush(shader))
  487.       gs_flush(shader);
  488. }
  489.  
  490. static void gs_tri_adj(struct draw_geometry_shader *shader,
  491.                        int i0, int i1, int i2,
  492.                        int i3, int i4, int i5)
  493. {
  494.    unsigned indices[6];
  495.  
  496.    indices[0] = i0;
  497.    indices[1] = i1;
  498.    indices[2] = i2;
  499.    indices[3] = i3;
  500.    indices[4] = i4;
  501.    indices[5] = i5;
  502.  
  503.    shader->fetch_inputs(shader, indices, 6,
  504.                         shader->fetched_prim_count);
  505.    ++shader->in_prim_idx;
  506.    ++shader->fetched_prim_count;
  507.  
  508.    if (draw_gs_should_flush(shader))
  509.       gs_flush(shader);
  510. }
  511.  
  512. #define FUNC         gs_run
  513. #define GET_ELT(idx) (idx)
  514. #include "draw_gs_tmp.h"
  515.  
  516.  
  517. #define FUNC         gs_run_elts
  518. #define LOCAL_VARS   const ushort *elts = input_prims->elts;
  519. #define GET_ELT(idx) (elts[idx])
  520. #include "draw_gs_tmp.h"
  521.  
  522.  
  523. /**
  524.  * Execute geometry shader.
  525.  */
  526. int draw_geometry_shader_run(struct draw_geometry_shader *shader,
  527.                              const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
  528.                              const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS],
  529.                              const struct draw_vertex_info *input_verts,
  530.                              const struct draw_prim_info *input_prim,
  531.                              const struct tgsi_shader_info *input_info,
  532.                              struct draw_vertex_info *output_verts,
  533.                              struct draw_prim_info *output_prims )
  534. {
  535.    const float (*input)[4] = (const float (*)[4])input_verts->verts->data;
  536.    unsigned input_stride = input_verts->vertex_size;
  537.    unsigned num_outputs = shader->info.num_outputs;
  538.    unsigned vertex_size = sizeof(struct vertex_header) + num_outputs * 4 * sizeof(float);
  539.    unsigned num_input_verts = input_prim->linear ?
  540.       input_verts->count :
  541.       input_prim->count;
  542.    unsigned num_in_primitives =
  543.       align(
  544.          MAX2(u_decomposed_prims_for_vertices(input_prim->prim,
  545.                                               num_input_verts),
  546.               u_decomposed_prims_for_vertices(shader->input_primitive,
  547.                                               num_input_verts)),
  548.          shader->vector_length);
  549.    unsigned max_out_prims =
  550.       u_decomposed_prims_for_vertices(shader->output_primitive,
  551.                                       shader->max_output_vertices)
  552.       * num_in_primitives;
  553.  
  554.    //Assume at least one primitive
  555.    max_out_prims = MAX2(max_out_prims, 1);
  556.  
  557.  
  558.    output_verts->vertex_size = vertex_size;
  559.    output_verts->stride = output_verts->vertex_size;
  560.    /* we allocate exactly one extra vertex per primitive to allow the GS to emit
  561.     * overflown vertices into some area where they won't harm anyone */
  562.    output_verts->verts =
  563.       (struct vertex_header *)MALLOC(output_verts->vertex_size *
  564.                                      max_out_prims *
  565.                                      shader->primitive_boundary);
  566.  
  567. #if 0
  568.    debug_printf("%s count = %d (in prims # = %d)\n",
  569.                 __FUNCTION__, num_input_verts, num_in_primitives);
  570.    debug_printf("\tlinear = %d, prim_info->count = %d\n",
  571.                 input_prim->linear, input_prim->count);
  572.    debug_printf("\tprim pipe = %s, shader in = %s, shader out = %s, max out = %d\n",
  573.                 u_prim_name(input_prim->prim),
  574.                 u_prim_name(shader->input_primitive),
  575.                 u_prim_name(shader->output_primitive),
  576.                 shader->max_output_vertices);
  577. #endif
  578.  
  579.    shader->emitted_vertices = 0;
  580.    shader->emitted_primitives = 0;
  581.    shader->vertex_size = vertex_size;
  582.    shader->tmp_output = (float (*)[4])output_verts->verts->data;
  583.    shader->fetched_prim_count = 0;
  584.    shader->input_vertex_stride = input_stride;
  585.    shader->input = input;
  586.    shader->input_info = input_info;
  587.    FREE(shader->primitive_lengths);
  588.    shader->primitive_lengths = MALLOC(max_out_prims * sizeof(unsigned));
  589.  
  590.  
  591. #ifdef HAVE_LLVM
  592.    if (draw_get_option_use_llvm()) {
  593.       shader->gs_output = output_verts->verts;
  594.       if (max_out_prims > shader->max_out_prims) {
  595.          unsigned i;
  596.          if (shader->llvm_prim_lengths) {
  597.             for (i = 0; i < shader->max_out_prims; ++i) {
  598.                align_free(shader->llvm_prim_lengths[i]);
  599.             }
  600.             FREE(shader->llvm_prim_lengths);
  601.          }
  602.  
  603.          shader->llvm_prim_lengths = MALLOC(max_out_prims * sizeof(unsigned*));
  604.          for (i = 0; i < max_out_prims; ++i) {
  605.             int vector_size = shader->vector_length * sizeof(unsigned);
  606.             shader->llvm_prim_lengths[i] =
  607.                align_malloc(vector_size, vector_size);
  608.          }
  609.  
  610.          shader->max_out_prims = max_out_prims;
  611.       }
  612.       shader->jit_context->prim_lengths = shader->llvm_prim_lengths;
  613.       shader->jit_context->emitted_vertices = shader->llvm_emitted_vertices;
  614.       shader->jit_context->emitted_prims = shader->llvm_emitted_primitives;
  615.    }
  616. #endif
  617.  
  618.    shader->prepare(shader, constants, constants_size);
  619.  
  620.    if (input_prim->linear)
  621.       gs_run(shader, input_prim, input_verts,
  622.              output_prims, output_verts);
  623.    else
  624.       gs_run_elts(shader, input_prim, input_verts,
  625.                   output_prims, output_verts);
  626.  
  627.    /* Flush the remaining primitives. Will happen if
  628.     * num_input_primitives % 4 != 0
  629.     */
  630.    if (shader->fetched_prim_count > 0) {
  631.       gs_flush(shader);
  632.    }
  633.  
  634.    debug_assert(shader->fetched_prim_count == 0);
  635.  
  636.    /* Update prim_info:
  637.     */
  638.    output_prims->linear = TRUE;
  639.    output_prims->elts = NULL;
  640.    output_prims->start = 0;
  641.    output_prims->count = shader->emitted_vertices;
  642.    output_prims->prim = shader->output_primitive;
  643.    output_prims->flags = 0x0;
  644.    output_prims->primitive_lengths = shader->primitive_lengths;
  645.    output_prims->primitive_count = shader->emitted_primitives;
  646.    output_verts->count = shader->emitted_vertices;
  647.  
  648.    if (shader->draw->collect_statistics) {
  649.       unsigned i;
  650.       for (i = 0; i < shader->emitted_primitives; ++i) {
  651.          shader->draw->statistics.gs_primitives +=
  652.             u_decomposed_prims_for_vertices(shader->output_primitive,
  653.                                             shader->primitive_lengths[i]);
  654.       }
  655.    }
  656.  
  657. #if 0
  658.    debug_printf("GS finished, prims = %d, verts = %d\n",
  659.                 output_prims->primitive_count,
  660.                 output_verts->count);
  661. #endif
  662.  
  663.    return shader->emitted_vertices;
  664. }
  665.  
  666. void draw_geometry_shader_prepare(struct draw_geometry_shader *shader,
  667.                                   struct draw_context *draw)
  668. {
  669. #ifdef HAVE_LLVM
  670.    boolean use_llvm = draw_get_option_use_llvm();
  671. #else
  672.    boolean use_llvm = FALSE;
  673. #endif
  674.    if (!use_llvm && shader && shader->machine->Tokens != shader->state.tokens) {
  675.       tgsi_exec_machine_bind_shader(shader->machine,
  676.                                     shader->state.tokens,
  677.                                     draw->gs.tgsi.sampler);
  678.    }
  679. }
  680.  
  681.  
  682. boolean
  683. draw_gs_init( struct draw_context *draw )
  684. {
  685.    draw->gs.tgsi.machine = tgsi_exec_machine_create();
  686.    if (!draw->gs.tgsi.machine)
  687.       return FALSE;
  688.  
  689.    draw->gs.tgsi.machine->Primitives = align_malloc(
  690.       MAX_PRIMITIVES * sizeof(struct tgsi_exec_vector), 16);
  691.    if (!draw->gs.tgsi.machine->Primitives)
  692.       return FALSE;
  693.    memset(draw->gs.tgsi.machine->Primitives, 0,
  694.           MAX_PRIMITIVES * sizeof(struct tgsi_exec_vector));
  695.  
  696.    return TRUE;
  697. }
  698.  
  699. void draw_gs_destroy( struct draw_context *draw )
  700. {
  701.    if (draw->gs.tgsi.machine) {
  702.       align_free(draw->gs.tgsi.machine->Primitives);
  703.       tgsi_exec_machine_destroy(draw->gs.tgsi.machine);
  704.    }
  705. }
  706.  
  707. struct draw_geometry_shader *
  708. draw_create_geometry_shader(struct draw_context *draw,
  709.                             const struct pipe_shader_state *state)
  710. {
  711. #ifdef HAVE_LLVM
  712.    boolean use_llvm = draw_get_option_use_llvm();
  713.    struct llvm_geometry_shader *llvm_gs;
  714. #endif
  715.    struct draw_geometry_shader *gs;
  716.    unsigned i;
  717.  
  718. #ifdef HAVE_LLVM
  719.    if (use_llvm) {
  720.       llvm_gs = CALLOC_STRUCT(llvm_geometry_shader);
  721.  
  722.       if (llvm_gs == NULL)
  723.          return NULL;
  724.  
  725.       gs = &llvm_gs->base;
  726.  
  727.       make_empty_list(&llvm_gs->variants);
  728.    } else
  729. #endif
  730.    {
  731.       gs = CALLOC_STRUCT(draw_geometry_shader);
  732.    }
  733.  
  734.    if (!gs)
  735.       return NULL;
  736.  
  737.    gs->draw = draw;
  738.    gs->state = *state;
  739.    gs->state.tokens = tgsi_dup_tokens(state->tokens);
  740.    if (!gs->state.tokens) {
  741.       FREE(gs);
  742.       return NULL;
  743.    }
  744.  
  745.    tgsi_scan_shader(state->tokens, &gs->info);
  746.  
  747.    /* setup the defaults */
  748.    gs->input_primitive = PIPE_PRIM_TRIANGLES;
  749.    gs->output_primitive = PIPE_PRIM_TRIANGLE_STRIP;
  750.    gs->max_output_vertices = 32;
  751.    gs->max_out_prims = 0;
  752.  
  753. #ifdef HAVE_LLVM
  754.    if (use_llvm) {
  755.       /* TODO: change the input array to handle the following
  756.          vector length, instead of the currently hardcoded
  757.          TGSI_NUM_CHANNELS
  758.       gs->vector_length = lp_native_vector_width / 32;*/
  759.       gs->vector_length = TGSI_NUM_CHANNELS;
  760.    } else
  761. #endif
  762.    {
  763.       gs->vector_length = 1;
  764.    }
  765.  
  766.    for (i = 0; i < gs->info.num_properties; ++i) {
  767.       if (gs->info.properties[i].name ==
  768.           TGSI_PROPERTY_GS_INPUT_PRIM)
  769.          gs->input_primitive = gs->info.properties[i].data[0];
  770.       else if (gs->info.properties[i].name ==
  771.                TGSI_PROPERTY_GS_OUTPUT_PRIM)
  772.          gs->output_primitive = gs->info.properties[i].data[0];
  773.       else if (gs->info.properties[i].name ==
  774.                TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES)
  775.          gs->max_output_vertices = gs->info.properties[i].data[0];
  776.    }
  777.    /* Primitive boundary is bigger than max_output_vertices by one, because
  778.     * the specification says that the geometry shader should exit if the
  779.     * number of emitted vertices is bigger or equal to max_output_vertices and
  780.     * we can't do that because we're running in the SoA mode, which means that
  781.     * our storing routines will keep getting called on channels that have
  782.     * overflown.
  783.     * So we need some scratch area where we can keep writing the overflown
  784.     * vertices without overwriting anything important or crashing.
  785.     */
  786.    gs->primitive_boundary = gs->max_output_vertices + 1;
  787.  
  788.    for (i = 0; i < gs->info.num_outputs; i++) {
  789.       if (gs->info.output_semantic_name[i] == TGSI_SEMANTIC_POSITION &&
  790.           gs->info.output_semantic_index[i] == 0)
  791.          gs->position_output = i;
  792.       if (gs->info.output_semantic_name[i] == TGSI_SEMANTIC_VIEWPORT_INDEX)
  793.          gs->viewport_index_output = i;
  794.       if (gs->info.output_semantic_name[i] == TGSI_SEMANTIC_CLIPDIST) {
  795.          debug_assert(gs->info.output_semantic_index[i] <
  796.                       PIPE_MAX_CLIP_OR_CULL_DISTANCE_ELEMENT_COUNT);
  797.          gs->clipdistance_output[gs->info.output_semantic_index[i]] = i;
  798.       }
  799.       if (gs->info.output_semantic_name[i] == TGSI_SEMANTIC_CULLDIST) {
  800.          debug_assert(gs->info.output_semantic_index[i] <
  801.                       PIPE_MAX_CLIP_OR_CULL_DISTANCE_ELEMENT_COUNT);
  802.          gs->culldistance_output[gs->info.output_semantic_index[i]] = i;
  803.       }
  804.    }
  805.  
  806.    gs->machine = draw->gs.tgsi.machine;
  807.  
  808. #ifdef HAVE_LLVM
  809.    if (use_llvm) {
  810.       int vector_size = gs->vector_length * sizeof(float);
  811.       gs->gs_input = align_malloc(sizeof(struct draw_gs_inputs), 16);
  812.       memset(gs->gs_input, 0, sizeof(struct draw_gs_inputs));
  813.       gs->llvm_prim_lengths = 0;
  814.  
  815.       gs->llvm_emitted_primitives = align_malloc(vector_size, vector_size);
  816.       gs->llvm_emitted_vertices = align_malloc(vector_size, vector_size);
  817.       gs->llvm_prim_ids = align_malloc(vector_size, vector_size);
  818.  
  819.       gs->fetch_outputs = llvm_fetch_gs_outputs;
  820.       gs->fetch_inputs = llvm_fetch_gs_input;
  821.       gs->prepare = llvm_gs_prepare;
  822.       gs->run = llvm_gs_run;
  823.  
  824.       gs->jit_context = &draw->llvm->gs_jit_context;
  825.  
  826.  
  827.       llvm_gs->variant_key_size =
  828.          draw_gs_llvm_variant_key_size(
  829.             MAX2(gs->info.file_max[TGSI_FILE_SAMPLER]+1,
  830.                  gs->info.file_max[TGSI_FILE_SAMPLER_VIEW]+1));
  831.    } else
  832. #endif
  833.    {
  834.       gs->fetch_outputs = tgsi_fetch_gs_outputs;
  835.       gs->fetch_inputs = tgsi_fetch_gs_input;
  836.       gs->prepare = tgsi_gs_prepare;
  837.       gs->run = tgsi_gs_run;
  838.    }
  839.  
  840.    return gs;
  841. }
  842.  
  843. void draw_bind_geometry_shader(struct draw_context *draw,
  844.                                struct draw_geometry_shader *dgs)
  845. {
  846.    draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE);
  847.  
  848.    if (dgs) {
  849.       draw->gs.geometry_shader = dgs;
  850.       draw->gs.num_gs_outputs = dgs->info.num_outputs;
  851.       draw->gs.position_output = dgs->position_output;
  852.       draw_geometry_shader_prepare(dgs, draw);
  853.    }
  854.    else {
  855.       draw->gs.geometry_shader = NULL;
  856.       draw->gs.num_gs_outputs = 0;
  857.    }
  858. }
  859.  
  860. void draw_delete_geometry_shader(struct draw_context *draw,
  861.                                  struct draw_geometry_shader *dgs)
  862. {
  863.    if (!dgs) {
  864.       return;
  865.    }
  866. #ifdef HAVE_LLVM
  867.    if (draw_get_option_use_llvm()) {
  868.       struct llvm_geometry_shader *shader = llvm_geometry_shader(dgs);
  869.       struct draw_gs_llvm_variant_list_item *li;
  870.  
  871.       li = first_elem(&shader->variants);
  872.       while(!at_end(&shader->variants, li)) {
  873.          struct draw_gs_llvm_variant_list_item *next = next_elem(li);
  874.          draw_gs_llvm_destroy_variant(li->base);
  875.          li = next;
  876.       }
  877.  
  878.       assert(shader->variants_cached == 0);
  879.  
  880.       if (dgs->llvm_prim_lengths) {
  881.          unsigned i;
  882.          for (i = 0; i < dgs->max_out_prims; ++i) {
  883.             align_free(dgs->llvm_prim_lengths[i]);
  884.          }
  885.          FREE(dgs->llvm_prim_lengths);
  886.       }
  887.       align_free(dgs->llvm_emitted_primitives);
  888.       align_free(dgs->llvm_emitted_vertices);
  889.       align_free(dgs->llvm_prim_ids);
  890.  
  891.       align_free(dgs->gs_input);
  892.    }
  893. #endif
  894.  
  895.    FREE(dgs->primitive_lengths);
  896.    FREE((void*) dgs->state.tokens);
  897.    FREE(dgs);
  898. }
  899.  
  900.  
  901. #ifdef HAVE_LLVM
  902. void draw_gs_set_current_variant(struct draw_geometry_shader *shader,
  903.                                  struct draw_gs_llvm_variant *variant)
  904. {
  905.    shader->current_variant = variant;
  906. }
  907. #endif
  908.  
  909. /*
  910.  * Called at the very begin of the draw call with a new instance
  911.  * Used to reset state that should persist between primitive restart.
  912.  */
  913. void
  914. draw_geometry_shader_new_instance(struct draw_geometry_shader *gs)
  915. {
  916.    if (!gs)
  917.       return;
  918.  
  919.    gs->in_prim_idx = 0;
  920. }
  921.