Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
3770 | Serge | 1 | /************************************************************************** |
2 | * |
||
3 | * Copyright 2009 VMware, Inc. |
||
4 | * All Rights Reserved. |
||
5 | * |
||
6 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
7 | * copy of this software and associated documentation files (the |
||
8 | * "Software"), to deal in the Software without restriction, including |
||
9 | * without limitation the rights to use, copy, modify, merge, publish, |
||
10 | * distribute, sub license, and/or sell copies of the Software, and to |
||
11 | * permit persons to whom the Software is furnished to do so, subject to |
||
12 | * the following conditions: |
||
13 | * |
||
14 | * The above copyright notice and this permission notice (including the |
||
15 | * next paragraph) shall be included in all copies or substantial portions |
||
16 | * of the Software. |
||
17 | * |
||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
||
21 | * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR |
||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||
25 | * |
||
26 | **************************************************************************/ |
||
27 | |||
28 | #include "draw_gs.h" |
||
29 | |||
30 | #include "draw_private.h" |
||
31 | #include "draw_context.h" |
||
32 | #ifdef HAVE_LLVM |
||
33 | #include "draw_llvm.h" |
||
34 | #endif |
||
35 | |||
36 | #include "tgsi/tgsi_parse.h" |
||
37 | #include "tgsi/tgsi_exec.h" |
||
38 | |||
39 | #include "pipe/p_shader_tokens.h" |
||
40 | |||
41 | #include "util/u_math.h" |
||
42 | #include "util/u_memory.h" |
||
43 | #include "util/u_prim.h" |
||
44 | |||
45 | /* fixme: move it from here */ |
||
46 | #define MAX_PRIMITIVES 64 |
||
47 | |||
48 | static INLINE int |
||
49 | draw_gs_get_input_index(int semantic, int index, |
||
50 | const struct tgsi_shader_info *input_info) |
||
51 | { |
||
52 | int i; |
||
53 | const ubyte *input_semantic_names = input_info->output_semantic_name; |
||
54 | const ubyte *input_semantic_indices = input_info->output_semantic_index; |
||
55 | for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) { |
||
56 | if (input_semantic_names[i] == semantic && |
||
57 | input_semantic_indices[i] == index) |
||
58 | return i; |
||
59 | } |
||
60 | return -1; |
||
61 | } |
||
62 | |||
63 | /** |
||
64 | * We execute geometry shaders in the SOA mode, so ideally we want to |
||
65 | * flush when the number of currently fetched primitives is equal to |
||
66 | * the number of elements in the SOA vector. This ensures that the |
||
67 | * throughput is optimized for the given vector instrunction set. |
||
68 | */ |
||
69 | static INLINE boolean |
||
70 | draw_gs_should_flush(struct draw_geometry_shader *shader) |
||
71 | { |
||
72 | return (shader->fetched_prim_count == shader->vector_length); |
||
73 | } |
||
74 | |||
75 | /*#define DEBUG_OUTPUTS 1*/ |
||
76 | static void |
||
77 | tgsi_fetch_gs_outputs(struct draw_geometry_shader *shader, |
||
78 | unsigned num_primitives, |
||
79 | float (**p_output)[4]) |
||
80 | { |
||
81 | struct tgsi_exec_machine *machine = shader->machine; |
||
82 | unsigned prim_idx, j, slot; |
||
83 | unsigned current_idx = 0; |
||
84 | float (*output)[4]; |
||
85 | |||
86 | output = *p_output; |
||
87 | |||
88 | /* Unswizzle all output results. |
||
89 | */ |
||
90 | |||
91 | for (prim_idx = 0; prim_idx < num_primitives; ++prim_idx) { |
||
92 | unsigned num_verts_per_prim = machine->Primitives[prim_idx]; |
||
93 | shader->primitive_lengths[prim_idx + shader->emitted_primitives] = |
||
94 | machine->Primitives[prim_idx]; |
||
95 | shader->emitted_vertices += num_verts_per_prim; |
||
96 | for (j = 0; j < num_verts_per_prim; j++, current_idx++) { |
||
97 | int idx = current_idx * shader->info.num_outputs; |
||
98 | #ifdef DEBUG_OUTPUTS |
||
99 | debug_printf("%d) Output vert:\n", idx / shader->info.num_outputs); |
||
100 | #endif |
||
101 | for (slot = 0; slot < shader->info.num_outputs; slot++) { |
||
102 | output[slot][0] = machine->Outputs[idx + slot].xyzw[0].f[0]; |
||
103 | output[slot][1] = machine->Outputs[idx + slot].xyzw[1].f[0]; |
||
104 | output[slot][2] = machine->Outputs[idx + slot].xyzw[2].f[0]; |
||
105 | output[slot][3] = machine->Outputs[idx + slot].xyzw[3].f[0]; |
||
106 | #ifdef DEBUG_OUTPUTS |
||
107 | debug_printf("\t%d: %f %f %f %f\n", slot, |
||
108 | output[slot][0], |
||
109 | output[slot][1], |
||
110 | output[slot][2], |
||
111 | output[slot][3]); |
||
112 | #endif |
||
113 | debug_assert(!util_is_inf_or_nan(output[slot][0])); |
||
114 | } |
||
115 | output = (float (*)[4])((char *)output + shader->vertex_size); |
||
116 | } |
||
117 | } |
||
118 | *p_output = output; |
||
119 | shader->emitted_primitives += num_primitives; |
||
120 | } |
||
121 | |||
122 | /*#define DEBUG_INPUTS 1*/ |
||
123 | static void tgsi_fetch_gs_input(struct draw_geometry_shader *shader, |
||
124 | unsigned *indices, |
||
125 | unsigned num_vertices, |
||
126 | unsigned prim_idx) |
||
127 | { |
||
128 | struct tgsi_exec_machine *machine = shader->machine; |
||
129 | unsigned slot, vs_slot, i; |
||
130 | unsigned input_vertex_stride = shader->input_vertex_stride; |
||
131 | const float (*input_ptr)[4]; |
||
132 | |||
133 | input_ptr = shader->input; |
||
134 | |||
135 | for (i = 0; i < num_vertices; ++i) { |
||
136 | const float (*input)[4]; |
||
137 | #if DEBUG_INPUTS |
||
138 | debug_printf("%d) vertex index = %d (prim idx = %d)\n", |
||
139 | i, indices[i], prim_idx); |
||
140 | #endif |
||
141 | input = (const float (*)[4])( |
||
142 | (const char *)input_ptr + (indices[i] * input_vertex_stride)); |
||
143 | for (slot = 0, vs_slot = 0; slot < shader->info.num_inputs; ++slot) { |
||
144 | unsigned idx = i * TGSI_EXEC_MAX_INPUT_ATTRIBS + slot; |
||
145 | if (shader->info.input_semantic_name[slot] == TGSI_SEMANTIC_PRIMID) { |
||
146 | machine->Inputs[idx].xyzw[0].f[prim_idx] = |
||
147 | (float)shader->in_prim_idx; |
||
148 | machine->Inputs[idx].xyzw[1].f[prim_idx] = |
||
149 | (float)shader->in_prim_idx; |
||
150 | machine->Inputs[idx].xyzw[2].f[prim_idx] = |
||
151 | (float)shader->in_prim_idx; |
||
152 | machine->Inputs[idx].xyzw[3].f[prim_idx] = |
||
153 | (float)shader->in_prim_idx; |
||
154 | } else { |
||
155 | vs_slot = draw_gs_get_input_index( |
||
156 | shader->info.input_semantic_name[slot], |
||
157 | shader->info.input_semantic_index[slot], |
||
158 | shader->input_info); |
||
159 | if (vs_slot < 0) { |
||
160 | debug_printf("VS/GS signature mismatch!\n"); |
||
161 | machine->Inputs[idx].xyzw[0].f[prim_idx] = 0; |
||
162 | machine->Inputs[idx].xyzw[1].f[prim_idx] = 0; |
||
163 | machine->Inputs[idx].xyzw[2].f[prim_idx] = 0; |
||
164 | machine->Inputs[idx].xyzw[3].f[prim_idx] = 0; |
||
165 | } else { |
||
166 | #if DEBUG_INPUTS |
||
167 | debug_printf("\tSlot = %d, vs_slot = %d, idx = %d:\n", |
||
168 | slot, vs_slot, idx); |
||
169 | assert(!util_is_inf_or_nan(input[vs_slot][0])); |
||
170 | assert(!util_is_inf_or_nan(input[vs_slot][1])); |
||
171 | assert(!util_is_inf_or_nan(input[vs_slot][2])); |
||
172 | assert(!util_is_inf_or_nan(input[vs_slot][3])); |
||
173 | #endif |
||
174 | machine->Inputs[idx].xyzw[0].f[prim_idx] = input[vs_slot][0]; |
||
175 | machine->Inputs[idx].xyzw[1].f[prim_idx] = input[vs_slot][1]; |
||
176 | machine->Inputs[idx].xyzw[2].f[prim_idx] = input[vs_slot][2]; |
||
177 | machine->Inputs[idx].xyzw[3].f[prim_idx] = input[vs_slot][3]; |
||
178 | #if DEBUG_INPUTS |
||
179 | debug_printf("\t\t%f %f %f %f\n", |
||
180 | machine->Inputs[idx].xyzw[0].f[prim_idx], |
||
181 | machine->Inputs[idx].xyzw[1].f[prim_idx], |
||
182 | machine->Inputs[idx].xyzw[2].f[prim_idx], |
||
183 | machine->Inputs[idx].xyzw[3].f[prim_idx]); |
||
184 | #endif |
||
185 | ++vs_slot; |
||
186 | } |
||
187 | } |
||
188 | } |
||
189 | } |
||
190 | } |
||
191 | |||
192 | static void tgsi_gs_prepare(struct draw_geometry_shader *shader, |
||
193 | const void *constants[PIPE_MAX_CONSTANT_BUFFERS], |
||
194 | const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS]) |
||
195 | { |
||
196 | struct tgsi_exec_machine *machine = shader->machine; |
||
197 | |||
198 | tgsi_exec_set_constant_buffers(machine, PIPE_MAX_CONSTANT_BUFFERS, |
||
199 | constants, constants_size); |
||
200 | } |
||
201 | |||
202 | static unsigned tgsi_gs_run(struct draw_geometry_shader *shader, |
||
203 | unsigned input_primitives) |
||
204 | { |
||
205 | struct tgsi_exec_machine *machine = shader->machine; |
||
206 | |||
207 | tgsi_set_exec_mask(machine, |
||
208 | 1, |
||
209 | input_primitives > 1, |
||
210 | input_primitives > 2, |
||
211 | input_primitives > 3); |
||
212 | |||
213 | /* run interpreter */ |
||
214 | tgsi_exec_machine_run(machine); |
||
215 | |||
216 | return |
||
217 | machine->Temps[TGSI_EXEC_TEMP_PRIMITIVE_I].xyzw[TGSI_EXEC_TEMP_PRIMITIVE_C].u[0]; |
||
218 | } |
||
219 | |||
220 | #ifdef HAVE_LLVM |
||
221 | |||
222 | static void |
||
223 | llvm_fetch_gs_input(struct draw_geometry_shader *shader, |
||
224 | unsigned *indices, |
||
225 | unsigned num_vertices, |
||
226 | unsigned prim_idx) |
||
227 | { |
||
228 | unsigned slot, vs_slot, i; |
||
229 | unsigned input_vertex_stride = shader->input_vertex_stride; |
||
230 | const float (*input_ptr)[4]; |
||
231 | float (*input_data)[6][PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS][TGSI_NUM_CHANNELS] = &shader->gs_input->data; |
||
232 | |||
233 | shader->llvm_prim_ids[shader->fetched_prim_count] = |
||
234 | shader->in_prim_idx; |
||
235 | |||
236 | input_ptr = shader->input; |
||
237 | |||
238 | for (i = 0; i < num_vertices; ++i) { |
||
239 | const float (*input)[4]; |
||
240 | #if DEBUG_INPUTS |
||
241 | debug_printf("%d) vertex index = %d (prim idx = %d)\n", |
||
242 | i, indices[i], prim_idx); |
||
243 | #endif |
||
244 | input = (const float (*)[4])( |
||
245 | (const char *)input_ptr + (indices[i] * input_vertex_stride)); |
||
246 | for (slot = 0, vs_slot = 0; slot < shader->info.num_inputs; ++slot) { |
||
247 | if (shader->info.input_semantic_name[slot] == TGSI_SEMANTIC_PRIMID) { |
||
248 | /* skip. we handle system values through gallivm */ |
||
249 | } else { |
||
250 | vs_slot = draw_gs_get_input_index( |
||
251 | shader->info.input_semantic_name[slot], |
||
252 | shader->info.input_semantic_index[slot], |
||
253 | shader->input_info); |
||
254 | if (vs_slot < 0) { |
||
255 | debug_printf("VS/GS signature mismatch!\n"); |
||
256 | (*input_data)[i][slot][0][prim_idx] = 0; |
||
257 | (*input_data)[i][slot][1][prim_idx] = 0; |
||
258 | (*input_data)[i][slot][2][prim_idx] = 0; |
||
259 | (*input_data)[i][slot][3][prim_idx] = 0; |
||
260 | } else { |
||
261 | #if DEBUG_INPUTS |
||
262 | debug_printf("\tSlot = %d, vs_slot = %d, i = %d:\n", |
||
263 | slot, vs_slot, i); |
||
264 | assert(!util_is_inf_or_nan(input[vs_slot][0])); |
||
265 | assert(!util_is_inf_or_nan(input[vs_slot][1])); |
||
266 | assert(!util_is_inf_or_nan(input[vs_slot][2])); |
||
267 | assert(!util_is_inf_or_nan(input[vs_slot][3])); |
||
268 | #endif |
||
269 | (*input_data)[i][slot][0][prim_idx] = input[vs_slot][0]; |
||
270 | (*input_data)[i][slot][1][prim_idx] = input[vs_slot][1]; |
||
271 | (*input_data)[i][slot][2][prim_idx] = input[vs_slot][2]; |
||
272 | (*input_data)[i][slot][3][prim_idx] = input[vs_slot][3]; |
||
273 | #if DEBUG_INPUTS |
||
274 | debug_printf("\t\t%f %f %f %f\n", |
||
275 | (*input_data)[i][slot][0][prim_idx], |
||
276 | (*input_data)[i][slot][1][prim_idx], |
||
277 | (*input_data)[i][slot][2][prim_idx], |
||
278 | (*input_data)[i][slot][3][prim_idx]); |
||
279 | #endif |
||
280 | ++vs_slot; |
||
281 | } |
||
282 | } |
||
283 | } |
||
284 | } |
||
285 | } |
||
286 | |||
287 | static void |
||
288 | llvm_fetch_gs_outputs(struct draw_geometry_shader *shader, |
||
289 | unsigned num_primitives, |
||
290 | float (**p_output)[4]) |
||
291 | { |
||
292 | int total_verts = 0; |
||
293 | int vertex_count = 0; |
||
294 | int total_prims = 0; |
||
295 | int max_prims_per_invocation = 0; |
||
296 | char *output_ptr = (char*)shader->gs_output; |
||
297 | int i, j, prim_idx; |
||
298 | unsigned next_prim_boundary = shader->primitive_boundary; |
||
299 | |||
300 | for (i = 0; i < shader->vector_length; ++i) { |
||
301 | int prims = shader->llvm_emitted_primitives[i]; |
||
302 | total_prims += prims; |
||
303 | max_prims_per_invocation = MAX2(max_prims_per_invocation, prims); |
||
304 | } |
||
305 | for (i = 0; i < shader->vector_length; ++i) { |
||
306 | total_verts += shader->llvm_emitted_vertices[i]; |
||
307 | } |
||
308 | |||
309 | output_ptr += shader->emitted_vertices * shader->vertex_size; |
||
310 | for (i = 0; i < shader->vector_length - 1; ++i) { |
||
311 | int current_verts = shader->llvm_emitted_vertices[i]; |
||
312 | int next_verts = shader->llvm_emitted_vertices[i + 1]; |
||
313 | #if 0 |
||
314 | int j; |
||
315 | for (j = 0; j < current_verts; ++j) { |
||
316 | struct vertex_header *vh = (struct vertex_header *) |
||
317 | (output_ptr + shader->vertex_size * (i * next_prim_boundary + j)); |
||
318 | debug_printf("--- %d) [%f, %f, %f, %f]\n", j + vertex_count, |
||
319 | vh->data[0][0], vh->data[0][1], vh->data[0][2], vh->data[0][3]); |
||
320 | |||
321 | } |
||
322 | #endif |
||
323 | debug_assert(current_verts <= shader->max_output_vertices); |
||
324 | debug_assert(next_verts <= shader->max_output_vertices); |
||
325 | if (next_verts) { |
||
326 | memmove(output_ptr + (vertex_count + current_verts) * shader->vertex_size, |
||
327 | output_ptr + ((i + 1) * next_prim_boundary) * shader->vertex_size, |
||
328 | shader->vertex_size * next_verts); |
||
329 | } |
||
330 | vertex_count += current_verts; |
||
331 | } |
||
332 | |||
333 | #if 0 |
||
334 | { |
||
335 | int i; |
||
336 | for (i = 0; i < total_verts; ++i) { |
||
337 | struct vertex_header *vh = (struct vertex_header *)(output_ptr + shader->vertex_size * i); |
||
338 | debug_printf("%d) Vertex:\n", i); |
||
339 | for (j = 0; j < shader->info.num_outputs; ++j) { |
||
340 | unsigned *udata = (unsigned*)vh->data[j]; |
||
341 | debug_printf(" %d) [%f, %f, %f, %f] [%d, %d, %d, %d]\n", j, |
||
342 | vh->data[j][0], vh->data[j][1], vh->data[j][2], vh->data[j][3], |
||
343 | udata[0], udata[1], udata[2], udata[3]); |
||
344 | } |
||
345 | |||
346 | } |
||
347 | } |
||
348 | #endif |
||
349 | |||
350 | prim_idx = 0; |
||
351 | for (i = 0; i < shader->vector_length; ++i) { |
||
352 | int num_prims = shader->llvm_emitted_primitives[i]; |
||
353 | for (j = 0; j < num_prims; ++j) { |
||
354 | int prim_length = |
||
355 | shader->llvm_prim_lengths[j][i]; |
||
356 | shader->primitive_lengths[shader->emitted_primitives + prim_idx] = |
||
357 | prim_length; |
||
358 | ++prim_idx; |
||
359 | } |
||
360 | } |
||
361 | |||
362 | shader->emitted_primitives += total_prims; |
||
363 | shader->emitted_vertices += total_verts; |
||
364 | } |
||
365 | |||
366 | static void |
||
367 | llvm_gs_prepare(struct draw_geometry_shader *shader, |
||
368 | const void *constants[PIPE_MAX_CONSTANT_BUFFERS], |
||
369 | const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS]) |
||
370 | { |
||
371 | } |
||
372 | |||
373 | static unsigned |
||
374 | llvm_gs_run(struct draw_geometry_shader *shader, |
||
375 | unsigned input_primitives) |
||
376 | { |
||
377 | unsigned ret; |
||
378 | char *input = (char*)shader->gs_output; |
||
379 | |||
380 | input += (shader->emitted_vertices * shader->vertex_size); |
||
381 | |||
382 | ret = shader->current_variant->jit_func( |
||
383 | shader->jit_context, shader->gs_input->data, |
||
384 | (struct vertex_header*)input, |
||
385 | input_primitives, |
||
386 | shader->draw->instance_id, |
||
387 | shader->llvm_prim_ids); |
||
388 | |||
389 | return ret; |
||
390 | } |
||
391 | |||
392 | #endif |
||
393 | |||
394 | static void gs_flush(struct draw_geometry_shader *shader) |
||
395 | { |
||
396 | unsigned out_prim_count; |
||
397 | |||
398 | unsigned input_primitives = shader->fetched_prim_count; |
||
399 | |||
400 | if (shader->draw->collect_statistics) { |
||
401 | shader->draw->statistics.gs_invocations += input_primitives; |
||
402 | } |
||
403 | |||
404 | debug_assert(input_primitives > 0 && |
||
405 | input_primitives <= 4); |
||
406 | |||
407 | out_prim_count = shader->run(shader, input_primitives); |
||
408 | shader->fetch_outputs(shader, out_prim_count, |
||
409 | &shader->tmp_output); |
||
410 | |||
411 | #if 0 |
||
412 | debug_printf("PRIM emitted prims = %d (verts=%d), cur prim count = %d\n", |
||
413 | shader->emitted_primitives, shader->emitted_vertices, |
||
414 | out_prim_count); |
||
415 | #endif |
||
416 | |||
417 | shader->fetched_prim_count = 0; |
||
418 | } |
||
419 | |||
420 | static void gs_point(struct draw_geometry_shader *shader, |
||
421 | int idx) |
||
422 | { |
||
423 | unsigned indices[1]; |
||
424 | |||
425 | indices[0] = idx; |
||
426 | |||
427 | shader->fetch_inputs(shader, indices, 1, |
||
428 | shader->fetched_prim_count); |
||
429 | ++shader->in_prim_idx; |
||
430 | ++shader->fetched_prim_count; |
||
431 | |||
432 | if (draw_gs_should_flush(shader)) |
||
433 | gs_flush(shader); |
||
434 | } |
||
435 | |||
436 | static void gs_line(struct draw_geometry_shader *shader, |
||
437 | int i0, int i1) |
||
438 | { |
||
439 | unsigned indices[2]; |
||
440 | |||
441 | indices[0] = i0; |
||
442 | indices[1] = i1; |
||
443 | |||
444 | shader->fetch_inputs(shader, indices, 2, |
||
445 | shader->fetched_prim_count); |
||
446 | ++shader->in_prim_idx; |
||
447 | ++shader->fetched_prim_count; |
||
448 | |||
449 | if (draw_gs_should_flush(shader)) |
||
450 | gs_flush(shader); |
||
451 | } |
||
452 | |||
453 | static void gs_line_adj(struct draw_geometry_shader *shader, |
||
454 | int i0, int i1, int i2, int i3) |
||
455 | { |
||
456 | unsigned indices[4]; |
||
457 | |||
458 | indices[0] = i0; |
||
459 | indices[1] = i1; |
||
460 | indices[2] = i2; |
||
461 | indices[3] = i3; |
||
462 | |||
463 | shader->fetch_inputs(shader, indices, 4, |
||
464 | shader->fetched_prim_count); |
||
465 | ++shader->in_prim_idx; |
||
466 | ++shader->fetched_prim_count; |
||
467 | |||
468 | if (draw_gs_should_flush(shader)) |
||
469 | gs_flush(shader); |
||
470 | } |
||
471 | |||
472 | static void gs_tri(struct draw_geometry_shader *shader, |
||
473 | int i0, int i1, int i2) |
||
474 | { |
||
475 | unsigned indices[3]; |
||
476 | |||
477 | indices[0] = i0; |
||
478 | indices[1] = i1; |
||
479 | indices[2] = i2; |
||
480 | |||
481 | shader->fetch_inputs(shader, indices, 3, |
||
482 | shader->fetched_prim_count); |
||
483 | ++shader->in_prim_idx; |
||
484 | ++shader->fetched_prim_count; |
||
485 | |||
486 | if (draw_gs_should_flush(shader)) |
||
487 | gs_flush(shader); |
||
488 | } |
||
489 | |||
490 | static void gs_tri_adj(struct draw_geometry_shader *shader, |
||
491 | int i0, int i1, int i2, |
||
492 | int i3, int i4, int i5) |
||
493 | { |
||
494 | unsigned indices[6]; |
||
495 | |||
496 | indices[0] = i0; |
||
497 | indices[1] = i1; |
||
498 | indices[2] = i2; |
||
499 | indices[3] = i3; |
||
500 | indices[4] = i4; |
||
501 | indices[5] = i5; |
||
502 | |||
503 | shader->fetch_inputs(shader, indices, 6, |
||
504 | shader->fetched_prim_count); |
||
505 | ++shader->in_prim_idx; |
||
506 | ++shader->fetched_prim_count; |
||
507 | |||
508 | if (draw_gs_should_flush(shader)) |
||
509 | gs_flush(shader); |
||
510 | } |
||
511 | |||
512 | #define FUNC gs_run |
||
513 | #define GET_ELT(idx) (idx) |
||
514 | #include "draw_gs_tmp.h" |
||
515 | |||
516 | |||
517 | #define FUNC gs_run_elts |
||
518 | #define LOCAL_VARS const ushort *elts = input_prims->elts; |
||
519 | #define GET_ELT(idx) (elts[idx]) |
||
520 | #include "draw_gs_tmp.h" |
||
521 | |||
522 | |||
523 | /** |
||
524 | * Execute geometry shader. |
||
525 | */ |
||
526 | int draw_geometry_shader_run(struct draw_geometry_shader *shader, |
||
527 | const void *constants[PIPE_MAX_CONSTANT_BUFFERS], |
||
528 | const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS], |
||
529 | const struct draw_vertex_info *input_verts, |
||
530 | const struct draw_prim_info *input_prim, |
||
531 | const struct tgsi_shader_info *input_info, |
||
532 | struct draw_vertex_info *output_verts, |
||
533 | struct draw_prim_info *output_prims ) |
||
534 | { |
||
535 | const float (*input)[4] = (const float (*)[4])input_verts->verts->data; |
||
536 | unsigned input_stride = input_verts->vertex_size; |
||
537 | unsigned num_outputs = shader->info.num_outputs; |
||
538 | unsigned vertex_size = sizeof(struct vertex_header) + num_outputs * 4 * sizeof(float); |
||
539 | unsigned num_input_verts = input_prim->linear ? |
||
540 | input_verts->count : |
||
541 | input_prim->count; |
||
542 | unsigned num_in_primitives = |
||
543 | align( |
||
544 | MAX2(u_decomposed_prims_for_vertices(input_prim->prim, |
||
545 | num_input_verts), |
||
546 | u_decomposed_prims_for_vertices(shader->input_primitive, |
||
547 | num_input_verts)), |
||
548 | shader->vector_length); |
||
549 | unsigned max_out_prims = |
||
550 | u_decomposed_prims_for_vertices(shader->output_primitive, |
||
551 | shader->max_output_vertices) |
||
552 | * num_in_primitives; |
||
553 | |||
554 | //Assume at least one primitive |
||
555 | max_out_prims = MAX2(max_out_prims, 1); |
||
556 | |||
557 | |||
558 | output_verts->vertex_size = vertex_size; |
||
559 | output_verts->stride = output_verts->vertex_size; |
||
560 | /* we allocate exactly one extra vertex per primitive to allow the GS to emit |
||
561 | * overflown vertices into some area where they won't harm anyone */ |
||
562 | output_verts->verts = |
||
563 | (struct vertex_header *)MALLOC(output_verts->vertex_size * |
||
564 | max_out_prims * |
||
565 | shader->primitive_boundary); |
||
566 | |||
567 | #if 0 |
||
568 | debug_printf("%s count = %d (in prims # = %d)\n", |
||
569 | __FUNCTION__, num_input_verts, num_in_primitives); |
||
570 | debug_printf("\tlinear = %d, prim_info->count = %d\n", |
||
571 | input_prim->linear, input_prim->count); |
||
572 | debug_printf("\tprim pipe = %s, shader in = %s, shader out = %s, max out = %d\n", |
||
573 | u_prim_name(input_prim->prim), |
||
574 | u_prim_name(shader->input_primitive), |
||
575 | u_prim_name(shader->output_primitive), |
||
576 | shader->max_output_vertices); |
||
577 | #endif |
||
578 | |||
579 | shader->emitted_vertices = 0; |
||
580 | shader->emitted_primitives = 0; |
||
581 | shader->vertex_size = vertex_size; |
||
582 | shader->tmp_output = (float (*)[4])output_verts->verts->data; |
||
583 | shader->fetched_prim_count = 0; |
||
584 | shader->input_vertex_stride = input_stride; |
||
585 | shader->input = input; |
||
586 | shader->input_info = input_info; |
||
587 | FREE(shader->primitive_lengths); |
||
588 | shader->primitive_lengths = MALLOC(max_out_prims * sizeof(unsigned)); |
||
589 | |||
590 | |||
591 | #ifdef HAVE_LLVM |
||
592 | if (draw_get_option_use_llvm()) { |
||
593 | shader->gs_output = output_verts->verts; |
||
594 | if (max_out_prims > shader->max_out_prims) { |
||
595 | unsigned i; |
||
596 | if (shader->llvm_prim_lengths) { |
||
597 | for (i = 0; i < shader->max_out_prims; ++i) { |
||
598 | align_free(shader->llvm_prim_lengths[i]); |
||
599 | } |
||
600 | FREE(shader->llvm_prim_lengths); |
||
601 | } |
||
602 | |||
603 | shader->llvm_prim_lengths = MALLOC(max_out_prims * sizeof(unsigned*)); |
||
604 | for (i = 0; i < max_out_prims; ++i) { |
||
605 | int vector_size = shader->vector_length * sizeof(unsigned); |
||
606 | shader->llvm_prim_lengths[i] = |
||
607 | align_malloc(vector_size, vector_size); |
||
608 | } |
||
609 | |||
610 | shader->max_out_prims = max_out_prims; |
||
611 | } |
||
612 | shader->jit_context->prim_lengths = shader->llvm_prim_lengths; |
||
613 | shader->jit_context->emitted_vertices = shader->llvm_emitted_vertices; |
||
614 | shader->jit_context->emitted_prims = shader->llvm_emitted_primitives; |
||
615 | } |
||
616 | #endif |
||
617 | |||
618 | shader->prepare(shader, constants, constants_size); |
||
619 | |||
620 | if (input_prim->linear) |
||
621 | gs_run(shader, input_prim, input_verts, |
||
622 | output_prims, output_verts); |
||
623 | else |
||
624 | gs_run_elts(shader, input_prim, input_verts, |
||
625 | output_prims, output_verts); |
||
626 | |||
627 | /* Flush the remaining primitives. Will happen if |
||
628 | * num_input_primitives % 4 != 0 |
||
629 | */ |
||
630 | if (shader->fetched_prim_count > 0) { |
||
631 | gs_flush(shader); |
||
632 | } |
||
633 | |||
634 | debug_assert(shader->fetched_prim_count == 0); |
||
635 | |||
636 | /* Update prim_info: |
||
637 | */ |
||
638 | output_prims->linear = TRUE; |
||
639 | output_prims->elts = NULL; |
||
640 | output_prims->start = 0; |
||
641 | output_prims->count = shader->emitted_vertices; |
||
642 | output_prims->prim = shader->output_primitive; |
||
643 | output_prims->flags = 0x0; |
||
644 | output_prims->primitive_lengths = shader->primitive_lengths; |
||
645 | output_prims->primitive_count = shader->emitted_primitives; |
||
646 | output_verts->count = shader->emitted_vertices; |
||
647 | |||
648 | if (shader->draw->collect_statistics) { |
||
649 | unsigned i; |
||
650 | for (i = 0; i < shader->emitted_primitives; ++i) { |
||
651 | shader->draw->statistics.gs_primitives += |
||
652 | u_decomposed_prims_for_vertices(shader->output_primitive, |
||
653 | shader->primitive_lengths[i]); |
||
654 | } |
||
655 | } |
||
656 | |||
657 | #if 0 |
||
658 | debug_printf("GS finished, prims = %d, verts = %d\n", |
||
659 | output_prims->primitive_count, |
||
660 | output_verts->count); |
||
661 | #endif |
||
662 | |||
663 | return shader->emitted_vertices; |
||
664 | } |
||
665 | |||
666 | void draw_geometry_shader_prepare(struct draw_geometry_shader *shader, |
||
667 | struct draw_context *draw) |
||
668 | { |
||
669 | #ifdef HAVE_LLVM |
||
670 | boolean use_llvm = draw_get_option_use_llvm(); |
||
671 | #else |
||
672 | boolean use_llvm = FALSE; |
||
673 | #endif |
||
674 | if (!use_llvm && shader && shader->machine->Tokens != shader->state.tokens) { |
||
675 | tgsi_exec_machine_bind_shader(shader->machine, |
||
676 | shader->state.tokens, |
||
677 | draw->gs.tgsi.sampler); |
||
678 | } |
||
679 | } |
||
680 | |||
681 | |||
682 | boolean |
||
683 | draw_gs_init( struct draw_context *draw ) |
||
684 | { |
||
685 | draw->gs.tgsi.machine = tgsi_exec_machine_create(); |
||
686 | if (!draw->gs.tgsi.machine) |
||
687 | return FALSE; |
||
688 | |||
689 | draw->gs.tgsi.machine->Primitives = align_malloc( |
||
690 | MAX_PRIMITIVES * sizeof(struct tgsi_exec_vector), 16); |
||
691 | if (!draw->gs.tgsi.machine->Primitives) |
||
692 | return FALSE; |
||
693 | memset(draw->gs.tgsi.machine->Primitives, 0, |
||
694 | MAX_PRIMITIVES * sizeof(struct tgsi_exec_vector)); |
||
695 | |||
696 | return TRUE; |
||
697 | } |
||
698 | |||
699 | void draw_gs_destroy( struct draw_context *draw ) |
||
700 | { |
||
701 | if (draw->gs.tgsi.machine) { |
||
702 | align_free(draw->gs.tgsi.machine->Primitives); |
||
703 | tgsi_exec_machine_destroy(draw->gs.tgsi.machine); |
||
704 | } |
||
705 | } |
||
706 | |||
707 | struct draw_geometry_shader * |
||
708 | draw_create_geometry_shader(struct draw_context *draw, |
||
709 | const struct pipe_shader_state *state) |
||
710 | { |
||
711 | #ifdef HAVE_LLVM |
||
712 | boolean use_llvm = draw_get_option_use_llvm(); |
||
713 | struct llvm_geometry_shader *llvm_gs; |
||
714 | #endif |
||
715 | struct draw_geometry_shader *gs; |
||
716 | unsigned i; |
||
717 | |||
718 | #ifdef HAVE_LLVM |
||
719 | if (use_llvm) { |
||
720 | llvm_gs = CALLOC_STRUCT(llvm_geometry_shader); |
||
721 | |||
722 | if (llvm_gs == NULL) |
||
723 | return NULL; |
||
724 | |||
725 | gs = &llvm_gs->base; |
||
726 | |||
727 | make_empty_list(&llvm_gs->variants); |
||
728 | } else |
||
729 | #endif |
||
730 | { |
||
731 | gs = CALLOC_STRUCT(draw_geometry_shader); |
||
732 | } |
||
733 | |||
734 | if (!gs) |
||
735 | return NULL; |
||
736 | |||
737 | gs->draw = draw; |
||
738 | gs->state = *state; |
||
739 | gs->state.tokens = tgsi_dup_tokens(state->tokens); |
||
740 | if (!gs->state.tokens) { |
||
741 | FREE(gs); |
||
742 | return NULL; |
||
743 | } |
||
744 | |||
745 | tgsi_scan_shader(state->tokens, &gs->info); |
||
746 | |||
747 | /* setup the defaults */ |
||
748 | gs->input_primitive = PIPE_PRIM_TRIANGLES; |
||
749 | gs->output_primitive = PIPE_PRIM_TRIANGLE_STRIP; |
||
750 | gs->max_output_vertices = 32; |
||
751 | gs->max_out_prims = 0; |
||
752 | |||
753 | #ifdef HAVE_LLVM |
||
754 | if (use_llvm) { |
||
755 | /* TODO: change the input array to handle the following |
||
756 | vector length, instead of the currently hardcoded |
||
757 | TGSI_NUM_CHANNELS |
||
758 | gs->vector_length = lp_native_vector_width / 32;*/ |
||
759 | gs->vector_length = TGSI_NUM_CHANNELS; |
||
760 | } else |
||
761 | #endif |
||
762 | { |
||
763 | gs->vector_length = 1; |
||
764 | } |
||
765 | |||
766 | for (i = 0; i < gs->info.num_properties; ++i) { |
||
767 | if (gs->info.properties[i].name == |
||
768 | TGSI_PROPERTY_GS_INPUT_PRIM) |
||
769 | gs->input_primitive = gs->info.properties[i].data[0]; |
||
770 | else if (gs->info.properties[i].name == |
||
771 | TGSI_PROPERTY_GS_OUTPUT_PRIM) |
||
772 | gs->output_primitive = gs->info.properties[i].data[0]; |
||
773 | else if (gs->info.properties[i].name == |
||
774 | TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES) |
||
775 | gs->max_output_vertices = gs->info.properties[i].data[0]; |
||
776 | } |
||
777 | /* Primitive boundary is bigger than max_output_vertices by one, because |
||
778 | * the specification says that the geometry shader should exit if the |
||
779 | * number of emitted vertices is bigger or equal to max_output_vertices and |
||
780 | * we can't do that because we're running in the SoA mode, which means that |
||
781 | * our storing routines will keep getting called on channels that have |
||
782 | * overflown. |
||
783 | * So we need some scratch area where we can keep writing the overflown |
||
784 | * vertices without overwriting anything important or crashing. |
||
785 | */ |
||
786 | gs->primitive_boundary = gs->max_output_vertices + 1; |
||
787 | |||
788 | for (i = 0; i < gs->info.num_outputs; i++) { |
||
789 | if (gs->info.output_semantic_name[i] == TGSI_SEMANTIC_POSITION && |
||
790 | gs->info.output_semantic_index[i] == 0) |
||
791 | gs->position_output = i; |
||
792 | if (gs->info.output_semantic_name[i] == TGSI_SEMANTIC_VIEWPORT_INDEX) |
||
793 | gs->viewport_index_output = i; |
||
794 | if (gs->info.output_semantic_name[i] == TGSI_SEMANTIC_CLIPDIST) { |
||
795 | debug_assert(gs->info.output_semantic_index[i] < |
||
796 | PIPE_MAX_CLIP_OR_CULL_DISTANCE_ELEMENT_COUNT); |
||
797 | gs->clipdistance_output[gs->info.output_semantic_index[i]] = i; |
||
798 | } |
||
799 | if (gs->info.output_semantic_name[i] == TGSI_SEMANTIC_CULLDIST) { |
||
800 | debug_assert(gs->info.output_semantic_index[i] < |
||
801 | PIPE_MAX_CLIP_OR_CULL_DISTANCE_ELEMENT_COUNT); |
||
802 | gs->culldistance_output[gs->info.output_semantic_index[i]] = i; |
||
803 | } |
||
804 | } |
||
805 | |||
806 | gs->machine = draw->gs.tgsi.machine; |
||
807 | |||
808 | #ifdef HAVE_LLVM |
||
809 | if (use_llvm) { |
||
810 | int vector_size = gs->vector_length * sizeof(float); |
||
811 | gs->gs_input = align_malloc(sizeof(struct draw_gs_inputs), 16); |
||
812 | memset(gs->gs_input, 0, sizeof(struct draw_gs_inputs)); |
||
813 | gs->llvm_prim_lengths = 0; |
||
814 | |||
815 | gs->llvm_emitted_primitives = align_malloc(vector_size, vector_size); |
||
816 | gs->llvm_emitted_vertices = align_malloc(vector_size, vector_size); |
||
817 | gs->llvm_prim_ids = align_malloc(vector_size, vector_size); |
||
818 | |||
819 | gs->fetch_outputs = llvm_fetch_gs_outputs; |
||
820 | gs->fetch_inputs = llvm_fetch_gs_input; |
||
821 | gs->prepare = llvm_gs_prepare; |
||
822 | gs->run = llvm_gs_run; |
||
823 | |||
824 | gs->jit_context = &draw->llvm->gs_jit_context; |
||
825 | |||
826 | |||
827 | llvm_gs->variant_key_size = |
||
828 | draw_gs_llvm_variant_key_size( |
||
829 | MAX2(gs->info.file_max[TGSI_FILE_SAMPLER]+1, |
||
830 | gs->info.file_max[TGSI_FILE_SAMPLER_VIEW]+1)); |
||
831 | } else |
||
832 | #endif |
||
833 | { |
||
834 | gs->fetch_outputs = tgsi_fetch_gs_outputs; |
||
835 | gs->fetch_inputs = tgsi_fetch_gs_input; |
||
836 | gs->prepare = tgsi_gs_prepare; |
||
837 | gs->run = tgsi_gs_run; |
||
838 | } |
||
839 | |||
840 | return gs; |
||
841 | } |
||
842 | |||
843 | void draw_bind_geometry_shader(struct draw_context *draw, |
||
844 | struct draw_geometry_shader *dgs) |
||
845 | { |
||
846 | draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE); |
||
847 | |||
848 | if (dgs) { |
||
849 | draw->gs.geometry_shader = dgs; |
||
850 | draw->gs.num_gs_outputs = dgs->info.num_outputs; |
||
851 | draw->gs.position_output = dgs->position_output; |
||
852 | draw_geometry_shader_prepare(dgs, draw); |
||
853 | } |
||
854 | else { |
||
855 | draw->gs.geometry_shader = NULL; |
||
856 | draw->gs.num_gs_outputs = 0; |
||
857 | } |
||
858 | } |
||
859 | |||
860 | void draw_delete_geometry_shader(struct draw_context *draw, |
||
861 | struct draw_geometry_shader *dgs) |
||
862 | { |
||
863 | if (!dgs) { |
||
864 | return; |
||
865 | } |
||
866 | #ifdef HAVE_LLVM |
||
867 | if (draw_get_option_use_llvm()) { |
||
868 | struct llvm_geometry_shader *shader = llvm_geometry_shader(dgs); |
||
869 | struct draw_gs_llvm_variant_list_item *li; |
||
870 | |||
871 | li = first_elem(&shader->variants); |
||
872 | while(!at_end(&shader->variants, li)) { |
||
873 | struct draw_gs_llvm_variant_list_item *next = next_elem(li); |
||
874 | draw_gs_llvm_destroy_variant(li->base); |
||
875 | li = next; |
||
876 | } |
||
877 | |||
878 | assert(shader->variants_cached == 0); |
||
879 | |||
880 | if (dgs->llvm_prim_lengths) { |
||
881 | unsigned i; |
||
882 | for (i = 0; i < dgs->max_out_prims; ++i) { |
||
883 | align_free(dgs->llvm_prim_lengths[i]); |
||
884 | } |
||
885 | FREE(dgs->llvm_prim_lengths); |
||
886 | } |
||
887 | align_free(dgs->llvm_emitted_primitives); |
||
888 | align_free(dgs->llvm_emitted_vertices); |
||
889 | align_free(dgs->llvm_prim_ids); |
||
890 | |||
891 | align_free(dgs->gs_input); |
||
892 | } |
||
893 | #endif |
||
894 | |||
895 | FREE(dgs->primitive_lengths); |
||
896 | FREE((void*) dgs->state.tokens); |
||
897 | FREE(dgs); |
||
898 | } |
||
899 | |||
900 | |||
901 | #ifdef HAVE_LLVM |
||
902 | void draw_gs_set_current_variant(struct draw_geometry_shader *shader, |
||
903 | struct draw_gs_llvm_variant *variant) |
||
904 | { |
||
905 | shader->current_variant = variant; |
||
906 | } |
||
907 | #endif |
||
908 | |||
909 | /* |
||
910 | * Called at the very begin of the draw call with a new instance |
||
911 | * Used to reset state that should persist between primitive restart. |
||
912 | */ |
||
913 | void |
||
914 | draw_geometry_shader_new_instance(struct draw_geometry_shader *gs) |
||
915 | { |
||
916 | if (!gs) |
||
917 | return; |
||
918 | |||
919 | gs->in_prim_idx = 0; |
||
920 | }> |