Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5563 | serge | 1 | /************************************************************************** |
2 | * |
||
3 | * Copyright 2009 VMware, Inc. |
||
4 | * All Rights Reserved. |
||
5 | * |
||
6 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
7 | * copy of this software and associated documentation files (the |
||
8 | * "Software"), to deal in the Software without restriction, including |
||
9 | * without limitation the rights to use, copy, modify, merge, publish, |
||
10 | * distribute, sub license, and/or sell copies of the Software, and to |
||
11 | * permit persons to whom the Software is furnished to do so, subject to |
||
12 | * the following conditions: |
||
13 | * |
||
14 | * The above copyright notice and this permission notice (including the |
||
15 | * next paragraph) shall be included in all copies or substantial portions |
||
16 | * of the Software. |
||
17 | * |
||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
||
21 | * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR |
||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||
25 | * |
||
26 | **************************************************************************/ |
||
27 | |||
28 | #include |
||
29 | #include "util/u_memory.h" |
||
30 | #include "util/u_math.h" |
||
31 | #include "util/u_rect.h" |
||
32 | #include "util/u_surface.h" |
||
33 | #include "util/u_pack_color.h" |
||
34 | |||
35 | #include "os/os_time.h" |
||
36 | |||
37 | #include "lp_scene_queue.h" |
||
38 | #include "lp_debug.h" |
||
39 | #include "lp_fence.h" |
||
40 | #include "lp_perf.h" |
||
41 | #include "lp_query.h" |
||
42 | #include "lp_rast.h" |
||
43 | #include "lp_rast_priv.h" |
||
44 | #include "gallivm/lp_bld_debug.h" |
||
45 | #include "lp_scene.h" |
||
46 | #include "lp_tex_sample.h" |
||
47 | |||
48 | |||
49 | #ifdef DEBUG |
||
50 | int jit_line = 0; |
||
51 | const struct lp_rast_state *jit_state = NULL; |
||
52 | const struct lp_rasterizer_task *jit_task = NULL; |
||
53 | #endif |
||
54 | |||
55 | |||
56 | /** |
||
57 | * Begin rasterizing a scene. |
||
58 | * Called once per scene by one thread. |
||
59 | */ |
||
60 | static void |
||
61 | lp_rast_begin( struct lp_rasterizer *rast, |
||
62 | struct lp_scene *scene ) |
||
63 | { |
||
64 | rast->curr_scene = scene; |
||
65 | |||
66 | LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); |
||
67 | |||
68 | lp_scene_begin_rasterization( scene ); |
||
69 | lp_scene_bin_iter_begin( scene ); |
||
70 | } |
||
71 | |||
72 | |||
73 | static void |
||
74 | lp_rast_end( struct lp_rasterizer *rast ) |
||
75 | { |
||
76 | lp_scene_end_rasterization( rast->curr_scene ); |
||
77 | |||
78 | rast->curr_scene = NULL; |
||
79 | } |
||
80 | |||
81 | |||
82 | /** |
||
83 | * Begining rasterization of a tile. |
||
84 | * \param x window X position of the tile, in pixels |
||
85 | * \param y window Y position of the tile, in pixels |
||
86 | */ |
||
87 | static void |
||
88 | lp_rast_tile_begin(struct lp_rasterizer_task *task, |
||
89 | const struct cmd_bin *bin, |
||
90 | int x, int y) |
||
91 | { |
||
92 | LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y); |
||
93 | |||
94 | task->bin = bin; |
||
95 | task->x = x * TILE_SIZE; |
||
96 | task->y = y * TILE_SIZE; |
||
97 | task->width = TILE_SIZE + x * TILE_SIZE > task->scene->fb.width ? |
||
98 | task->scene->fb.width - x * TILE_SIZE : TILE_SIZE; |
||
99 | task->height = TILE_SIZE + y * TILE_SIZE > task->scene->fb.height ? |
||
100 | task->scene->fb.height - y * TILE_SIZE : TILE_SIZE; |
||
101 | |||
102 | task->thread_data.vis_counter = 0; |
||
103 | task->ps_invocations = 0; |
||
104 | |||
105 | /* reset pointers to color and depth tile(s) */ |
||
106 | memset(task->color_tiles, 0, sizeof(task->color_tiles)); |
||
107 | task->depth_tile = NULL; |
||
108 | } |
||
109 | |||
110 | |||
111 | /** |
||
112 | * Clear the rasterizer's current color tile. |
||
113 | * This is a bin command called during bin processing. |
||
114 | * Clear commands always clear all bound layers. |
||
115 | */ |
||
116 | static void |
||
117 | lp_rast_clear_color(struct lp_rasterizer_task *task, |
||
118 | const union lp_rast_cmd_arg arg) |
||
119 | { |
||
120 | const struct lp_scene *scene = task->scene; |
||
121 | |||
122 | if (scene->fb.nr_cbufs) { |
||
123 | unsigned i; |
||
124 | union util_color uc; |
||
125 | |||
126 | if (util_format_is_pure_integer(scene->fb.cbufs[0]->format)) { |
||
127 | /* |
||
128 | * We expect int/uint clear values here, though some APIs |
||
129 | * might disagree (but in any case util_pack_color() |
||
130 | * couldn't handle it)... |
||
131 | */ |
||
132 | LP_DBG(DEBUG_RAST, "%s pure int 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__, |
||
133 | arg.clear_color.ui[0], |
||
134 | arg.clear_color.ui[1], |
||
135 | arg.clear_color.ui[2], |
||
136 | arg.clear_color.ui[3]); |
||
137 | |||
138 | for (i = 0; i < scene->fb.nr_cbufs; i++) { |
||
139 | enum pipe_format format = scene->fb.cbufs[i]->format; |
||
140 | |||
141 | if (util_format_is_pure_sint(format)) { |
||
142 | util_format_write_4i(format, arg.clear_color.i, 0, &uc, 0, 0, 0, 1, 1); |
||
143 | } |
||
144 | else { |
||
145 | assert(util_format_is_pure_uint(format)); |
||
146 | util_format_write_4ui(format, arg.clear_color.ui, 0, &uc, 0, 0, 0, 1, 1); |
||
147 | } |
||
148 | |||
149 | util_fill_box(scene->cbufs[i].map, |
||
150 | format, |
||
151 | scene->cbufs[i].stride, |
||
152 | scene->cbufs[i].layer_stride, |
||
153 | task->x, |
||
154 | task->y, |
||
155 | 0, |
||
156 | task->width, |
||
157 | task->height, |
||
158 | scene->fb_max_layer + 1, |
||
159 | &uc); |
||
160 | } |
||
161 | } |
||
162 | else { |
||
163 | uint8_t clear_color[4]; |
||
164 | |||
165 | for (i = 0; i < 4; ++i) { |
||
166 | clear_color[i] = float_to_ubyte(arg.clear_color.f[i]); |
||
167 | } |
||
168 | |||
169 | LP_DBG(DEBUG_RAST, "%s 0x%x,0x%x,0x%x,0x%x\n", __FUNCTION__, |
||
170 | clear_color[0], |
||
171 | clear_color[1], |
||
172 | clear_color[2], |
||
173 | clear_color[3]); |
||
174 | |||
175 | for (i = 0; i < scene->fb.nr_cbufs; i++) { |
||
176 | util_pack_color(arg.clear_color.f, |
||
177 | scene->fb.cbufs[i]->format, &uc); |
||
178 | |||
179 | util_fill_box(scene->cbufs[i].map, |
||
180 | scene->fb.cbufs[i]->format, |
||
181 | scene->cbufs[i].stride, |
||
182 | scene->cbufs[i].layer_stride, |
||
183 | task->x, |
||
184 | task->y, |
||
185 | 0, |
||
186 | task->width, |
||
187 | task->height, |
||
188 | scene->fb_max_layer + 1, |
||
189 | &uc); |
||
190 | } |
||
191 | } |
||
192 | } |
||
193 | |||
194 | LP_COUNT(nr_color_tile_clear); |
||
195 | } |
||
196 | |||
197 | |||
198 | |||
199 | |||
200 | /** |
||
201 | * Clear the rasterizer's current z/stencil tile. |
||
202 | * This is a bin command called during bin processing. |
||
203 | * Clear commands always clear all bound layers. |
||
204 | */ |
||
205 | static void |
||
206 | lp_rast_clear_zstencil(struct lp_rasterizer_task *task, |
||
207 | const union lp_rast_cmd_arg arg) |
||
208 | { |
||
209 | const struct lp_scene *scene = task->scene; |
||
210 | uint64_t clear_value64 = arg.clear_zstencil.value; |
||
211 | uint64_t clear_mask64 = arg.clear_zstencil.mask; |
||
212 | uint32_t clear_value = (uint32_t) clear_value64; |
||
213 | uint32_t clear_mask = (uint32_t) clear_mask64; |
||
214 | const unsigned height = task->height; |
||
215 | const unsigned width = task->width; |
||
216 | const unsigned dst_stride = scene->zsbuf.stride; |
||
217 | uint8_t *dst; |
||
218 | unsigned i, j; |
||
219 | unsigned block_size; |
||
220 | |||
221 | LP_DBG(DEBUG_RAST, "%s: value=0x%08x, mask=0x%08x\n", |
||
222 | __FUNCTION__, clear_value, clear_mask); |
||
223 | |||
224 | /* |
||
225 | * Clear the area of the depth/depth buffer matching this tile. |
||
226 | */ |
||
227 | |||
228 | if (scene->fb.zsbuf) { |
||
229 | unsigned layer; |
||
230 | uint8_t *dst_layer = lp_rast_get_unswizzled_depth_tile_pointer(task, LP_TEX_USAGE_READ_WRITE); |
||
231 | block_size = util_format_get_blocksize(scene->fb.zsbuf->format); |
||
232 | |||
233 | clear_value &= clear_mask; |
||
234 | |||
235 | for (layer = 0; layer <= scene->fb_max_layer; layer++) { |
||
236 | dst = dst_layer; |
||
237 | |||
238 | switch (block_size) { |
||
239 | case 1: |
||
240 | assert(clear_mask == 0xff); |
||
241 | memset(dst, (uint8_t) clear_value, height * width); |
||
242 | break; |
||
243 | case 2: |
||
244 | if (clear_mask == 0xffff) { |
||
245 | for (i = 0; i < height; i++) { |
||
246 | uint16_t *row = (uint16_t *)dst; |
||
247 | for (j = 0; j < width; j++) |
||
248 | *row++ = (uint16_t) clear_value; |
||
249 | dst += dst_stride; |
||
250 | } |
||
251 | } |
||
252 | else { |
||
253 | for (i = 0; i < height; i++) { |
||
254 | uint16_t *row = (uint16_t *)dst; |
||
255 | for (j = 0; j < width; j++) { |
||
256 | uint16_t tmp = ~clear_mask & *row; |
||
257 | *row++ = clear_value | tmp; |
||
258 | } |
||
259 | dst += dst_stride; |
||
260 | } |
||
261 | } |
||
262 | break; |
||
263 | case 4: |
||
264 | if (clear_mask == 0xffffffff) { |
||
265 | for (i = 0; i < height; i++) { |
||
266 | uint32_t *row = (uint32_t *)dst; |
||
267 | for (j = 0; j < width; j++) |
||
268 | *row++ = clear_value; |
||
269 | dst += dst_stride; |
||
270 | } |
||
271 | } |
||
272 | else { |
||
273 | for (i = 0; i < height; i++) { |
||
274 | uint32_t *row = (uint32_t *)dst; |
||
275 | for (j = 0; j < width; j++) { |
||
276 | uint32_t tmp = ~clear_mask & *row; |
||
277 | *row++ = clear_value | tmp; |
||
278 | } |
||
279 | dst += dst_stride; |
||
280 | } |
||
281 | } |
||
282 | break; |
||
283 | case 8: |
||
284 | clear_value64 &= clear_mask64; |
||
285 | if (clear_mask64 == 0xffffffffffULL) { |
||
286 | for (i = 0; i < height; i++) { |
||
287 | uint64_t *row = (uint64_t *)dst; |
||
288 | for (j = 0; j < width; j++) |
||
289 | *row++ = clear_value64; |
||
290 | dst += dst_stride; |
||
291 | } |
||
292 | } |
||
293 | else { |
||
294 | for (i = 0; i < height; i++) { |
||
295 | uint64_t *row = (uint64_t *)dst; |
||
296 | for (j = 0; j < width; j++) { |
||
297 | uint64_t tmp = ~clear_mask64 & *row; |
||
298 | *row++ = clear_value64 | tmp; |
||
299 | } |
||
300 | dst += dst_stride; |
||
301 | } |
||
302 | } |
||
303 | break; |
||
304 | |||
305 | default: |
||
306 | assert(0); |
||
307 | break; |
||
308 | } |
||
309 | dst_layer += scene->zsbuf.layer_stride; |
||
310 | } |
||
311 | } |
||
312 | } |
||
313 | |||
314 | |||
315 | |||
316 | /** |
||
317 | * Run the shader on all blocks in a tile. This is used when a tile is |
||
318 | * completely contained inside a triangle. |
||
319 | * This is a bin command called during bin processing. |
||
320 | */ |
||
321 | static void |
||
322 | lp_rast_shade_tile(struct lp_rasterizer_task *task, |
||
323 | const union lp_rast_cmd_arg arg) |
||
324 | { |
||
325 | const struct lp_scene *scene = task->scene; |
||
326 | const struct lp_rast_shader_inputs *inputs = arg.shade_tile; |
||
327 | const struct lp_rast_state *state; |
||
328 | struct lp_fragment_shader_variant *variant; |
||
329 | const unsigned tile_x = task->x, tile_y = task->y; |
||
330 | unsigned x, y; |
||
331 | |||
332 | if (inputs->disable) { |
||
333 | /* This command was partially binned and has been disabled */ |
||
334 | return; |
||
335 | } |
||
336 | |||
337 | LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); |
||
338 | |||
339 | state = task->state; |
||
340 | assert(state); |
||
341 | if (!state) { |
||
342 | return; |
||
343 | } |
||
344 | variant = state->variant; |
||
345 | |||
346 | /* render the whole 64x64 tile in 4x4 chunks */ |
||
347 | for (y = 0; y < task->height; y += 4){ |
||
348 | for (x = 0; x < task->width; x += 4) { |
||
349 | uint8_t *color[PIPE_MAX_COLOR_BUFS]; |
||
350 | unsigned stride[PIPE_MAX_COLOR_BUFS]; |
||
351 | uint8_t *depth = NULL; |
||
352 | unsigned depth_stride = 0; |
||
353 | unsigned i; |
||
354 | |||
355 | /* color buffer */ |
||
356 | for (i = 0; i < scene->fb.nr_cbufs; i++){ |
||
357 | stride[i] = scene->cbufs[i].stride; |
||
358 | color[i] = lp_rast_get_unswizzled_color_block_pointer(task, i, tile_x + x, |
||
359 | tile_y + y, inputs->layer); |
||
360 | } |
||
361 | |||
362 | /* depth buffer */ |
||
363 | if (scene->zsbuf.map) { |
||
364 | depth = lp_rast_get_unswizzled_depth_block_pointer(task, tile_x + x, |
||
365 | tile_y + y, inputs->layer); |
||
366 | depth_stride = scene->zsbuf.stride; |
||
367 | } |
||
368 | |||
369 | /* run shader on 4x4 block */ |
||
370 | BEGIN_JIT_CALL(state, task); |
||
371 | variant->jit_function[RAST_WHOLE]( &state->jit_context, |
||
372 | tile_x + x, tile_y + y, |
||
373 | inputs->frontfacing, |
||
374 | GET_A0(inputs), |
||
375 | GET_DADX(inputs), |
||
376 | GET_DADY(inputs), |
||
377 | color, |
||
378 | depth, |
||
379 | 0xffff, |
||
380 | &task->thread_data, |
||
381 | stride, |
||
382 | depth_stride); |
||
383 | END_JIT_CALL(); |
||
384 | } |
||
385 | } |
||
386 | } |
||
387 | |||
388 | |||
389 | /** |
||
390 | * Run the shader on all blocks in a tile. This is used when a tile is |
||
391 | * completely contained inside a triangle, and the shader is opaque. |
||
392 | * This is a bin command called during bin processing. |
||
393 | */ |
||
394 | static void |
||
395 | lp_rast_shade_tile_opaque(struct lp_rasterizer_task *task, |
||
396 | const union lp_rast_cmd_arg arg) |
||
397 | { |
||
398 | LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); |
||
399 | |||
400 | assert(task->state); |
||
401 | if (!task->state) { |
||
402 | return; |
||
403 | } |
||
404 | |||
405 | lp_rast_shade_tile(task, arg); |
||
406 | } |
||
407 | |||
408 | |||
409 | /** |
||
410 | * Compute shading for a 4x4 block of pixels inside a triangle. |
||
411 | * This is a bin command called during bin processing. |
||
412 | * \param x X position of quad in window coords |
||
413 | * \param y Y position of quad in window coords |
||
414 | */ |
||
415 | void |
||
416 | lp_rast_shade_quads_mask(struct lp_rasterizer_task *task, |
||
417 | const struct lp_rast_shader_inputs *inputs, |
||
418 | unsigned x, unsigned y, |
||
419 | unsigned mask) |
||
420 | { |
||
421 | const struct lp_rast_state *state = task->state; |
||
422 | struct lp_fragment_shader_variant *variant = state->variant; |
||
423 | const struct lp_scene *scene = task->scene; |
||
424 | uint8_t *color[PIPE_MAX_COLOR_BUFS]; |
||
425 | unsigned stride[PIPE_MAX_COLOR_BUFS]; |
||
426 | uint8_t *depth = NULL; |
||
427 | unsigned depth_stride = 0; |
||
428 | unsigned i; |
||
429 | |||
430 | assert(state); |
||
431 | |||
432 | /* Sanity checks */ |
||
433 | assert(x < scene->tiles_x * TILE_SIZE); |
||
434 | assert(y < scene->tiles_y * TILE_SIZE); |
||
435 | assert(x % TILE_VECTOR_WIDTH == 0); |
||
436 | assert(y % TILE_VECTOR_HEIGHT == 0); |
||
437 | |||
438 | assert((x % 4) == 0); |
||
439 | assert((y % 4) == 0); |
||
440 | |||
441 | /* color buffer */ |
||
442 | for (i = 0; i < scene->fb.nr_cbufs; i++) { |
||
443 | stride[i] = scene->cbufs[i].stride; |
||
444 | color[i] = lp_rast_get_unswizzled_color_block_pointer(task, i, x, y, inputs->layer); |
||
445 | } |
||
446 | |||
447 | /* depth buffer */ |
||
448 | if (scene->zsbuf.map) { |
||
449 | depth_stride = scene->zsbuf.stride; |
||
450 | depth = lp_rast_get_unswizzled_depth_block_pointer(task, x, y, inputs->layer); |
||
451 | } |
||
452 | |||
453 | assert(lp_check_alignment(state->jit_context.u8_blend_color, 16)); |
||
454 | |||
455 | /* |
||
456 | * The rasterizer may produce fragments outside our |
||
457 | * allocated 4x4 blocks hence need to filter them out here. |
||
458 | */ |
||
459 | if ((x % TILE_SIZE) < task->width && (y % TILE_SIZE) < task->height) { |
||
460 | /* not very accurate would need a popcount on the mask */ |
||
461 | /* always count this not worth bothering? */ |
||
462 | task->ps_invocations++; |
||
463 | |||
464 | /* run shader on 4x4 block */ |
||
465 | BEGIN_JIT_CALL(state, task); |
||
466 | variant->jit_function[RAST_EDGE_TEST](&state->jit_context, |
||
467 | x, y, |
||
468 | inputs->frontfacing, |
||
469 | GET_A0(inputs), |
||
470 | GET_DADX(inputs), |
||
471 | GET_DADY(inputs), |
||
472 | color, |
||
473 | depth, |
||
474 | mask, |
||
475 | &task->thread_data, |
||
476 | stride, |
||
477 | depth_stride); |
||
478 | END_JIT_CALL(); |
||
479 | } |
||
480 | } |
||
481 | |||
482 | |||
483 | |||
484 | /** |
||
485 | * Begin a new occlusion query. |
||
486 | * This is a bin command put in all bins. |
||
487 | * Called per thread. |
||
488 | */ |
||
489 | static void |
||
490 | lp_rast_begin_query(struct lp_rasterizer_task *task, |
||
491 | const union lp_rast_cmd_arg arg) |
||
492 | { |
||
493 | struct llvmpipe_query *pq = arg.query_obj; |
||
494 | |||
495 | switch (pq->type) { |
||
496 | case PIPE_QUERY_OCCLUSION_COUNTER: |
||
497 | case PIPE_QUERY_OCCLUSION_PREDICATE: |
||
498 | pq->start[task->thread_index] = task->thread_data.vis_counter; |
||
499 | break; |
||
500 | case PIPE_QUERY_PIPELINE_STATISTICS: |
||
501 | pq->start[task->thread_index] = task->ps_invocations; |
||
502 | break; |
||
503 | default: |
||
504 | assert(0); |
||
505 | break; |
||
506 | } |
||
507 | } |
||
508 | |||
509 | |||
510 | /** |
||
511 | * End the current occlusion query. |
||
512 | * This is a bin command put in all bins. |
||
513 | * Called per thread. |
||
514 | */ |
||
515 | static void |
||
516 | lp_rast_end_query(struct lp_rasterizer_task *task, |
||
517 | const union lp_rast_cmd_arg arg) |
||
518 | { |
||
519 | struct llvmpipe_query *pq = arg.query_obj; |
||
520 | |||
521 | switch (pq->type) { |
||
522 | case PIPE_QUERY_OCCLUSION_COUNTER: |
||
523 | case PIPE_QUERY_OCCLUSION_PREDICATE: |
||
524 | pq->end[task->thread_index] += |
||
525 | task->thread_data.vis_counter - pq->start[task->thread_index]; |
||
526 | pq->start[task->thread_index] = 0; |
||
527 | break; |
||
528 | case PIPE_QUERY_TIMESTAMP: |
||
529 | pq->end[task->thread_index] = os_time_get_nano(); |
||
530 | break; |
||
531 | case PIPE_QUERY_PIPELINE_STATISTICS: |
||
532 | pq->end[task->thread_index] += |
||
533 | task->ps_invocations - pq->start[task->thread_index]; |
||
534 | pq->start[task->thread_index] = 0; |
||
535 | break; |
||
536 | default: |
||
537 | assert(0); |
||
538 | break; |
||
539 | } |
||
540 | } |
||
541 | |||
542 | |||
543 | void |
||
544 | lp_rast_set_state(struct lp_rasterizer_task *task, |
||
545 | const union lp_rast_cmd_arg arg) |
||
546 | { |
||
547 | task->state = arg.state; |
||
548 | } |
||
549 | |||
550 | |||
551 | |||
552 | /** |
||
553 | * Called when we're done writing to a color tile. |
||
554 | */ |
||
555 | static void |
||
556 | lp_rast_tile_end(struct lp_rasterizer_task *task) |
||
557 | { |
||
558 | unsigned i; |
||
559 | |||
560 | for (i = 0; i < task->scene->num_active_queries; ++i) { |
||
561 | lp_rast_end_query(task, lp_rast_arg_query(task->scene->active_queries[i])); |
||
562 | } |
||
563 | |||
564 | /* debug */ |
||
565 | memset(task->color_tiles, 0, sizeof(task->color_tiles)); |
||
566 | task->depth_tile = NULL; |
||
567 | |||
568 | task->bin = NULL; |
||
569 | } |
||
570 | |||
571 | static lp_rast_cmd_func dispatch[LP_RAST_OP_MAX] = |
||
572 | { |
||
573 | lp_rast_clear_color, |
||
574 | lp_rast_clear_zstencil, |
||
575 | lp_rast_triangle_1, |
||
576 | lp_rast_triangle_2, |
||
577 | lp_rast_triangle_3, |
||
578 | lp_rast_triangle_4, |
||
579 | lp_rast_triangle_5, |
||
580 | lp_rast_triangle_6, |
||
581 | lp_rast_triangle_7, |
||
582 | lp_rast_triangle_8, |
||
583 | lp_rast_triangle_3_4, |
||
584 | lp_rast_triangle_3_16, |
||
585 | lp_rast_triangle_4_16, |
||
586 | lp_rast_shade_tile, |
||
587 | lp_rast_shade_tile_opaque, |
||
588 | lp_rast_begin_query, |
||
589 | lp_rast_end_query, |
||
590 | lp_rast_set_state, |
||
591 | }; |
||
592 | |||
593 | |||
594 | static void |
||
595 | do_rasterize_bin(struct lp_rasterizer_task *task, |
||
596 | const struct cmd_bin *bin, |
||
597 | int x, int y) |
||
598 | { |
||
599 | const struct cmd_block *block; |
||
600 | unsigned k; |
||
601 | |||
602 | if (0) |
||
603 | lp_debug_bin(bin, x, y); |
||
604 | |||
605 | for (block = bin->head; block; block = block->next) { |
||
606 | for (k = 0; k < block->count; k++) { |
||
607 | dispatch[block->cmd[k]]( task, block->arg[k] ); |
||
608 | } |
||
609 | } |
||
610 | } |
||
611 | |||
612 | |||
613 | |||
614 | /** |
||
615 | * Rasterize commands for a single bin. |
||
616 | * \param x, y position of the bin's tile in the framebuffer |
||
617 | * Must be called between lp_rast_begin() and lp_rast_end(). |
||
618 | * Called per thread. |
||
619 | */ |
||
620 | static void |
||
621 | rasterize_bin(struct lp_rasterizer_task *task, |
||
622 | const struct cmd_bin *bin, int x, int y ) |
||
623 | { |
||
624 | lp_rast_tile_begin( task, bin, x, y ); |
||
625 | |||
626 | do_rasterize_bin(task, bin, x, y); |
||
627 | |||
628 | lp_rast_tile_end(task); |
||
629 | |||
630 | |||
631 | /* Debug/Perf flags: |
||
632 | */ |
||
633 | if (bin->head->count == 1) { |
||
634 | if (bin->head->cmd[0] == LP_RAST_OP_SHADE_TILE_OPAQUE) |
||
635 | LP_COUNT(nr_pure_shade_opaque_64); |
||
636 | else if (bin->head->cmd[0] == LP_RAST_OP_SHADE_TILE) |
||
637 | LP_COUNT(nr_pure_shade_64); |
||
638 | } |
||
639 | } |
||
640 | |||
641 | |||
642 | /* An empty bin is one that just loads the contents of the tile and |
||
643 | * stores them again unchanged. This typically happens when bins have |
||
644 | * been flushed for some reason in the middle of a frame, or when |
||
645 | * incremental updates are being made to a render target. |
||
646 | * |
||
647 | * Try to avoid doing pointless work in this case. |
||
648 | */ |
||
649 | static boolean |
||
650 | is_empty_bin( const struct cmd_bin *bin ) |
||
651 | { |
||
652 | return bin->head == NULL; |
||
653 | } |
||
654 | |||
655 | |||
656 | /** |
||
657 | * Rasterize/execute all bins within a scene. |
||
658 | * Called per thread. |
||
659 | */ |
||
660 | static void |
||
661 | rasterize_scene(struct lp_rasterizer_task *task, |
||
662 | struct lp_scene *scene) |
||
663 | { |
||
664 | task->scene = scene; |
||
665 | |||
666 | if (!task->rast->no_rast && !scene->discard) { |
||
667 | /* loop over scene bins, rasterize each */ |
||
668 | { |
||
669 | struct cmd_bin *bin; |
||
670 | int i, j; |
||
671 | |||
672 | assert(scene); |
||
673 | while ((bin = lp_scene_bin_iter_next(scene, &i, &j))) { |
||
674 | if (!is_empty_bin( bin )) |
||
675 | rasterize_bin(task, bin, i, j); |
||
676 | } |
||
677 | } |
||
678 | } |
||
679 | |||
680 | |||
681 | if (scene->fence) { |
||
682 | lp_fence_signal(scene->fence); |
||
683 | } |
||
684 | |||
685 | task->scene = NULL; |
||
686 | } |
||
687 | |||
688 | |||
689 | /** |
||
690 | * Called by setup module when it has something for us to render. |
||
691 | */ |
||
692 | void |
||
693 | lp_rast_queue_scene( struct lp_rasterizer *rast, |
||
694 | struct lp_scene *scene) |
||
695 | { |
||
696 | LP_DBG(DEBUG_SETUP, "%s\n", __FUNCTION__); |
||
697 | |||
698 | if (rast->num_threads == 0) { |
||
699 | /* no threading */ |
||
700 | |||
701 | lp_rast_begin( rast, scene ); |
||
702 | |||
703 | rasterize_scene( &rast->tasks[0], scene ); |
||
704 | |||
705 | lp_rast_end( rast ); |
||
706 | |||
707 | rast->curr_scene = NULL; |
||
708 | } |
||
709 | else { |
||
710 | /* threaded rendering! */ |
||
711 | unsigned i; |
||
712 | |||
713 | lp_scene_enqueue( rast->full_scenes, scene ); |
||
714 | |||
715 | /* signal the threads that there's work to do */ |
||
716 | for (i = 0; i < rast->num_threads; i++) { |
||
717 | pipe_semaphore_signal(&rast->tasks[i].work_ready); |
||
718 | } |
||
719 | } |
||
720 | |||
721 | LP_DBG(DEBUG_SETUP, "%s done \n", __FUNCTION__); |
||
722 | } |
||
723 | |||
724 | |||
725 | void |
||
726 | lp_rast_finish( struct lp_rasterizer *rast ) |
||
727 | { |
||
728 | if (rast->num_threads == 0) { |
||
729 | /* nothing to do */ |
||
730 | } |
||
731 | else { |
||
732 | int i; |
||
733 | |||
734 | /* wait for work to complete */ |
||
735 | for (i = 0; i < rast->num_threads; i++) { |
||
736 | pipe_semaphore_wait(&rast->tasks[i].work_done); |
||
737 | } |
||
738 | } |
||
739 | } |
||
740 | |||
741 | |||
742 | /** |
||
743 | * This is the thread's main entrypoint. |
||
744 | * It's a simple loop: |
||
745 | * 1. wait for work |
||
746 | * 2. do work |
||
747 | * 3. signal that we're done |
||
748 | */ |
||
749 | static PIPE_THREAD_ROUTINE( thread_function, init_data ) |
||
750 | { |
||
751 | struct lp_rasterizer_task *task = (struct lp_rasterizer_task *) init_data; |
||
752 | struct lp_rasterizer *rast = task->rast; |
||
753 | boolean debug = false; |
||
754 | unsigned fpstate = util_fpstate_get(); |
||
755 | |||
756 | /* Make sure that denorms are treated like zeros. This is |
||
757 | * the behavior required by D3D10. OpenGL doesn't care. |
||
758 | */ |
||
759 | util_fpstate_set_denorms_to_zero(fpstate); |
||
760 | |||
761 | while (1) { |
||
762 | /* wait for work */ |
||
763 | if (debug) |
||
764 | debug_printf("thread %d waiting for work\n", task->thread_index); |
||
765 | pipe_semaphore_wait(&task->work_ready); |
||
766 | |||
767 | if (rast->exit_flag) |
||
768 | break; |
||
769 | |||
770 | if (task->thread_index == 0) { |
||
771 | /* thread[0]: |
||
772 | * - get next scene to rasterize |
||
773 | * - map the framebuffer surfaces |
||
774 | */ |
||
775 | lp_rast_begin( rast, |
||
776 | lp_scene_dequeue( rast->full_scenes, TRUE ) ); |
||
777 | } |
||
778 | |||
779 | /* Wait for all threads to get here so that threads[1+] don't |
||
780 | * get a null rast->curr_scene pointer. |
||
781 | */ |
||
782 | pipe_barrier_wait( &rast->barrier ); |
||
783 | |||
784 | /* do work */ |
||
785 | if (debug) |
||
786 | debug_printf("thread %d doing work\n", task->thread_index); |
||
787 | |||
788 | rasterize_scene(task, |
||
789 | rast->curr_scene); |
||
790 | |||
791 | /* wait for all threads to finish with this scene */ |
||
792 | pipe_barrier_wait( &rast->barrier ); |
||
793 | |||
794 | /* XXX: shouldn't be necessary: |
||
795 | */ |
||
796 | if (task->thread_index == 0) { |
||
797 | lp_rast_end( rast ); |
||
798 | } |
||
799 | |||
800 | /* signal done with work */ |
||
801 | if (debug) |
||
802 | debug_printf("thread %d done working\n", task->thread_index); |
||
803 | |||
804 | pipe_semaphore_signal(&task->work_done); |
||
805 | } |
||
806 | |||
807 | return NULL; |
||
808 | } |
||
809 | |||
810 | |||
811 | /** |
||
812 | * Initialize semaphores and spawn the threads. |
||
813 | */ |
||
814 | static void |
||
815 | create_rast_threads(struct lp_rasterizer *rast) |
||
816 | { |
||
817 | unsigned i; |
||
818 | |||
819 | /* NOTE: if num_threads is zero, we won't use any threads */ |
||
820 | for (i = 0; i < rast->num_threads; i++) { |
||
821 | pipe_semaphore_init(&rast->tasks[i].work_ready, 0); |
||
822 | pipe_semaphore_init(&rast->tasks[i].work_done, 0); |
||
823 | rast->threads[i] = pipe_thread_create(thread_function, |
||
824 | (void *) &rast->tasks[i]); |
||
825 | } |
||
826 | } |
||
827 | |||
828 | |||
829 | |||
830 | /** |
||
831 | * Create new lp_rasterizer. If num_threads is zero, don't create any |
||
832 | * new threads, do rendering synchronously. |
||
833 | * \param num_threads number of rasterizer threads to create |
||
834 | */ |
||
835 | struct lp_rasterizer * |
||
836 | lp_rast_create( unsigned num_threads ) |
||
837 | { |
||
838 | struct lp_rasterizer *rast; |
||
839 | unsigned i; |
||
840 | |||
841 | rast = CALLOC_STRUCT(lp_rasterizer); |
||
842 | if (!rast) { |
||
843 | goto no_rast; |
||
844 | } |
||
845 | |||
846 | rast->full_scenes = lp_scene_queue_create(); |
||
847 | if (!rast->full_scenes) { |
||
848 | goto no_full_scenes; |
||
849 | } |
||
850 | |||
851 | for (i = 0; i < Elements(rast->tasks); i++) { |
||
852 | struct lp_rasterizer_task *task = &rast->tasks[i]; |
||
853 | task->rast = rast; |
||
854 | task->thread_index = i; |
||
855 | } |
||
856 | |||
857 | rast->num_threads = num_threads; |
||
858 | |||
859 | rast->no_rast = debug_get_bool_option("LP_NO_RAST", FALSE); |
||
860 | |||
861 | create_rast_threads(rast); |
||
862 | |||
863 | /* for synchronizing rasterization threads */ |
||
864 | pipe_barrier_init( &rast->barrier, rast->num_threads ); |
||
865 | |||
866 | memset(lp_dummy_tile, 0, sizeof lp_dummy_tile); |
||
867 | |||
868 | return rast; |
||
869 | |||
870 | no_full_scenes: |
||
871 | FREE(rast); |
||
872 | no_rast: |
||
873 | return NULL; |
||
874 | } |
||
875 | |||
876 | |||
877 | /* Shutdown: |
||
878 | */ |
||
879 | void lp_rast_destroy( struct lp_rasterizer *rast ) |
||
880 | { |
||
881 | unsigned i; |
||
882 | |||
883 | /* Set exit_flag and signal each thread's work_ready semaphore. |
||
884 | * Each thread will be woken up, notice that the exit_flag is set and |
||
885 | * break out of its main loop. The thread will then exit. |
||
886 | */ |
||
887 | rast->exit_flag = TRUE; |
||
888 | for (i = 0; i < rast->num_threads; i++) { |
||
889 | pipe_semaphore_signal(&rast->tasks[i].work_ready); |
||
890 | } |
||
891 | |||
892 | /* Wait for threads to terminate before cleaning up per-thread data */ |
||
893 | for (i = 0; i < rast->num_threads; i++) { |
||
894 | pipe_thread_wait(rast->threads[i]); |
||
895 | } |
||
896 | |||
897 | /* Clean up per-thread data */ |
||
898 | for (i = 0; i < rast->num_threads; i++) { |
||
899 | pipe_semaphore_destroy(&rast->tasks[i].work_ready); |
||
900 | pipe_semaphore_destroy(&rast->tasks[i].work_done); |
||
901 | } |
||
902 | |||
903 | /* for synchronizing rasterization threads */ |
||
904 | pipe_barrier_destroy( &rast->barrier ); |
||
905 | |||
906 | lp_scene_queue_destroy(rast->full_scenes); |
||
907 | |||
908 | FREE(rast); |
||
909 | } |
||
910 | |||
911 | |||
912 | /** Return number of rasterization threads */ |
||
913 | unsigned |
||
914 | lp_rast_get_num_threads( struct lp_rasterizer *rast ) |
||
915 | { |
||
916 | return rast->num_threads; |
||
917 | }>>>>>>>>>>>>>>>>>>>>>>>>>>>>>=>>>> |
||
918 |