Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4358 | Serge | 1 | /* |
2 | * Copyright 2010 Jerome Glisse |
||
3 | * |
||
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
5 | * copy of this software and associated documentation files (the "Software"), |
||
6 | * to deal in the Software without restriction, including without limitation |
||
7 | * on the rights to use, copy, modify, merge, publish, distribute, sub |
||
8 | * license, and/or sell copies of the Software, and to permit persons to whom |
||
9 | * the Software is furnished to do so, subject to the following conditions: |
||
10 | * |
||
11 | * The above copyright notice and this permission notice (including the next |
||
12 | * paragraph) shall be included in all copies or substantial portions of the |
||
13 | * Software. |
||
14 | * |
||
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
||
18 | * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
||
19 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
||
20 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
||
21 | * USE OR OTHER DEALINGS IN THE SOFTWARE. |
||
22 | * |
||
23 | * Authors: |
||
24 | * Jerome Glisse |
||
25 | */ |
||
26 | #include "r600_hw_context_priv.h" |
||
27 | #include "radeonsi_pm4.h" |
||
28 | #include "radeonsi_pipe.h" |
||
29 | #include "sid.h" |
||
30 | #include "util/u_memory.h" |
||
31 | #include |
||
32 | |||
33 | #define GROUP_FORCE_NEW_BLOCK 0 |
||
34 | |||
35 | /* Get backends mask */ |
||
36 | void si_get_backend_mask(struct r600_context *ctx) |
||
37 | { |
||
38 | struct radeon_winsys_cs *cs = ctx->cs; |
||
39 | struct si_resource *buffer; |
||
40 | uint32_t *results; |
||
41 | unsigned num_backends = ctx->screen->info.r600_num_backends; |
||
42 | unsigned i, mask = 0; |
||
43 | |||
44 | /* if backend_map query is supported by the kernel */ |
||
45 | if (ctx->screen->info.r600_backend_map_valid) { |
||
46 | unsigned num_tile_pipes = ctx->screen->info.r600_num_tile_pipes; |
||
47 | unsigned backend_map = ctx->screen->info.r600_backend_map; |
||
48 | unsigned item_width = 4, item_mask = 0x7; |
||
49 | |||
50 | while(num_tile_pipes--) { |
||
51 | i = backend_map & item_mask; |
||
52 | mask |= (1< |
||
53 | backend_map >>= item_width; |
||
54 | } |
||
55 | if (mask != 0) { |
||
56 | ctx->backend_mask = mask; |
||
57 | return; |
||
58 | } |
||
59 | } |
||
60 | |||
61 | /* otherwise backup path for older kernels */ |
||
62 | |||
63 | /* create buffer for event data */ |
||
64 | buffer = si_resource_create_custom(&ctx->screen->screen, |
||
65 | PIPE_USAGE_STAGING, |
||
66 | ctx->max_db*16); |
||
67 | if (!buffer) |
||
68 | goto err; |
||
69 | |||
70 | /* initialize buffer with zeroes */ |
||
71 | results = ctx->ws->buffer_map(buffer->cs_buf, ctx->cs, PIPE_TRANSFER_WRITE); |
||
72 | if (results) { |
||
73 | uint64_t va = 0; |
||
74 | |||
75 | memset(results, 0, ctx->max_db * 4 * 4); |
||
76 | ctx->ws->buffer_unmap(buffer->cs_buf); |
||
77 | |||
78 | /* emit EVENT_WRITE for ZPASS_DONE */ |
||
79 | va = r600_resource_va(&ctx->screen->screen, (void *)buffer); |
||
80 | cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0); |
||
81 | cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1); |
||
82 | cs->buf[cs->cdw++] = va; |
||
83 | cs->buf[cs->cdw++] = va >> 32; |
||
84 | |||
85 | cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); |
||
86 | cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, buffer, RADEON_USAGE_WRITE); |
||
87 | |||
88 | /* analyze results */ |
||
89 | results = ctx->ws->buffer_map(buffer->cs_buf, ctx->cs, PIPE_TRANSFER_READ); |
||
90 | if (results) { |
||
91 | for(i = 0; i < ctx->max_db; i++) { |
||
92 | /* at least highest bit will be set if backend is used */ |
||
93 | if (results[i*4 + 1]) |
||
94 | mask |= (1< |
||
95 | } |
||
96 | ctx->ws->buffer_unmap(buffer->cs_buf); |
||
97 | } |
||
98 | } |
||
99 | |||
100 | si_resource_reference(&buffer, NULL); |
||
101 | |||
102 | if (mask != 0) { |
||
103 | ctx->backend_mask = mask; |
||
104 | return; |
||
105 | } |
||
106 | |||
107 | err: |
||
108 | /* fallback to old method - set num_backends lower bits to 1 */ |
||
109 | ctx->backend_mask = (~((uint32_t)0))>>(32-num_backends); |
||
110 | return; |
||
111 | } |
||
112 | |||
113 | /* initialize */ |
||
114 | void si_need_cs_space(struct r600_context *ctx, unsigned num_dw, |
||
115 | boolean count_draw_in) |
||
116 | { |
||
117 | /* The number of dwords we already used in the CS so far. */ |
||
118 | num_dw += ctx->cs->cdw; |
||
119 | |||
120 | if (count_draw_in) { |
||
121 | /* The number of dwords all the dirty states would take. */ |
||
122 | num_dw += ctx->pm4_dirty_cdwords; |
||
123 | |||
124 | /* The upper-bound of how much a draw command would take. */ |
||
125 | num_dw += SI_MAX_DRAW_CS_DWORDS; |
||
126 | } |
||
127 | |||
128 | /* Count in queries_suspend. */ |
||
129 | num_dw += ctx->num_cs_dw_queries_suspend; |
||
130 | |||
131 | /* Count in streamout_end at the end of CS. */ |
||
132 | num_dw += ctx->num_cs_dw_streamout_end; |
||
133 | |||
134 | /* Count in render_condition(NULL) at the end of CS. */ |
||
135 | if (ctx->predicate_drawing) { |
||
136 | num_dw += 3; |
||
137 | } |
||
138 | |||
139 | /* Count in framebuffer cache flushes at the end of CS. */ |
||
140 | num_dw += 7; /* one SURFACE_SYNC and CACHE_FLUSH_AND_INV (r6xx-only) */ |
||
141 | |||
142 | /* Save 16 dwords for the fence mechanism. */ |
||
143 | num_dw += 16; |
||
144 | |||
145 | #if R600_TRACE_CS |
||
146 | if (ctx->screen->trace_bo) { |
||
147 | num_dw += R600_TRACE_CS_DWORDS; |
||
148 | } |
||
149 | #endif |
||
150 | |||
151 | /* Flush if there's not enough space. */ |
||
152 | if (num_dw > RADEON_MAX_CMDBUF_DWORDS) { |
||
153 | radeonsi_flush(&ctx->context, NULL, RADEON_FLUSH_ASYNC); |
||
154 | } |
||
155 | } |
||
156 | |||
157 | static void r600_flush_framebuffer(struct r600_context *ctx) |
||
158 | { |
||
159 | struct si_pm4_state *pm4; |
||
160 | |||
161 | if (!(ctx->flags & R600_CONTEXT_DST_CACHES_DIRTY)) |
||
162 | return; |
||
163 | |||
164 | pm4 = si_pm4_alloc_state(ctx); |
||
165 | |||
166 | if (pm4 == NULL) |
||
167 | return; |
||
168 | |||
169 | si_cmd_surface_sync(pm4, S_0085F0_CB0_DEST_BASE_ENA(1) | |
||
170 | S_0085F0_CB1_DEST_BASE_ENA(1) | |
||
171 | S_0085F0_CB2_DEST_BASE_ENA(1) | |
||
172 | S_0085F0_CB3_DEST_BASE_ENA(1) | |
||
173 | S_0085F0_CB4_DEST_BASE_ENA(1) | |
||
174 | S_0085F0_CB5_DEST_BASE_ENA(1) | |
||
175 | S_0085F0_CB6_DEST_BASE_ENA(1) | |
||
176 | S_0085F0_CB7_DEST_BASE_ENA(1) | |
||
177 | S_0085F0_DB_ACTION_ENA(1) | |
||
178 | S_0085F0_DB_DEST_BASE_ENA(1)); |
||
179 | si_pm4_emit(ctx, pm4); |
||
180 | si_pm4_free_state(ctx, pm4, ~0); |
||
181 | |||
182 | ctx->flags &= ~R600_CONTEXT_DST_CACHES_DIRTY; |
||
183 | } |
||
184 | |||
185 | void si_context_flush(struct r600_context *ctx, unsigned flags) |
||
186 | { |
||
187 | struct radeon_winsys_cs *cs = ctx->cs; |
||
188 | bool queries_suspended = false; |
||
189 | |||
190 | #if 0 |
||
191 | bool streamout_suspended = false; |
||
192 | #endif |
||
193 | |||
194 | if (!cs->cdw) |
||
195 | return; |
||
196 | |||
197 | /* suspend queries */ |
||
198 | if (ctx->num_cs_dw_queries_suspend) { |
||
199 | r600_context_queries_suspend(ctx); |
||
200 | queries_suspended = true; |
||
201 | } |
||
202 | |||
203 | #if 0 |
||
204 | if (ctx->num_cs_dw_streamout_end) { |
||
205 | r600_context_streamout_end(ctx); |
||
206 | streamout_suspended = true; |
||
207 | } |
||
208 | #endif |
||
209 | |||
210 | r600_flush_framebuffer(ctx); |
||
211 | |||
212 | /* partial flush is needed to avoid lockups on some chips with user fences */ |
||
213 | cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); |
||
214 | cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4); |
||
215 | |||
216 | /* force to keep tiling flags */ |
||
217 | flags |= RADEON_FLUSH_KEEP_TILING_FLAGS; |
||
218 | |||
219 | #if R600_TRACE_CS |
||
220 | if (ctx->screen->trace_bo) { |
||
221 | struct r600_screen *rscreen = ctx->screen; |
||
222 | unsigned i; |
||
223 | |||
224 | for (i = 0; i < cs->cdw; i++) { |
||
225 | fprintf(stderr, "[%4d] [%5d] 0x%08x\n", rscreen->cs_count, i, cs->buf[i]); |
||
226 | } |
||
227 | rscreen->cs_count++; |
||
228 | } |
||
229 | #endif |
||
230 | |||
231 | /* Flush the CS. */ |
||
232 | ctx->ws->cs_flush(ctx->cs, flags, 0); |
||
233 | |||
234 | #if R600_TRACE_CS |
||
235 | if (ctx->screen->trace_bo) { |
||
236 | struct r600_screen *rscreen = ctx->screen; |
||
237 | unsigned i; |
||
238 | |||
239 | for (i = 0; i < 10; i++) { |
||
240 | usleep(5); |
||
241 | if (!ctx->ws->buffer_is_busy(rscreen->trace_bo->buf, RADEON_USAGE_READWRITE)) { |
||
242 | break; |
||
243 | } |
||
244 | } |
||
245 | if (i == 10) { |
||
246 | fprintf(stderr, "timeout on cs lockup likely happen at cs %d dw %d\n", |
||
247 | rscreen->trace_ptr[1], rscreen->trace_ptr[0]); |
||
248 | } else { |
||
249 | fprintf(stderr, "cs %d executed in %dms\n", rscreen->trace_ptr[1], i * 5); |
||
250 | } |
||
251 | } |
||
252 | #endif |
||
253 | |||
254 | ctx->pm4_dirty_cdwords = 0; |
||
255 | ctx->flags = 0; |
||
256 | |||
257 | #if 0 |
||
258 | if (streamout_suspended) { |
||
259 | ctx->streamout_start = TRUE; |
||
260 | ctx->streamout_append_bitmask = ~0; |
||
261 | } |
||
262 | #endif |
||
263 | |||
264 | /* resume queries */ |
||
265 | if (queries_suspended) { |
||
266 | r600_context_queries_resume(ctx); |
||
267 | } |
||
268 | |||
269 | /* set all valid group as dirty so they get reemited on |
||
270 | * next draw command |
||
271 | */ |
||
272 | si_pm4_reset_emitted(ctx); |
||
273 | } |
||
274 | |||
275 | void si_context_emit_fence(struct r600_context *ctx, struct si_resource *fence_bo, unsigned offset, unsigned value) |
||
276 | { |
||
277 | struct radeon_winsys_cs *cs = ctx->cs; |
||
278 | uint64_t va; |
||
279 | |||
280 | si_need_cs_space(ctx, 10, FALSE); |
||
281 | |||
282 | va = r600_resource_va(&ctx->screen->screen, (void*)fence_bo); |
||
283 | va = va + (offset << 2); |
||
284 | |||
285 | cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); |
||
286 | cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4); |
||
287 | cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0); |
||
288 | cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); |
||
289 | cs->buf[cs->cdw++] = va & 0xFFFFFFFFUL; /* ADDRESS_LO */ |
||
290 | /* DATA_SEL | INT_EN | ADDRESS_HI */ |
||
291 | cs->buf[cs->cdw++] = (1 << 29) | (0 << 24) | ((va >> 32UL) & 0xFF); |
||
292 | cs->buf[cs->cdw++] = value; /* DATA_LO */ |
||
293 | cs->buf[cs->cdw++] = 0; /* DATA_HI */ |
||
294 | cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); |
||
295 | cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, fence_bo, RADEON_USAGE_WRITE); |
||
296 | } |
||
297 | |||
298 | static unsigned r600_query_read_result(char *map, unsigned start_index, unsigned end_index, |
||
299 | bool test_status_bit) |
||
300 | { |
||
301 | uint32_t *current_result = (uint32_t*)map; |
||
302 | uint64_t start, end; |
||
303 | |||
304 | start = (uint64_t)current_result[start_index] | |
||
305 | (uint64_t)current_result[start_index+1] << 32; |
||
306 | end = (uint64_t)current_result[end_index] | |
||
307 | (uint64_t)current_result[end_index+1] << 32; |
||
308 | |||
309 | if (!test_status_bit || |
||
310 | ((start & 0x8000000000000000UL) && (end & 0x8000000000000000UL))) { |
||
311 | return end - start; |
||
312 | } |
||
313 | return 0; |
||
314 | } |
||
315 | |||
316 | static boolean r600_query_result(struct r600_context *ctx, struct r600_query *query, boolean wait) |
||
317 | { |
||
318 | unsigned results_base = query->results_start; |
||
319 | char *map; |
||
320 | |||
321 | map = ctx->ws->buffer_map(query->buffer->cs_buf, ctx->cs, |
||
322 | PIPE_TRANSFER_READ | |
||
323 | (wait ? 0 : PIPE_TRANSFER_DONTBLOCK)); |
||
324 | if (!map) |
||
325 | return FALSE; |
||
326 | |||
327 | /* count all results across all data blocks */ |
||
328 | switch (query->type) { |
||
329 | case PIPE_QUERY_OCCLUSION_COUNTER: |
||
330 | while (results_base != query->results_end) { |
||
331 | query->result.u64 += |
||
332 | r600_query_read_result(map + results_base, 0, 2, true); |
||
333 | results_base = (results_base + 16) % query->buffer->b.b.width0; |
||
334 | } |
||
335 | break; |
||
336 | case PIPE_QUERY_OCCLUSION_PREDICATE: |
||
337 | while (results_base != query->results_end) { |
||
338 | query->result.b = query->result.b || |
||
339 | r600_query_read_result(map + results_base, 0, 2, true) != 0; |
||
340 | results_base = (results_base + 16) % query->buffer->b.b.width0; |
||
341 | } |
||
342 | break; |
||
343 | case PIPE_QUERY_TIME_ELAPSED: |
||
344 | while (results_base != query->results_end) { |
||
345 | query->result.u64 += |
||
346 | r600_query_read_result(map + results_base, 0, 2, false); |
||
347 | results_base = (results_base + query->result_size) % query->buffer->b.b.width0; |
||
348 | } |
||
349 | break; |
||
350 | case PIPE_QUERY_PRIMITIVES_EMITTED: |
||
351 | /* SAMPLE_STREAMOUTSTATS stores this structure: |
||
352 | * { |
||
353 | * u64 NumPrimitivesWritten; |
||
354 | * u64 PrimitiveStorageNeeded; |
||
355 | * } |
||
356 | * We only need NumPrimitivesWritten here. */ |
||
357 | while (results_base != query->results_end) { |
||
358 | query->result.u64 += |
||
359 | r600_query_read_result(map + results_base, 2, 6, true); |
||
360 | results_base = (results_base + query->result_size) % query->buffer->b.b.width0; |
||
361 | } |
||
362 | break; |
||
363 | case PIPE_QUERY_PRIMITIVES_GENERATED: |
||
364 | /* Here we read PrimitiveStorageNeeded. */ |
||
365 | while (results_base != query->results_end) { |
||
366 | query->result.u64 += |
||
367 | r600_query_read_result(map + results_base, 0, 4, true); |
||
368 | results_base = (results_base + query->result_size) % query->buffer->b.b.width0; |
||
369 | } |
||
370 | break; |
||
371 | case PIPE_QUERY_SO_STATISTICS: |
||
372 | while (results_base != query->results_end) { |
||
373 | query->result.so.num_primitives_written += |
||
374 | r600_query_read_result(map + results_base, 2, 6, true); |
||
375 | query->result.so.primitives_storage_needed += |
||
376 | r600_query_read_result(map + results_base, 0, 4, true); |
||
377 | results_base = (results_base + query->result_size) % query->buffer->b.b.width0; |
||
378 | } |
||
379 | break; |
||
380 | case PIPE_QUERY_SO_OVERFLOW_PREDICATE: |
||
381 | while (results_base != query->results_end) { |
||
382 | query->result.b = query->result.b || |
||
383 | r600_query_read_result(map + results_base, 2, 6, true) != |
||
384 | r600_query_read_result(map + results_base, 0, 4, true); |
||
385 | results_base = (results_base + query->result_size) % query->buffer->b.b.width0; |
||
386 | } |
||
387 | break; |
||
388 | default: |
||
389 | assert(0); |
||
390 | } |
||
391 | |||
392 | query->results_start = query->results_end; |
||
393 | ctx->ws->buffer_unmap(query->buffer->cs_buf); |
||
394 | return TRUE; |
||
395 | } |
||
396 | |||
397 | void r600_query_begin(struct r600_context *ctx, struct r600_query *query) |
||
398 | { |
||
399 | struct radeon_winsys_cs *cs = ctx->cs; |
||
400 | unsigned new_results_end, i; |
||
401 | uint32_t *results; |
||
402 | uint64_t va; |
||
403 | |||
404 | si_need_cs_space(ctx, query->num_cs_dw * 2, TRUE); |
||
405 | |||
406 | new_results_end = (query->results_end + query->result_size) % query->buffer->b.b.width0; |
||
407 | |||
408 | /* collect current results if query buffer is full */ |
||
409 | if (new_results_end == query->results_start) { |
||
410 | r600_query_result(ctx, query, TRUE); |
||
411 | } |
||
412 | |||
413 | switch (query->type) { |
||
414 | case PIPE_QUERY_OCCLUSION_COUNTER: |
||
415 | case PIPE_QUERY_OCCLUSION_PREDICATE: |
||
416 | results = ctx->ws->buffer_map(query->buffer->cs_buf, ctx->cs, PIPE_TRANSFER_WRITE); |
||
417 | if (results) { |
||
418 | results = (uint32_t*)((char*)results + query->results_end); |
||
419 | memset(results, 0, query->result_size); |
||
420 | |||
421 | /* Set top bits for unused backends */ |
||
422 | for (i = 0; i < ctx->max_db; i++) { |
||
423 | if (!(ctx->backend_mask & (1< |
||
424 | results[(i * 4)+1] = 0x80000000; |
||
425 | results[(i * 4)+3] = 0x80000000; |
||
426 | } |
||
427 | } |
||
428 | ctx->ws->buffer_unmap(query->buffer->cs_buf); |
||
429 | } |
||
430 | break; |
||
431 | case PIPE_QUERY_TIME_ELAPSED: |
||
432 | break; |
||
433 | case PIPE_QUERY_PRIMITIVES_EMITTED: |
||
434 | case PIPE_QUERY_PRIMITIVES_GENERATED: |
||
435 | case PIPE_QUERY_SO_STATISTICS: |
||
436 | case PIPE_QUERY_SO_OVERFLOW_PREDICATE: |
||
437 | results = ctx->ws->buffer_map(query->buffer->cs_buf, ctx->cs, PIPE_TRANSFER_WRITE); |
||
438 | results = (uint32_t*)((char*)results + query->results_end); |
||
439 | memset(results, 0, query->result_size); |
||
440 | ctx->ws->buffer_unmap(query->buffer->cs_buf); |
||
441 | break; |
||
442 | default: |
||
443 | assert(0); |
||
444 | } |
||
445 | |||
446 | /* emit begin query */ |
||
447 | va = r600_resource_va(&ctx->screen->screen, (void*)query->buffer); |
||
448 | va += query->results_end; |
||
449 | |||
450 | switch (query->type) { |
||
451 | case PIPE_QUERY_OCCLUSION_COUNTER: |
||
452 | case PIPE_QUERY_OCCLUSION_PREDICATE: |
||
453 | cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0); |
||
454 | cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1); |
||
455 | cs->buf[cs->cdw++] = va; |
||
456 | cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF; |
||
457 | break; |
||
458 | case PIPE_QUERY_PRIMITIVES_EMITTED: |
||
459 | case PIPE_QUERY_PRIMITIVES_GENERATED: |
||
460 | case PIPE_QUERY_SO_STATISTICS: |
||
461 | case PIPE_QUERY_SO_OVERFLOW_PREDICATE: |
||
462 | cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0); |
||
463 | cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3); |
||
464 | cs->buf[cs->cdw++] = query->results_end; |
||
465 | cs->buf[cs->cdw++] = 0; |
||
466 | break; |
||
467 | case PIPE_QUERY_TIME_ELAPSED: |
||
468 | cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0); |
||
469 | cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); |
||
470 | cs->buf[cs->cdw++] = va; |
||
471 | cs->buf[cs->cdw++] = (3 << 29) | ((va >> 32UL) & 0xFF); |
||
472 | cs->buf[cs->cdw++] = 0; |
||
473 | cs->buf[cs->cdw++] = 0; |
||
474 | break; |
||
475 | default: |
||
476 | assert(0); |
||
477 | } |
||
478 | cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); |
||
479 | cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, query->buffer, RADEON_USAGE_WRITE); |
||
480 | |||
481 | ctx->num_cs_dw_queries_suspend += query->num_cs_dw; |
||
482 | } |
||
483 | |||
484 | void r600_query_end(struct r600_context *ctx, struct r600_query *query) |
||
485 | { |
||
486 | struct radeon_winsys_cs *cs = ctx->cs; |
||
487 | uint64_t va; |
||
488 | |||
489 | va = r600_resource_va(&ctx->screen->screen, (void*)query->buffer); |
||
490 | /* emit end query */ |
||
491 | switch (query->type) { |
||
492 | case PIPE_QUERY_OCCLUSION_COUNTER: |
||
493 | case PIPE_QUERY_OCCLUSION_PREDICATE: |
||
494 | va += query->results_end + 8; |
||
495 | cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0); |
||
496 | cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1); |
||
497 | cs->buf[cs->cdw++] = va; |
||
498 | cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF; |
||
499 | break; |
||
500 | case PIPE_QUERY_PRIMITIVES_EMITTED: |
||
501 | case PIPE_QUERY_PRIMITIVES_GENERATED: |
||
502 | case PIPE_QUERY_SO_STATISTICS: |
||
503 | case PIPE_QUERY_SO_OVERFLOW_PREDICATE: |
||
504 | cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0); |
||
505 | cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3); |
||
506 | cs->buf[cs->cdw++] = query->results_end + query->result_size/2; |
||
507 | cs->buf[cs->cdw++] = 0; |
||
508 | break; |
||
509 | case PIPE_QUERY_TIME_ELAPSED: |
||
510 | va += query->results_end + query->result_size/2; |
||
511 | cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0); |
||
512 | cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); |
||
513 | cs->buf[cs->cdw++] = va; |
||
514 | cs->buf[cs->cdw++] = (3 << 29) | ((va >> 32UL) & 0xFF); |
||
515 | cs->buf[cs->cdw++] = 0; |
||
516 | cs->buf[cs->cdw++] = 0; |
||
517 | break; |
||
518 | default: |
||
519 | assert(0); |
||
520 | } |
||
521 | cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); |
||
522 | cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, query->buffer, RADEON_USAGE_WRITE); |
||
523 | |||
524 | query->results_end = (query->results_end + query->result_size) % query->buffer->b.b.width0; |
||
525 | ctx->num_cs_dw_queries_suspend -= query->num_cs_dw; |
||
526 | } |
||
527 | |||
528 | void r600_query_predication(struct r600_context *ctx, struct r600_query *query, int operation, |
||
529 | int flag_wait) |
||
530 | { |
||
531 | struct radeon_winsys_cs *cs = ctx->cs; |
||
532 | uint64_t va; |
||
533 | |||
534 | if (operation == PREDICATION_OP_CLEAR) { |
||
535 | si_need_cs_space(ctx, 3, FALSE); |
||
536 | |||
537 | cs->buf[cs->cdw++] = PKT3(PKT3_SET_PREDICATION, 1, 0); |
||
538 | cs->buf[cs->cdw++] = 0; |
||
539 | cs->buf[cs->cdw++] = PRED_OP(PREDICATION_OP_CLEAR); |
||
540 | } else { |
||
541 | unsigned results_base = query->results_start; |
||
542 | unsigned count; |
||
543 | uint32_t op; |
||
544 | |||
545 | /* find count of the query data blocks */ |
||
546 | count = (query->buffer->b.b.width0 + query->results_end - query->results_start) % query->buffer->b.b.width0; |
||
547 | count /= query->result_size; |
||
548 | |||
549 | si_need_cs_space(ctx, 5 * count, TRUE); |
||
550 | |||
551 | op = PRED_OP(operation) | PREDICATION_DRAW_VISIBLE | |
||
552 | (flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW); |
||
553 | va = r600_resource_va(&ctx->screen->screen, (void*)query->buffer); |
||
554 | |||
555 | /* emit predicate packets for all data blocks */ |
||
556 | while (results_base != query->results_end) { |
||
557 | cs->buf[cs->cdw++] = PKT3(PKT3_SET_PREDICATION, 1, 0); |
||
558 | cs->buf[cs->cdw++] = (va + results_base) & 0xFFFFFFFFUL; |
||
559 | cs->buf[cs->cdw++] = op | (((va + results_base) >> 32UL) & 0xFF); |
||
560 | cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); |
||
561 | cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, query->buffer, |
||
562 | RADEON_USAGE_READ); |
||
563 | results_base = (results_base + query->result_size) % query->buffer->b.b.width0; |
||
564 | |||
565 | /* set CONTINUE bit for all packets except the first */ |
||
566 | op |= PREDICATION_CONTINUE; |
||
567 | } |
||
568 | } |
||
569 | } |
||
570 | |||
571 | struct r600_query *r600_context_query_create(struct r600_context *ctx, unsigned query_type) |
||
572 | { |
||
573 | struct r600_query *query; |
||
574 | unsigned buffer_size = 4096; |
||
575 | |||
576 | query = CALLOC_STRUCT(r600_query); |
||
577 | if (query == NULL) |
||
578 | return NULL; |
||
579 | |||
580 | query->type = query_type; |
||
581 | |||
582 | switch (query_type) { |
||
583 | case PIPE_QUERY_OCCLUSION_COUNTER: |
||
584 | case PIPE_QUERY_OCCLUSION_PREDICATE: |
||
585 | query->result_size = 16 * ctx->max_db; |
||
586 | query->num_cs_dw = 6; |
||
587 | break; |
||
588 | case PIPE_QUERY_TIME_ELAPSED: |
||
589 | query->result_size = 16; |
||
590 | query->num_cs_dw = 8; |
||
591 | break; |
||
592 | case PIPE_QUERY_PRIMITIVES_EMITTED: |
||
593 | case PIPE_QUERY_PRIMITIVES_GENERATED: |
||
594 | case PIPE_QUERY_SO_STATISTICS: |
||
595 | case PIPE_QUERY_SO_OVERFLOW_PREDICATE: |
||
596 | /* NumPrimitivesWritten, PrimitiveStorageNeeded. */ |
||
597 | query->result_size = 32; |
||
598 | query->num_cs_dw = 6; |
||
599 | break; |
||
600 | default: |
||
601 | assert(0); |
||
602 | FREE(query); |
||
603 | return NULL; |
||
604 | } |
||
605 | |||
606 | /* adjust buffer size to simplify offsets wrapping math */ |
||
607 | buffer_size -= buffer_size % query->result_size; |
||
608 | |||
609 | /* Queries are normally read by the CPU after |
||
610 | * being written by the gpu, hence staging is probably a good |
||
611 | * usage pattern. |
||
612 | */ |
||
613 | query->buffer = si_resource_create_custom(&ctx->screen->screen, |
||
614 | PIPE_USAGE_STAGING, |
||
615 | buffer_size); |
||
616 | if (!query->buffer) { |
||
617 | FREE(query); |
||
618 | return NULL; |
||
619 | } |
||
620 | return query; |
||
621 | } |
||
622 | |||
623 | void r600_context_query_destroy(struct r600_context *ctx, struct r600_query *query) |
||
624 | { |
||
625 | si_resource_reference(&query->buffer, NULL); |
||
626 | free(query); |
||
627 | } |
||
628 | |||
629 | boolean r600_context_query_result(struct r600_context *ctx, |
||
630 | struct r600_query *query, |
||
631 | boolean wait, void *vresult) |
||
632 | { |
||
633 | boolean *result_b = (boolean*)vresult; |
||
634 | uint64_t *result_u64 = (uint64_t*)vresult; |
||
635 | struct pipe_query_data_so_statistics *result_so = |
||
636 | (struct pipe_query_data_so_statistics*)vresult; |
||
637 | |||
638 | if (!r600_query_result(ctx, query, wait)) |
||
639 | return FALSE; |
||
640 | |||
641 | switch (query->type) { |
||
642 | case PIPE_QUERY_OCCLUSION_COUNTER: |
||
643 | case PIPE_QUERY_PRIMITIVES_EMITTED: |
||
644 | case PIPE_QUERY_PRIMITIVES_GENERATED: |
||
645 | *result_u64 = query->result.u64; |
||
646 | break; |
||
647 | case PIPE_QUERY_OCCLUSION_PREDICATE: |
||
648 | case PIPE_QUERY_SO_OVERFLOW_PREDICATE: |
||
649 | *result_b = query->result.b; |
||
650 | break; |
||
651 | case PIPE_QUERY_TIME_ELAPSED: |
||
652 | *result_u64 = (1000000 * query->result.u64) / ctx->screen->info.r600_clock_crystal_freq; |
||
653 | break; |
||
654 | case PIPE_QUERY_SO_STATISTICS: |
||
655 | *result_so = query->result.so; |
||
656 | break; |
||
657 | default: |
||
658 | assert(0); |
||
659 | } |
||
660 | return TRUE; |
||
661 | } |
||
662 | |||
663 | void r600_context_queries_suspend(struct r600_context *ctx) |
||
664 | { |
||
665 | struct r600_query *query; |
||
666 | |||
667 | LIST_FOR_EACH_ENTRY(query, &ctx->active_query_list, list) { |
||
668 | r600_query_end(ctx, query); |
||
669 | } |
||
670 | assert(ctx->num_cs_dw_queries_suspend == 0); |
||
671 | } |
||
672 | |||
673 | void r600_context_queries_resume(struct r600_context *ctx) |
||
674 | { |
||
675 | struct r600_query *query; |
||
676 | |||
677 | assert(ctx->num_cs_dw_queries_suspend == 0); |
||
678 | |||
679 | LIST_FOR_EACH_ENTRY(query, &ctx->active_query_list, list) { |
||
680 | r600_query_begin(ctx, query); |
||
681 | } |
||
682 | } |
||
683 | |||
684 | void r600_context_draw_opaque_count(struct r600_context *ctx, struct r600_so_target *t) |
||
685 | { |
||
686 | struct radeon_winsys_cs *cs = ctx->cs; |
||
687 | si_need_cs_space(ctx, 14 + 21, TRUE); |
||
688 | |||
689 | cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0); |
||
690 | cs->buf[cs->cdw++] = (R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET - SI_CONTEXT_REG_OFFSET) >> 2; |
||
691 | cs->buf[cs->cdw++] = 0; |
||
692 | |||
693 | cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0); |
||
694 | cs->buf[cs->cdw++] = (R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE - SI_CONTEXT_REG_OFFSET) >> 2; |
||
695 | cs->buf[cs->cdw++] = t->stride >> 2; |
||
696 | |||
697 | #if 0 |
||
698 | cs->buf[cs->cdw++] = PKT3(PKT3_COPY_DW, 4, 0); |
||
699 | cs->buf[cs->cdw++] = COPY_DW_SRC_IS_MEM | COPY_DW_DST_IS_REG; |
||
700 | cs->buf[cs->cdw++] = 0; /* src address lo */ |
||
701 | cs->buf[cs->cdw++] = 0; /* src address hi */ |
||
702 | cs->buf[cs->cdw++] = R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2; /* dst register */ |
||
703 | cs->buf[cs->cdw++] = 0; /* unused */ |
||
704 | #endif |
||
705 | |||
706 | cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); |
||
707 | cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, t->filled_size, RADEON_USAGE_READ); |
||
708 | |||
709 | } |
||
710 | |||
711 | #if R600_TRACE_CS |
||
712 | void r600_trace_emit(struct r600_context *rctx) |
||
713 | { |
||
714 | struct r600_screen *rscreen = rctx->screen; |
||
715 | struct radeon_winsys_cs *cs = rctx->cs; |
||
716 | uint64_t va; |
||
717 | |||
718 | va = r600_resource_va(&rscreen->screen, (void*)rscreen->trace_bo); |
||
719 | r600_context_bo_reloc(rctx, rscreen->trace_bo, RADEON_USAGE_READWRITE); |
||
720 | cs->buf[cs->cdw++] = PKT3(PKT3_WRITE_DATA, 4, 0); |
||
721 | cs->buf[cs->cdw++] = PKT3_WRITE_DATA_DST_SEL(PKT3_WRITE_DATA_DST_SEL_MEM_SYNC) | |
||
722 | PKT3_WRITE_DATA_WR_CONFIRM | |
||
723 | PKT3_WRITE_DATA_ENGINE_SEL(PKT3_WRITE_DATA_ENGINE_SEL_ME); |
||
724 | cs->buf[cs->cdw++] = va & 0xFFFFFFFFUL; |
||
725 | cs->buf[cs->cdw++] = (va >> 32UL) & 0xFFFFFFFFUL; |
||
726 | cs->buf[cs->cdw++] = cs->cdw; |
||
727 | cs->buf[cs->cdw++] = rscreen->cs_count; |
||
728 | } |
||
729 | #endif><>><>>><>><>><>><>><>>> |