Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4358 | Serge | 1 | /* |
2 | * Copyright 2010 Jerome Glisse |
||
3 | * |
||
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
5 | * copy of this software and associated documentation files (the "Software"), |
||
6 | * to deal in the Software without restriction, including without limitation |
||
7 | * on the rights to use, copy, modify, merge, publish, distribute, sub |
||
8 | * license, and/or sell copies of the Software, and to permit persons to whom |
||
9 | * the Software is furnished to do so, subject to the following conditions: |
||
10 | * |
||
11 | * The above copyright notice and this permission notice (including the next |
||
12 | * paragraph) shall be included in all copies or substantial portions of the |
||
13 | * Software. |
||
14 | * |
||
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
||
18 | * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
||
19 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
||
20 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
||
21 | * USE OR OTHER DEALINGS IN THE SOFTWARE. |
||
22 | */ |
||
23 | #include "r600_pipe.h" |
||
24 | #include "r600d.h" |
||
25 | #include "util/u_memory.h" |
||
26 | |||
27 | static bool r600_is_timer_query(unsigned type) |
||
28 | { |
||
29 | return type == PIPE_QUERY_TIME_ELAPSED || |
||
30 | type == PIPE_QUERY_TIMESTAMP || |
||
31 | type == PIPE_QUERY_TIMESTAMP_DISJOINT; |
||
32 | } |
||
33 | |||
34 | static bool r600_query_needs_begin(unsigned type) |
||
35 | { |
||
36 | return type != PIPE_QUERY_GPU_FINISHED && |
||
37 | type != PIPE_QUERY_TIMESTAMP; |
||
38 | } |
||
39 | |||
40 | static struct r600_resource *r600_new_query_buffer(struct r600_context *ctx, unsigned type) |
||
41 | { |
||
42 | unsigned j, i, num_results, buf_size = 4096; |
||
43 | uint32_t *results; |
||
44 | |||
45 | /* Non-GPU queries. */ |
||
46 | switch (type) { |
||
47 | case R600_QUERY_DRAW_CALLS: |
||
48 | case R600_QUERY_REQUESTED_VRAM: |
||
49 | case R600_QUERY_REQUESTED_GTT: |
||
50 | case R600_QUERY_BUFFER_WAIT_TIME: |
||
51 | return NULL; |
||
52 | } |
||
53 | |||
54 | /* Queries are normally read by the CPU after |
||
55 | * being written by the gpu, hence staging is probably a good |
||
56 | * usage pattern. |
||
57 | */ |
||
58 | struct r600_resource *buf = (struct r600_resource*) |
||
59 | pipe_buffer_create(&ctx->screen->screen, PIPE_BIND_CUSTOM, |
||
60 | PIPE_USAGE_STAGING, buf_size); |
||
61 | |||
62 | switch (type) { |
||
63 | case PIPE_QUERY_OCCLUSION_COUNTER: |
||
64 | case PIPE_QUERY_OCCLUSION_PREDICATE: |
||
65 | results = r600_buffer_mmap_sync_with_rings(ctx, buf, PIPE_TRANSFER_WRITE); |
||
66 | memset(results, 0, buf_size); |
||
67 | |||
68 | /* Set top bits for unused backends. */ |
||
69 | num_results = buf_size / (16 * ctx->max_db); |
||
70 | for (j = 0; j < num_results; j++) { |
||
71 | for (i = 0; i < ctx->max_db; i++) { |
||
72 | if (!(ctx->backend_mask & (1< |
||
73 | results[(i * 4)+1] = 0x80000000; |
||
74 | results[(i * 4)+3] = 0x80000000; |
||
75 | } |
||
76 | } |
||
77 | results += 4 * ctx->max_db; |
||
78 | } |
||
79 | ctx->ws->buffer_unmap(buf->cs_buf); |
||
80 | break; |
||
81 | case PIPE_QUERY_TIME_ELAPSED: |
||
82 | case PIPE_QUERY_TIMESTAMP: |
||
83 | break; |
||
84 | case PIPE_QUERY_PRIMITIVES_EMITTED: |
||
85 | case PIPE_QUERY_PRIMITIVES_GENERATED: |
||
86 | case PIPE_QUERY_SO_STATISTICS: |
||
87 | case PIPE_QUERY_SO_OVERFLOW_PREDICATE: |
||
88 | case PIPE_QUERY_PIPELINE_STATISTICS: |
||
89 | results = r600_buffer_mmap_sync_with_rings(ctx, buf, PIPE_TRANSFER_WRITE); |
||
90 | memset(results, 0, buf_size); |
||
91 | ctx->ws->buffer_unmap(buf->cs_buf); |
||
92 | break; |
||
93 | default: |
||
94 | assert(0); |
||
95 | } |
||
96 | return buf; |
||
97 | } |
||
98 | |||
99 | static void r600_update_occlusion_query_state(struct r600_context *rctx, |
||
100 | unsigned type, int diff) |
||
101 | { |
||
102 | if (type == PIPE_QUERY_OCCLUSION_COUNTER || |
||
103 | type == PIPE_QUERY_OCCLUSION_PREDICATE) { |
||
104 | bool enable; |
||
105 | |||
106 | rctx->num_occlusion_queries += diff; |
||
107 | assert(rctx->num_occlusion_queries >= 0); |
||
108 | |||
109 | enable = rctx->num_occlusion_queries != 0; |
||
110 | |||
111 | if (rctx->db_misc_state.occlusion_query_enabled != enable) { |
||
112 | rctx->db_misc_state.occlusion_query_enabled = enable; |
||
113 | rctx->db_misc_state.atom.dirty = true; |
||
114 | } |
||
115 | } |
||
116 | } |
||
117 | |||
118 | static void r600_emit_query_begin(struct r600_context *ctx, struct r600_query *query) |
||
119 | { |
||
120 | struct radeon_winsys_cs *cs = ctx->rings.gfx.cs; |
||
121 | uint64_t va; |
||
122 | |||
123 | r600_update_occlusion_query_state(ctx, query->type, 1); |
||
124 | r600_need_cs_space(ctx, query->num_cs_dw * 2, TRUE); |
||
125 | |||
126 | /* Get a new query buffer if needed. */ |
||
127 | if (query->buffer.results_end + query->result_size > query->buffer.buf->b.b.width0) { |
||
128 | struct r600_query_buffer *qbuf = MALLOC_STRUCT(r600_query_buffer); |
||
129 | *qbuf = query->buffer; |
||
130 | query->buffer.buf = r600_new_query_buffer(ctx, query->type); |
||
131 | query->buffer.results_end = 0; |
||
132 | query->buffer.previous = qbuf; |
||
133 | } |
||
134 | |||
135 | /* emit begin query */ |
||
136 | va = r600_resource_va(&ctx->screen->screen, (void*)query->buffer.buf); |
||
137 | va += query->buffer.results_end; |
||
138 | |||
139 | switch (query->type) { |
||
140 | case PIPE_QUERY_OCCLUSION_COUNTER: |
||
141 | case PIPE_QUERY_OCCLUSION_PREDICATE: |
||
142 | cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0); |
||
143 | cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1); |
||
144 | cs->buf[cs->cdw++] = va; |
||
145 | cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF; |
||
146 | break; |
||
147 | case PIPE_QUERY_PRIMITIVES_EMITTED: |
||
148 | case PIPE_QUERY_PRIMITIVES_GENERATED: |
||
149 | case PIPE_QUERY_SO_STATISTICS: |
||
150 | case PIPE_QUERY_SO_OVERFLOW_PREDICATE: |
||
151 | cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0); |
||
152 | cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3); |
||
153 | cs->buf[cs->cdw++] = va; |
||
154 | cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF; |
||
155 | break; |
||
156 | case PIPE_QUERY_TIME_ELAPSED: |
||
157 | cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0); |
||
158 | cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); |
||
159 | cs->buf[cs->cdw++] = va; |
||
160 | cs->buf[cs->cdw++] = (3 << 29) | ((va >> 32UL) & 0xFF); |
||
161 | cs->buf[cs->cdw++] = 0; |
||
162 | cs->buf[cs->cdw++] = 0; |
||
163 | break; |
||
164 | case PIPE_QUERY_PIPELINE_STATISTICS: |
||
165 | if (!ctx->num_pipelinestat_queries) { |
||
166 | cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); |
||
167 | cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PIPELINESTAT_START) | EVENT_INDEX(0); |
||
168 | } |
||
169 | ctx->num_pipelinestat_queries++; |
||
170 | cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0); |
||
171 | cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2); |
||
172 | cs->buf[cs->cdw++] = va; |
||
173 | cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF; |
||
174 | break; |
||
175 | default: |
||
176 | assert(0); |
||
177 | } |
||
178 | cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); |
||
179 | cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, &ctx->rings.gfx, query->buffer.buf, RADEON_USAGE_WRITE); |
||
180 | |||
181 | if (!r600_is_timer_query(query->type)) { |
||
182 | ctx->num_cs_dw_nontimer_queries_suspend += query->num_cs_dw; |
||
183 | } |
||
184 | } |
||
185 | |||
186 | static void r600_emit_query_end(struct r600_context *ctx, struct r600_query *query) |
||
187 | { |
||
188 | struct radeon_winsys_cs *cs = ctx->rings.gfx.cs; |
||
189 | uint64_t va; |
||
190 | |||
191 | /* The queries which need begin already called this in begin_query. */ |
||
192 | if (!r600_query_needs_begin(query->type)) { |
||
193 | r600_need_cs_space(ctx, query->num_cs_dw, FALSE); |
||
194 | } |
||
195 | |||
196 | va = r600_resource_va(&ctx->screen->screen, (void*)query->buffer.buf); |
||
197 | /* emit end query */ |
||
198 | switch (query->type) { |
||
199 | case PIPE_QUERY_OCCLUSION_COUNTER: |
||
200 | case PIPE_QUERY_OCCLUSION_PREDICATE: |
||
201 | va += query->buffer.results_end + 8; |
||
202 | cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0); |
||
203 | cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1); |
||
204 | cs->buf[cs->cdw++] = va; |
||
205 | cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF; |
||
206 | break; |
||
207 | case PIPE_QUERY_PRIMITIVES_EMITTED: |
||
208 | case PIPE_QUERY_PRIMITIVES_GENERATED: |
||
209 | case PIPE_QUERY_SO_STATISTICS: |
||
210 | case PIPE_QUERY_SO_OVERFLOW_PREDICATE: |
||
211 | va += query->buffer.results_end + query->result_size/2; |
||
212 | cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0); |
||
213 | cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3); |
||
214 | cs->buf[cs->cdw++] = va; |
||
215 | cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF; |
||
216 | break; |
||
217 | case PIPE_QUERY_TIME_ELAPSED: |
||
218 | va += query->buffer.results_end + query->result_size/2; |
||
219 | /* fall through */ |
||
220 | case PIPE_QUERY_TIMESTAMP: |
||
221 | cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0); |
||
222 | cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); |
||
223 | cs->buf[cs->cdw++] = va; |
||
224 | cs->buf[cs->cdw++] = (3 << 29) | ((va >> 32UL) & 0xFF); |
||
225 | cs->buf[cs->cdw++] = 0; |
||
226 | cs->buf[cs->cdw++] = 0; |
||
227 | break; |
||
228 | case PIPE_QUERY_PIPELINE_STATISTICS: |
||
229 | assert(ctx->num_pipelinestat_queries > 0); |
||
230 | ctx->num_pipelinestat_queries--; |
||
231 | if (!ctx->num_pipelinestat_queries) { |
||
232 | cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0); |
||
233 | cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PIPELINESTAT_STOP) | EVENT_INDEX(0); |
||
234 | } |
||
235 | va += query->buffer.results_end + query->result_size/2; |
||
236 | cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0); |
||
237 | cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2); |
||
238 | cs->buf[cs->cdw++] = va; |
||
239 | cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF; |
||
240 | break; |
||
241 | default: |
||
242 | assert(0); |
||
243 | } |
||
244 | cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); |
||
245 | cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, &ctx->rings.gfx, query->buffer.buf, RADEON_USAGE_WRITE); |
||
246 | |||
247 | query->buffer.results_end += query->result_size; |
||
248 | |||
249 | if (r600_query_needs_begin(query->type)) { |
||
250 | if (!r600_is_timer_query(query->type)) { |
||
251 | ctx->num_cs_dw_nontimer_queries_suspend -= query->num_cs_dw; |
||
252 | } |
||
253 | } |
||
254 | |||
255 | r600_update_occlusion_query_state(ctx, query->type, -1); |
||
256 | } |
||
257 | |||
258 | static void r600_emit_query_predication(struct r600_context *ctx, struct r600_query *query, |
||
259 | int operation, bool flag_wait) |
||
260 | { |
||
261 | struct radeon_winsys_cs *cs = ctx->rings.gfx.cs; |
||
262 | |||
263 | if (operation == PREDICATION_OP_CLEAR) { |
||
264 | r600_need_cs_space(ctx, 3, FALSE); |
||
265 | |||
266 | cs->buf[cs->cdw++] = PKT3(PKT3_SET_PREDICATION, 1, 0); |
||
267 | cs->buf[cs->cdw++] = 0; |
||
268 | cs->buf[cs->cdw++] = PRED_OP(PREDICATION_OP_CLEAR); |
||
269 | } else { |
||
270 | struct r600_query_buffer *qbuf; |
||
271 | unsigned count; |
||
272 | uint32_t op; |
||
273 | |||
274 | /* Find how many results there are. */ |
||
275 | count = 0; |
||
276 | for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) { |
||
277 | count += qbuf->results_end / query->result_size; |
||
278 | } |
||
279 | |||
280 | r600_need_cs_space(ctx, 5 * count, TRUE); |
||
281 | |||
282 | op = PRED_OP(operation) | PREDICATION_DRAW_VISIBLE | |
||
283 | (flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW); |
||
284 | |||
285 | /* emit predicate packets for all data blocks */ |
||
286 | for (qbuf = &query->buffer; qbuf; qbuf = qbuf->previous) { |
||
287 | unsigned results_base = 0; |
||
288 | uint64_t va = r600_resource_va(&ctx->screen->screen, &qbuf->buf->b.b); |
||
289 | |||
290 | while (results_base < qbuf->results_end) { |
||
291 | cs->buf[cs->cdw++] = PKT3(PKT3_SET_PREDICATION, 1, 0); |
||
292 | cs->buf[cs->cdw++] = (va + results_base) & 0xFFFFFFFFUL; |
||
293 | cs->buf[cs->cdw++] = op | (((va + results_base) >> 32UL) & 0xFF); |
||
294 | cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0); |
||
295 | cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, &ctx->rings.gfx, qbuf->buf, RADEON_USAGE_READ); |
||
296 | results_base += query->result_size; |
||
297 | |||
298 | /* set CONTINUE bit for all packets except the first */ |
||
299 | op |= PREDICATION_CONTINUE; |
||
300 | } |
||
301 | } while (qbuf); |
||
302 | } |
||
303 | } |
||
304 | |||
305 | static struct pipe_query *r600_create_query(struct pipe_context *ctx, unsigned query_type) |
||
306 | { |
||
307 | struct r600_context *rctx = (struct r600_context *)ctx; |
||
308 | struct r600_query *query; |
||
309 | bool skip_allocation = false; |
||
310 | |||
311 | query = CALLOC_STRUCT(r600_query); |
||
312 | if (query == NULL) |
||
313 | return NULL; |
||
314 | |||
315 | query->type = query_type; |
||
316 | |||
317 | switch (query_type) { |
||
318 | case PIPE_QUERY_OCCLUSION_COUNTER: |
||
319 | case PIPE_QUERY_OCCLUSION_PREDICATE: |
||
320 | query->result_size = 16 * rctx->max_db; |
||
321 | query->num_cs_dw = 6; |
||
322 | break; |
||
323 | case PIPE_QUERY_TIME_ELAPSED: |
||
324 | query->result_size = 16; |
||
325 | query->num_cs_dw = 8; |
||
326 | break; |
||
327 | case PIPE_QUERY_TIMESTAMP: |
||
328 | query->result_size = 8; |
||
329 | query->num_cs_dw = 8; |
||
330 | break; |
||
331 | case PIPE_QUERY_PRIMITIVES_EMITTED: |
||
332 | case PIPE_QUERY_PRIMITIVES_GENERATED: |
||
333 | case PIPE_QUERY_SO_STATISTICS: |
||
334 | case PIPE_QUERY_SO_OVERFLOW_PREDICATE: |
||
335 | /* NumPrimitivesWritten, PrimitiveStorageNeeded. */ |
||
336 | query->result_size = 32; |
||
337 | query->num_cs_dw = 6; |
||
338 | break; |
||
339 | case PIPE_QUERY_PIPELINE_STATISTICS: |
||
340 | /* 11 values on EG, 8 on R600. */ |
||
341 | query->result_size = (rctx->chip_class >= EVERGREEN ? 11 : 8) * 16; |
||
342 | query->num_cs_dw = 8; |
||
343 | break; |
||
344 | /* Non-GPU queries. */ |
||
345 | case R600_QUERY_DRAW_CALLS: |
||
346 | case R600_QUERY_REQUESTED_VRAM: |
||
347 | case R600_QUERY_REQUESTED_GTT: |
||
348 | case R600_QUERY_BUFFER_WAIT_TIME: |
||
349 | skip_allocation = true; |
||
350 | break; |
||
351 | default: |
||
352 | assert(0); |
||
353 | FREE(query); |
||
354 | return NULL; |
||
355 | } |
||
356 | |||
357 | if (!skip_allocation) { |
||
358 | query->buffer.buf = r600_new_query_buffer(rctx, query_type); |
||
359 | if (!query->buffer.buf) { |
||
360 | FREE(query); |
||
361 | return NULL; |
||
362 | } |
||
363 | } |
||
364 | return (struct pipe_query*)query; |
||
365 | } |
||
366 | |||
367 | static void r600_destroy_query(struct pipe_context *ctx, struct pipe_query *query) |
||
368 | { |
||
369 | struct r600_query *rquery = (struct r600_query*)query; |
||
370 | struct r600_query_buffer *prev = rquery->buffer.previous; |
||
371 | |||
372 | /* Release all query buffers. */ |
||
373 | while (prev) { |
||
374 | struct r600_query_buffer *qbuf = prev; |
||
375 | prev = prev->previous; |
||
376 | pipe_resource_reference((struct pipe_resource**)&qbuf->buf, NULL); |
||
377 | FREE(qbuf); |
||
378 | } |
||
379 | |||
380 | pipe_resource_reference((struct pipe_resource**)&rquery->buffer.buf, NULL); |
||
381 | FREE(query); |
||
382 | } |
||
383 | |||
384 | static void r600_begin_query(struct pipe_context *ctx, struct pipe_query *query) |
||
385 | { |
||
386 | struct r600_context *rctx = (struct r600_context *)ctx; |
||
387 | struct r600_query *rquery = (struct r600_query *)query; |
||
388 | struct r600_query_buffer *prev = rquery->buffer.previous; |
||
389 | |||
390 | if (!r600_query_needs_begin(rquery->type)) { |
||
391 | assert(0); |
||
392 | return; |
||
393 | } |
||
394 | |||
395 | /* Non-GPU queries. */ |
||
396 | switch (rquery->type) { |
||
397 | case R600_QUERY_DRAW_CALLS: |
||
398 | rquery->begin_result = rctx->num_draw_calls; |
||
399 | return; |
||
400 | case R600_QUERY_REQUESTED_VRAM: |
||
401 | case R600_QUERY_REQUESTED_GTT: |
||
402 | rquery->begin_result = 0; |
||
403 | return; |
||
404 | case R600_QUERY_BUFFER_WAIT_TIME: |
||
405 | rquery->begin_result = rctx->ws->query_value(rctx->ws, RADEON_BUFFER_WAIT_TIME_NS); |
||
406 | return; |
||
407 | } |
||
408 | |||
409 | /* Discard the old query buffers. */ |
||
410 | while (prev) { |
||
411 | struct r600_query_buffer *qbuf = prev; |
||
412 | prev = prev->previous; |
||
413 | pipe_resource_reference((struct pipe_resource**)&qbuf->buf, NULL); |
||
414 | FREE(qbuf); |
||
415 | } |
||
416 | |||
417 | /* Obtain a new buffer if the current one can't be mapped without a stall. */ |
||
418 | if (r600_rings_is_buffer_referenced(rctx, rquery->buffer.buf->cs_buf, RADEON_USAGE_READWRITE) || |
||
419 | rctx->ws->buffer_is_busy(rquery->buffer.buf->buf, RADEON_USAGE_READWRITE)) { |
||
420 | pipe_resource_reference((struct pipe_resource**)&rquery->buffer.buf, NULL); |
||
421 | rquery->buffer.buf = r600_new_query_buffer(rctx, rquery->type); |
||
422 | } |
||
423 | |||
424 | rquery->buffer.results_end = 0; |
||
425 | rquery->buffer.previous = NULL; |
||
426 | |||
427 | r600_emit_query_begin(rctx, rquery); |
||
428 | |||
429 | if (!r600_is_timer_query(rquery->type)) { |
||
430 | LIST_ADDTAIL(&rquery->list, &rctx->active_nontimer_queries); |
||
431 | } |
||
432 | } |
||
433 | |||
434 | static void r600_end_query(struct pipe_context *ctx, struct pipe_query *query) |
||
435 | { |
||
436 | struct r600_context *rctx = (struct r600_context *)ctx; |
||
437 | struct r600_query *rquery = (struct r600_query *)query; |
||
438 | |||
439 | /* Non-GPU queries. */ |
||
440 | switch (rquery->type) { |
||
441 | case R600_QUERY_DRAW_CALLS: |
||
442 | rquery->end_result = rctx->num_draw_calls; |
||
443 | return; |
||
444 | case R600_QUERY_REQUESTED_VRAM: |
||
445 | rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_REQUESTED_VRAM_MEMORY); |
||
446 | return; |
||
447 | case R600_QUERY_REQUESTED_GTT: |
||
448 | rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_REQUESTED_GTT_MEMORY); |
||
449 | return; |
||
450 | case R600_QUERY_BUFFER_WAIT_TIME: |
||
451 | rquery->end_result = rctx->ws->query_value(rctx->ws, RADEON_BUFFER_WAIT_TIME_NS); |
||
452 | return; |
||
453 | } |
||
454 | |||
455 | r600_emit_query_end(rctx, rquery); |
||
456 | |||
457 | if (r600_query_needs_begin(rquery->type) && !r600_is_timer_query(rquery->type)) { |
||
458 | LIST_DELINIT(&rquery->list); |
||
459 | } |
||
460 | } |
||
461 | |||
462 | static unsigned r600_query_read_result(char *map, unsigned start_index, unsigned end_index, |
||
463 | bool test_status_bit) |
||
464 | { |
||
465 | uint32_t *current_result = (uint32_t*)map; |
||
466 | uint64_t start, end; |
||
467 | |||
468 | start = (uint64_t)current_result[start_index] | |
||
469 | (uint64_t)current_result[start_index+1] << 32; |
||
470 | end = (uint64_t)current_result[end_index] | |
||
471 | (uint64_t)current_result[end_index+1] << 32; |
||
472 | |||
473 | if (!test_status_bit || |
||
474 | ((start & 0x8000000000000000UL) && (end & 0x8000000000000000UL))) { |
||
475 | return end - start; |
||
476 | } |
||
477 | return 0; |
||
478 | } |
||
479 | |||
480 | static boolean r600_get_query_buffer_result(struct r600_context *ctx, |
||
481 | struct r600_query *query, |
||
482 | struct r600_query_buffer *qbuf, |
||
483 | boolean wait, |
||
484 | union pipe_query_result *result) |
||
485 | { |
||
486 | unsigned results_base = 0; |
||
487 | char *map; |
||
488 | |||
489 | /* Non-GPU queries. */ |
||
490 | switch (query->type) { |
||
491 | case R600_QUERY_DRAW_CALLS: |
||
492 | case R600_QUERY_REQUESTED_VRAM: |
||
493 | case R600_QUERY_REQUESTED_GTT: |
||
494 | case R600_QUERY_BUFFER_WAIT_TIME: |
||
495 | result->u64 = query->end_result - query->begin_result; |
||
496 | return TRUE; |
||
497 | } |
||
498 | |||
499 | map = r600_buffer_mmap_sync_with_rings(ctx, qbuf->buf, |
||
500 | PIPE_TRANSFER_READ | |
||
501 | (wait ? 0 : PIPE_TRANSFER_DONTBLOCK)); |
||
502 | if (!map) |
||
503 | return FALSE; |
||
504 | |||
505 | /* count all results across all data blocks */ |
||
506 | switch (query->type) { |
||
507 | case PIPE_QUERY_OCCLUSION_COUNTER: |
||
508 | while (results_base != qbuf->results_end) { |
||
509 | result->u64 += |
||
510 | r600_query_read_result(map + results_base, 0, 2, true); |
||
511 | results_base += 16; |
||
512 | } |
||
513 | break; |
||
514 | case PIPE_QUERY_OCCLUSION_PREDICATE: |
||
515 | while (results_base != qbuf->results_end) { |
||
516 | result->b = result->b || |
||
517 | r600_query_read_result(map + results_base, 0, 2, true) != 0; |
||
518 | results_base += 16; |
||
519 | } |
||
520 | break; |
||
521 | case PIPE_QUERY_TIME_ELAPSED: |
||
522 | while (results_base != qbuf->results_end) { |
||
523 | result->u64 += |
||
524 | r600_query_read_result(map + results_base, 0, 2, false); |
||
525 | results_base += query->result_size; |
||
526 | } |
||
527 | break; |
||
528 | case PIPE_QUERY_TIMESTAMP: |
||
529 | { |
||
530 | uint32_t *current_result = (uint32_t*)map; |
||
531 | result->u64 = (uint64_t)current_result[0] | |
||
532 | (uint64_t)current_result[1] << 32; |
||
533 | break; |
||
534 | } |
||
535 | case PIPE_QUERY_PRIMITIVES_EMITTED: |
||
536 | /* SAMPLE_STREAMOUTSTATS stores this structure: |
||
537 | * { |
||
538 | * u64 NumPrimitivesWritten; |
||
539 | * u64 PrimitiveStorageNeeded; |
||
540 | * } |
||
541 | * We only need NumPrimitivesWritten here. */ |
||
542 | while (results_base != qbuf->results_end) { |
||
543 | result->u64 += |
||
544 | r600_query_read_result(map + results_base, 2, 6, true); |
||
545 | results_base += query->result_size; |
||
546 | } |
||
547 | break; |
||
548 | case PIPE_QUERY_PRIMITIVES_GENERATED: |
||
549 | /* Here we read PrimitiveStorageNeeded. */ |
||
550 | while (results_base != qbuf->results_end) { |
||
551 | result->u64 += |
||
552 | r600_query_read_result(map + results_base, 0, 4, true); |
||
553 | results_base += query->result_size; |
||
554 | } |
||
555 | break; |
||
556 | case PIPE_QUERY_SO_STATISTICS: |
||
557 | while (results_base != qbuf->results_end) { |
||
558 | result->so_statistics.num_primitives_written += |
||
559 | r600_query_read_result(map + results_base, 2, 6, true); |
||
560 | result->so_statistics.primitives_storage_needed += |
||
561 | r600_query_read_result(map + results_base, 0, 4, true); |
||
562 | results_base += query->result_size; |
||
563 | } |
||
564 | break; |
||
565 | case PIPE_QUERY_SO_OVERFLOW_PREDICATE: |
||
566 | while (results_base != qbuf->results_end) { |
||
567 | result->b = result->b || |
||
568 | r600_query_read_result(map + results_base, 2, 6, true) != |
||
569 | r600_query_read_result(map + results_base, 0, 4, true); |
||
570 | results_base += query->result_size; |
||
571 | } |
||
572 | break; |
||
573 | case PIPE_QUERY_PIPELINE_STATISTICS: |
||
574 | if (ctx->chip_class >= EVERGREEN) { |
||
575 | while (results_base != qbuf->results_end) { |
||
576 | result->pipeline_statistics.ps_invocations += |
||
577 | r600_query_read_result(map + results_base, 0, 22, false); |
||
578 | result->pipeline_statistics.c_primitives += |
||
579 | r600_query_read_result(map + results_base, 2, 24, false); |
||
580 | result->pipeline_statistics.c_invocations += |
||
581 | r600_query_read_result(map + results_base, 4, 26, false); |
||
582 | result->pipeline_statistics.vs_invocations += |
||
583 | r600_query_read_result(map + results_base, 6, 28, false); |
||
584 | result->pipeline_statistics.gs_invocations += |
||
585 | r600_query_read_result(map + results_base, 8, 30, false); |
||
586 | result->pipeline_statistics.gs_primitives += |
||
587 | r600_query_read_result(map + results_base, 10, 32, false); |
||
588 | result->pipeline_statistics.ia_primitives += |
||
589 | r600_query_read_result(map + results_base, 12, 34, false); |
||
590 | result->pipeline_statistics.ia_vertices += |
||
591 | r600_query_read_result(map + results_base, 14, 36, false); |
||
592 | result->pipeline_statistics.hs_invocations += |
||
593 | r600_query_read_result(map + results_base, 16, 38, false); |
||
594 | result->pipeline_statistics.ds_invocations += |
||
595 | r600_query_read_result(map + results_base, 18, 40, false); |
||
596 | result->pipeline_statistics.cs_invocations += |
||
597 | r600_query_read_result(map + results_base, 20, 42, false); |
||
598 | results_base += query->result_size; |
||
599 | } |
||
600 | } else { |
||
601 | while (results_base != qbuf->results_end) { |
||
602 | result->pipeline_statistics.ps_invocations += |
||
603 | r600_query_read_result(map + results_base, 0, 16, false); |
||
604 | result->pipeline_statistics.c_primitives += |
||
605 | r600_query_read_result(map + results_base, 2, 18, false); |
||
606 | result->pipeline_statistics.c_invocations += |
||
607 | r600_query_read_result(map + results_base, 4, 20, false); |
||
608 | result->pipeline_statistics.vs_invocations += |
||
609 | r600_query_read_result(map + results_base, 6, 22, false); |
||
610 | result->pipeline_statistics.gs_invocations += |
||
611 | r600_query_read_result(map + results_base, 8, 24, false); |
||
612 | result->pipeline_statistics.gs_primitives += |
||
613 | r600_query_read_result(map + results_base, 10, 26, false); |
||
614 | result->pipeline_statistics.ia_primitives += |
||
615 | r600_query_read_result(map + results_base, 12, 28, false); |
||
616 | result->pipeline_statistics.ia_vertices += |
||
617 | r600_query_read_result(map + results_base, 14, 30, false); |
||
618 | results_base += query->result_size; |
||
619 | } |
||
620 | } |
||
621 | #if 0 /* for testing */ |
||
622 | printf("Pipeline stats: IA verts=%llu, IA prims=%llu, VS=%llu, HS=%llu, " |
||
623 | "DS=%llu, GS=%llu, GS prims=%llu, Clipper=%llu, " |
||
624 | "Clipper prims=%llu, PS=%llu, CS=%llu\n", |
||
625 | result->pipeline_statistics.ia_vertices, |
||
626 | result->pipeline_statistics.ia_primitives, |
||
627 | result->pipeline_statistics.vs_invocations, |
||
628 | result->pipeline_statistics.hs_invocations, |
||
629 | result->pipeline_statistics.ds_invocations, |
||
630 | result->pipeline_statistics.gs_invocations, |
||
631 | result->pipeline_statistics.gs_primitives, |
||
632 | result->pipeline_statistics.c_invocations, |
||
633 | result->pipeline_statistics.c_primitives, |
||
634 | result->pipeline_statistics.ps_invocations, |
||
635 | result->pipeline_statistics.cs_invocations); |
||
636 | #endif |
||
637 | break; |
||
638 | default: |
||
639 | assert(0); |
||
640 | } |
||
641 | |||
642 | ctx->ws->buffer_unmap(qbuf->buf->cs_buf); |
||
643 | return TRUE; |
||
644 | } |
||
645 | |||
646 | static boolean r600_get_query_result(struct pipe_context *ctx, |
||
647 | struct pipe_query *query, |
||
648 | boolean wait, union pipe_query_result *result) |
||
649 | { |
||
650 | struct r600_context *rctx = (struct r600_context *)ctx; |
||
651 | struct r600_query *rquery = (struct r600_query *)query; |
||
652 | struct r600_query_buffer *qbuf; |
||
653 | |||
654 | util_query_clear_result(result, rquery->type); |
||
655 | |||
656 | for (qbuf = &rquery->buffer; qbuf; qbuf = qbuf->previous) { |
||
657 | if (!r600_get_query_buffer_result(rctx, rquery, qbuf, wait, result)) { |
||
658 | return FALSE; |
||
659 | } |
||
660 | } |
||
661 | |||
662 | /* Convert the time to expected units. */ |
||
663 | if (rquery->type == PIPE_QUERY_TIME_ELAPSED || |
||
664 | rquery->type == PIPE_QUERY_TIMESTAMP) { |
||
665 | result->u64 = (1000000 * result->u64) / rctx->screen->info.r600_clock_crystal_freq; |
||
666 | } |
||
667 | return TRUE; |
||
668 | } |
||
669 | |||
670 | static void r600_render_condition(struct pipe_context *ctx, |
||
671 | struct pipe_query *query, |
||
672 | boolean condition, |
||
673 | uint mode) |
||
674 | { |
||
675 | struct r600_context *rctx = (struct r600_context *)ctx; |
||
676 | struct r600_query *rquery = (struct r600_query *)query; |
||
677 | bool wait_flag = false; |
||
678 | |||
679 | rctx->current_render_cond = query; |
||
680 | rctx->current_render_cond_cond = condition; |
||
681 | rctx->current_render_cond_mode = mode; |
||
682 | |||
683 | if (query == NULL) { |
||
684 | if (rctx->predicate_drawing) { |
||
685 | rctx->predicate_drawing = false; |
||
686 | r600_emit_query_predication(rctx, NULL, PREDICATION_OP_CLEAR, false); |
||
687 | } |
||
688 | return; |
||
689 | } |
||
690 | |||
691 | if (mode == PIPE_RENDER_COND_WAIT || |
||
692 | mode == PIPE_RENDER_COND_BY_REGION_WAIT) { |
||
693 | wait_flag = true; |
||
694 | } |
||
695 | |||
696 | rctx->predicate_drawing = true; |
||
697 | |||
698 | switch (rquery->type) { |
||
699 | case PIPE_QUERY_OCCLUSION_COUNTER: |
||
700 | case PIPE_QUERY_OCCLUSION_PREDICATE: |
||
701 | r600_emit_query_predication(rctx, rquery, PREDICATION_OP_ZPASS, wait_flag); |
||
702 | break; |
||
703 | case PIPE_QUERY_PRIMITIVES_EMITTED: |
||
704 | case PIPE_QUERY_PRIMITIVES_GENERATED: |
||
705 | case PIPE_QUERY_SO_STATISTICS: |
||
706 | case PIPE_QUERY_SO_OVERFLOW_PREDICATE: |
||
707 | r600_emit_query_predication(rctx, rquery, PREDICATION_OP_PRIMCOUNT, wait_flag); |
||
708 | break; |
||
709 | default: |
||
710 | assert(0); |
||
711 | } |
||
712 | } |
||
713 | |||
714 | void r600_suspend_nontimer_queries(struct r600_context *ctx) |
||
715 | { |
||
716 | struct r600_query *query; |
||
717 | |||
718 | LIST_FOR_EACH_ENTRY(query, &ctx->active_nontimer_queries, list) { |
||
719 | r600_emit_query_end(ctx, query); |
||
720 | } |
||
721 | assert(ctx->num_cs_dw_nontimer_queries_suspend == 0); |
||
722 | } |
||
723 | |||
724 | void r600_resume_nontimer_queries(struct r600_context *ctx) |
||
725 | { |
||
726 | struct r600_query *query; |
||
727 | |||
728 | assert(ctx->num_cs_dw_nontimer_queries_suspend == 0); |
||
729 | |||
730 | LIST_FOR_EACH_ENTRY(query, &ctx->active_nontimer_queries, list) { |
||
731 | r600_emit_query_begin(ctx, query); |
||
732 | } |
||
733 | } |
||
734 | |||
735 | void r600_init_query_functions(struct r600_context *rctx) |
||
736 | { |
||
737 | rctx->context.create_query = r600_create_query; |
||
738 | rctx->context.destroy_query = r600_destroy_query; |
||
739 | rctx->context.begin_query = r600_begin_query; |
||
740 | rctx->context.end_query = r600_end_query; |
||
741 | rctx->context.get_query_result = r600_get_query_result; |
||
742 | |||
743 | if (rctx->screen->info.r600_num_backends > 0) |
||
744 | rctx->context.render_condition = r600_render_condition; |
||
745 | }><>><>><>>><>><>>> |