Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5564 | serge | 1 | /* |
2 | * Copyright © 2008 Intel Corporation |
||
3 | * |
||
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
5 | * copy of this software and associated documentation files (the "Software"), |
||
6 | * to deal in the Software without restriction, including without limitation |
||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
8 | * and/or sell copies of the Software, and to permit persons to whom the |
||
9 | * Software is furnished to do so, subject to the following conditions: |
||
10 | * |
||
11 | * The above copyright notice and this permission notice (including the next |
||
12 | * paragraph) shall be included in all copies or substantial portions of the |
||
13 | * Software. |
||
14 | * |
||
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
||
20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
||
21 | * IN THE SOFTWARE. |
||
22 | * |
||
23 | * Authors: |
||
24 | * Eric Anholt |
||
25 | * |
||
26 | */ |
||
27 | |||
28 | /** @file brw_queryobj.c |
||
29 | * |
||
30 | * Support for query objects (GL_ARB_occlusion_query, GL_ARB_timer_query, |
||
31 | * GL_EXT_transform_feedback, and friends). |
||
32 | * |
||
33 | * The hardware provides a PIPE_CONTROL command that can report the number of |
||
34 | * fragments that passed the depth test, or the hardware timer. They are |
||
35 | * appropriately synced with the stage of the pipeline for our extensions' |
||
36 | * needs. |
||
37 | */ |
||
38 | #include "main/imports.h" |
||
39 | |||
40 | #include "brw_context.h" |
||
41 | #include "brw_defines.h" |
||
42 | #include "brw_state.h" |
||
43 | #include "intel_batchbuffer.h" |
||
44 | #include "intel_reg.h" |
||
45 | |||
46 | /** |
||
47 | * Emit PIPE_CONTROLs to write the current GPU timestamp into a buffer. |
||
48 | */ |
||
49 | void |
||
50 | brw_write_timestamp(struct brw_context *brw, drm_intel_bo *query_bo, int idx) |
||
51 | { |
||
52 | if (brw->gen == 6) { |
||
53 | /* Emit Sandybridge workaround flush: */ |
||
54 | brw_emit_pipe_control_flush(brw, |
||
55 | PIPE_CONTROL_CS_STALL | |
||
56 | PIPE_CONTROL_STALL_AT_SCOREBOARD); |
||
57 | } |
||
58 | |||
59 | brw_emit_pipe_control_write(brw, PIPE_CONTROL_WRITE_TIMESTAMP, |
||
60 | query_bo, idx * sizeof(uint64_t), 0, 0); |
||
61 | } |
||
62 | |||
63 | /** |
||
64 | * Emit PIPE_CONTROLs to write the PS_DEPTH_COUNT register into a buffer. |
||
65 | */ |
||
66 | void |
||
67 | brw_write_depth_count(struct brw_context *brw, drm_intel_bo *query_bo, int idx) |
||
68 | { |
||
69 | uint32_t flags; |
||
70 | |||
71 | flags = (PIPE_CONTROL_WRITE_DEPTH_COUNT | |
||
72 | PIPE_CONTROL_DEPTH_STALL); |
||
73 | |||
74 | /* Needed to ensure the memory is coherent for the MI_LOAD_REGISTER_MEM |
||
75 | * command when loading the values into the predicate source registers for |
||
76 | * conditional rendering. |
||
77 | */ |
||
78 | if (brw->predicate.supported) |
||
79 | flags |= PIPE_CONTROL_FLUSH_ENABLE; |
||
80 | |||
81 | brw_emit_pipe_control_write(brw, flags, query_bo, |
||
82 | idx * sizeof(uint64_t), 0, 0); |
||
83 | } |
||
84 | |||
85 | /** |
||
86 | * Wait on the query object's BO and calculate the final result. |
||
87 | */ |
||
88 | static void |
||
89 | brw_queryobj_get_results(struct gl_context *ctx, |
||
90 | struct brw_query_object *query) |
||
91 | { |
||
92 | struct brw_context *brw = brw_context(ctx); |
||
93 | |||
94 | int i; |
||
95 | uint64_t *results; |
||
96 | |||
97 | assert(brw->gen < 6); |
||
98 | |||
99 | if (query->bo == NULL) |
||
100 | return; |
||
101 | |||
102 | /* If the application has requested the query result, but this batch is |
||
103 | * still contributing to it, flush it now so the results will be present |
||
104 | * when mapped. |
||
105 | */ |
||
106 | if (drm_intel_bo_references(brw->batch.bo, query->bo)) |
||
107 | intel_batchbuffer_flush(brw); |
||
108 | |||
109 | if (unlikely(brw->perf_debug)) { |
||
110 | if (drm_intel_bo_busy(query->bo)) { |
||
111 | perf_debug("Stalling on the GPU waiting for a query object.\n"); |
||
112 | } |
||
113 | } |
||
114 | |||
115 | drm_intel_bo_map(query->bo, false); |
||
116 | results = query->bo->virtual; |
||
117 | switch (query->Base.Target) { |
||
118 | case GL_TIME_ELAPSED_EXT: |
||
119 | /* The query BO contains the starting and ending timestamps. |
||
120 | * Subtract the two and convert to nanoseconds. |
||
121 | */ |
||
122 | query->Base.Result += 1000 * ((results[1] >> 32) - (results[0] >> 32)); |
||
123 | break; |
||
124 | |||
125 | case GL_TIMESTAMP: |
||
126 | /* The query BO contains a single timestamp value in results[0]. */ |
||
127 | query->Base.Result = 1000 * (results[0] >> 32); |
||
128 | break; |
||
129 | |||
130 | case GL_SAMPLES_PASSED_ARB: |
||
131 | /* Loop over pairs of values from the BO, which are the PS_DEPTH_COUNT |
||
132 | * value at the start and end of the batchbuffer. Subtract them to |
||
133 | * get the number of fragments which passed the depth test in each |
||
134 | * individual batch, and add those differences up to get the number |
||
135 | * of fragments for the entire query. |
||
136 | * |
||
137 | * Note that query->Base.Result may already be non-zero. We may have |
||
138 | * run out of space in the query's BO and allocated a new one. If so, |
||
139 | * this function was already called to accumulate the results so far. |
||
140 | */ |
||
141 | for (i = 0; i < query->last_index; i++) { |
||
142 | query->Base.Result += results[i * 2 + 1] - results[i * 2]; |
||
143 | } |
||
144 | break; |
||
145 | |||
146 | case GL_ANY_SAMPLES_PASSED: |
||
147 | case GL_ANY_SAMPLES_PASSED_CONSERVATIVE: |
||
148 | /* If the starting and ending PS_DEPTH_COUNT from any of the batches |
||
149 | * differ, then some fragments passed the depth test. |
||
150 | */ |
||
151 | for (i = 0; i < query->last_index; i++) { |
||
152 | if (results[i * 2 + 1] != results[i * 2]) { |
||
153 | query->Base.Result = GL_TRUE; |
||
154 | break; |
||
155 | } |
||
156 | } |
||
157 | break; |
||
158 | |||
159 | default: |
||
160 | unreachable("Unrecognized query target in brw_queryobj_get_results()"); |
||
161 | } |
||
162 | drm_intel_bo_unmap(query->bo); |
||
163 | |||
164 | /* Now that we've processed the data stored in the query's buffer object, |
||
165 | * we can release it. |
||
166 | */ |
||
167 | drm_intel_bo_unreference(query->bo); |
||
168 | query->bo = NULL; |
||
169 | } |
||
170 | |||
171 | /** |
||
172 | * The NewQueryObject() driver hook. |
||
173 | * |
||
174 | * Allocates and initializes a new query object. |
||
175 | */ |
||
176 | static struct gl_query_object * |
||
177 | brw_new_query_object(struct gl_context *ctx, GLuint id) |
||
178 | { |
||
179 | struct brw_query_object *query; |
||
180 | |||
181 | query = calloc(1, sizeof(struct brw_query_object)); |
||
182 | |||
183 | query->Base.Id = id; |
||
184 | query->Base.Result = 0; |
||
185 | query->Base.Active = false; |
||
186 | query->Base.Ready = true; |
||
187 | |||
188 | return &query->Base; |
||
189 | } |
||
190 | |||
191 | /** |
||
192 | * The DeleteQuery() driver hook. |
||
193 | */ |
||
194 | static void |
||
195 | brw_delete_query(struct gl_context *ctx, struct gl_query_object *q) |
||
196 | { |
||
197 | struct brw_query_object *query = (struct brw_query_object *)q; |
||
198 | |||
199 | drm_intel_bo_unreference(query->bo); |
||
200 | free(query); |
||
201 | } |
||
202 | |||
203 | /** |
||
204 | * Gen4-5 driver hook for glBeginQuery(). |
||
205 | * |
||
206 | * Initializes driver structures and emits any GPU commands required to begin |
||
207 | * recording data for the query. |
||
208 | */ |
||
209 | static void |
||
210 | brw_begin_query(struct gl_context *ctx, struct gl_query_object *q) |
||
211 | { |
||
212 | struct brw_context *brw = brw_context(ctx); |
||
213 | struct brw_query_object *query = (struct brw_query_object *)q; |
||
214 | |||
215 | assert(brw->gen < 6); |
||
216 | |||
217 | switch (query->Base.Target) { |
||
218 | case GL_TIME_ELAPSED_EXT: |
||
219 | /* For timestamp queries, we record the starting time right away so that |
||
220 | * we measure the full time between BeginQuery and EndQuery. There's |
||
221 | * some debate about whether this is the right thing to do. Our decision |
||
222 | * is based on the following text from the ARB_timer_query extension: |
||
223 | * |
||
224 | * "(5) Should the extension measure total time elapsed between the full |
||
225 | * completion of the BeginQuery and EndQuery commands, or just time |
||
226 | * spent in the graphics library? |
||
227 | * |
||
228 | * RESOLVED: This extension will measure the total time elapsed |
||
229 | * between the full completion of these commands. Future extensions |
||
230 | * may implement a query to determine time elapsed at different stages |
||
231 | * of the graphics pipeline." |
||
232 | * |
||
233 | * We write a starting timestamp now (at index 0). At EndQuery() time, |
||
234 | * we'll write a second timestamp (at index 1), and subtract the two to |
||
235 | * obtain the time elapsed. Notably, this includes time elapsed while |
||
236 | * the system was doing other work, such as running other applications. |
||
237 | */ |
||
238 | drm_intel_bo_unreference(query->bo); |
||
239 | query->bo = drm_intel_bo_alloc(brw->bufmgr, "timer query", 4096, 4096); |
||
240 | brw_write_timestamp(brw, query->bo, 0); |
||
241 | break; |
||
242 | |||
243 | case GL_ANY_SAMPLES_PASSED: |
||
244 | case GL_ANY_SAMPLES_PASSED_CONSERVATIVE: |
||
245 | case GL_SAMPLES_PASSED_ARB: |
||
246 | /* For occlusion queries, we delay taking an initial sample until the |
||
247 | * first drawing occurs in this batch. See the reasoning in the comments |
||
248 | * for brw_emit_query_begin() below. |
||
249 | * |
||
250 | * Since we're starting a new query, we need to be sure to throw away |
||
251 | * any previous occlusion query results. |
||
252 | */ |
||
253 | drm_intel_bo_unreference(query->bo); |
||
254 | query->bo = NULL; |
||
255 | query->last_index = -1; |
||
256 | |||
257 | brw->query.obj = query; |
||
258 | |||
259 | /* Depth statistics on Gen4 require strange workarounds, so we try to |
||
260 | * avoid them when necessary. They're required for occlusion queries, |
||
261 | * so turn them on now. |
||
262 | */ |
||
263 | brw->stats_wm++; |
||
264 | brw->ctx.NewDriverState |= BRW_NEW_STATS_WM; |
||
265 | break; |
||
266 | |||
267 | default: |
||
268 | unreachable("Unrecognized query target in brw_begin_query()"); |
||
269 | } |
||
270 | } |
||
271 | |||
272 | /** |
||
273 | * Gen4-5 driver hook for glEndQuery(). |
||
274 | * |
||
275 | * Emits GPU commands to record a final query value, ending any data capturing. |
||
276 | * However, the final result isn't necessarily available until the GPU processes |
||
277 | * those commands. brw_queryobj_get_results() processes the captured data to |
||
278 | * produce the final result. |
||
279 | */ |
||
280 | static void |
||
281 | brw_end_query(struct gl_context *ctx, struct gl_query_object *q) |
||
282 | { |
||
283 | struct brw_context *brw = brw_context(ctx); |
||
284 | struct brw_query_object *query = (struct brw_query_object *)q; |
||
285 | |||
286 | assert(brw->gen < 6); |
||
287 | |||
288 | switch (query->Base.Target) { |
||
289 | case GL_TIME_ELAPSED_EXT: |
||
290 | /* Write the final timestamp. */ |
||
291 | brw_write_timestamp(brw, query->bo, 1); |
||
292 | break; |
||
293 | |||
294 | case GL_ANY_SAMPLES_PASSED: |
||
295 | case GL_ANY_SAMPLES_PASSED_CONSERVATIVE: |
||
296 | case GL_SAMPLES_PASSED_ARB: |
||
297 | |||
298 | /* No query->bo means that EndQuery was called after BeginQuery with no |
||
299 | * intervening drawing. Rather than doing nothing at all here in this |
||
300 | * case, we emit the query_begin and query_end state to the |
||
301 | * hardware. This is to guarantee that waiting on the result of this |
||
302 | * empty state will cause all previous queries to complete at all, as |
||
303 | * required by the specification: |
||
304 | * |
||
305 | * It must always be true that if any query object |
||
306 | * returns a result available of TRUE, all queries of the |
||
307 | * same type issued prior to that query must also return |
||
308 | * TRUE. [Open GL 4.3 (Core Profile) Section 4.2.1] |
||
309 | */ |
||
310 | if (!query->bo) { |
||
311 | brw_emit_query_begin(brw); |
||
312 | } |
||
313 | |||
314 | assert(query->bo); |
||
315 | |||
316 | brw_emit_query_end(brw); |
||
317 | |||
318 | brw->query.obj = NULL; |
||
319 | |||
320 | brw->stats_wm--; |
||
321 | brw->ctx.NewDriverState |= BRW_NEW_STATS_WM; |
||
322 | break; |
||
323 | |||
324 | default: |
||
325 | unreachable("Unrecognized query target in brw_end_query()"); |
||
326 | } |
||
327 | } |
||
328 | |||
329 | /** |
||
330 | * The Gen4-5 WaitQuery() driver hook. |
||
331 | * |
||
332 | * Wait for a query result to become available and return it. This is the |
||
333 | * backing for glGetQueryObjectiv() with the GL_QUERY_RESULT pname. |
||
334 | */ |
||
335 | static void brw_wait_query(struct gl_context *ctx, struct gl_query_object *q) |
||
336 | { |
||
337 | struct brw_query_object *query = (struct brw_query_object *)q; |
||
338 | |||
339 | assert(brw_context(ctx)->gen < 6); |
||
340 | |||
341 | brw_queryobj_get_results(ctx, query); |
||
342 | query->Base.Ready = true; |
||
343 | } |
||
344 | |||
345 | /** |
||
346 | * The Gen4-5 CheckQuery() driver hook. |
||
347 | * |
||
348 | * Checks whether a query result is ready yet. If not, flushes. |
||
349 | * This is the backing for glGetQueryObjectiv()'s QUERY_RESULT_AVAILABLE pname. |
||
350 | */ |
||
351 | static void brw_check_query(struct gl_context *ctx, struct gl_query_object *q) |
||
352 | { |
||
353 | struct brw_context *brw = brw_context(ctx); |
||
354 | struct brw_query_object *query = (struct brw_query_object *)q; |
||
355 | |||
356 | assert(brw->gen < 6); |
||
357 | |||
358 | /* From the GL_ARB_occlusion_query spec: |
||
359 | * |
||
360 | * "Instead of allowing for an infinite loop, performing a |
||
361 | * QUERY_RESULT_AVAILABLE_ARB will perform a flush if the result is |
||
362 | * not ready yet on the first time it is queried. This ensures that |
||
363 | * the async query will return true in finite time. |
||
364 | */ |
||
365 | if (query->bo && drm_intel_bo_references(brw->batch.bo, query->bo)) |
||
366 | intel_batchbuffer_flush(brw); |
||
367 | |||
368 | if (query->bo == NULL || !drm_intel_bo_busy(query->bo)) { |
||
369 | brw_queryobj_get_results(ctx, query); |
||
370 | query->Base.Ready = true; |
||
371 | } |
||
372 | } |
||
373 | |||
374 | /** |
||
375 | * Ensure there query's BO has enough space to store a new pair of values. |
||
376 | * |
||
377 | * If not, gather the existing BO's results and create a new buffer of the |
||
378 | * same size. |
||
379 | */ |
||
380 | static void |
||
381 | ensure_bo_has_space(struct gl_context *ctx, struct brw_query_object *query) |
||
382 | { |
||
383 | struct brw_context *brw = brw_context(ctx); |
||
384 | |||
385 | assert(brw->gen < 6); |
||
386 | |||
387 | if (!query->bo || query->last_index * 2 + 1 >= 4096 / sizeof(uint64_t)) { |
||
388 | |||
389 | if (query->bo != NULL) { |
||
390 | /* The old query BO did not have enough space, so we allocated a new |
||
391 | * one. Gather the results so far (adding up the differences) and |
||
392 | * release the old BO. |
||
393 | */ |
||
394 | brw_queryobj_get_results(ctx, query); |
||
395 | } |
||
396 | |||
397 | query->bo = drm_intel_bo_alloc(brw->bufmgr, "query", 4096, 1); |
||
398 | query->last_index = 0; |
||
399 | } |
||
400 | } |
||
401 | |||
402 | /** |
||
403 | * Record the PS_DEPTH_COUNT value (for occlusion queries) just before |
||
404 | * primitive drawing. |
||
405 | * |
||
406 | * In a pre-hardware context world, the single PS_DEPTH_COUNT register is |
||
407 | * shared among all applications using the GPU. However, our query value |
||
408 | * needs to only include fragments generated by our application/GL context. |
||
409 | * |
||
410 | * To accommodate this, we record PS_DEPTH_COUNT at the start and end of |
||
411 | * each batchbuffer (technically, the first primitive drawn and flush time). |
||
412 | * Subtracting each pair of values calculates the change in PS_DEPTH_COUNT |
||
413 | * caused by a batchbuffer. Since there is no preemption inside batches, |
||
414 | * this is guaranteed to only measure the effects of our current application. |
||
415 | * |
||
416 | * Adding each of these differences (in case drawing is done over many batches) |
||
417 | * produces the final expected value. |
||
418 | * |
||
419 | * In a world with hardware contexts, PS_DEPTH_COUNT is saved and restored |
||
420 | * as part of the context state, so this is unnecessary, and skipped. |
||
421 | */ |
||
422 | void |
||
423 | brw_emit_query_begin(struct brw_context *brw) |
||
424 | { |
||
425 | struct gl_context *ctx = &brw->ctx; |
||
426 | struct brw_query_object *query = brw->query.obj; |
||
427 | |||
428 | if (brw->hw_ctx) |
||
429 | return; |
||
430 | |||
431 | /* Skip if we're not doing any queries, or we've already recorded the |
||
432 | * initial query value for this batchbuffer. |
||
433 | */ |
||
434 | if (!query || brw->query.begin_emitted) |
||
435 | return; |
||
436 | |||
437 | ensure_bo_has_space(ctx, query); |
||
438 | |||
439 | brw_write_depth_count(brw, query->bo, query->last_index * 2); |
||
440 | |||
441 | brw->query.begin_emitted = true; |
||
442 | } |
||
443 | |||
444 | /** |
||
445 | * Called at batchbuffer flush to get an ending PS_DEPTH_COUNT |
||
446 | * (for non-hardware context platforms). |
||
447 | * |
||
448 | * See the explanation in brw_emit_query_begin(). |
||
449 | */ |
||
450 | void |
||
451 | brw_emit_query_end(struct brw_context *brw) |
||
452 | { |
||
453 | struct brw_query_object *query = brw->query.obj; |
||
454 | |||
455 | if (brw->hw_ctx) |
||
456 | return; |
||
457 | |||
458 | if (!brw->query.begin_emitted) |
||
459 | return; |
||
460 | |||
461 | brw_write_depth_count(brw, query->bo, query->last_index * 2 + 1); |
||
462 | |||
463 | brw->query.begin_emitted = false; |
||
464 | query->last_index++; |
||
465 | } |
||
466 | |||
467 | /** |
||
468 | * Driver hook for glQueryCounter(). |
||
469 | * |
||
470 | * This handles GL_TIMESTAMP queries, which perform a pipelined read of the |
||
471 | * current GPU time. This is unlike GL_TIME_ELAPSED, which measures the |
||
472 | * time while the query is active. |
||
473 | */ |
||
474 | static void |
||
475 | brw_query_counter(struct gl_context *ctx, struct gl_query_object *q) |
||
476 | { |
||
477 | struct brw_context *brw = brw_context(ctx); |
||
478 | struct brw_query_object *query = (struct brw_query_object *) q; |
||
479 | |||
480 | assert(q->Target == GL_TIMESTAMP); |
||
481 | |||
482 | drm_intel_bo_unreference(query->bo); |
||
483 | query->bo = drm_intel_bo_alloc(brw->bufmgr, "timestamp query", 4096, 4096); |
||
484 | brw_write_timestamp(brw, query->bo, 0); |
||
485 | |||
486 | query->flushed = false; |
||
487 | } |
||
488 | |||
489 | /** |
||
490 | * Read the TIMESTAMP register immediately (in a non-pipelined fashion). |
||
491 | * |
||
492 | * This is used to implement the GetTimestamp() driver hook. |
||
493 | */ |
||
494 | static uint64_t |
||
495 | brw_get_timestamp(struct gl_context *ctx) |
||
496 | { |
||
497 | struct brw_context *brw = brw_context(ctx); |
||
498 | uint64_t result = 0; |
||
499 | |||
500 | drm_intel_reg_read(brw->bufmgr, TIMESTAMP, &result); |
||
501 | |||
502 | /* See logic in brw_queryobj_get_results() */ |
||
503 | result = result >> 32; |
||
504 | result *= 80; |
||
505 | result &= (1ull << 36) - 1; |
||
506 | |||
507 | return result; |
||
508 | } |
||
509 | |||
510 | /* Initialize query object functions used on all generations. */ |
||
511 | void brw_init_common_queryobj_functions(struct dd_function_table *functions) |
||
512 | { |
||
513 | functions->NewQueryObject = brw_new_query_object; |
||
514 | functions->DeleteQuery = brw_delete_query; |
||
515 | functions->QueryCounter = brw_query_counter; |
||
516 | functions->GetTimestamp = brw_get_timestamp; |
||
517 | } |
||
518 | |||
519 | /* Initialize Gen4/5-specific query object functions. */ |
||
520 | void gen4_init_queryobj_functions(struct dd_function_table *functions) |
||
521 | { |
||
522 | functions->BeginQuery = brw_begin_query; |
||
523 | functions->EndQuery = brw_end_query; |
||
524 | functions->CheckQuery = brw_check_query; |
||
525 | functions->WaitQuery = brw_wait_query; |
||
526 | }><>>>>>>>>> |