WebSVN – Kolibri OS – Blame – /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/gen6_queryobj.c

Rev	Author	Line No.	Line
4358	Serge	1	/*
		2	* Copyright © 2008 Intel Corporation
		3	*
		4	* Permission is hereby granted, free of charge, to any person obtaining a
		5	* copy of this software and associated documentation files (the "Software"),
		6	* to deal in the Software without restriction, including without limitation
		7	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
		8	* and/or sell copies of the Software, and to permit persons to whom the
		9	* Software is furnished to do so, subject to the following conditions:
		10	*
		11	* The above copyright notice and this permission notice (including the next
		12	* paragraph) shall be included in all copies or substantial portions of the
		13	* Software.
		14	*
		15	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
		16	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
		17	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
		18	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
		19	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
		20	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
		21	* IN THE SOFTWARE.
		22	*
		23	* Authors:
		24	* Eric Anholt
		25	* Kenneth Graunke
		26	*/
		27
		28	/** @file gen6_queryobj.c
		29	*
		30	* Support for query objects (GL_ARB_occlusion_query, GL_ARB_timer_query,
		31	* GL_EXT_transform_feedback, and friends) on platforms that support
		32	* hardware contexts (Gen6+).
		33	*/
		34	#include "main/imports.h"
		35
		36	#include "brw_context.h"
		37	#include "brw_defines.h"
		38	#include "brw_state.h"
		39	#include "intel_batchbuffer.h"
		40	#include "intel_reg.h"
		41
		42	/**
		43	* Emit PIPE_CONTROLs to write the current GPU timestamp into a buffer.
		44	*/
		45	static void
		46	write_timestamp(struct brw_context brw, drm_intel_bo query_bo, int idx)
		47	{
		48	/* Emit workaround flushes: */
		49	if (brw->gen == 6) {
		50	/* The timestamp write below is a non-zero post-sync op, which on
		51	* Gen6 necessitates a CS stall. CS stalls need stall at scoreboard
		52	* set. See the comments for intel_emit_post_sync_nonzero_flush().
		53	*/
		54	BEGIN_BATCH(4);
		55	OUT_BATCH(_3DSTATE_PIPE_CONTROL \| (4 - 2));
		56	OUT_BATCH(PIPE_CONTROL_CS_STALL \| PIPE_CONTROL_STALL_AT_SCOREBOARD);
		57	OUT_BATCH(0);
		58	OUT_BATCH(0);
		59	ADVANCE_BATCH();
		60	}
		61
		62	BEGIN_BATCH(5);
		63	OUT_BATCH(_3DSTATE_PIPE_CONTROL \| (5 - 2));
		64	OUT_BATCH(PIPE_CONTROL_WRITE_TIMESTAMP);
		65	OUT_RELOC(query_bo,
		66	I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
		67	PIPE_CONTROL_GLOBAL_GTT_WRITE \|
		68	idx * sizeof(uint64_t));
		69	OUT_BATCH(0);
		70	OUT_BATCH(0);
		71	ADVANCE_BATCH();
		72	}
		73
		74	/**
		75	* Emit PIPE_CONTROLs to write the PS_DEPTH_COUNT register into a buffer.
		76	*/
		77	static void
		78	write_depth_count(struct brw_context brw, drm_intel_bo query_bo, int idx)
		79	{
		80	/* Emit Sandybridge workaround flush: */
		81	if (brw->gen == 6)
		82	intel_emit_post_sync_nonzero_flush(brw);
		83
		84	BEGIN_BATCH(5);
		85	OUT_BATCH(_3DSTATE_PIPE_CONTROL \| (5 - 2));
		86	OUT_BATCH(PIPE_CONTROL_DEPTH_STALL \|
		87	PIPE_CONTROL_WRITE_DEPTH_COUNT);
		88	OUT_RELOC(query_bo,
		89	I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
		90	PIPE_CONTROL_GLOBAL_GTT_WRITE \|
		91	(idx * sizeof(uint64_t)));
		92	OUT_BATCH(0);
		93	OUT_BATCH(0);
		94	ADVANCE_BATCH();
		95	}
		96
		97	/*
		98	* Write an arbitrary 64-bit register to a buffer via MI_STORE_REGISTER_MEM.
		99	*
		100	* Only TIMESTAMP and PS_DEPTH_COUNT have special PIPE_CONTROL support; other
		101	* counters have to be read via the generic MI_STORE_REGISTER_MEM. This
		102	* function also performs a pipeline flush for proper synchronization.
		103	*/
		104	static void
		105	write_reg(struct brw_context *brw,
		106	drm_intel_bo *query_bo, uint32_t reg, int idx)
		107	{
		108	assert(brw->gen >= 6);
		109
		110	intel_batchbuffer_emit_mi_flush(brw);
		111
		112	/* MI_STORE_REGISTER_MEM only stores a single 32-bit value, so to
		113	* read a full 64-bit register, we need to do two of them.
		114	*/
		115	BEGIN_BATCH(3);
		116	OUT_BATCH(MI_STORE_REGISTER_MEM \| (3 - 2));
		117	OUT_BATCH(reg);
		118	OUT_RELOC(query_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
		119	idx * sizeof(uint64_t));
		120	ADVANCE_BATCH();
		121
		122	BEGIN_BATCH(3);
		123	OUT_BATCH(MI_STORE_REGISTER_MEM \| (3 - 2));
		124	OUT_BATCH(reg + sizeof(uint32_t));
		125	OUT_RELOC(query_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
		126	sizeof(uint32_t) + idx * sizeof(uint64_t));
		127	ADVANCE_BATCH();
		128	}
		129
		130	static void
		131	write_primitives_generated(struct brw_context *brw,
		132	drm_intel_bo *query_bo, int idx)
		133	{
		134	write_reg(brw, query_bo, CL_INVOCATION_COUNT, idx);
		135	}
		136
		137	static void
		138	write_xfb_primitives_written(struct brw_context *brw,
		139	drm_intel_bo *query_bo, int idx)
		140	{
		141	if (brw->gen >= 7) {
		142	write_reg(brw, query_bo, SO_NUM_PRIMS_WRITTEN0_IVB, idx);
		143	} else {
		144	write_reg(brw, query_bo, SO_NUM_PRIMS_WRITTEN, idx);
		145	}
		146	}
		147
		148	/**
		149	* Wait on the query object's BO and calculate the final result.
		150	*/
		151	static void
		152	gen6_queryobj_get_results(struct gl_context *ctx,
		153	struct brw_query_object *query)
		154	{
		155	struct brw_context *brw = brw_context(ctx);
		156
		157	if (query->bo == NULL)
		158	return;
		159
		160	/* If the application has requested the query result, but this batch is
		161	* still contributing to it, flush it now so the results will be present
		162	* when mapped.
		163	*/
		164	if (drm_intel_bo_references(brw->batch.bo, query->bo))
		165	intel_batchbuffer_flush(brw);
		166
		167	if (unlikely(brw->perf_debug)) {
		168	if (drm_intel_bo_busy(query->bo)) {
		169	perf_debug("Stalling on the GPU waiting for a query object.\n");
		170	}
		171	}
		172
		173	drm_intel_bo_map(query->bo, false);
		174	uint64_t *results = query->bo->virtual;
		175	switch (query->Base.Target) {
		176	case GL_TIME_ELAPSED:
		177	/* The query BO contains the starting and ending timestamps.
		178	* Subtract the two and convert to nanoseconds.
		179	*/
		180	query->Base.Result += 80 * (results[1] - results[0]);
		181	break;
		182
		183	case GL_TIMESTAMP:
		184	/* Our timer is a clock that increments every 80ns (regardless of
		185	* other clock scaling in the system). The timestamp register we can
		186	* read for glGetTimestamp() masks out the top 32 bits, so we do that
		187	* here too to let the two counters be compared against each other.
		188	*
		189	* If we just multiplied that 32 bits of data by 80, it would roll
		190	* over at a non-power-of-two, so an application couldn't use
		191	* GL_QUERY_COUNTER_BITS to handle rollover correctly. Instead, we
		192	* report 36 bits and truncate at that (rolling over 5 times as often
		193	* as the HW counter), and when the 32-bit counter rolls over, it
		194	* happens to also be at a rollover in the reported value from near
		195	* (1<<36) to 0.
		196	*
		197	* The low 32 bits rolls over in ~343 seconds. Our 36-bit result
		198	* rolls over every ~69 seconds.
		199	*
		200	* The query BO contains a single timestamp value in results[0].
		201	*/
		202	query->Base.Result = 80 * (results[0] & 0xffffffff);
		203	query->Base.Result &= (1ull << 36) - 1;
		204	break;
		205
		206	case GL_SAMPLES_PASSED_ARB:
		207	/* We need to use += rather than = here since some BLT-based operations
		208	* may have added additional samples to our occlusion query value.
		209	*/
		210	query->Base.Result += results[1] - results[0];
		211	break;
		212
		213	case GL_ANY_SAMPLES_PASSED:
		214	case GL_ANY_SAMPLES_PASSED_CONSERVATIVE:
		215	if (results[0] != results[1])
		216	query->Base.Result = true;
		217	break;
		218
		219	case GL_PRIMITIVES_GENERATED:
		220	case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
		221	query->Base.Result = results[1] - results[0];
		222	break;
		223
		224	default:
		225	assert(!"Unrecognized query target in brw_queryobj_get_results()");
		226	break;
		227	}
		228	drm_intel_bo_unmap(query->bo);
		229
		230	/* Now that we've processed the data stored in the query's buffer object,
		231	* we can release it.
		232	*/
		233	drm_intel_bo_unreference(query->bo);
		234	query->bo = NULL;
		235	}
		236
		237	/**
		238	* Driver hook for glBeginQuery().
		239	*
		240	* Initializes driver structures and emits any GPU commands required to begin
		241	* recording data for the query.
		242	*/
		243	static void
		244	gen6_begin_query(struct gl_context ctx, struct gl_query_object q)
		245	{
		246	struct brw_context *brw = brw_context(ctx);
		247	struct brw_query_object query = (struct brw_query_object )q;
		248
		249	/* Since we're starting a new query, we need to throw away old results. */
		250	drm_intel_bo_unreference(query->bo);
		251	query->bo = drm_intel_bo_alloc(brw->bufmgr, "query results", 4096, 4096);
		252
		253	switch (query->Base.Target) {
		254	case GL_TIME_ELAPSED:
		255	/* For timestamp queries, we record the starting time right away so that
		256	* we measure the full time between BeginQuery and EndQuery. There's
		257	* some debate about whether this is the right thing to do. Our decision
		258	* is based on the following text from the ARB_timer_query extension:
		259	*
		260	* "(5) Should the extension measure total time elapsed between the full
		261	* completion of the BeginQuery and EndQuery commands, or just time
		262	* spent in the graphics library?
		263	*
		264	* RESOLVED: This extension will measure the total time elapsed
		265	* between the full completion of these commands. Future extensions
		266	* may implement a query to determine time elapsed at different stages
		267	* of the graphics pipeline."
		268	*
		269	* We write a starting timestamp now (at index 0). At EndQuery() time,
		270	* we'll write a second timestamp (at index 1), and subtract the two to
		271	* obtain the time elapsed. Notably, this includes time elapsed while
		272	* the system was doing other work, such as running other applications.
		273	*/
		274	write_timestamp(brw, query->bo, 0);
		275	break;
		276
		277	case GL_ANY_SAMPLES_PASSED:
		278	case GL_ANY_SAMPLES_PASSED_CONSERVATIVE:
		279	case GL_SAMPLES_PASSED_ARB:
		280	write_depth_count(brw, query->bo, 0);
		281	break;
		282
		283	case GL_PRIMITIVES_GENERATED:
		284	write_primitives_generated(brw, query->bo, 0);
		285	break;
		286
		287	case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
		288	write_xfb_primitives_written(brw, query->bo, 0);
		289	break;
		290
		291	default:
		292	assert(!"Unrecognized query target in brw_begin_query()");
		293	break;
		294	}
		295	}
		296
		297	/**
		298	* Driver hook for glEndQuery().
		299	*
		300	* Emits GPU commands to record a final query value, ending any data capturing.
		301	* However, the final result isn't necessarily available until the GPU processes
		302	* those commands. brw_queryobj_get_results() processes the captured data to
		303	* produce the final result.
		304	*/
		305	static void
		306	gen6_end_query(struct gl_context ctx, struct gl_query_object q)
		307	{
		308	struct brw_context *brw = brw_context(ctx);
		309	struct brw_query_object query = (struct brw_query_object )q;
		310
		311	switch (query->Base.Target) {
		312	case GL_TIME_ELAPSED:
		313	write_timestamp(brw, query->bo, 1);
		314	break;
		315
		316	case GL_ANY_SAMPLES_PASSED:
		317	case GL_ANY_SAMPLES_PASSED_CONSERVATIVE:
		318	case GL_SAMPLES_PASSED_ARB:
		319	write_depth_count(brw, query->bo, 1);
		320	break;
		321
		322	case GL_PRIMITIVES_GENERATED:
		323	write_primitives_generated(brw, query->bo, 1);
		324	break;
		325
		326	case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
		327	write_xfb_primitives_written(brw, query->bo, 1);
		328	break;
		329
		330	default:
		331	assert(!"Unrecognized query target in brw_end_query()");
		332	break;
		333	}
		334	}
		335
		336	/**
		337	* The WaitQuery() driver hook.
		338	*
		339	* Wait for a query result to become available and return it. This is the
		340	* backing for glGetQueryObjectiv() with the GL_QUERY_RESULT pname.
		341	*/
		342	static void gen6_wait_query(struct gl_context ctx, struct gl_query_object q)
		343	{
		344	struct brw_query_object query = (struct brw_query_object )q;
		345
		346	gen6_queryobj_get_results(ctx, query);
		347	query->Base.Ready = true;
		348	}
		349
		350	/**
		351	* The CheckQuery() driver hook.
		352	*
		353	* Checks whether a query result is ready yet. If not, flushes.
		354	* This is the backing for glGetQueryObjectiv()'s QUERY_RESULT_AVAILABLE pname.
		355	*/
		356	static void gen6_check_query(struct gl_context ctx, struct gl_query_object q)
		357	{
		358	struct brw_context *brw = brw_context(ctx);
		359	struct brw_query_object query = (struct brw_query_object )q;
		360
		361	/* From the GL_ARB_occlusion_query spec:
		362	*
		363	* "Instead of allowing for an infinite loop, performing a
		364	* QUERY_RESULT_AVAILABLE_ARB will perform a flush if the result is
		365	* not ready yet on the first time it is queried. This ensures that
		366	* the async query will return true in finite time.
		367	*/
		368	if (query->bo && drm_intel_bo_references(brw->batch.bo, query->bo))
		369	intel_batchbuffer_flush(brw);
		370
		371	if (query->bo == NULL \|\| !drm_intel_bo_busy(query->bo)) {
		372	gen6_queryobj_get_results(ctx, query);
		373	query->Base.Ready = true;
		374	}
		375	}
		376
		377	/* Initialize Gen6+-specific query object functions. */
		378	void gen6_init_queryobj_functions(struct dd_function_table *functions)
		379	{
		380	functions->BeginQuery = gen6_begin_query;
		381	functions->EndQuery = gen6_end_query;
		382	functions->CheckQuery = gen6_check_query;
		383	functions->WaitQuery = gen6_wait_query;
		384	}

Subversion Repositories Kolibri OS

(root)/contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/gen6_queryobj.c – Rev 4358