WebSVN – Kolibri OS – Blame – /drivers/video/drm/i915/execbuffer.c

Rev	Author	Line No.	Line
2352	Serge	1	#include "drmP.h"
		2	#include "drm.h"
		3	#include "i915_drm.h"
		4	#include "i915_drv.h"
		5	#include "intel_drv.h"
		6	//#include
		7
		8	#undef mb
		9	#undef rmb
		10	#undef wmb
		11	#define mb() asm volatile("mfence")
		12	#define rmb() asm volatile ("lfence")
		13	#define wmb() asm volatile ("sfence")
		14
		15
		16	typedef struct
		17	{
		18	struct drm_i915_gem_object *batch;
		19	struct list_head objects;
		20	u32 exec_start;
		21	u32 exec_len;
		22
		23	}batchbuffer_t;
		24
		25	struct change_domains {
		26	uint32_t invalidate_domains;
		27	uint32_t flush_domains;
		28	uint32_t flush_rings;
		29	uint32_t flips;
		30	};
		31
		32	/*
		33	* Set the next domain for the specified object. This
		34	* may not actually perform the necessary flushing/invaliding though,
		35	* as that may want to be batched with other set_domain operations
		36	*
		37	* This is (we hope) the only really tricky part of gem. The goal
		38	* is fairly simple -- track which caches hold bits of the object
		39	* and make sure they remain coherent. A few concrete examples may
		40	* help to explain how it works. For shorthand, we use the notation
		41	* (read_domains, write_domain), e.g. (CPU, CPU) to indicate the
		42	* a pair of read and write domain masks.
		43	*
		44	* Case 1: the batch buffer
		45	*
		46	* 1. Allocated
		47	* 2. Written by CPU
		48	* 3. Mapped to GTT
		49	* 4. Read by GPU
		50	* 5. Unmapped from GTT
		51	* 6. Freed
		52	*
		53	* Let's take these a step at a time
		54	*
		55	* 1. Allocated
		56	* Pages allocated from the kernel may still have
		57	* cache contents, so we set them to (CPU, CPU) always.
		58	* 2. Written by CPU (using pwrite)
		59	* The pwrite function calls set_domain (CPU, CPU) and
		60	* this function does nothing (as nothing changes)
		61	* 3. Mapped by GTT
		62	* This function asserts that the object is not
		63	* currently in any GPU-based read or write domains
		64	* 4. Read by GPU
		65	* i915_gem_execbuffer calls set_domain (COMMAND, 0).
		66	* As write_domain is zero, this function adds in the
		67	* current read domains (CPU+COMMAND, 0).
		68	* flush_domains is set to CPU.
		69	* invalidate_domains is set to COMMAND
		70	* clflush is run to get data out of the CPU caches
		71	* then i915_dev_set_domain calls i915_gem_flush to
		72	* emit an MI_FLUSH and drm_agp_chipset_flush
		73	* 5. Unmapped from GTT
		74	* i915_gem_object_unbind calls set_domain (CPU, CPU)
		75	* flush_domains and invalidate_domains end up both zero
		76	* so no flushing/invalidating happens
		77	* 6. Freed
		78	* yay, done
		79	*
		80	* Case 2: The shared render buffer
		81	*
		82	* 1. Allocated
		83	* 2. Mapped to GTT
		84	* 3. Read/written by GPU
		85	* 4. set_domain to (CPU,CPU)
		86	* 5. Read/written by CPU
		87	* 6. Read/written by GPU
		88	*
		89	* 1. Allocated
		90	* Same as last example, (CPU, CPU)
		91	* 2. Mapped to GTT
		92	* Nothing changes (assertions find that it is not in the GPU)
		93	* 3. Read/written by GPU
		94	* execbuffer calls set_domain (RENDER, RENDER)
		95	* flush_domains gets CPU
		96	* invalidate_domains gets GPU
		97	* clflush (obj)
		98	* MI_FLUSH and drm_agp_chipset_flush
		99	* 4. set_domain (CPU, CPU)
		100	* flush_domains gets GPU
		101	* invalidate_domains gets CPU
		102	* wait_rendering (obj) to make sure all drawing is complete.
		103	* This will include an MI_FLUSH to get the data from GPU
		104	* to memory
		105	* clflush (obj) to invalidate the CPU cache
		106	* Another MI_FLUSH in i915_gem_flush (eliminate this somehow?)
		107	* 5. Read/written by CPU
		108	* cache lines are loaded and dirtied
		109	* 6. Read written by GPU
		110	* Same as last GPU access
		111	*
		112	* Case 3: The constant buffer
		113	*
		114	* 1. Allocated
		115	* 2. Written by CPU
		116	* 3. Read by GPU
		117	* 4. Updated (written) by CPU again
		118	* 5. Read by GPU
		119	*
		120	* 1. Allocated
		121	* (CPU, CPU)
		122	* 2. Written by CPU
		123	* (CPU, CPU)
		124	* 3. Read by GPU
		125	* (CPU+RENDER, 0)
		126	* flush_domains = CPU
		127	* invalidate_domains = RENDER
		128	* clflush (obj)
		129	* MI_FLUSH
		130	* drm_agp_chipset_flush
		131	* 4. Updated (written) by CPU again
		132	* (CPU, CPU)
		133	* flush_domains = 0 (no previous write domain)
		134	* invalidate_domains = 0 (no new read domains)
		135	* 5. Read by GPU
		136	* (CPU+RENDER, 0)
		137	* flush_domains = CPU
		138	* invalidate_domains = RENDER
		139	* clflush (obj)
		140	* MI_FLUSH
		141	* drm_agp_chipset_flush
		142	*/
		143	static void
		144	i915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object *obj,
		145	struct intel_ring_buffer *ring,
		146	struct change_domains *cd)
		147	{
		148	uint32_t invalidate_domains = 0, flush_domains = 0;
		149
		150	/*
		151	* If the object isn't moving to a new write domain,
		152	* let the object stay in multiple read domains
		153	*/
		154	if (obj->base.pending_write_domain == 0)
		155	obj->base.pending_read_domains \|= obj->base.read_domains;
		156
		157	/*
		158	* Flush the current write domain if
		159	* the new read domains don't match. Invalidate
		160	* any read domains which differ from the old
		161	* write domain
		162	*/
		163	if (obj->base.write_domain &&
		164	(((obj->base.write_domain != obj->base.pending_read_domains \|\|
		165	obj->ring != ring)) \|\|
		166	(obj->fenced_gpu_access && !obj->pending_fenced_gpu_access))) {
		167	flush_domains \|= obj->base.write_domain;
		168	invalidate_domains \|=
		169	obj->base.pending_read_domains & ~obj->base.write_domain;
		170	}
		171	/*
		172	* Invalidate any read caches which may have
		173	* stale data. That is, any new read domains.
		174	*/
		175	invalidate_domains \|= obj->base.pending_read_domains & ~obj->base.read_domains;
		176	if ((flush_domains \| invalidate_domains) & I915_GEM_DOMAIN_CPU)
		177	i915_gem_clflush_object(obj);
		178
		179	if (obj->base.pending_write_domain)
		180	cd->flips \|= atomic_read(&obj->pending_flip);
		181
		182	/* The actual obj->write_domain will be updated with
		183	* pending_write_domain after we emit the accumulated flush for all
		184	* of our domain changes in execbuffers (which clears objects'
		185	* write_domains). So if we have a current write domain that we
		186	* aren't changing, set pending_write_domain to that.
		187	*/
		188	if (flush_domains == 0 && obj->base.pending_write_domain == 0)
		189	obj->base.pending_write_domain = obj->base.write_domain;
		190
		191	cd->invalidate_domains \|= invalidate_domains;
		192	cd->flush_domains \|= flush_domains;
		193	if (flush_domains & I915_GEM_GPU_DOMAINS)
		194	cd->flush_rings \|= obj->ring->id;
		195	if (invalidate_domains & I915_GEM_GPU_DOMAINS)
		196	cd->flush_rings \|= ring->id;
		197	}
		198
		199	static int
		200	i915_gem_execbuffer_flush(struct drm_device *dev,
		201	uint32_t invalidate_domains,
		202	uint32_t flush_domains,
		203	uint32_t flush_rings)
		204	{
		205	drm_i915_private_t *dev_priv = dev->dev_private;
		206	int i, ret;
		207
		208	if (flush_domains & I915_GEM_DOMAIN_CPU)
		209	intel_gtt_chipset_flush();
		210
		211	if (flush_domains & I915_GEM_DOMAIN_GTT)
		212	wmb();
		213
		214	if ((flush_domains \| invalidate_domains) & I915_GEM_GPU_DOMAINS) {
		215	for (i = 0; i < I915_NUM_RINGS; i++)
		216	if (flush_rings & (1 << i)) {
		217	ret = i915_gem_flush_ring(&dev_priv->ring[i],
		218	invalidate_domains,
		219	flush_domains);
		220	if (ret)
		221	return ret;
		222	}
		223	}
		224
		225	return 0;
		226	}
		227
		228	static int
		229	i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
		230	struct list_head *objects)
		231	{
		232	struct drm_i915_gem_object *obj;
		233	struct change_domains cd;
		234	int ret;
		235
		236	memset(&cd, 0, sizeof(cd));
		237	list_for_each_entry(obj, objects, exec_list)
		238	i915_gem_object_set_to_gpu_domain(obj, ring, &cd);
		239
		240	if (cd.invalidate_domains \| cd.flush_domains) {
		241	ret = i915_gem_execbuffer_flush(ring->dev,
		242	cd.invalidate_domains,
		243	cd.flush_domains,
		244	cd.flush_rings);
		245	if (ret)
		246	return ret;
		247	}
		248
		249	// if (cd.flips) {
		250	// ret = i915_gem_execbuffer_wait_for_flips(ring, cd.flips);
		251	// if (ret)
		252	// return ret;
		253	// }
		254
		255	// list_for_each_entry(obj, objects, exec_list) {
		256	// ret = i915_gem_execbuffer_sync_rings(obj, ring);
		257	// if (ret)
		258	// return ret;
		259	// }
		260
		261	return 0;
		262	}
		263
		264	static void
		265	i915_gem_execbuffer_move_to_active(struct list_head *objects,
		266	struct intel_ring_buffer *ring,
		267	u32 seqno)
		268	{
		269	struct drm_i915_gem_object *obj;
		270
		271	list_for_each_entry(obj, objects, exec_list) {
		272	u32 old_read = obj->base.read_domains;
		273	u32 old_write = obj->base.write_domain;
		274
		275
		276	obj->base.read_domains = obj->base.pending_read_domains;
		277	obj->base.write_domain = obj->base.pending_write_domain;
		278	obj->fenced_gpu_access = obj->pending_fenced_gpu_access;
		279
		280	i915_gem_object_move_to_active(obj, ring, seqno);
		281	if (obj->base.write_domain) {
		282	obj->dirty = 1;
		283	obj->pending_gpu_write = true;
		284	list_move_tail(&obj->gpu_write_list,
		285	&ring->gpu_write_list);
		286	// intel_mark_busy(ring->dev, obj);
		287	}
		288
		289	// trace_i915_gem_object_change_domain(obj, old_read, old_write);
		290	}
		291	}
		292
		293	static void
		294	i915_gem_execbuffer_retire_commands(struct drm_device *dev,
		295	struct intel_ring_buffer *ring)
		296	{
		297	struct drm_i915_gem_request *request;
		298	u32 invalidate;
		299
		300	/*
		301	* Ensure that the commands in the batch buffer are
		302	* finished before the interrupt fires.
		303	*
		304	* The sampler always gets flushed on i965 (sigh).
		305	*/
		306	invalidate = I915_GEM_DOMAIN_COMMAND;
		307	if (INTEL_INFO(dev)->gen >= 4)
		308	invalidate \|= I915_GEM_DOMAIN_SAMPLER;
		309	if (ring->flush(ring, invalidate, 0)) {
		310	i915_gem_next_request_seqno(ring);
		311	return;
		312	}
		313
		314	/* Add a breadcrumb for the completion of the batch buffer */
		315	request = kzalloc(sizeof(*request), GFP_KERNEL);
		316	if (request == NULL \|\| i915_add_request(ring, NULL, request)) {
		317	i915_gem_next_request_seqno(ring);
		318	kfree(request);
		319	}
		320	}
		321
		322
		323	int exec_batch(struct drm_device dev, struct intel_ring_buffer ring,
		324	batchbuffer_t *exec)
		325	{
		326	drm_i915_private_t *dev_priv = dev->dev_private;
		327	struct drm_i915_gem_object *obj;
		328
		329	u32 seqno;
		330	int i;
		331	int ret;
		332
		333	ring = &dev_priv->ring[RCS];
		334
		335	mutex_lock(&dev->struct_mutex);
		336
		337	list_for_each_entry(obj, &exec->objects, exec_list)
		338	{
		339	obj->base.pending_read_domains = 0;
		340	obj->base.pending_write_domain = 0;
		341	};
		342
		343	exec->batch->base.pending_read_domains \|= I915_GEM_DOMAIN_COMMAND;
		344
		345	ret = i915_gem_execbuffer_move_to_gpu(ring, &exec->objects);
		346	if (ret)
		347	goto err;
		348
		349	seqno = i915_gem_next_request_seqno(ring);
		350	// for (i = 0; i < ARRAY_SIZE(ring->sync_seqno); i++) {
		351	// if (seqno < ring->sync_seqno[i]) {
		352	/* The GPU can not handle its semaphore value wrapping,
		353	* so every billion or so execbuffers, we need to stall
		354	* the GPU in order to reset the counters.
		355	*/
		356	// ret = i915_gpu_idle(dev);
		357	// if (ret)
		358	// goto err;
		359
		360	// BUG_ON(ring->sync_seqno[i]);
		361	// }
		362	// };
		363
		364	ret = ring->dispatch_execbuffer(ring, exec->exec_start, exec->exec_len);
		365	if (ret)
		366	goto err;
		367
		368	i915_gem_execbuffer_move_to_active(&exec->objects, ring, seqno);
		369	i915_gem_execbuffer_retire_commands(dev, ring);
		370
		371	err:
		372	mutex_unlock(&dev->struct_mutex);
		373
		374	return ret;
		375
		376	};

Subversion Repositories Kolibri OS

(root)/drivers/video/drm/i915/execbuffer.c @ 4557 – Rev