WebSVN – Kolibri OS – Blame – /drivers/video/drm/i915/intel_lrc.c

Rev	Author	Line No.	Line
5354	serge	1	/*
		2	* Copyright © 2014 Intel Corporation
		3	*
		4	* Permission is hereby granted, free of charge, to any person obtaining a
		5	* copy of this software and associated documentation files (the "Software"),
		6	* to deal in the Software without restriction, including without limitation
		7	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
		8	* and/or sell copies of the Software, and to permit persons to whom the
		9	* Software is furnished to do so, subject to the following conditions:
		10	*
		11	* The above copyright notice and this permission notice (including the next
		12	* paragraph) shall be included in all copies or substantial portions of the
		13	* Software.
		14	*
		15	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
		16	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
		17	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
		18	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
		19	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
		20	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
		21	* IN THE SOFTWARE.
		22	*
		23	* Authors:
		24	* Ben Widawsky
		25	* Michel Thierry
		26	* Thomas Daniel
		27	* Oscar Mateo
		28	*
		29	*/
		30
		31	/**
		32	* DOC: Logical Rings, Logical Ring Contexts and Execlists
		33	*
		34	* Motivation:
		35	* GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts".
		36	* These expanded contexts enable a number of new abilities, especially
		37	* "Execlists" (also implemented in this file).
		38	*
		39	* One of the main differences with the legacy HW contexts is that logical
		40	* ring contexts incorporate many more things to the context's state, like
		41	* PDPs or ringbuffer control registers:
		42	*
		43	* The reason why PDPs are included in the context is straightforward: as
		44	* PPGTTs (per-process GTTs) are actually per-context, having the PDPs
		45	* contained there mean you don't need to do a ppgtt->switch_mm yourself,
		46	* instead, the GPU will do it for you on the context switch.
		47	*
		48	* But, what about the ringbuffer control registers (head, tail, etc..)?
		49	* shouldn't we just need a set of those per engine command streamer? This is
		50	* where the name "Logical Rings" starts to make sense: by virtualizing the
		51	* rings, the engine cs shifts to a new "ring buffer" with every context
		52	* switch. When you want to submit a workload to the GPU you: A) choose your
		53	* context, B) find its appropriate virtualized ring, C) write commands to it
		54	* and then, finally, D) tell the GPU to switch to that context.
		55	*
		56	* Instead of the legacy MI_SET_CONTEXT, the way you tell the GPU to switch
		57	* to a contexts is via a context execution list, ergo "Execlists".
		58	*
		59	* LRC implementation:
		60	* Regarding the creation of contexts, we have:
		61	*
		62	* - One global default context.
		63	* - One local default context for each opened fd.
		64	* - One local extra context for each context create ioctl call.
		65	*
		66	* Now that ringbuffers belong per-context (and not per-engine, like before)
		67	* and that contexts are uniquely tied to a given engine (and not reusable,
		68	* like before) we need:
		69	*
		70	* - One ringbuffer per-engine inside each context.
		71	* - One backing object per-engine inside each context.
		72	*
		73	* The global default context starts its life with these new objects fully
		74	* allocated and populated. The local default context for each opened fd is
		75	* more complex, because we don't know at creation time which engine is going
		76	* to use them. To handle this, we have implemented a deferred creation of LR
		77	* contexts:
		78	*
		79	* The local context starts its life as a hollow or blank holder, that only
		80	* gets populated for a given engine once we receive an execbuffer. If later
		81	* on we receive another execbuffer ioctl for the same context but a different
		82	* engine, we allocate/populate a new ringbuffer and context backing object and
		83	* so on.
		84	*
		85	* Finally, regarding local contexts created using the ioctl call: as they are
		86	* only allowed with the render ring, we can allocate & populate them right
		87	* away (no need to defer anything, at least for now).
		88	*
		89	* Execlists implementation:
		90	* Execlists are the new method by which, on gen8+ hardware, workloads are
		91	* submitted for execution (as opposed to the legacy, ringbuffer-based, method).
		92	* This method works as follows:
		93	*
		94	* When a request is committed, its commands (the BB start and any leading or
		95	* trailing commands, like the seqno breadcrumbs) are placed in the ringbuffer
		96	* for the appropriate context. The tail pointer in the hardware context is not
		97	* updated at this time, but instead, kept by the driver in the ringbuffer
		98	* structure. A structure representing this request is added to a request queue
		99	* for the appropriate engine: this structure contains a copy of the context's
		100	* tail after the request was written to the ring buffer and a pointer to the
		101	* context itself.
		102	*
		103	* If the engine's request queue was empty before the request was added, the
		104	* queue is processed immediately. Otherwise the queue will be processed during
		105	* a context switch interrupt. In any case, elements on the queue will get sent
		106	* (in pairs) to the GPU's ExecLists Submit Port (ELSP, for short) with a
		107	* globally unique 20-bits submission ID.
		108	*
		109	* When execution of a request completes, the GPU updates the context status
		110	* buffer with a context complete event and generates a context switch interrupt.
		111	* During the interrupt handling, the driver examines the events in the buffer:
		112	* for each context complete event, if the announced ID matches that on the head
		113	* of the request queue, then that request is retired and removed from the queue.
		114	*
		115	* After processing, if any requests were retired and the queue is not empty
		116	* then a new execution list can be submitted. The two requests at the front of
		117	* the queue are next to be submitted but since a context may not occur twice in
		118	* an execution list, if subsequent requests have the same ID as the first then
		119	* the two requests must be combined. This is done simply by discarding requests
		120	* at the head of the queue until either only one requests is left (in which case
		121	* we use a NULL second context) or the first two requests have unique IDs.
		122	*
		123	* By always executing the first two requests in the queue the driver ensures
		124	* that the GPU is kept as busy as possible. In the case where a single context
		125	* completes but a second context is still executing, the request for this second
		126	* context will be at the head of the queue when we remove the first one. This
		127	* request will then be resubmitted along with a new request for a different context,
		128	* which will cause the hardware to continue executing the second request and queue
		129	* the new request (the GPU detects the condition of a context getting preempted
		130	* with the same context and optimizes the context switch flow by not doing
		131	* preemption, but just sampling the new tail pointer).
		132	*
		133	*/
		134
		135	#include
		136	#include
		137	#include "intel_drv.h"
		138	#include "i915_drv.h"
		139
		140	#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE)
		141	#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE)
		142	#define GEN8_LR_CONTEXT_OTHER_SIZE (2 * PAGE_SIZE)
		143
		144	#define RING_EXECLIST_QFULL (1 << 0x2)
		145	#define RING_EXECLIST1_VALID (1 << 0x3)
		146	#define RING_EXECLIST0_VALID (1 << 0x4)
		147	#define RING_EXECLIST_ACTIVE_STATUS (3 << 0xE)
		148	#define RING_EXECLIST1_ACTIVE (1 << 0x11)
		149	#define RING_EXECLIST0_ACTIVE (1 << 0x12)
		150
		151	#define GEN8_CTX_STATUS_IDLE_ACTIVE (1 << 0)
		152	#define GEN8_CTX_STATUS_PREEMPTED (1 << 1)
		153	#define GEN8_CTX_STATUS_ELEMENT_SWITCH (1 << 2)
		154	#define GEN8_CTX_STATUS_ACTIVE_IDLE (1 << 3)
		155	#define GEN8_CTX_STATUS_COMPLETE (1 << 4)
		156	#define GEN8_CTX_STATUS_LITE_RESTORE (1 << 15)
		157
		158	#define CTX_LRI_HEADER_0 0x01
		159	#define CTX_CONTEXT_CONTROL 0x02
		160	#define CTX_RING_HEAD 0x04
		161	#define CTX_RING_TAIL 0x06
		162	#define CTX_RING_BUFFER_START 0x08
		163	#define CTX_RING_BUFFER_CONTROL 0x0a
		164	#define CTX_BB_HEAD_U 0x0c
		165	#define CTX_BB_HEAD_L 0x0e
		166	#define CTX_BB_STATE 0x10
		167	#define CTX_SECOND_BB_HEAD_U 0x12
		168	#define CTX_SECOND_BB_HEAD_L 0x14
		169	#define CTX_SECOND_BB_STATE 0x16
		170	#define CTX_BB_PER_CTX_PTR 0x18
		171	#define CTX_RCS_INDIRECT_CTX 0x1a
		172	#define CTX_RCS_INDIRECT_CTX_OFFSET 0x1c
		173	#define CTX_LRI_HEADER_1 0x21
		174	#define CTX_CTX_TIMESTAMP 0x22
		175	#define CTX_PDP3_UDW 0x24
		176	#define CTX_PDP3_LDW 0x26
		177	#define CTX_PDP2_UDW 0x28
		178	#define CTX_PDP2_LDW 0x2a
		179	#define CTX_PDP1_UDW 0x2c
		180	#define CTX_PDP1_LDW 0x2e
		181	#define CTX_PDP0_UDW 0x30
		182	#define CTX_PDP0_LDW 0x32
		183	#define CTX_LRI_HEADER_2 0x41
		184	#define CTX_R_PWR_CLK_STATE 0x42
		185	#define CTX_GPGPU_CSR_BASE_ADDRESS 0x44
		186
		187	#define GEN8_CTX_VALID (1<<0)
		188	#define GEN8_CTX_FORCE_PD_RESTORE (1<<1)
		189	#define GEN8_CTX_FORCE_RESTORE (1<<2)
		190	#define GEN8_CTX_L3LLC_COHERENT (1<<5)
		191	#define GEN8_CTX_PRIVILEGE (1<<8)
		192	enum {
		193	ADVANCED_CONTEXT = 0,
		194	LEGACY_CONTEXT,
		195	ADVANCED_AD_CONTEXT,
		196	LEGACY_64B_CONTEXT
		197	};
		198	#define GEN8_CTX_MODE_SHIFT 3
		199	enum {
		200	FAULT_AND_HANG = 0,
		201	FAULT_AND_HALT, /* Debug only */
		202	FAULT_AND_STREAM,
		203	FAULT_AND_CONTINUE /* Unsupported */
		204	};
		205	#define GEN8_CTX_ID_SHIFT 32
		206
		207	static int intel_lr_context_pin(struct intel_engine_cs *ring,
		208	struct intel_context *ctx);
		209
		210	/**
		211	* intel_sanitize_enable_execlists() - sanitize i915.enable_execlists
		212	* @dev: DRM device.
		213	* @enable_execlists: value of i915.enable_execlists module parameter.
		214	*
		215	* Only certain platforms support Execlists (the prerequisites being
		216	* support for Logical Ring Contexts and Aliasing PPGTT or better),
		217	* and only when enabled via module parameter.
		218	*
		219	* Return: 1 if Execlists is supported and has to be enabled.
		220	*/
		221	int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists)
		222	{
		223	WARN_ON(i915.enable_ppgtt == -1);
		224
		225	if (INTEL_INFO(dev)->gen >= 9)
		226	return 1;
		227
		228	if (enable_execlists == 0)
		229	return 0;
		230
		231	if (HAS_LOGICAL_RING_CONTEXTS(dev) && USES_PPGTT(dev) &&
		232	i915.use_mmio_flip >= 0)
		233	return 1;
		234
		235	return 0;
		236	}
		237
		238	/**
		239	* intel_execlists_ctx_id() - get the Execlists Context ID
		240	* @ctx_obj: Logical Ring Context backing object.
		241	*
		242	* Do not confuse with ctx->id! Unfortunately we have a name overload
		243	* here: the old context ID we pass to userspace as a handler so that
		244	* they can refer to a context, and the new context ID we pass to the
		245	* ELSP so that the GPU can inform us of the context status via
		246	* interrupts.
		247	*
		248	* Return: 20-bits globally unique context ID.
		249	*/
		250	u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj)
		251	{
		252	u32 lrca = i915_gem_obj_ggtt_offset(ctx_obj);
		253
		254	/* LRCA is required to be 4K aligned so the more significant 20 bits
		255	* are globally unique */
		256	return lrca >> 12;
		257	}
		258
		259	static uint64_t execlists_ctx_descriptor(struct drm_i915_gem_object *ctx_obj)
		260	{
		261	uint64_t desc;
		262	uint64_t lrca = i915_gem_obj_ggtt_offset(ctx_obj);
		263
		264	WARN_ON(lrca & 0xFFFFFFFF00000FFFULL);
		265
		266	desc = GEN8_CTX_VALID;
		267	desc \|= LEGACY_CONTEXT << GEN8_CTX_MODE_SHIFT;
		268	desc \|= GEN8_CTX_L3LLC_COHERENT;
		269	desc \|= GEN8_CTX_PRIVILEGE;
		270	desc \|= lrca;
		271	desc \|= (u64)intel_execlists_ctx_id(ctx_obj) << GEN8_CTX_ID_SHIFT;
		272
		273	/* TODO: WaDisableLiteRestore when we start using semaphore
		274	* signalling between Command Streamers */
		275	/* desc \|= GEN8_CTX_FORCE_RESTORE; */
		276
		277	return desc;
		278	}
		279
		280	static void execlists_elsp_write(struct intel_engine_cs *ring,
		281	struct drm_i915_gem_object *ctx_obj0,
		282	struct drm_i915_gem_object *ctx_obj1)
		283	{
		284	struct drm_device *dev = ring->dev;
		285	struct drm_i915_private *dev_priv = dev->dev_private;
		286	uint64_t temp = 0;
		287	uint32_t desc[4];
		288	unsigned long flags;
		289
		290	/* XXX: You must always write both descriptors in the order below. */
		291	if (ctx_obj1)
		292	temp = execlists_ctx_descriptor(ctx_obj1);
		293	else
		294	temp = 0;
		295	desc[1] = (u32)(temp >> 32);
		296	desc[0] = (u32)temp;
		297
		298	temp = execlists_ctx_descriptor(ctx_obj0);
		299	desc[3] = (u32)(temp >> 32);
		300	desc[2] = (u32)temp;
		301
		302	/* Set Force Wakeup bit to prevent GT from entering C6 while ELSP writes
		303	* are in progress.
		304	*
		305	* The other problem is that we can't just call gen6_gt_force_wake_get()
		306	* because that function calls intel_runtime_pm_get(), which might sleep.
		307	* Instead, we do the runtime_pm_get/put when creating/destroying requests.
		308	*/
		309	spin_lock_irqsave(&dev_priv->uncore.lock, flags);
		310	if (IS_CHERRYVIEW(dev) \|\| INTEL_INFO(dev)->gen >= 9) {
		311	if (dev_priv->uncore.fw_rendercount++ == 0)
		312	dev_priv->uncore.funcs.force_wake_get(dev_priv,
		313	FORCEWAKE_RENDER);
		314	if (dev_priv->uncore.fw_mediacount++ == 0)
		315	dev_priv->uncore.funcs.force_wake_get(dev_priv,
		316	FORCEWAKE_MEDIA);
		317	if (INTEL_INFO(dev)->gen >= 9) {
		318	if (dev_priv->uncore.fw_blittercount++ == 0)
		319	dev_priv->uncore.funcs.force_wake_get(dev_priv,
		320	FORCEWAKE_BLITTER);
		321	}
		322	} else {
		323	if (dev_priv->uncore.forcewake_count++ == 0)
		324	dev_priv->uncore.funcs.force_wake_get(dev_priv,
		325	FORCEWAKE_ALL);
		326	}
		327	spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);
		328
		329	I915_WRITE(RING_ELSP(ring), desc[1]);
		330	I915_WRITE(RING_ELSP(ring), desc[0]);
		331	I915_WRITE(RING_ELSP(ring), desc[3]);
		332	/* The context is automatically loaded after the following */
		333	I915_WRITE(RING_ELSP(ring), desc[2]);
		334
		335	/* ELSP is a wo register, so use another nearby reg for posting instead */
		336	POSTING_READ(RING_EXECLIST_STATUS(ring));
		337
		338	/* Release Force Wakeup (see the big comment above). */
		339	spin_lock_irqsave(&dev_priv->uncore.lock, flags);
		340	if (IS_CHERRYVIEW(dev) \|\| INTEL_INFO(dev)->gen >= 9) {
		341	if (--dev_priv->uncore.fw_rendercount == 0)
		342	dev_priv->uncore.funcs.force_wake_put(dev_priv,
		343	FORCEWAKE_RENDER);
		344	if (--dev_priv->uncore.fw_mediacount == 0)
		345	dev_priv->uncore.funcs.force_wake_put(dev_priv,
		346	FORCEWAKE_MEDIA);
		347	if (INTEL_INFO(dev)->gen >= 9) {
		348	if (--dev_priv->uncore.fw_blittercount == 0)
		349	dev_priv->uncore.funcs.force_wake_put(dev_priv,
		350	FORCEWAKE_BLITTER);
		351	}
		352	} else {
		353	if (--dev_priv->uncore.forcewake_count == 0)
		354	dev_priv->uncore.funcs.force_wake_put(dev_priv,
		355	FORCEWAKE_ALL);
		356	}
		357
		358	spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);
		359	}
		360
		361	static int execlists_update_context(struct drm_i915_gem_object *ctx_obj,
		362	struct drm_i915_gem_object *ring_obj,
		363	u32 tail)
		364	{
		365	struct page *page;
		366	uint32_t *reg_state;
		367
		368	page = i915_gem_object_get_page(ctx_obj, 1);
		369	reg_state = kmap_atomic(page);
		370
		371	reg_state[CTX_RING_TAIL+1] = tail;
		372	reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(ring_obj);
		373
		374	kunmap_atomic(reg_state);
		375
		376	return 0;
		377	}
		378
		379	static void execlists_submit_contexts(struct intel_engine_cs *ring,
		380	struct intel_context *to0, u32 tail0,
		381	struct intel_context *to1, u32 tail1)
		382	{
		383	struct drm_i915_gem_object *ctx_obj0 = to0->engine[ring->id].state;
		384	struct intel_ringbuffer *ringbuf0 = to0->engine[ring->id].ringbuf;
		385	struct drm_i915_gem_object *ctx_obj1 = NULL;
		386	struct intel_ringbuffer *ringbuf1 = NULL;
		387
		388	BUG_ON(!ctx_obj0);
		389	WARN_ON(!i915_gem_obj_is_pinned(ctx_obj0));
		390	WARN_ON(!i915_gem_obj_is_pinned(ringbuf0->obj));
		391
		392	execlists_update_context(ctx_obj0, ringbuf0->obj, tail0);
		393
		394	if (to1) {
		395	ringbuf1 = to1->engine[ring->id].ringbuf;
		396	ctx_obj1 = to1->engine[ring->id].state;
		397	BUG_ON(!ctx_obj1);
		398	WARN_ON(!i915_gem_obj_is_pinned(ctx_obj1));
		399	WARN_ON(!i915_gem_obj_is_pinned(ringbuf1->obj));
		400
		401	execlists_update_context(ctx_obj1, ringbuf1->obj, tail1);
		402	}
		403
		404	execlists_elsp_write(ring, ctx_obj0, ctx_obj1);
		405	}
		406
		407	static void execlists_context_unqueue(struct intel_engine_cs *ring)
		408	{
		409	struct intel_ctx_submit_request req0 = NULL, req1 = NULL;
		410	struct intel_ctx_submit_request cursor = NULL, tmp = NULL;
		411
		412	assert_spin_locked(&ring->execlist_lock);
		413
		414	if (list_empty(&ring->execlist_queue))
		415	return;
		416
		417	/* Try to read in pairs */
		418	list_for_each_entry_safe(cursor, tmp, &ring->execlist_queue,
		419	execlist_link) {
		420	if (!req0) {
		421	req0 = cursor;
		422	} else if (req0->ctx == cursor->ctx) {
		423	/* Same ctx: ignore first request, as second request
		424	* will update tail past first request's workload */
		425	cursor->elsp_submitted = req0->elsp_submitted;
		426	list_del(&req0->execlist_link);
		427	list_add_tail(&req0->execlist_link,
		428	&ring->execlist_retired_req_list);
		429	req0 = cursor;
		430	} else {
		431	req1 = cursor;
		432	break;
		433	}
		434	}
		435
		436	WARN_ON(req1 && req1->elsp_submitted);
		437
		438	execlists_submit_contexts(ring, req0->ctx, req0->tail,
		439	req1 ? req1->ctx : NULL,
		440	req1 ? req1->tail : 0);
		441
		442	req0->elsp_submitted++;
		443	if (req1)
		444	req1->elsp_submitted++;
		445	}
		446
		447	static bool execlists_check_remove_request(struct intel_engine_cs *ring,
		448	u32 request_id)
		449	{
		450	struct intel_ctx_submit_request *head_req;
		451
		452	assert_spin_locked(&ring->execlist_lock);
		453
		454	head_req = list_first_entry_or_null(&ring->execlist_queue,
		455	struct intel_ctx_submit_request,
		456	execlist_link);
		457
		458	if (head_req != NULL) {
		459	struct drm_i915_gem_object *ctx_obj =
		460	head_req->ctx->engine[ring->id].state;
		461	if (intel_execlists_ctx_id(ctx_obj) == request_id) {
		462	WARN(head_req->elsp_submitted == 0,
		463	"Never submitted head request\n");
		464
		465	if (--head_req->elsp_submitted <= 0) {
		466	list_del(&head_req->execlist_link);
		467	list_add_tail(&head_req->execlist_link,
		468	&ring->execlist_retired_req_list);
		469	return true;
		470	}
		471	}
		472	}
		473
		474	return false;
		475	}
		476
		477	/**
		478	* intel_execlists_handle_ctx_events() - handle Context Switch interrupts
		479	* @ring: Engine Command Streamer to handle.
		480	*
		481	* Check the unread Context Status Buffers and manage the submission of new
		482	* contexts to the ELSP accordingly.
		483	*/
		484	void intel_execlists_handle_ctx_events(struct intel_engine_cs *ring)
		485	{
		486	struct drm_i915_private *dev_priv = ring->dev->dev_private;
		487	u32 status_pointer;
		488	u8 read_pointer;
		489	u8 write_pointer;
		490	u32 status;
		491	u32 status_id;
		492	u32 submit_contexts = 0;
		493
		494	status_pointer = I915_READ(RING_CONTEXT_STATUS_PTR(ring));
		495
		496	read_pointer = ring->next_context_status_buffer;
		497	write_pointer = status_pointer & 0x07;
		498	if (read_pointer > write_pointer)
		499	write_pointer += 6;
		500
		501	spin_lock(&ring->execlist_lock);
		502
		503	while (read_pointer < write_pointer) {
		504	read_pointer++;
		505	status = I915_READ(RING_CONTEXT_STATUS_BUF(ring) +
		506	(read_pointer % 6) * 8);
		507	status_id = I915_READ(RING_CONTEXT_STATUS_BUF(ring) +
		508	(read_pointer % 6) * 8 + 4);
		509
		510	if (status & GEN8_CTX_STATUS_PREEMPTED) {
		511	if (status & GEN8_CTX_STATUS_LITE_RESTORE) {
		512	if (execlists_check_remove_request(ring, status_id))
		513	WARN(1, "Lite Restored request removed from queue\n");
		514	} else
		515	WARN(1, "Preemption without Lite Restore\n");
		516	}
		517
		518	if ((status & GEN8_CTX_STATUS_ACTIVE_IDLE) \|\|
		519	(status & GEN8_CTX_STATUS_ELEMENT_SWITCH)) {
		520	if (execlists_check_remove_request(ring, status_id))
		521	submit_contexts++;
		522	}
		523	}
		524
		525	if (submit_contexts != 0)
		526	execlists_context_unqueue(ring);
		527
		528	spin_unlock(&ring->execlist_lock);
		529
		530	WARN(submit_contexts > 2, "More than two context complete events?\n");
		531	ring->next_context_status_buffer = write_pointer % 6;
		532
		533	I915_WRITE(RING_CONTEXT_STATUS_PTR(ring),
		534	((u32)ring->next_context_status_buffer & 0x07) << 8);
		535	}
		536
		537	static int execlists_context_queue(struct intel_engine_cs *ring,
		538	struct intel_context *to,
		539	u32 tail)
		540	{
		541	struct intel_ctx_submit_request req = NULL, cursor;
		542	struct drm_i915_private *dev_priv = ring->dev->dev_private;
		543	unsigned long flags;
		544	int num_elements = 0;
		545
		546	req = kzalloc(sizeof(*req), GFP_KERNEL);
		547	if (req == NULL)
		548	return -ENOMEM;
		549	req->ctx = to;
		550	i915_gem_context_reference(req->ctx);
		551
		552	if (to != ring->default_context)
		553	intel_lr_context_pin(ring, to);
		554
		555	req->ring = ring;
		556	req->tail = tail;
		557
		558	intel_runtime_pm_get(dev_priv);
		559
		560	spin_lock_irqsave(&ring->execlist_lock, flags);
		561
		562	list_for_each_entry(cursor, &ring->execlist_queue, execlist_link)
		563	if (++num_elements > 2)
		564	break;
		565
		566	if (num_elements > 2) {
		567	struct intel_ctx_submit_request *tail_req;
		568
		569	tail_req = list_last_entry(&ring->execlist_queue,
		570	struct intel_ctx_submit_request,
		571	execlist_link);
		572
		573	if (to == tail_req->ctx) {
		574	WARN(tail_req->elsp_submitted != 0,
		575	"More than 2 already-submitted reqs queued\n");
		576	list_del(&tail_req->execlist_link);
		577	list_add_tail(&tail_req->execlist_link,
		578	&ring->execlist_retired_req_list);
		579	}
		580	}
		581
		582	list_add_tail(&req->execlist_link, &ring->execlist_queue);
		583	if (num_elements == 0)
		584	execlists_context_unqueue(ring);
		585
		586	spin_unlock_irqrestore(&ring->execlist_lock, flags);
		587
		588	return 0;
		589	}
		590
		591	static int logical_ring_invalidate_all_caches(struct intel_ringbuffer *ringbuf)
		592	{
		593	struct intel_engine_cs *ring = ringbuf->ring;
		594	uint32_t flush_domains;
		595	int ret;
		596
		597	flush_domains = 0;
		598	if (ring->gpu_caches_dirty)
		599	flush_domains = I915_GEM_GPU_DOMAINS;
		600
		601	ret = ring->emit_flush(ringbuf, I915_GEM_GPU_DOMAINS, flush_domains);
		602	if (ret)
		603	return ret;
		604
		605	ring->gpu_caches_dirty = false;
		606	return 0;
		607	}
		608
		609	static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf,
		610	struct list_head *vmas)
		611	{
		612	struct intel_engine_cs *ring = ringbuf->ring;
		613	struct i915_vma *vma;
		614	uint32_t flush_domains = 0;
		615	bool flush_chipset = false;
		616	int ret;
		617
		618	list_for_each_entry(vma, vmas, exec_list) {
		619	struct drm_i915_gem_object *obj = vma->obj;
		620
		621	ret = i915_gem_object_sync(obj, ring);
		622	if (ret)
		623	return ret;
		624
		625	if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
		626	flush_chipset \|= i915_gem_clflush_object(obj, false);
		627
		628	flush_domains \|= obj->base.write_domain;
		629	}
		630
		631	if (flush_domains & I915_GEM_DOMAIN_GTT)
		632	wmb();
		633
		634	/* Unconditionally invalidate gpu caches and ensure that we do flush
		635	* any residual writes from the previous batch.
		636	*/
		637	return logical_ring_invalidate_all_caches(ringbuf);
		638	}
		639
		640	/**
		641	* execlists_submission() - submit a batchbuffer for execution, Execlists style
		642	* @dev: DRM device.
		643	* @file: DRM file.
		644	* @ring: Engine Command Streamer to submit to.
		645	* @ctx: Context to employ for this submission.
		646	* @args: execbuffer call arguments.
		647	* @vmas: list of vmas.
		648	* @batch_obj: the batchbuffer to submit.
		649	* @exec_start: batchbuffer start virtual address pointer.
		650	* @flags: translated execbuffer call flags.
		651	*
		652	* This is the evil twin version of i915_gem_ringbuffer_submission. It abstracts
		653	* away the submission details of the execbuffer ioctl call.
		654	*
		655	* Return: non-zero if the submission fails.
		656	*/
		657	int intel_execlists_submission(struct drm_device dev, struct drm_file file,
		658	struct intel_engine_cs *ring,
		659	struct intel_context *ctx,
		660	struct drm_i915_gem_execbuffer2 *args,
		661	struct list_head *vmas,
		662	struct drm_i915_gem_object *batch_obj,
		663	u64 exec_start, u32 flags)
		664	{
		665	struct drm_i915_private *dev_priv = dev->dev_private;
		666	struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
		667	int instp_mode;
		668	u32 instp_mask;
		669	int ret;
		670
		671	instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK;
		672	instp_mask = I915_EXEC_CONSTANTS_MASK;
		673	switch (instp_mode) {
		674	case I915_EXEC_CONSTANTS_REL_GENERAL:
		675	case I915_EXEC_CONSTANTS_ABSOLUTE:
		676	case I915_EXEC_CONSTANTS_REL_SURFACE:
		677	if (instp_mode != 0 && ring != &dev_priv->ring[RCS]) {
		678	DRM_DEBUG("non-0 rel constants mode on non-RCS\n");
		679	return -EINVAL;
		680	}
		681
		682	if (instp_mode != dev_priv->relative_constants_mode) {
		683	if (instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) {
		684	DRM_DEBUG("rel surface constants mode invalid on gen5+\n");
		685	return -EINVAL;
		686	}
		687
		688	/* The HW changed the meaning on this bit on gen6 */
		689	instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
		690	}
		691	break;
		692	default:
		693	DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode);
		694	return -EINVAL;
		695	}
		696
		697	if (args->num_cliprects != 0) {
		698	DRM_DEBUG("clip rectangles are only valid on pre-gen5\n");
		699	return -EINVAL;
		700	} else {
		701	if (args->DR4 == 0xffffffff) {
		702	DRM_DEBUG("UXA submitting garbage DR4, fixing up\n");
		703	args->DR4 = 0;
		704	}
		705
		706	if (args->DR1 \|\| args->DR4 \|\| args->cliprects_ptr) {
		707	DRM_DEBUG("0 cliprects but dirt in cliprects fields\n");
		708	return -EINVAL;
		709	}
		710	}
		711
		712	if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
		713	DRM_DEBUG("sol reset is gen7 only\n");
		714	return -EINVAL;
		715	}
		716
		717	ret = execlists_move_to_gpu(ringbuf, vmas);
		718	if (ret)
		719	return ret;
		720
		721	if (ring == &dev_priv->ring[RCS] &&
		722	instp_mode != dev_priv->relative_constants_mode) {
		723	ret = intel_logical_ring_begin(ringbuf, 4);
		724	if (ret)
		725	return ret;
		726
		727	intel_logical_ring_emit(ringbuf, MI_NOOP);
		728	intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(1));
		729	intel_logical_ring_emit(ringbuf, INSTPM);
		730	intel_logical_ring_emit(ringbuf, instp_mask << 16 \| instp_mode);
		731	intel_logical_ring_advance(ringbuf);
		732
		733	dev_priv->relative_constants_mode = instp_mode;
		734	}
		735
		736	ret = ring->emit_bb_start(ringbuf, exec_start, flags);
		737	if (ret)
		738	return ret;
		739
		740	i915_gem_execbuffer_move_to_active(vmas, ring);
		741	i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj);
		742
		743	return 0;
		744	}
		745
		746	void intel_execlists_retire_requests(struct intel_engine_cs *ring)
		747	{
		748	struct intel_ctx_submit_request req, tmp;
		749	struct drm_i915_private *dev_priv = ring->dev->dev_private;
		750	unsigned long flags;
		751	struct list_head retired_list;
		752
		753	WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
		754	if (list_empty(&ring->execlist_retired_req_list))
		755	return;
		756
		757	INIT_LIST_HEAD(&retired_list);
		758	spin_lock_irqsave(&ring->execlist_lock, flags);
		759	list_replace_init(&ring->execlist_retired_req_list, &retired_list);
		760	spin_unlock_irqrestore(&ring->execlist_lock, flags);
		761
		762	list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
		763	struct intel_context *ctx = req->ctx;
		764	struct drm_i915_gem_object *ctx_obj =
		765	ctx->engine[ring->id].state;
		766
		767	if (ctx_obj && (ctx != ring->default_context))
		768	intel_lr_context_unpin(ring, ctx);
		769	intel_runtime_pm_put(dev_priv);
		770	i915_gem_context_unreference(req->ctx);
		771	list_del(&req->execlist_link);
		772	kfree(req);
		773	}
		774	}
		775
		776	void intel_logical_ring_stop(struct intel_engine_cs *ring)
		777	{
		778	struct drm_i915_private *dev_priv = ring->dev->dev_private;
		779	int ret;
		780
		781	if (!intel_ring_initialized(ring))
		782	return;
		783
		784	ret = intel_ring_idle(ring);
		785	if (ret && !i915_reset_in_progress(&to_i915(ring->dev)->gpu_error))
		786	DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n",
		787	ring->name, ret);
		788
		789	/* TODO: Is this correct with Execlists enabled? */
		790	I915_WRITE_MODE(ring, _MASKED_BIT_ENABLE(STOP_RING));
		791	if (wait_for_atomic((I915_READ_MODE(ring) & MODE_IDLE) != 0, 1000)) {
		792	DRM_ERROR("%s :timed out trying to stop ring\n", ring->name);
		793	return;
		794	}
		795	I915_WRITE_MODE(ring, _MASKED_BIT_DISABLE(STOP_RING));
		796	}
		797
		798	int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf)
		799	{
		800	struct intel_engine_cs *ring = ringbuf->ring;
		801	int ret;
		802
		803	if (!ring->gpu_caches_dirty)
		804	return 0;
		805
		806	ret = ring->emit_flush(ringbuf, 0, I915_GEM_GPU_DOMAINS);
		807	if (ret)
		808	return ret;
		809
		810	ring->gpu_caches_dirty = false;
		811	return 0;
		812	}
		813
		814	/**
		815	* intel_logical_ring_advance_and_submit() - advance the tail and submit the workload
		816	* @ringbuf: Logical Ringbuffer to advance.
		817	*
		818	* The tail is updated in our logical ringbuffer struct, not in the actual context. What
		819	* really happens during submission is that the context and current tail will be placed
		820	* on a queue waiting for the ELSP to be ready to accept a new context submission. At that
		821	* point, the tail inside the context is updated and the ELSP written to.
		822	*/
		823	void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf)
		824	{
		825	struct intel_engine_cs *ring = ringbuf->ring;
		826	struct intel_context *ctx = ringbuf->FIXME_lrc_ctx;
		827
		828	intel_logical_ring_advance(ringbuf);
		829
		830	if (intel_ring_stopped(ring))
		831	return;
		832
		833	execlists_context_queue(ring, ctx, ringbuf->tail);
		834	}
		835
		836	static int intel_lr_context_pin(struct intel_engine_cs *ring,
		837	struct intel_context *ctx)
		838	{
		839	struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
		840	struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
		841	int ret = 0;
		842
		843	WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
		844	if (ctx->engine[ring->id].unpin_count++ == 0) {
		845	ret = i915_gem_obj_ggtt_pin(ctx_obj,
		846	GEN8_LR_CONTEXT_ALIGN, 0);
		847	if (ret)
		848	goto reset_unpin_count;
		849
		850	ret = intel_pin_and_map_ringbuffer_obj(ring->dev, ringbuf);
		851	if (ret)
		852	goto unpin_ctx_obj;
		853	}
		854
		855	return ret;
		856
		857	unpin_ctx_obj:
		858	i915_gem_object_ggtt_unpin(ctx_obj);
		859	reset_unpin_count:
		860	ctx->engine[ring->id].unpin_count = 0;
		861
		862	return ret;
		863	}
		864
		865	void intel_lr_context_unpin(struct intel_engine_cs *ring,
		866	struct intel_context *ctx)
		867	{
		868	struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
		869	struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
		870
		871	if (ctx_obj) {
		872	WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
		873	if (--ctx->engine[ring->id].unpin_count == 0) {
		874	intel_unpin_ringbuffer_obj(ringbuf);
		875	i915_gem_object_ggtt_unpin(ctx_obj);
		876	}
		877	}
		878	}
		879
		880	static int logical_ring_alloc_seqno(struct intel_engine_cs *ring,
		881	struct intel_context *ctx)
		882	{
		883	int ret;
		884
		885	if (ring->outstanding_lazy_seqno)
		886	return 0;
		887
		888	if (ring->preallocated_lazy_request == NULL) {
		889	struct drm_i915_gem_request *request;
		890
		891	request = kmalloc(sizeof(*request), GFP_KERNEL);
		892	if (request == NULL)
		893	return -ENOMEM;
		894
		895	if (ctx != ring->default_context) {
		896	ret = intel_lr_context_pin(ring, ctx);
		897	if (ret) {
		898	kfree(request);
		899	return ret;
		900	}
		901	}
		902
		903	/* Hold a reference to the context this request belongs to
		904	* (we will need it when the time comes to emit/retire the
		905	* request).
		906	*/
		907	request->ctx = ctx;
		908	i915_gem_context_reference(request->ctx);
		909
		910	ring->preallocated_lazy_request = request;
		911	}
		912
		913	return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_seqno);
		914	}
		915
		916	static int logical_ring_wait_request(struct intel_ringbuffer *ringbuf,
		917	int bytes)
		918	{
		919	struct intel_engine_cs *ring = ringbuf->ring;
		920	struct drm_i915_gem_request *request;
		921	u32 seqno = 0;
		922	int ret;
		923
		924	if (ringbuf->last_retired_head != -1) {
		925	ringbuf->head = ringbuf->last_retired_head;
		926	ringbuf->last_retired_head = -1;
		927
		928	ringbuf->space = intel_ring_space(ringbuf);
		929	if (ringbuf->space >= bytes)
		930	return 0;
		931	}
		932
		933	list_for_each_entry(request, &ring->request_list, list) {
		934	if (__intel_ring_space(request->tail, ringbuf->tail,
		935	ringbuf->size) >= bytes) {
		936	seqno = request->seqno;
		937	break;
		938	}
		939	}
		940
		941	if (seqno == 0)
		942	return -ENOSPC;
		943
		944	ret = i915_wait_seqno(ring, seqno);
		945	if (ret)
		946	return ret;
		947
		948	i915_gem_retire_requests_ring(ring);
		949	ringbuf->head = ringbuf->last_retired_head;
		950	ringbuf->last_retired_head = -1;
		951
		952	ringbuf->space = intel_ring_space(ringbuf);
		953	return 0;
		954	}
		955
		956	static int logical_ring_wait_for_space(struct intel_ringbuffer *ringbuf,
		957	int bytes)
		958	{
		959	struct intel_engine_cs *ring = ringbuf->ring;
		960	struct drm_device *dev = ring->dev;
		961	struct drm_i915_private *dev_priv = dev->dev_private;
		962	unsigned long end;
		963	int ret;
		964
		965	ret = logical_ring_wait_request(ringbuf, bytes);
		966	if (ret != -ENOSPC)
		967	return ret;
		968
		969	/* Force the context submission in case we have been skipping it */
		970	intel_logical_ring_advance_and_submit(ringbuf);
		971
		972	/* With GEM the hangcheck timer should kick us out of the loop,
		973	* leaving it early runs the risk of corrupting GEM state (due
		974	* to running on almost untested codepaths). But on resume
		975	* timers don't work yet, so prevent a complete hang in that
		976	* case by choosing an insanely large timeout. */
		977	end = jiffies + 60 * HZ;
		978
		979	do {
		980	ringbuf->head = I915_READ_HEAD(ring);
		981	ringbuf->space = intel_ring_space(ringbuf);
		982	if (ringbuf->space >= bytes) {
		983	ret = 0;
		984	break;
		985	}
		986
		987	msleep(1);
		988
		989	ret = i915_gem_check_wedge(&dev_priv->gpu_error,
		990	dev_priv->mm.interruptible);
		991	if (ret)
		992	break;
		993
		994	if (time_after(jiffies, end)) {
		995	ret = -EBUSY;
		996	break;
		997	}
		998	} while (1);
		999
		1000	return ret;
		1001	}
		1002
		1003	static int logical_ring_wrap_buffer(struct intel_ringbuffer *ringbuf)
		1004	{
		1005	uint32_t __iomem *virt;
		1006	int rem = ringbuf->size - ringbuf->tail;
		1007
		1008	if (ringbuf->space < rem) {
		1009	int ret = logical_ring_wait_for_space(ringbuf, rem);
		1010
		1011	if (ret)
		1012	return ret;
		1013	}
		1014
		1015	virt = ringbuf->virtual_start + ringbuf->tail;
		1016	rem /= 4;
		1017	while (rem--)
		1018	iowrite32(MI_NOOP, virt++);
		1019
		1020	ringbuf->tail = 0;
		1021	ringbuf->space = intel_ring_space(ringbuf);
		1022
		1023	return 0;
		1024	}
		1025
		1026	static int logical_ring_prepare(struct intel_ringbuffer *ringbuf, int bytes)
		1027	{
		1028	int ret;
		1029
		1030	if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) {
		1031	ret = logical_ring_wrap_buffer(ringbuf);
		1032	if (unlikely(ret))
		1033	return ret;
		1034	}
		1035
		1036	if (unlikely(ringbuf->space < bytes)) {
		1037	ret = logical_ring_wait_for_space(ringbuf, bytes);
		1038	if (unlikely(ret))
		1039	return ret;
		1040	}
		1041
		1042	return 0;
		1043	}
		1044
		1045	/**
		1046	* intel_logical_ring_begin() - prepare the logical ringbuffer to accept some commands
		1047	*
		1048	* @ringbuf: Logical ringbuffer.
		1049	* @num_dwords: number of DWORDs that we plan to write to the ringbuffer.
		1050	*
		1051	* The ringbuffer might not be ready to accept the commands right away (maybe it needs to
		1052	* be wrapped, or wait a bit for the tail to be updated). This function takes care of that
		1053	* and also preallocates a request (every workload submission is still mediated through
		1054	* requests, same as it did with legacy ringbuffer submission).
		1055	*
		1056	* Return: non-zero if the ringbuffer is not ready to be written to.
		1057	*/
		1058	int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords)
		1059	{
		1060	struct intel_engine_cs *ring = ringbuf->ring;
		1061	struct drm_device *dev = ring->dev;
		1062	struct drm_i915_private *dev_priv = dev->dev_private;
		1063	int ret;
		1064
		1065	ret = i915_gem_check_wedge(&dev_priv->gpu_error,
		1066	dev_priv->mm.interruptible);
		1067	if (ret)
		1068	return ret;
		1069
		1070	ret = logical_ring_prepare(ringbuf, num_dwords * sizeof(uint32_t));
		1071	if (ret)
		1072	return ret;
		1073
		1074	/* Preallocate the olr before touching the ring */
		1075	ret = logical_ring_alloc_seqno(ring, ringbuf->FIXME_lrc_ctx);
		1076	if (ret)
		1077	return ret;
		1078
		1079	ringbuf->space -= num_dwords * sizeof(uint32_t);
		1080	return 0;
		1081	}
		1082
		1083	static int intel_logical_ring_workarounds_emit(struct intel_engine_cs *ring,
		1084	struct intel_context *ctx)
		1085	{
		1086	int ret, i;
		1087	struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
		1088	struct drm_device *dev = ring->dev;
		1089	struct drm_i915_private *dev_priv = dev->dev_private;
		1090	struct i915_workarounds *w = &dev_priv->workarounds;
		1091
		1092	if (WARN_ON(w->count == 0))
		1093	return 0;
		1094
		1095	ring->gpu_caches_dirty = true;
		1096	ret = logical_ring_flush_all_caches(ringbuf);
		1097	if (ret)
		1098	return ret;
		1099
		1100	ret = intel_logical_ring_begin(ringbuf, w->count * 2 + 2);
		1101	if (ret)
		1102	return ret;
		1103
		1104	intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(w->count));
		1105	for (i = 0; i < w->count; i++) {
		1106	intel_logical_ring_emit(ringbuf, w->reg[i].addr);
		1107	intel_logical_ring_emit(ringbuf, w->reg[i].value);
		1108	}
		1109	intel_logical_ring_emit(ringbuf, MI_NOOP);
		1110
		1111	intel_logical_ring_advance(ringbuf);
		1112
		1113	ring->gpu_caches_dirty = true;
		1114	ret = logical_ring_flush_all_caches(ringbuf);
		1115	if (ret)
		1116	return ret;
		1117
		1118	return 0;
		1119	}
		1120
		1121	static int gen8_init_common_ring(struct intel_engine_cs *ring)
		1122	{
		1123	struct drm_device *dev = ring->dev;
		1124	struct drm_i915_private *dev_priv = dev->dev_private;
		1125
		1126	I915_WRITE_IMR(ring, ~(ring->irq_enable_mask \| ring->irq_keep_mask));
		1127	I915_WRITE(RING_HWSTAM(ring->mmio_base), 0xffffffff);
		1128
		1129	I915_WRITE(RING_MODE_GEN7(ring),
		1130	_MASKED_BIT_DISABLE(GFX_REPLAY_MODE) \|
		1131	_MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE));
		1132	POSTING_READ(RING_MODE_GEN7(ring));
		1133	DRM_DEBUG_DRIVER("Execlists enabled for %s\n", ring->name);
		1134
		1135	memset(&ring->hangcheck, 0, sizeof(ring->hangcheck));
		1136
		1137	return 0;
		1138	}
		1139
		1140	static int gen8_init_render_ring(struct intel_engine_cs *ring)
		1141	{
		1142	struct drm_device *dev = ring->dev;
		1143	struct drm_i915_private *dev_priv = dev->dev_private;
		1144	int ret;
		1145
		1146	ret = gen8_init_common_ring(ring);
		1147	if (ret)
		1148	return ret;
		1149
		1150	/* We need to disable the AsyncFlip performance optimisations in order
		1151	* to use MI_WAIT_FOR_EVENT within the CS. It should already be
		1152	* programmed to '1' on all products.
		1153	*
		1154	* WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv,bdw,chv
		1155	*/
		1156	I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
		1157
		1158	ret = intel_init_pipe_control(ring);
		1159	if (ret)
		1160	return ret;
		1161
		1162	I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
		1163
		1164	return init_workarounds_ring(ring);
		1165	}
		1166
		1167	static int gen8_emit_bb_start(struct intel_ringbuffer *ringbuf,
		1168	u64 offset, unsigned flags)
		1169	{
		1170	bool ppgtt = !(flags & I915_DISPATCH_SECURE);
		1171	int ret;
		1172
		1173	ret = intel_logical_ring_begin(ringbuf, 4);
		1174	if (ret)
		1175	return ret;
		1176
		1177	/* FIXME(BDW): Address space and security selectors. */
		1178	intel_logical_ring_emit(ringbuf, MI_BATCH_BUFFER_START_GEN8 \| (ppgtt<<8));
		1179	intel_logical_ring_emit(ringbuf, lower_32_bits(offset));
		1180	intel_logical_ring_emit(ringbuf, upper_32_bits(offset));
		1181	intel_logical_ring_emit(ringbuf, MI_NOOP);
		1182	intel_logical_ring_advance(ringbuf);
		1183
		1184	return 0;
		1185	}
		1186
		1187	static bool gen8_logical_ring_get_irq(struct intel_engine_cs *ring)
		1188	{
		1189	struct drm_device *dev = ring->dev;
		1190	struct drm_i915_private *dev_priv = dev->dev_private;
		1191	unsigned long flags;
		1192
		1193	if (WARN_ON(!intel_irqs_enabled(dev_priv)))
		1194	return false;
		1195
		1196	spin_lock_irqsave(&dev_priv->irq_lock, flags);
		1197	if (ring->irq_refcount++ == 0) {
		1198	I915_WRITE_IMR(ring, ~(ring->irq_enable_mask \| ring->irq_keep_mask));
		1199	POSTING_READ(RING_IMR(ring->mmio_base));
		1200	}
		1201	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
		1202
		1203	return true;
		1204	}
		1205
		1206	static void gen8_logical_ring_put_irq(struct intel_engine_cs *ring)
		1207	{
		1208	struct drm_device *dev = ring->dev;
		1209	struct drm_i915_private *dev_priv = dev->dev_private;
		1210	unsigned long flags;
		1211
		1212	spin_lock_irqsave(&dev_priv->irq_lock, flags);
		1213	if (--ring->irq_refcount == 0) {
		1214	I915_WRITE_IMR(ring, ~ring->irq_keep_mask);
		1215	POSTING_READ(RING_IMR(ring->mmio_base));
		1216	}
		1217	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
		1218	}
		1219
		1220	static int gen8_emit_flush(struct intel_ringbuffer *ringbuf,
		1221	u32 invalidate_domains,
		1222	u32 unused)
		1223	{
		1224	struct intel_engine_cs *ring = ringbuf->ring;
		1225	struct drm_device *dev = ring->dev;
		1226	struct drm_i915_private *dev_priv = dev->dev_private;
		1227	uint32_t cmd;
		1228	int ret;
		1229
		1230	ret = intel_logical_ring_begin(ringbuf, 4);
		1231	if (ret)
		1232	return ret;
		1233
		1234	cmd = MI_FLUSH_DW + 1;
		1235
		1236	if (ring == &dev_priv->ring[VCS]) {
		1237	if (invalidate_domains & I915_GEM_GPU_DOMAINS)
		1238	cmd \|= MI_INVALIDATE_TLB \| MI_INVALIDATE_BSD \|
		1239	MI_FLUSH_DW_STORE_INDEX \|
		1240	MI_FLUSH_DW_OP_STOREDW;
		1241	} else {
		1242	if (invalidate_domains & I915_GEM_DOMAIN_RENDER)
		1243	cmd \|= MI_INVALIDATE_TLB \| MI_FLUSH_DW_STORE_INDEX \|
		1244	MI_FLUSH_DW_OP_STOREDW;
		1245	}
		1246
		1247	intel_logical_ring_emit(ringbuf, cmd);
		1248	intel_logical_ring_emit(ringbuf,
		1249	I915_GEM_HWS_SCRATCH_ADDR \|
		1250	MI_FLUSH_DW_USE_GTT);
		1251	intel_logical_ring_emit(ringbuf, 0); /* upper addr */
		1252	intel_logical_ring_emit(ringbuf, 0); /* value */
		1253	intel_logical_ring_advance(ringbuf);
		1254
		1255	return 0;
		1256	}
		1257
		1258	static int gen8_emit_flush_render(struct intel_ringbuffer *ringbuf,
		1259	u32 invalidate_domains,
		1260	u32 flush_domains)
		1261	{
		1262	struct intel_engine_cs *ring = ringbuf->ring;
		1263	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
		1264	u32 flags = 0;
		1265	int ret;
		1266
		1267	flags \|= PIPE_CONTROL_CS_STALL;
		1268
		1269	if (flush_domains) {
		1270	flags \|= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
		1271	flags \|= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
		1272	}
		1273
		1274	if (invalidate_domains) {
		1275	flags \|= PIPE_CONTROL_TLB_INVALIDATE;
		1276	flags \|= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
		1277	flags \|= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
		1278	flags \|= PIPE_CONTROL_VF_CACHE_INVALIDATE;
		1279	flags \|= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
		1280	flags \|= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
		1281	flags \|= PIPE_CONTROL_QW_WRITE;
		1282	flags \|= PIPE_CONTROL_GLOBAL_GTT_IVB;
		1283	}
		1284
		1285	ret = intel_logical_ring_begin(ringbuf, 6);
		1286	if (ret)
		1287	return ret;
		1288
		1289	intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6));
		1290	intel_logical_ring_emit(ringbuf, flags);
		1291	intel_logical_ring_emit(ringbuf, scratch_addr);
		1292	intel_logical_ring_emit(ringbuf, 0);
		1293	intel_logical_ring_emit(ringbuf, 0);
		1294	intel_logical_ring_emit(ringbuf, 0);
		1295	intel_logical_ring_advance(ringbuf);
		1296
		1297	return 0;
		1298	}
		1299
		1300	static u32 gen8_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
		1301	{
		1302	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
		1303	}
		1304
		1305	static void gen8_set_seqno(struct intel_engine_cs *ring, u32 seqno)
		1306	{
		1307	intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno);
		1308	}
		1309
		1310	static int gen8_emit_request(struct intel_ringbuffer *ringbuf)
		1311	{
		1312	struct intel_engine_cs *ring = ringbuf->ring;
		1313	u32 cmd;
		1314	int ret;
		1315
		1316	ret = intel_logical_ring_begin(ringbuf, 6);
		1317	if (ret)
		1318	return ret;
		1319
		1320	cmd = MI_STORE_DWORD_IMM_GEN8;
		1321	cmd \|= MI_GLOBAL_GTT;
		1322
		1323	intel_logical_ring_emit(ringbuf, cmd);
		1324	intel_logical_ring_emit(ringbuf,
		1325	(ring->status_page.gfx_addr +
		1326	(I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT)));
		1327	intel_logical_ring_emit(ringbuf, 0);
		1328	intel_logical_ring_emit(ringbuf, ring->outstanding_lazy_seqno);
		1329	intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT);
		1330	intel_logical_ring_emit(ringbuf, MI_NOOP);
		1331	intel_logical_ring_advance_and_submit(ringbuf);
		1332
		1333	return 0;
		1334	}
		1335
		1336	/**
		1337	* intel_logical_ring_cleanup() - deallocate the Engine Command Streamer
		1338	*
		1339	* @ring: Engine Command Streamer.
		1340	*
		1341	*/
		1342	void intel_logical_ring_cleanup(struct intel_engine_cs *ring)
		1343	{
		1344	struct drm_i915_private *dev_priv;
		1345
		1346	if (!intel_ring_initialized(ring))
		1347	return;
		1348
		1349	dev_priv = ring->dev->dev_private;
		1350
		1351	intel_logical_ring_stop(ring);
		1352	WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0);
		1353	ring->preallocated_lazy_request = NULL;
		1354	ring->outstanding_lazy_seqno = 0;
		1355
		1356	if (ring->cleanup)
		1357	ring->cleanup(ring);
		1358
		1359	i915_cmd_parser_fini_ring(ring);
		1360
		1361	if (ring->status_page.obj) {
		1362	kunmap(sg_page(ring->status_page.obj->pages->sgl));
		1363	ring->status_page.obj = NULL;
		1364	}
		1365	}
		1366
		1367	static int logical_ring_init(struct drm_device dev, struct intel_engine_cs ring)
		1368	{
		1369	int ret;
		1370
		1371	/* Intentionally left blank. */
		1372	ring->buffer = NULL;
		1373
		1374	ring->dev = dev;
		1375	INIT_LIST_HEAD(&ring->active_list);
		1376	INIT_LIST_HEAD(&ring->request_list);
		1377	init_waitqueue_head(&ring->irq_queue);
		1378
		1379	INIT_LIST_HEAD(&ring->execlist_queue);
		1380	INIT_LIST_HEAD(&ring->execlist_retired_req_list);
		1381	spin_lock_init(&ring->execlist_lock);
		1382	ring->next_context_status_buffer = 0;
		1383
		1384	ret = i915_cmd_parser_init_ring(ring);
		1385	if (ret)
		1386	return ret;
		1387
		1388	if (ring->init) {
		1389	ret = ring->init(ring);
		1390	if (ret)
		1391	return ret;
		1392	}
		1393
		1394	ret = intel_lr_context_deferred_create(ring->default_context, ring);
		1395
		1396	return ret;
		1397	}
		1398
		1399	static int logical_render_ring_init(struct drm_device *dev)
		1400	{
		1401	struct drm_i915_private *dev_priv = dev->dev_private;
		1402	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
		1403
		1404	ring->name = "render ring";
		1405	ring->id = RCS;
		1406	ring->mmio_base = RENDER_RING_BASE;
		1407	ring->irq_enable_mask =
		1408	GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT;
		1409	ring->irq_keep_mask =
		1410	GT_CONTEXT_SWITCH_INTERRUPT << GEN8_RCS_IRQ_SHIFT;
		1411	if (HAS_L3_DPF(dev))
		1412	ring->irq_keep_mask \|= GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
		1413
		1414	ring->init = gen8_init_render_ring;
		1415	ring->init_context = intel_logical_ring_workarounds_emit;
		1416	ring->cleanup = intel_fini_pipe_control;
		1417	ring->get_seqno = gen8_get_seqno;
		1418	ring->set_seqno = gen8_set_seqno;
		1419	ring->emit_request = gen8_emit_request;
		1420	ring->emit_flush = gen8_emit_flush_render;
		1421	ring->irq_get = gen8_logical_ring_get_irq;
		1422	ring->irq_put = gen8_logical_ring_put_irq;
		1423	ring->emit_bb_start = gen8_emit_bb_start;
		1424
		1425	return logical_ring_init(dev, ring);
		1426	}
		1427
		1428	static int logical_bsd_ring_init(struct drm_device *dev)
		1429	{
		1430	struct drm_i915_private *dev_priv = dev->dev_private;
		1431	struct intel_engine_cs *ring = &dev_priv->ring[VCS];
		1432
		1433	ring->name = "bsd ring";
		1434	ring->id = VCS;
		1435	ring->mmio_base = GEN6_BSD_RING_BASE;
		1436	ring->irq_enable_mask =
		1437	GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT;
		1438	ring->irq_keep_mask =
		1439	GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS1_IRQ_SHIFT;
		1440
		1441	ring->init = gen8_init_common_ring;
		1442	ring->get_seqno = gen8_get_seqno;
		1443	ring->set_seqno = gen8_set_seqno;
		1444	ring->emit_request = gen8_emit_request;
		1445	ring->emit_flush = gen8_emit_flush;
		1446	ring->irq_get = gen8_logical_ring_get_irq;
		1447	ring->irq_put = gen8_logical_ring_put_irq;
		1448	ring->emit_bb_start = gen8_emit_bb_start;
		1449
		1450	return logical_ring_init(dev, ring);
		1451	}
		1452
		1453	static int logical_bsd2_ring_init(struct drm_device *dev)
		1454	{
		1455	struct drm_i915_private *dev_priv = dev->dev_private;
		1456	struct intel_engine_cs *ring = &dev_priv->ring[VCS2];
		1457
		1458	ring->name = "bds2 ring";
		1459	ring->id = VCS2;
		1460	ring->mmio_base = GEN8_BSD2_RING_BASE;
		1461	ring->irq_enable_mask =
		1462	GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT;
		1463	ring->irq_keep_mask =
		1464	GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS2_IRQ_SHIFT;
		1465
		1466	ring->init = gen8_init_common_ring;
		1467	ring->get_seqno = gen8_get_seqno;
		1468	ring->set_seqno = gen8_set_seqno;
		1469	ring->emit_request = gen8_emit_request;
		1470	ring->emit_flush = gen8_emit_flush;
		1471	ring->irq_get = gen8_logical_ring_get_irq;
		1472	ring->irq_put = gen8_logical_ring_put_irq;
		1473	ring->emit_bb_start = gen8_emit_bb_start;
		1474
		1475	return logical_ring_init(dev, ring);
		1476	}
		1477
		1478	static int logical_blt_ring_init(struct drm_device *dev)
		1479	{
		1480	struct drm_i915_private *dev_priv = dev->dev_private;
		1481	struct intel_engine_cs *ring = &dev_priv->ring[BCS];
		1482
		1483	ring->name = "blitter ring";
		1484	ring->id = BCS;
		1485	ring->mmio_base = BLT_RING_BASE;
		1486	ring->irq_enable_mask =
		1487	GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT;
		1488	ring->irq_keep_mask =
		1489	GT_CONTEXT_SWITCH_INTERRUPT << GEN8_BCS_IRQ_SHIFT;
		1490
		1491	ring->init = gen8_init_common_ring;
		1492	ring->get_seqno = gen8_get_seqno;
		1493	ring->set_seqno = gen8_set_seqno;
		1494	ring->emit_request = gen8_emit_request;
		1495	ring->emit_flush = gen8_emit_flush;
		1496	ring->irq_get = gen8_logical_ring_get_irq;
		1497	ring->irq_put = gen8_logical_ring_put_irq;
		1498	ring->emit_bb_start = gen8_emit_bb_start;
		1499
		1500	return logical_ring_init(dev, ring);
		1501	}
		1502
		1503	static int logical_vebox_ring_init(struct drm_device *dev)
		1504	{
		1505	struct drm_i915_private *dev_priv = dev->dev_private;
		1506	struct intel_engine_cs *ring = &dev_priv->ring[VECS];
		1507
		1508	ring->name = "video enhancement ring";
		1509	ring->id = VECS;
		1510	ring->mmio_base = VEBOX_RING_BASE;
		1511	ring->irq_enable_mask =
		1512	GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT;
		1513	ring->irq_keep_mask =
		1514	GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VECS_IRQ_SHIFT;
		1515
		1516	ring->init = gen8_init_common_ring;
		1517	ring->get_seqno = gen8_get_seqno;
		1518	ring->set_seqno = gen8_set_seqno;
		1519	ring->emit_request = gen8_emit_request;
		1520	ring->emit_flush = gen8_emit_flush;
		1521	ring->irq_get = gen8_logical_ring_get_irq;
		1522	ring->irq_put = gen8_logical_ring_put_irq;
		1523	ring->emit_bb_start = gen8_emit_bb_start;
		1524
		1525	return logical_ring_init(dev, ring);
		1526	}
		1527
		1528	/**
		1529	* intel_logical_rings_init() - allocate, populate and init the Engine Command Streamers
		1530	* @dev: DRM device.
		1531	*
		1532	* This function inits the engines for an Execlists submission style (the equivalent in the
		1533	* legacy ringbuffer submission world would be i915_gem_init_rings). It does it only for
		1534	* those engines that are present in the hardware.
		1535	*
		1536	* Return: non-zero if the initialization failed.
		1537	*/
		1538	int intel_logical_rings_init(struct drm_device *dev)
		1539	{
		1540	struct drm_i915_private *dev_priv = dev->dev_private;
		1541	int ret;
		1542
		1543	ret = logical_render_ring_init(dev);
		1544	if (ret)
		1545	return ret;
		1546
		1547	if (HAS_BSD(dev)) {
		1548	ret = logical_bsd_ring_init(dev);
		1549	if (ret)
		1550	goto cleanup_render_ring;
		1551	}
		1552
		1553	if (HAS_BLT(dev)) {
		1554	ret = logical_blt_ring_init(dev);
		1555	if (ret)
		1556	goto cleanup_bsd_ring;
		1557	}
		1558
		1559	if (HAS_VEBOX(dev)) {
		1560	ret = logical_vebox_ring_init(dev);
		1561	if (ret)
		1562	goto cleanup_blt_ring;
		1563	}
		1564
		1565	if (HAS_BSD2(dev)) {
		1566	ret = logical_bsd2_ring_init(dev);
		1567	if (ret)
		1568	goto cleanup_vebox_ring;
		1569	}
		1570
		1571	ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000));
		1572	if (ret)
		1573	goto cleanup_bsd2_ring;
		1574
		1575	return 0;
		1576
		1577	cleanup_bsd2_ring:
		1578	intel_logical_ring_cleanup(&dev_priv->ring[VCS2]);
		1579	cleanup_vebox_ring:
		1580	intel_logical_ring_cleanup(&dev_priv->ring[VECS]);
		1581	cleanup_blt_ring:
		1582	intel_logical_ring_cleanup(&dev_priv->ring[BCS]);
		1583	cleanup_bsd_ring:
		1584	intel_logical_ring_cleanup(&dev_priv->ring[VCS]);
		1585	cleanup_render_ring:
		1586	intel_logical_ring_cleanup(&dev_priv->ring[RCS]);
		1587
		1588	return ret;
		1589	}
		1590
		1591	int intel_lr_context_render_state_init(struct intel_engine_cs *ring,
		1592	struct intel_context *ctx)
		1593	{
		1594	struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
		1595	struct render_state so;
		1596	struct drm_i915_file_private *file_priv = ctx->file_priv;
		1597	struct drm_file *file = file_priv ? file_priv->file : NULL;
		1598	int ret;
		1599
		1600	ret = i915_gem_render_state_prepare(ring, &so);
		1601	if (ret)
		1602	return ret;
		1603
		1604	if (so.rodata == NULL)
		1605	return 0;
		1606
		1607	ret = ring->emit_bb_start(ringbuf,
		1608	so.ggtt_offset,
		1609	I915_DISPATCH_SECURE);
		1610	if (ret)
		1611	goto out;
		1612
		1613	i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), ring);
		1614
		1615	ret = __i915_add_request(ring, file, so.obj, NULL);
		1616	/* intel_logical_ring_add_request moves object to inactive if it
		1617	* fails */
		1618	out:
		1619	i915_gem_render_state_fini(&so);
		1620	return ret;
		1621	}
		1622
		1623	static int
		1624	populate_lr_context(struct intel_context ctx, struct drm_i915_gem_object ctx_obj,
		1625	struct intel_engine_cs ring, struct intel_ringbuffer ringbuf)
		1626	{
		1627	struct drm_device *dev = ring->dev;
		1628	struct drm_i915_private *dev_priv = dev->dev_private;
		1629	struct i915_hw_ppgtt *ppgtt = ctx->ppgtt;
		1630	struct page *page;
		1631	uint32_t *reg_state;
		1632	int ret;
		1633
		1634	if (!ppgtt)
		1635	ppgtt = dev_priv->mm.aliasing_ppgtt;
		1636
		1637	ret = i915_gem_object_set_to_cpu_domain(ctx_obj, true);
		1638	if (ret) {
		1639	DRM_DEBUG_DRIVER("Could not set to CPU domain\n");
		1640	return ret;
		1641	}
		1642
		1643	ret = i915_gem_object_get_pages(ctx_obj);
		1644	if (ret) {
		1645	DRM_DEBUG_DRIVER("Could not get object pages\n");
		1646	return ret;
		1647	}
		1648
		1649	i915_gem_object_pin_pages(ctx_obj);
		1650
		1651	/* The second page of the context object contains some fields which must
		1652	* be set up prior to the first execution. */
		1653	page = i915_gem_object_get_page(ctx_obj, 1);
		1654	reg_state = kmap_atomic(page);
		1655
		1656	/* A context is actually a big batch buffer with several MI_LOAD_REGISTER_IMM
		1657	* commands followed by (reg, value) pairs. The values we are setting here are
		1658	* only for the first context restore: on a subsequent save, the GPU will
		1659	* recreate this batchbuffer with new values (including all the missing
		1660	* MI_LOAD_REGISTER_IMM commands that we are not initializing here). */
		1661	if (ring->id == RCS)
		1662	reg_state[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(14);
		1663	else
		1664	reg_state[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(11);
		1665	reg_state[CTX_LRI_HEADER_0] \|= MI_LRI_FORCE_POSTED;
		1666	reg_state[CTX_CONTEXT_CONTROL] = RING_CONTEXT_CONTROL(ring);
		1667	reg_state[CTX_CONTEXT_CONTROL+1] =
		1668	_MASKED_BIT_ENABLE((1<<3) \| MI_RESTORE_INHIBIT);
		1669	reg_state[CTX_RING_HEAD] = RING_HEAD(ring->mmio_base);
		1670	reg_state[CTX_RING_HEAD+1] = 0;
		1671	reg_state[CTX_RING_TAIL] = RING_TAIL(ring->mmio_base);
		1672	reg_state[CTX_RING_TAIL+1] = 0;
		1673	reg_state[CTX_RING_BUFFER_START] = RING_START(ring->mmio_base);
		1674	/* Ring buffer start address is not known until the buffer is pinned.
		1675	* It is written to the context image in execlists_update_context()
		1676	*/
		1677	reg_state[CTX_RING_BUFFER_CONTROL] = RING_CTL(ring->mmio_base);
		1678	reg_state[CTX_RING_BUFFER_CONTROL+1] =
		1679	((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES) \| RING_VALID;
		1680	reg_state[CTX_BB_HEAD_U] = ring->mmio_base + 0x168;
		1681	reg_state[CTX_BB_HEAD_U+1] = 0;
		1682	reg_state[CTX_BB_HEAD_L] = ring->mmio_base + 0x140;
		1683	reg_state[CTX_BB_HEAD_L+1] = 0;
		1684	reg_state[CTX_BB_STATE] = ring->mmio_base + 0x110;
		1685	reg_state[CTX_BB_STATE+1] = (1<<5);
		1686	reg_state[CTX_SECOND_BB_HEAD_U] = ring->mmio_base + 0x11c;
		1687	reg_state[CTX_SECOND_BB_HEAD_U+1] = 0;
		1688	reg_state[CTX_SECOND_BB_HEAD_L] = ring->mmio_base + 0x114;
		1689	reg_state[CTX_SECOND_BB_HEAD_L+1] = 0;
		1690	reg_state[CTX_SECOND_BB_STATE] = ring->mmio_base + 0x118;
		1691	reg_state[CTX_SECOND_BB_STATE+1] = 0;
		1692	if (ring->id == RCS) {
		1693	/* TODO: according to BSpec, the register state context
		1694	* for CHV does not have these. OTOH, these registers do
		1695	* exist in CHV. I'm waiting for a clarification */
		1696	reg_state[CTX_BB_PER_CTX_PTR] = ring->mmio_base + 0x1c0;
		1697	reg_state[CTX_BB_PER_CTX_PTR+1] = 0;
		1698	reg_state[CTX_RCS_INDIRECT_CTX] = ring->mmio_base + 0x1c4;
		1699	reg_state[CTX_RCS_INDIRECT_CTX+1] = 0;
		1700	reg_state[CTX_RCS_INDIRECT_CTX_OFFSET] = ring->mmio_base + 0x1c8;
		1701	reg_state[CTX_RCS_INDIRECT_CTX_OFFSET+1] = 0;
		1702	}
		1703	reg_state[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9);
		1704	reg_state[CTX_LRI_HEADER_1] \|= MI_LRI_FORCE_POSTED;
		1705	reg_state[CTX_CTX_TIMESTAMP] = ring->mmio_base + 0x3a8;
		1706	reg_state[CTX_CTX_TIMESTAMP+1] = 0;
		1707	reg_state[CTX_PDP3_UDW] = GEN8_RING_PDP_UDW(ring, 3);
		1708	reg_state[CTX_PDP3_LDW] = GEN8_RING_PDP_LDW(ring, 3);
		1709	reg_state[CTX_PDP2_UDW] = GEN8_RING_PDP_UDW(ring, 2);
		1710	reg_state[CTX_PDP2_LDW] = GEN8_RING_PDP_LDW(ring, 2);
		1711	reg_state[CTX_PDP1_UDW] = GEN8_RING_PDP_UDW(ring, 1);
		1712	reg_state[CTX_PDP1_LDW] = GEN8_RING_PDP_LDW(ring, 1);
		1713	reg_state[CTX_PDP0_UDW] = GEN8_RING_PDP_UDW(ring, 0);
		1714	reg_state[CTX_PDP0_LDW] = GEN8_RING_PDP_LDW(ring, 0);
		1715	reg_state[CTX_PDP3_UDW+1] = upper_32_bits(ppgtt->pd_dma_addr[3]);
		1716	reg_state[CTX_PDP3_LDW+1] = lower_32_bits(ppgtt->pd_dma_addr[3]);
		1717	reg_state[CTX_PDP2_UDW+1] = upper_32_bits(ppgtt->pd_dma_addr[2]);
		1718	reg_state[CTX_PDP2_LDW+1] = lower_32_bits(ppgtt->pd_dma_addr[2]);
		1719	reg_state[CTX_PDP1_UDW+1] = upper_32_bits(ppgtt->pd_dma_addr[1]);
		1720	reg_state[CTX_PDP1_LDW+1] = lower_32_bits(ppgtt->pd_dma_addr[1]);
		1721	reg_state[CTX_PDP0_UDW+1] = upper_32_bits(ppgtt->pd_dma_addr[0]);
		1722	reg_state[CTX_PDP0_LDW+1] = lower_32_bits(ppgtt->pd_dma_addr[0]);
		1723	if (ring->id == RCS) {
		1724	reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
		1725	reg_state[CTX_R_PWR_CLK_STATE] = 0x20c8;
		1726	reg_state[CTX_R_PWR_CLK_STATE+1] = 0;
		1727	}
		1728
		1729	kunmap_atomic(reg_state);
		1730
		1731	ctx_obj->dirty = 1;
		1732	// set_page_dirty(page);
		1733	i915_gem_object_unpin_pages(ctx_obj);
		1734
		1735	return 0;
		1736	}
		1737
		1738	/**
		1739	* intel_lr_context_free() - free the LRC specific bits of a context
		1740	* @ctx: the LR context to free.
		1741	*
		1742	* The real context freeing is done in i915_gem_context_free: this only
		1743	* takes care of the bits that are LRC related: the per-engine backing
		1744	* objects and the logical ringbuffer.
		1745	*/
		1746	void intel_lr_context_free(struct intel_context *ctx)
		1747	{
		1748	int i;
		1749
		1750	for (i = 0; i < I915_NUM_RINGS; i++) {
		1751	struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state;
		1752
		1753	if (ctx_obj) {
		1754	struct intel_ringbuffer *ringbuf =
		1755	ctx->engine[i].ringbuf;
		1756	struct intel_engine_cs *ring = ringbuf->ring;
		1757
		1758	if (ctx == ring->default_context) {
		1759	intel_unpin_ringbuffer_obj(ringbuf);
		1760	i915_gem_object_ggtt_unpin(ctx_obj);
		1761	}
		1762	intel_destroy_ringbuffer_obj(ringbuf);
		1763	kfree(ringbuf);
		1764	drm_gem_object_unreference(&ctx_obj->base);
		1765	}
		1766	}
		1767	}
		1768
		1769	static uint32_t get_lr_context_size(struct intel_engine_cs *ring)
		1770	{
		1771	int ret = 0;
		1772
		1773	WARN_ON(INTEL_INFO(ring->dev)->gen < 8);
		1774
		1775	switch (ring->id) {
		1776	case RCS:
		1777	if (INTEL_INFO(ring->dev)->gen >= 9)
		1778	ret = GEN9_LR_CONTEXT_RENDER_SIZE;
		1779	else
		1780	ret = GEN8_LR_CONTEXT_RENDER_SIZE;
		1781	break;
		1782	case VCS:
		1783	case BCS:
		1784	case VECS:
		1785	case VCS2:
		1786	ret = GEN8_LR_CONTEXT_OTHER_SIZE;
		1787	break;
		1788	}
		1789
		1790	return ret;
		1791	}
		1792
		1793	static void lrc_setup_hardware_status_page(struct intel_engine_cs *ring,
		1794	struct drm_i915_gem_object *default_ctx_obj)
		1795	{
		1796	struct drm_i915_private *dev_priv = ring->dev->dev_private;
		1797
		1798	/* The status page is offset 0 from the default context object
		1799	* in LRC mode. */
		1800	ring->status_page.gfx_addr = i915_gem_obj_ggtt_offset(default_ctx_obj);
		1801	ring->status_page.page_addr =
		1802	kmap(sg_page(default_ctx_obj->pages->sgl));
		1803	ring->status_page.obj = default_ctx_obj;
		1804
		1805	I915_WRITE(RING_HWS_PGA(ring->mmio_base),
		1806	(u32)ring->status_page.gfx_addr);
		1807	POSTING_READ(RING_HWS_PGA(ring->mmio_base));
		1808	}
		1809
		1810	/**
		1811	* intel_lr_context_deferred_create() - create the LRC specific bits of a context
		1812	* @ctx: LR context to create.
		1813	* @ring: engine to be used with the context.
		1814	*
		1815	* This function can be called more than once, with different engines, if we plan
		1816	* to use the context with them. The context backing objects and the ringbuffers
		1817	* (specially the ringbuffer backing objects) suck a lot of memory up, and that's why
		1818	* the creation is a deferred call: it's better to make sure first that we need to use
		1819	* a given ring with the context.
		1820	*
		1821	* Return: non-zero on error.
		1822	*/
		1823	int intel_lr_context_deferred_create(struct intel_context *ctx,
		1824	struct intel_engine_cs *ring)
		1825	{
		1826	const bool is_global_default_ctx = (ctx == ring->default_context);
		1827	struct drm_device *dev = ring->dev;
		1828	struct drm_i915_gem_object *ctx_obj;
		1829	uint32_t context_size;
		1830	struct intel_ringbuffer *ringbuf;
		1831	int ret;
		1832
		1833	WARN_ON(ctx->legacy_hw_ctx.rcs_state != NULL);
		1834	if (ctx->engine[ring->id].state)
		1835	return 0;
		1836
		1837	context_size = round_up(get_lr_context_size(ring), 4096);
		1838
		1839	ctx_obj = i915_gem_alloc_context_obj(dev, context_size);
		1840	if (IS_ERR(ctx_obj)) {
		1841	ret = PTR_ERR(ctx_obj);
		1842	DRM_DEBUG_DRIVER("Alloc LRC backing obj failed: %d\n", ret);
		1843	return ret;
		1844	}
		1845
		1846	if (is_global_default_ctx) {
		1847	ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN, 0);
		1848	if (ret) {
		1849	DRM_DEBUG_DRIVER("Pin LRC backing obj failed: %d\n",
		1850	ret);
		1851	drm_gem_object_unreference(&ctx_obj->base);
		1852	return ret;
		1853	}
		1854	}
		1855
		1856	ringbuf = kzalloc(sizeof(*ringbuf), GFP_KERNEL);
		1857	if (!ringbuf) {
		1858	DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n",
		1859	ring->name);
		1860	ret = -ENOMEM;
		1861	goto error_unpin_ctx;
		1862	}
		1863
		1864	ringbuf->ring = ring;
		1865	ringbuf->FIXME_lrc_ctx = ctx;
		1866
		1867	ringbuf->size = 32 * PAGE_SIZE;
		1868	ringbuf->effective_size = ringbuf->size;
		1869	ringbuf->head = 0;
		1870	ringbuf->tail = 0;
		1871	ringbuf->space = ringbuf->size;
		1872	ringbuf->last_retired_head = -1;
		1873
		1874	if (ringbuf->obj == NULL) {
		1875	ret = intel_alloc_ringbuffer_obj(dev, ringbuf);
		1876	if (ret) {
		1877	DRM_DEBUG_DRIVER(
		1878	"Failed to allocate ringbuffer obj %s: %d\n",
		1879	ring->name, ret);
		1880	goto error_free_rbuf;
		1881	}
		1882
		1883	if (is_global_default_ctx) {
		1884	ret = intel_pin_and_map_ringbuffer_obj(dev, ringbuf);
		1885	if (ret) {
		1886	DRM_ERROR(
		1887	"Failed to pin and map ringbuffer %s: %d\n",
		1888	ring->name, ret);
		1889	goto error_destroy_rbuf;
		1890	}
		1891	}
		1892
		1893	}
		1894
		1895	ret = populate_lr_context(ctx, ctx_obj, ring, ringbuf);
		1896	if (ret) {
		1897	DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret);
		1898	goto error;
		1899	}
		1900
		1901	ctx->engine[ring->id].ringbuf = ringbuf;
		1902	ctx->engine[ring->id].state = ctx_obj;
		1903
		1904	if (ctx == ring->default_context)
		1905	lrc_setup_hardware_status_page(ring, ctx_obj);
		1906
		1907	if (ring->id == RCS && !ctx->rcs_initialized) {
		1908	if (ring->init_context) {
		1909	ret = ring->init_context(ring, ctx);
		1910	if (ret)
		1911	DRM_ERROR("ring init context: %d\n", ret);
		1912	}
		1913
		1914	ret = intel_lr_context_render_state_init(ring, ctx);
		1915	if (ret) {
		1916	DRM_ERROR("Init render state failed: %d\n", ret);
		1917	ctx->engine[ring->id].ringbuf = NULL;
		1918	ctx->engine[ring->id].state = NULL;
		1919	goto error;
		1920	}
		1921	ctx->rcs_initialized = true;
		1922	}
		1923
		1924	return 0;
		1925
		1926	error:
		1927	if (is_global_default_ctx)
		1928	intel_unpin_ringbuffer_obj(ringbuf);
		1929	error_destroy_rbuf:
		1930	intel_destroy_ringbuffer_obj(ringbuf);
		1931	error_free_rbuf:
		1932	kfree(ringbuf);
		1933	error_unpin_ctx:
		1934	if (is_global_default_ctx)
		1935	i915_gem_object_ggtt_unpin(ctx_obj);
		1936	drm_gem_object_unreference(&ctx_obj->base);
		1937	return ret;
		1938	}

Subversion Repositories Kolibri OS

(root)/drivers/video/drm/i915/intel_lrc.c @ 6084 – Rev 5354