Subversion Repositories Kolibri OS

Rev

Rev 6660 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
5354 serge 1
/*
2
 * Copyright © 2014 Intel Corporation
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
 * IN THE SOFTWARE.
22
 *
23
 * Authors:
24
 *    Ben Widawsky 
25
 *    Michel Thierry 
26
 *    Thomas Daniel 
27
 *    Oscar Mateo 
28
 *
29
 */
30
 
31
/**
32
 * DOC: Logical Rings, Logical Ring Contexts and Execlists
33
 *
34
 * Motivation:
35
 * GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts".
36
 * These expanded contexts enable a number of new abilities, especially
37
 * "Execlists" (also implemented in this file).
38
 *
39
 * One of the main differences with the legacy HW contexts is that logical
40
 * ring contexts incorporate many more things to the context's state, like
41
 * PDPs or ringbuffer control registers:
42
 *
43
 * The reason why PDPs are included in the context is straightforward: as
44
 * PPGTTs (per-process GTTs) are actually per-context, having the PDPs
45
 * contained there mean you don't need to do a ppgtt->switch_mm yourself,
46
 * instead, the GPU will do it for you on the context switch.
47
 *
48
 * But, what about the ringbuffer control registers (head, tail, etc..)?
49
 * shouldn't we just need a set of those per engine command streamer? This is
50
 * where the name "Logical Rings" starts to make sense: by virtualizing the
51
 * rings, the engine cs shifts to a new "ring buffer" with every context
52
 * switch. When you want to submit a workload to the GPU you: A) choose your
53
 * context, B) find its appropriate virtualized ring, C) write commands to it
54
 * and then, finally, D) tell the GPU to switch to that context.
55
 *
56
 * Instead of the legacy MI_SET_CONTEXT, the way you tell the GPU to switch
57
 * to a contexts is via a context execution list, ergo "Execlists".
58
 *
59
 * LRC implementation:
60
 * Regarding the creation of contexts, we have:
61
 *
62
 * - One global default context.
63
 * - One local default context for each opened fd.
64
 * - One local extra context for each context create ioctl call.
65
 *
66
 * Now that ringbuffers belong per-context (and not per-engine, like before)
67
 * and that contexts are uniquely tied to a given engine (and not reusable,
68
 * like before) we need:
69
 *
70
 * - One ringbuffer per-engine inside each context.
71
 * - One backing object per-engine inside each context.
72
 *
73
 * The global default context starts its life with these new objects fully
74
 * allocated and populated. The local default context for each opened fd is
75
 * more complex, because we don't know at creation time which engine is going
76
 * to use them. To handle this, we have implemented a deferred creation of LR
77
 * contexts:
78
 *
79
 * The local context starts its life as a hollow or blank holder, that only
80
 * gets populated for a given engine once we receive an execbuffer. If later
81
 * on we receive another execbuffer ioctl for the same context but a different
82
 * engine, we allocate/populate a new ringbuffer and context backing object and
83
 * so on.
84
 *
85
 * Finally, regarding local contexts created using the ioctl call: as they are
86
 * only allowed with the render ring, we can allocate & populate them right
87
 * away (no need to defer anything, at least for now).
88
 *
89
 * Execlists implementation:
90
 * Execlists are the new method by which, on gen8+ hardware, workloads are
91
 * submitted for execution (as opposed to the legacy, ringbuffer-based, method).
92
 * This method works as follows:
93
 *
94
 * When a request is committed, its commands (the BB start and any leading or
95
 * trailing commands, like the seqno breadcrumbs) are placed in the ringbuffer
96
 * for the appropriate context. The tail pointer in the hardware context is not
97
 * updated at this time, but instead, kept by the driver in the ringbuffer
98
 * structure. A structure representing this request is added to a request queue
99
 * for the appropriate engine: this structure contains a copy of the context's
100
 * tail after the request was written to the ring buffer and a pointer to the
101
 * context itself.
102
 *
103
 * If the engine's request queue was empty before the request was added, the
104
 * queue is processed immediately. Otherwise the queue will be processed during
105
 * a context switch interrupt. In any case, elements on the queue will get sent
106
 * (in pairs) to the GPU's ExecLists Submit Port (ELSP, for short) with a
107
 * globally unique 20-bits submission ID.
108
 *
109
 * When execution of a request completes, the GPU updates the context status
110
 * buffer with a context complete event and generates a context switch interrupt.
111
 * During the interrupt handling, the driver examines the events in the buffer:
112
 * for each context complete event, if the announced ID matches that on the head
113
 * of the request queue, then that request is retired and removed from the queue.
114
 *
115
 * After processing, if any requests were retired and the queue is not empty
116
 * then a new execution list can be submitted. The two requests at the front of
117
 * the queue are next to be submitted but since a context may not occur twice in
118
 * an execution list, if subsequent requests have the same ID as the first then
119
 * the two requests must be combined. This is done simply by discarding requests
120
 * at the head of the queue until either only one requests is left (in which case
121
 * we use a NULL second context) or the first two requests have unique IDs.
122
 *
123
 * By always executing the first two requests in the queue the driver ensures
124
 * that the GPU is kept as busy as possible. In the case where a single context
125
 * completes but a second context is still executing, the request for this second
126
 * context will be at the head of the queue when we remove the first one. This
127
 * request will then be resubmitted along with a new request for a different context,
128
 * which will cause the hardware to continue executing the second request and queue
129
 * the new request (the GPU detects the condition of a context getting preempted
130
 * with the same context and optimizes the context switch flow by not doing
131
 * preemption, but just sampling the new tail pointer).
132
 *
133
 */
134
 
135
#include 
136
#include 
6937 serge 137
#include "i915_drv.h"
6084 serge 138
#include "intel_mocs.h"
5354 serge 139
 
140
#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE)
141
#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE)
142
#define GEN8_LR_CONTEXT_OTHER_SIZE (2 * PAGE_SIZE)
143
 
144
#define RING_EXECLIST_QFULL		(1 << 0x2)
145
#define RING_EXECLIST1_VALID		(1 << 0x3)
146
#define RING_EXECLIST0_VALID		(1 << 0x4)
147
#define RING_EXECLIST_ACTIVE_STATUS	(3 << 0xE)
148
#define RING_EXECLIST1_ACTIVE		(1 << 0x11)
149
#define RING_EXECLIST0_ACTIVE		(1 << 0x12)
150
 
151
#define GEN8_CTX_STATUS_IDLE_ACTIVE	(1 << 0)
152
#define GEN8_CTX_STATUS_PREEMPTED	(1 << 1)
153
#define GEN8_CTX_STATUS_ELEMENT_SWITCH	(1 << 2)
154
#define GEN8_CTX_STATUS_ACTIVE_IDLE	(1 << 3)
155
#define GEN8_CTX_STATUS_COMPLETE	(1 << 4)
156
#define GEN8_CTX_STATUS_LITE_RESTORE	(1 << 15)
157
 
158
#define CTX_LRI_HEADER_0		0x01
159
#define CTX_CONTEXT_CONTROL		0x02
160
#define CTX_RING_HEAD			0x04
161
#define CTX_RING_TAIL			0x06
162
#define CTX_RING_BUFFER_START		0x08
163
#define CTX_RING_BUFFER_CONTROL		0x0a
164
#define CTX_BB_HEAD_U			0x0c
165
#define CTX_BB_HEAD_L			0x0e
166
#define CTX_BB_STATE			0x10
167
#define CTX_SECOND_BB_HEAD_U		0x12
168
#define CTX_SECOND_BB_HEAD_L		0x14
169
#define CTX_SECOND_BB_STATE		0x16
170
#define CTX_BB_PER_CTX_PTR		0x18
171
#define CTX_RCS_INDIRECT_CTX		0x1a
172
#define CTX_RCS_INDIRECT_CTX_OFFSET	0x1c
173
#define CTX_LRI_HEADER_1		0x21
174
#define CTX_CTX_TIMESTAMP		0x22
175
#define CTX_PDP3_UDW			0x24
176
#define CTX_PDP3_LDW			0x26
177
#define CTX_PDP2_UDW			0x28
178
#define CTX_PDP2_LDW			0x2a
179
#define CTX_PDP1_UDW			0x2c
180
#define CTX_PDP1_LDW			0x2e
181
#define CTX_PDP0_UDW			0x30
182
#define CTX_PDP0_LDW			0x32
183
#define CTX_LRI_HEADER_2		0x41
184
#define CTX_R_PWR_CLK_STATE		0x42
185
#define CTX_GPGPU_CSR_BASE_ADDRESS	0x44
186
 
187
#define GEN8_CTX_VALID (1<<0)
188
#define GEN8_CTX_FORCE_PD_RESTORE (1<<1)
189
#define GEN8_CTX_FORCE_RESTORE (1<<2)
190
#define GEN8_CTX_L3LLC_COHERENT (1<<5)
191
#define GEN8_CTX_PRIVILEGE (1<<8)
6084 serge 192
 
6937 serge 193
#define ASSIGN_CTX_REG(reg_state, pos, reg, val) do { \
194
	(reg_state)[(pos)+0] = i915_mmio_reg_offset(reg); \
195
	(reg_state)[(pos)+1] = (val); \
196
} while (0)
197
 
198
#define ASSIGN_CTX_PDP(ppgtt, reg_state, n) do {		\
6084 serge 199
	const u64 _addr = i915_page_dir_dma_addr((ppgtt), (n));	\
200
	reg_state[CTX_PDP ## n ## _UDW+1] = upper_32_bits(_addr); \
201
	reg_state[CTX_PDP ## n ## _LDW+1] = lower_32_bits(_addr); \
6937 serge 202
} while (0)
6084 serge 203
 
6937 serge 204
#define ASSIGN_CTX_PML4(ppgtt, reg_state) do { \
6084 serge 205
	reg_state[CTX_PDP0_UDW + 1] = upper_32_bits(px_dma(&ppgtt->pml4)); \
206
	reg_state[CTX_PDP0_LDW + 1] = lower_32_bits(px_dma(&ppgtt->pml4)); \
6937 serge 207
} while (0)
6084 serge 208
 
5354 serge 209
enum {
210
	ADVANCED_CONTEXT = 0,
6084 serge 211
	LEGACY_32B_CONTEXT,
5354 serge 212
	ADVANCED_AD_CONTEXT,
213
	LEGACY_64B_CONTEXT
214
};
6084 serge 215
#define GEN8_CTX_ADDRESSING_MODE_SHIFT 3
216
#define GEN8_CTX_ADDRESSING_MODE(dev)  (USES_FULL_48BIT_PPGTT(dev) ?\
217
		LEGACY_64B_CONTEXT :\
218
		LEGACY_32B_CONTEXT)
5354 serge 219
enum {
220
	FAULT_AND_HANG = 0,
221
	FAULT_AND_HALT, /* Debug only */
222
	FAULT_AND_STREAM,
223
	FAULT_AND_CONTINUE /* Unsupported */
224
};
225
#define GEN8_CTX_ID_SHIFT 32
6084 serge 226
#define CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT  0x17
5354 serge 227
 
6084 serge 228
static int intel_lr_context_pin(struct drm_i915_gem_request *rq);
229
static void lrc_setup_hardware_status_page(struct intel_engine_cs *ring,
230
		struct drm_i915_gem_object *default_ctx_obj);
5354 serge 231
 
6084 serge 232
 
5354 serge 233
/**
234
 * intel_sanitize_enable_execlists() - sanitize i915.enable_execlists
235
 * @dev: DRM device.
236
 * @enable_execlists: value of i915.enable_execlists module parameter.
237
 *
238
 * Only certain platforms support Execlists (the prerequisites being
6084 serge 239
 * support for Logical Ring Contexts and Aliasing PPGTT or better).
5354 serge 240
 *
241
 * Return: 1 if Execlists is supported and has to be enabled.
242
 */
243
int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists)
244
{
245
	WARN_ON(i915.enable_ppgtt == -1);
246
 
6084 serge 247
	/* On platforms with execlist available, vGPU will only
248
	 * support execlist mode, no ring buffer mode.
249
	 */
250
	if (HAS_LOGICAL_RING_CONTEXTS(dev) && intel_vgpu_active(dev))
251
		return 1;
252
 
5354 serge 253
	if (INTEL_INFO(dev)->gen >= 9)
254
		return 1;
255
 
256
	if (enable_execlists == 0)
257
		return 0;
258
 
259
	if (HAS_LOGICAL_RING_CONTEXTS(dev) && USES_PPGTT(dev) &&
260
	    i915.use_mmio_flip >= 0)
261
		return 1;
262
 
263
	return 0;
264
}
265
 
266
/**
267
 * intel_execlists_ctx_id() - get the Execlists Context ID
268
 * @ctx_obj: Logical Ring Context backing object.
269
 *
270
 * Do not confuse with ctx->id! Unfortunately we have a name overload
271
 * here: the old context ID we pass to userspace as a handler so that
272
 * they can refer to a context, and the new context ID we pass to the
273
 * ELSP so that the GPU can inform us of the context status via
274
 * interrupts.
275
 *
276
 * Return: 20-bits globally unique context ID.
277
 */
278
u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj)
279
{
6084 serge 280
	u32 lrca = i915_gem_obj_ggtt_offset(ctx_obj) +
281
			LRC_PPHWSP_PN * PAGE_SIZE;
5354 serge 282
 
283
	/* LRCA is required to be 4K aligned so the more significant 20 bits
284
	 * are globally unique */
285
	return lrca >> 12;
286
}
287
 
6084 serge 288
static bool disable_lite_restore_wa(struct intel_engine_cs *ring)
5354 serge 289
{
6084 serge 290
	struct drm_device *dev = ring->dev;
291
 
6937 serge 292
	return (IS_SKL_REVID(dev, 0, SKL_REVID_B0) ||
293
		IS_BXT_REVID(dev, 0, BXT_REVID_A1)) &&
6084 serge 294
	       (ring->id == VCS || ring->id == VCS2);
295
}
296
 
297
uint64_t intel_lr_context_descriptor(struct intel_context *ctx,
298
				     struct intel_engine_cs *ring)
299
{
300
	struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
5354 serge 301
	uint64_t desc;
6084 serge 302
	uint64_t lrca = i915_gem_obj_ggtt_offset(ctx_obj) +
303
			LRC_PPHWSP_PN * PAGE_SIZE;
5354 serge 304
 
305
	WARN_ON(lrca & 0xFFFFFFFF00000FFFULL);
306
 
307
	desc = GEN8_CTX_VALID;
6084 serge 308
	desc |= GEN8_CTX_ADDRESSING_MODE(dev) << GEN8_CTX_ADDRESSING_MODE_SHIFT;
309
	if (IS_GEN8(ctx_obj->base.dev))
310
		desc |= GEN8_CTX_L3LLC_COHERENT;
5354 serge 311
	desc |= GEN8_CTX_PRIVILEGE;
312
	desc |= lrca;
313
	desc |= (u64)intel_execlists_ctx_id(ctx_obj) << GEN8_CTX_ID_SHIFT;
314
 
315
	/* TODO: WaDisableLiteRestore when we start using semaphore
316
	 * signalling between Command Streamers */
317
	/* desc |= GEN8_CTX_FORCE_RESTORE; */
318
 
6084 serge 319
	/* WaEnableForceRestoreInCtxtDescForVCS:skl */
320
	/* WaEnableForceRestoreInCtxtDescForVCS:bxt */
321
	if (disable_lite_restore_wa(ring))
322
		desc |= GEN8_CTX_FORCE_RESTORE;
323
 
5354 serge 324
	return desc;
325
}
326
 
6084 serge 327
static void execlists_elsp_write(struct drm_i915_gem_request *rq0,
328
				 struct drm_i915_gem_request *rq1)
5354 serge 329
{
6084 serge 330
 
331
	struct intel_engine_cs *ring = rq0->ring;
5354 serge 332
	struct drm_device *dev = ring->dev;
333
	struct drm_i915_private *dev_priv = dev->dev_private;
6084 serge 334
	uint64_t desc[2];
5354 serge 335
 
6084 serge 336
	if (rq1) {
337
		desc[1] = intel_lr_context_descriptor(rq1->ctx, rq1->ring);
338
		rq1->elsp_submitted++;
5354 serge 339
	} else {
6084 serge 340
		desc[1] = 0;
5354 serge 341
	}
342
 
6084 serge 343
	desc[0] = intel_lr_context_descriptor(rq0->ctx, rq0->ring);
344
	rq0->elsp_submitted++;
5354 serge 345
 
6084 serge 346
	/* You must always write both descriptors in the order below. */
347
	spin_lock(&dev_priv->uncore.lock);
348
	intel_uncore_forcewake_get__locked(dev_priv, FORCEWAKE_ALL);
349
	I915_WRITE_FW(RING_ELSP(ring), upper_32_bits(desc[1]));
350
	I915_WRITE_FW(RING_ELSP(ring), lower_32_bits(desc[1]));
5354 serge 351
 
6084 serge 352
	I915_WRITE_FW(RING_ELSP(ring), upper_32_bits(desc[0]));
353
	/* The context is automatically loaded after the following */
354
	I915_WRITE_FW(RING_ELSP(ring), lower_32_bits(desc[0]));
5354 serge 355
 
6084 serge 356
	/* ELSP is a wo register, use another nearby reg for posting */
357
	POSTING_READ_FW(RING_EXECLIST_STATUS_LO(ring));
358
	intel_uncore_forcewake_put__locked(dev_priv, FORCEWAKE_ALL);
359
	spin_unlock(&dev_priv->uncore.lock);
5354 serge 360
}
361
 
6084 serge 362
static int execlists_update_context(struct drm_i915_gem_request *rq)
5354 serge 363
{
6084 serge 364
	struct intel_engine_cs *ring = rq->ring;
365
	struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt;
366
	struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state;
367
	struct drm_i915_gem_object *rb_obj = rq->ringbuf->obj;
5354 serge 368
	struct page *page;
369
	uint32_t *reg_state;
370
 
6084 serge 371
	BUG_ON(!ctx_obj);
372
	WARN_ON(!i915_gem_obj_is_pinned(ctx_obj));
373
	WARN_ON(!i915_gem_obj_is_pinned(rb_obj));
374
 
6937 serge 375
	page = i915_gem_object_get_dirty_page(ctx_obj, LRC_STATE_PN);
5354 serge 376
	reg_state = kmap_atomic(page);
377
 
6084 serge 378
	reg_state[CTX_RING_TAIL+1] = rq->tail;
379
	reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(rb_obj);
5354 serge 380
 
6084 serge 381
	if (ppgtt && !USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
382
		/* True 32b PPGTT with dynamic page allocation: update PDP
383
		 * registers and point the unallocated PDPs to scratch page.
384
		 * PML4 is allocated during ppgtt init, so this is not needed
385
		 * in 48-bit mode.
386
		 */
387
		ASSIGN_CTX_PDP(ppgtt, reg_state, 3);
388
		ASSIGN_CTX_PDP(ppgtt, reg_state, 2);
389
		ASSIGN_CTX_PDP(ppgtt, reg_state, 1);
390
		ASSIGN_CTX_PDP(ppgtt, reg_state, 0);
391
	}
392
 
5354 serge 393
	kunmap_atomic(reg_state);
394
 
395
	return 0;
396
}
397
 
6084 serge 398
static void execlists_submit_requests(struct drm_i915_gem_request *rq0,
399
				      struct drm_i915_gem_request *rq1)
5354 serge 400
{
6084 serge 401
	execlists_update_context(rq0);
5354 serge 402
 
6084 serge 403
	if (rq1)
404
		execlists_update_context(rq1);
5354 serge 405
 
6084 serge 406
	execlists_elsp_write(rq0, rq1);
5354 serge 407
}
408
 
409
static void execlists_context_unqueue(struct intel_engine_cs *ring)
410
{
6084 serge 411
	struct drm_i915_gem_request *req0 = NULL, *req1 = NULL;
412
	struct drm_i915_gem_request *cursor = NULL, *tmp = NULL;
5354 serge 413
 
414
	assert_spin_locked(&ring->execlist_lock);
415
 
6084 serge 416
	/*
417
	 * If irqs are not active generate a warning as batches that finish
418
	 * without the irqs may get lost and a GPU Hang may occur.
419
	 */
420
	WARN_ON(!intel_irqs_enabled(ring->dev->dev_private));
421
 
5354 serge 422
	if (list_empty(&ring->execlist_queue))
423
		return;
424
 
425
	/* Try to read in pairs */
426
	list_for_each_entry_safe(cursor, tmp, &ring->execlist_queue,
427
				 execlist_link) {
428
		if (!req0) {
429
			req0 = cursor;
430
		} else if (req0->ctx == cursor->ctx) {
431
			/* Same ctx: ignore first request, as second request
432
			 * will update tail past first request's workload */
433
			cursor->elsp_submitted = req0->elsp_submitted;
434
			list_del(&req0->execlist_link);
435
			list_add_tail(&req0->execlist_link,
436
				&ring->execlist_retired_req_list);
437
			req0 = cursor;
438
		} else {
439
			req1 = cursor;
440
			break;
441
		}
442
	}
443
 
6084 serge 444
	if (IS_GEN8(ring->dev) || IS_GEN9(ring->dev)) {
445
		/*
446
		 * WaIdleLiteRestore: make sure we never cause a lite
447
		 * restore with HEAD==TAIL
448
		 */
449
		if (req0->elsp_submitted) {
450
			/*
451
			 * Apply the wa NOOPS to prevent ring:HEAD == req:TAIL
452
			 * as we resubmit the request. See gen8_emit_request()
453
			 * for where we prepare the padding after the end of the
454
			 * request.
455
			 */
456
			struct intel_ringbuffer *ringbuf;
457
 
458
			ringbuf = req0->ctx->engine[ring->id].ringbuf;
459
			req0->tail += 8;
460
			req0->tail &= ringbuf->size - 1;
461
		}
462
	}
463
 
5354 serge 464
	WARN_ON(req1 && req1->elsp_submitted);
465
 
6084 serge 466
	execlists_submit_requests(req0, req1);
5354 serge 467
}
468
 
469
static bool execlists_check_remove_request(struct intel_engine_cs *ring,
470
					   u32 request_id)
471
{
6084 serge 472
	struct drm_i915_gem_request *head_req;
5354 serge 473
 
474
	assert_spin_locked(&ring->execlist_lock);
475
 
476
	head_req = list_first_entry_or_null(&ring->execlist_queue,
6084 serge 477
					    struct drm_i915_gem_request,
5354 serge 478
					    execlist_link);
479
 
480
	if (head_req != NULL) {
481
		struct drm_i915_gem_object *ctx_obj =
482
				head_req->ctx->engine[ring->id].state;
483
		if (intel_execlists_ctx_id(ctx_obj) == request_id) {
484
			WARN(head_req->elsp_submitted == 0,
485
			     "Never submitted head request\n");
486
 
487
			if (--head_req->elsp_submitted <= 0) {
488
				list_del(&head_req->execlist_link);
489
				list_add_tail(&head_req->execlist_link,
490
					&ring->execlist_retired_req_list);
491
				return true;
492
			}
493
		}
494
	}
495
 
496
	return false;
497
}
498
 
499
/**
6084 serge 500
 * intel_lrc_irq_handler() - handle Context Switch interrupts
5354 serge 501
 * @ring: Engine Command Streamer to handle.
502
 *
503
 * Check the unread Context Status Buffers and manage the submission of new
504
 * contexts to the ELSP accordingly.
505
 */
6084 serge 506
void intel_lrc_irq_handler(struct intel_engine_cs *ring)
5354 serge 507
{
508
	struct drm_i915_private *dev_priv = ring->dev->dev_private;
509
	u32 status_pointer;
510
	u8 read_pointer;
511
	u8 write_pointer;
6084 serge 512
	u32 status = 0;
5354 serge 513
	u32 status_id;
514
	u32 submit_contexts = 0;
515
 
516
	status_pointer = I915_READ(RING_CONTEXT_STATUS_PTR(ring));
517
 
518
	read_pointer = ring->next_context_status_buffer;
6084 serge 519
	write_pointer = status_pointer & GEN8_CSB_PTR_MASK;
5354 serge 520
	if (read_pointer > write_pointer)
6084 serge 521
		write_pointer += GEN8_CSB_ENTRIES;
5354 serge 522
 
523
	spin_lock(&ring->execlist_lock);
524
 
525
	while (read_pointer < write_pointer) {
526
		read_pointer++;
6084 serge 527
		status = I915_READ(RING_CONTEXT_STATUS_BUF_LO(ring, read_pointer % GEN8_CSB_ENTRIES));
528
		status_id = I915_READ(RING_CONTEXT_STATUS_BUF_HI(ring, read_pointer % GEN8_CSB_ENTRIES));
5354 serge 529
 
6084 serge 530
		if (status & GEN8_CTX_STATUS_IDLE_ACTIVE)
531
			continue;
532
 
5354 serge 533
		if (status & GEN8_CTX_STATUS_PREEMPTED) {
534
			if (status & GEN8_CTX_STATUS_LITE_RESTORE) {
535
				if (execlists_check_remove_request(ring, status_id))
536
					WARN(1, "Lite Restored request removed from queue\n");
537
			} else
538
				WARN(1, "Preemption without Lite Restore\n");
539
		}
540
 
541
		 if ((status & GEN8_CTX_STATUS_ACTIVE_IDLE) ||
542
		     (status & GEN8_CTX_STATUS_ELEMENT_SWITCH)) {
543
			if (execlists_check_remove_request(ring, status_id))
544
				submit_contexts++;
545
		}
546
	}
547
 
6084 serge 548
	if (disable_lite_restore_wa(ring)) {
549
		/* Prevent a ctx to preempt itself */
550
		if ((status & GEN8_CTX_STATUS_ACTIVE_IDLE) &&
551
		    (submit_contexts != 0))
552
			execlists_context_unqueue(ring);
553
	} else if (submit_contexts != 0) {
5354 serge 554
		execlists_context_unqueue(ring);
6084 serge 555
	}
5354 serge 556
 
557
	spin_unlock(&ring->execlist_lock);
558
 
559
	WARN(submit_contexts > 2, "More than two context complete events?\n");
6084 serge 560
	ring->next_context_status_buffer = write_pointer % GEN8_CSB_ENTRIES;
5354 serge 561
 
562
	I915_WRITE(RING_CONTEXT_STATUS_PTR(ring),
6084 serge 563
		   _MASKED_FIELD(GEN8_CSB_PTR_MASK << 8,
564
				 ((u32)ring->next_context_status_buffer &
565
				  GEN8_CSB_PTR_MASK) << 8));
5354 serge 566
}
567
 
6084 serge 568
static int execlists_context_queue(struct drm_i915_gem_request *request)
5354 serge 569
{
6084 serge 570
	struct intel_engine_cs *ring = request->ring;
571
	struct drm_i915_gem_request *cursor;
5354 serge 572
	int num_elements = 0;
573
 
6084 serge 574
	if (request->ctx != ring->default_context)
575
		intel_lr_context_pin(request);
5354 serge 576
 
6084 serge 577
	i915_gem_request_reference(request);
5354 serge 578
 
6084 serge 579
	spin_lock_irq(&ring->execlist_lock);
5354 serge 580
 
581
	list_for_each_entry(cursor, &ring->execlist_queue, execlist_link)
582
		if (++num_elements > 2)
583
			break;
584
 
585
	if (num_elements > 2) {
6084 serge 586
		struct drm_i915_gem_request *tail_req;
5354 serge 587
 
588
		tail_req = list_last_entry(&ring->execlist_queue,
6084 serge 589
					   struct drm_i915_gem_request,
5354 serge 590
					   execlist_link);
591
 
6084 serge 592
		if (request->ctx == tail_req->ctx) {
5354 serge 593
			WARN(tail_req->elsp_submitted != 0,
594
				"More than 2 already-submitted reqs queued\n");
595
			list_del(&tail_req->execlist_link);
596
			list_add_tail(&tail_req->execlist_link,
597
				&ring->execlist_retired_req_list);
598
		}
599
	}
600
 
6084 serge 601
	list_add_tail(&request->execlist_link, &ring->execlist_queue);
5354 serge 602
	if (num_elements == 0)
603
		execlists_context_unqueue(ring);
604
 
6084 serge 605
	spin_unlock_irq(&ring->execlist_lock);
5354 serge 606
 
607
	return 0;
608
}
609
 
6084 serge 610
static int logical_ring_invalidate_all_caches(struct drm_i915_gem_request *req)
5354 serge 611
{
6084 serge 612
	struct intel_engine_cs *ring = req->ring;
5354 serge 613
	uint32_t flush_domains;
614
	int ret;
615
 
616
	flush_domains = 0;
617
	if (ring->gpu_caches_dirty)
618
		flush_domains = I915_GEM_GPU_DOMAINS;
619
 
6084 serge 620
	ret = ring->emit_flush(req, I915_GEM_GPU_DOMAINS, flush_domains);
5354 serge 621
	if (ret)
622
		return ret;
623
 
624
	ring->gpu_caches_dirty = false;
625
	return 0;
626
}
627
 
6084 serge 628
static int execlists_move_to_gpu(struct drm_i915_gem_request *req,
5354 serge 629
				 struct list_head *vmas)
630
{
6084 serge 631
	const unsigned other_rings = ~intel_ring_flag(req->ring);
5354 serge 632
	struct i915_vma *vma;
633
	uint32_t flush_domains = 0;
634
	bool flush_chipset = false;
635
	int ret;
636
 
637
	list_for_each_entry(vma, vmas, exec_list) {
638
		struct drm_i915_gem_object *obj = vma->obj;
639
 
6084 serge 640
		if (obj->active & other_rings) {
641
			ret = i915_gem_object_sync(obj, req->ring, &req);
642
			if (ret)
643
				return ret;
644
		}
5354 serge 645
 
646
		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
647
			flush_chipset |= i915_gem_clflush_object(obj, false);
648
 
649
		flush_domains |= obj->base.write_domain;
650
	}
651
 
652
	if (flush_domains & I915_GEM_DOMAIN_GTT)
653
		wmb();
654
 
655
	/* Unconditionally invalidate gpu caches and ensure that we do flush
656
	 * any residual writes from the previous batch.
657
	 */
6084 serge 658
	return logical_ring_invalidate_all_caches(req);
5354 serge 659
}
660
 
6084 serge 661
int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request)
662
{
663
	int ret;
664
 
665
	request->ringbuf = request->ctx->engine[request->ring->id].ringbuf;
666
 
667
	if (request->ctx != request->ring->default_context) {
668
		ret = intel_lr_context_pin(request);
669
		if (ret)
670
			return ret;
671
	}
672
 
673
	return 0;
674
}
675
 
676
static int logical_ring_wait_for_space(struct drm_i915_gem_request *req,
677
				       int bytes)
678
{
679
	struct intel_ringbuffer *ringbuf = req->ringbuf;
680
	struct intel_engine_cs *ring = req->ring;
681
	struct drm_i915_gem_request *target;
682
	unsigned space;
683
	int ret;
684
 
685
	if (intel_ring_space(ringbuf) >= bytes)
686
		return 0;
687
 
688
	/* The whole point of reserving space is to not wait! */
689
	WARN_ON(ringbuf->reserved_in_use);
690
 
691
	list_for_each_entry(target, &ring->request_list, list) {
692
		/*
693
		 * The request queue is per-engine, so can contain requests
694
		 * from multiple ringbuffers. Here, we must ignore any that
695
		 * aren't from the ringbuffer we're considering.
696
		 */
697
		if (target->ringbuf != ringbuf)
698
			continue;
699
 
700
		/* Would completion of this request free enough space? */
701
		space = __intel_ring_space(target->postfix, ringbuf->tail,
702
					   ringbuf->size);
703
		if (space >= bytes)
704
			break;
705
	}
706
 
707
	if (WARN_ON(&target->list == &ring->request_list))
708
		return -ENOSPC;
709
 
710
	ret = i915_wait_request(target);
711
	if (ret)
712
		return ret;
713
 
714
	ringbuf->space = space;
715
	return 0;
716
}
717
 
718
/*
719
 * intel_logical_ring_advance_and_submit() - advance the tail and submit the workload
720
 * @request: Request to advance the logical ringbuffer of.
721
 *
722
 * The tail is updated in our logical ringbuffer struct, not in the actual context. What
723
 * really happens during submission is that the context and current tail will be placed
724
 * on a queue waiting for the ELSP to be ready to accept a new context submission. At that
725
 * point, the tail *inside* the context is updated and the ELSP written to.
726
 */
727
static void
728
intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request)
729
{
730
	struct intel_engine_cs *ring = request->ring;
731
	struct drm_i915_private *dev_priv = request->i915;
732
 
733
	intel_logical_ring_advance(request->ringbuf);
734
 
735
	request->tail = request->ringbuf->tail;
736
 
737
	if (intel_ring_stopped(ring))
738
		return;
739
 
740
	if (dev_priv->guc.execbuf_client)
741
		i915_guc_submit(dev_priv->guc.execbuf_client, request);
742
	else
743
		execlists_context_queue(request);
744
}
745
 
746
static void __wrap_ring_buffer(struct intel_ringbuffer *ringbuf)
747
{
748
	uint32_t __iomem *virt;
749
	int rem = ringbuf->size - ringbuf->tail;
750
 
751
	virt = ringbuf->virtual_start + ringbuf->tail;
752
	rem /= 4;
753
	while (rem--)
754
		iowrite32(MI_NOOP, virt++);
755
 
756
	ringbuf->tail = 0;
757
	intel_ring_update_space(ringbuf);
758
}
759
 
760
static int logical_ring_prepare(struct drm_i915_gem_request *req, int bytes)
761
{
762
	struct intel_ringbuffer *ringbuf = req->ringbuf;
763
	int remain_usable = ringbuf->effective_size - ringbuf->tail;
764
	int remain_actual = ringbuf->size - ringbuf->tail;
765
	int ret, total_bytes, wait_bytes = 0;
766
	bool need_wrap = false;
767
 
768
	if (ringbuf->reserved_in_use)
769
		total_bytes = bytes;
770
	else
771
		total_bytes = bytes + ringbuf->reserved_size;
772
 
773
	if (unlikely(bytes > remain_usable)) {
774
		/*
775
		 * Not enough space for the basic request. So need to flush
776
		 * out the remainder and then wait for base + reserved.
777
		 */
778
		wait_bytes = remain_actual + total_bytes;
779
		need_wrap = true;
780
	} else {
781
		if (unlikely(total_bytes > remain_usable)) {
782
			/*
783
			 * The base request will fit but the reserved space
6660 serge 784
			 * falls off the end. So don't need an immediate wrap
785
			 * and only need to effectively wait for the reserved
786
			 * size space from the start of ringbuffer.
6084 serge 787
			 */
788
			wait_bytes = remain_actual + ringbuf->reserved_size;
789
		} else if (total_bytes > ringbuf->space) {
790
			/* No wrapping required, just waiting. */
791
			wait_bytes = total_bytes;
792
		}
793
	}
794
 
795
	if (wait_bytes) {
796
		ret = logical_ring_wait_for_space(req, wait_bytes);
797
		if (unlikely(ret))
798
			return ret;
799
 
800
		if (need_wrap)
801
			__wrap_ring_buffer(ringbuf);
802
	}
803
 
804
	return 0;
805
}
806
 
5354 serge 807
/**
6084 serge 808
 * intel_logical_ring_begin() - prepare the logical ringbuffer to accept some commands
809
 *
810
 * @req: The request to start some new work for
811
 * @num_dwords: number of DWORDs that we plan to write to the ringbuffer.
812
 *
813
 * The ringbuffer might not be ready to accept the commands right away (maybe it needs to
814
 * be wrapped, or wait a bit for the tail to be updated). This function takes care of that
815
 * and also preallocates a request (every workload submission is still mediated through
816
 * requests, same as it did with legacy ringbuffer submission).
817
 *
818
 * Return: non-zero if the ringbuffer is not ready to be written to.
819
 */
820
int intel_logical_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
821
{
822
	struct drm_i915_private *dev_priv;
823
	int ret;
824
 
825
	WARN_ON(req == NULL);
826
	dev_priv = req->ring->dev->dev_private;
827
 
828
	ret = i915_gem_check_wedge(&dev_priv->gpu_error,
829
				   dev_priv->mm.interruptible);
830
	if (ret)
831
		return ret;
832
 
833
	ret = logical_ring_prepare(req, num_dwords * sizeof(uint32_t));
834
	if (ret)
835
		return ret;
836
 
837
	req->ringbuf->space -= num_dwords * sizeof(uint32_t);
838
	return 0;
839
}
840
 
841
int intel_logical_ring_reserve_space(struct drm_i915_gem_request *request)
842
{
843
	/*
844
	 * The first call merely notes the reserve request and is common for
845
	 * all back ends. The subsequent localised _begin() call actually
846
	 * ensures that the reservation is available. Without the begin, if
847
	 * the request creator immediately submitted the request without
848
	 * adding any commands to it then there might not actually be
849
	 * sufficient room for the submission commands.
850
	 */
851
	intel_ring_reserved_space_reserve(request->ringbuf, MIN_SPACE_FOR_ADD_REQUEST);
852
 
853
	return intel_logical_ring_begin(request, 0);
854
}
855
 
856
/**
5354 serge 857
 * execlists_submission() - submit a batchbuffer for execution, Execlists style
858
 * @dev: DRM device.
859
 * @file: DRM file.
860
 * @ring: Engine Command Streamer to submit to.
861
 * @ctx: Context to employ for this submission.
862
 * @args: execbuffer call arguments.
863
 * @vmas: list of vmas.
864
 * @batch_obj: the batchbuffer to submit.
865
 * @exec_start: batchbuffer start virtual address pointer.
6084 serge 866
 * @dispatch_flags: translated execbuffer call flags.
5354 serge 867
 *
868
 * This is the evil twin version of i915_gem_ringbuffer_submission. It abstracts
869
 * away the submission details of the execbuffer ioctl call.
870
 *
871
 * Return: non-zero if the submission fails.
872
 */
6084 serge 873
int intel_execlists_submission(struct i915_execbuffer_params *params,
5354 serge 874
			       struct drm_i915_gem_execbuffer2 *args,
6084 serge 875
			       struct list_head *vmas)
5354 serge 876
{
6084 serge 877
	struct drm_device       *dev = params->dev;
878
	struct intel_engine_cs  *ring = params->ring;
5354 serge 879
	struct drm_i915_private *dev_priv = dev->dev_private;
6084 serge 880
	struct intel_ringbuffer *ringbuf = params->ctx->engine[ring->id].ringbuf;
881
	u64 exec_start;
5354 serge 882
	int instp_mode;
883
	u32 instp_mask;
884
	int ret;
885
 
886
	instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK;
887
	instp_mask = I915_EXEC_CONSTANTS_MASK;
888
	switch (instp_mode) {
889
	case I915_EXEC_CONSTANTS_REL_GENERAL:
890
	case I915_EXEC_CONSTANTS_ABSOLUTE:
891
	case I915_EXEC_CONSTANTS_REL_SURFACE:
892
		if (instp_mode != 0 && ring != &dev_priv->ring[RCS]) {
893
			DRM_DEBUG("non-0 rel constants mode on non-RCS\n");
894
			return -EINVAL;
895
		}
896
 
897
		if (instp_mode != dev_priv->relative_constants_mode) {
898
			if (instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) {
899
				DRM_DEBUG("rel surface constants mode invalid on gen5+\n");
900
				return -EINVAL;
901
			}
902
 
903
			/* The HW changed the meaning on this bit on gen6 */
904
			instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
905
		}
906
		break;
907
	default:
908
		DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode);
909
		return -EINVAL;
910
	}
911
 
912
	if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
913
		DRM_DEBUG("sol reset is gen7 only\n");
914
		return -EINVAL;
915
	}
916
 
6084 serge 917
	ret = execlists_move_to_gpu(params->request, vmas);
5354 serge 918
	if (ret)
919
		return ret;
920
 
921
	if (ring == &dev_priv->ring[RCS] &&
922
	    instp_mode != dev_priv->relative_constants_mode) {
6084 serge 923
		ret = intel_logical_ring_begin(params->request, 4);
5354 serge 924
		if (ret)
925
			return ret;
926
 
927
		intel_logical_ring_emit(ringbuf, MI_NOOP);
928
		intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(1));
6937 serge 929
		intel_logical_ring_emit_reg(ringbuf, INSTPM);
5354 serge 930
		intel_logical_ring_emit(ringbuf, instp_mask << 16 | instp_mode);
931
		intel_logical_ring_advance(ringbuf);
932
 
933
		dev_priv->relative_constants_mode = instp_mode;
934
	}
935
 
6084 serge 936
	exec_start = params->batch_obj_vm_offset +
937
		     args->batch_start_offset;
938
 
939
	ret = ring->emit_bb_start(params->request, exec_start, params->dispatch_flags);
5354 serge 940
	if (ret)
941
		return ret;
942
 
6084 serge 943
	trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags);
5354 serge 944
 
6084 serge 945
	i915_gem_execbuffer_move_to_active(vmas, params->request);
946
	i915_gem_execbuffer_retire_commands(params);
947
 
5354 serge 948
	return 0;
949
}
950
 
951
void intel_execlists_retire_requests(struct intel_engine_cs *ring)
952
{
6084 serge 953
	struct drm_i915_gem_request *req, *tmp;
5354 serge 954
	struct list_head retired_list;
955
 
956
	WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
957
	if (list_empty(&ring->execlist_retired_req_list))
958
		return;
959
 
960
	INIT_LIST_HEAD(&retired_list);
6084 serge 961
	spin_lock_irq(&ring->execlist_lock);
5354 serge 962
	list_replace_init(&ring->execlist_retired_req_list, &retired_list);
6084 serge 963
	spin_unlock_irq(&ring->execlist_lock);
5354 serge 964
 
965
	list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
966
		struct intel_context *ctx = req->ctx;
967
		struct drm_i915_gem_object *ctx_obj =
968
				ctx->engine[ring->id].state;
969
 
970
		if (ctx_obj && (ctx != ring->default_context))
6084 serge 971
			intel_lr_context_unpin(req);
5354 serge 972
		list_del(&req->execlist_link);
6084 serge 973
		i915_gem_request_unreference(req);
5354 serge 974
	}
975
}
976
 
977
void intel_logical_ring_stop(struct intel_engine_cs *ring)
978
{
979
	struct drm_i915_private *dev_priv = ring->dev->dev_private;
980
	int ret;
981
 
982
	if (!intel_ring_initialized(ring))
983
		return;
984
 
985
	ret = intel_ring_idle(ring);
986
	if (ret && !i915_reset_in_progress(&to_i915(ring->dev)->gpu_error))
987
		DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n",
988
			  ring->name, ret);
989
 
990
	/* TODO: Is this correct with Execlists enabled? */
991
	I915_WRITE_MODE(ring, _MASKED_BIT_ENABLE(STOP_RING));
992
	if (wait_for_atomic((I915_READ_MODE(ring) & MODE_IDLE) != 0, 1000)) {
993
		DRM_ERROR("%s :timed out trying to stop ring\n", ring->name);
994
		return;
995
	}
996
	I915_WRITE_MODE(ring, _MASKED_BIT_DISABLE(STOP_RING));
997
}
998
 
6084 serge 999
int logical_ring_flush_all_caches(struct drm_i915_gem_request *req)
5354 serge 1000
{
6084 serge 1001
	struct intel_engine_cs *ring = req->ring;
5354 serge 1002
	int ret;
1003
 
1004
	if (!ring->gpu_caches_dirty)
1005
		return 0;
1006
 
6084 serge 1007
	ret = ring->emit_flush(req, 0, I915_GEM_GPU_DOMAINS);
5354 serge 1008
	if (ret)
1009
		return ret;
1010
 
1011
	ring->gpu_caches_dirty = false;
1012
	return 0;
1013
}
1014
 
6084 serge 1015
static int intel_lr_context_do_pin(struct intel_engine_cs *ring,
1016
		struct drm_i915_gem_object *ctx_obj,
1017
		struct intel_ringbuffer *ringbuf)
5354 serge 1018
{
6084 serge 1019
	struct drm_device *dev = ring->dev;
1020
	struct drm_i915_private *dev_priv = dev->dev_private;
1021
	int ret = 0;
5354 serge 1022
 
6084 serge 1023
	WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
1024
	ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN,
1025
			PIN_OFFSET_BIAS | GUC_WOPCM_TOP);
1026
	if (ret)
1027
		return ret;
5354 serge 1028
 
6084 serge 1029
	ret = intel_pin_and_map_ringbuffer_obj(ring->dev, ringbuf);
1030
	if (ret)
1031
		goto unpin_ctx_obj;
5354 serge 1032
 
6084 serge 1033
	ctx_obj->dirty = true;
1034
 
1035
	/* Invalidate GuC TLB. */
1036
	if (i915.enable_guc_submission)
1037
		I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
1038
 
1039
	return ret;
1040
 
1041
unpin_ctx_obj:
1042
	i915_gem_object_ggtt_unpin(ctx_obj);
1043
 
1044
	return ret;
5354 serge 1045
}
1046
 
6084 serge 1047
static int intel_lr_context_pin(struct drm_i915_gem_request *rq)
5354 serge 1048
{
1049
	int ret = 0;
6084 serge 1050
	struct intel_engine_cs *ring = rq->ring;
1051
	struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state;
1052
	struct intel_ringbuffer *ringbuf = rq->ringbuf;
5354 serge 1053
 
6084 serge 1054
	if (rq->ctx->engine[ring->id].pin_count++ == 0) {
1055
		ret = intel_lr_context_do_pin(ring, ctx_obj, ringbuf);
5354 serge 1056
		if (ret)
6084 serge 1057
			goto reset_pin_count;
5354 serge 1058
	}
1059
	return ret;
1060
 
6084 serge 1061
reset_pin_count:
1062
	rq->ctx->engine[ring->id].pin_count = 0;
5354 serge 1063
	return ret;
1064
}
1065
 
6084 serge 1066
void intel_lr_context_unpin(struct drm_i915_gem_request *rq)
5354 serge 1067
{
6084 serge 1068
	struct intel_engine_cs *ring = rq->ring;
1069
	struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state;
1070
	struct intel_ringbuffer *ringbuf = rq->ringbuf;
5354 serge 1071
 
1072
	if (ctx_obj) {
1073
		WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
6084 serge 1074
		if (--rq->ctx->engine[ring->id].pin_count == 0) {
5354 serge 1075
			intel_unpin_ringbuffer_obj(ringbuf);
1076
			i915_gem_object_ggtt_unpin(ctx_obj);
1077
		}
1078
	}
1079
}
1080
 
6084 serge 1081
static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req)
5354 serge 1082
{
6084 serge 1083
	int ret, i;
1084
	struct intel_engine_cs *ring = req->ring;
1085
	struct intel_ringbuffer *ringbuf = req->ringbuf;
1086
	struct drm_device *dev = ring->dev;
1087
	struct drm_i915_private *dev_priv = dev->dev_private;
1088
	struct i915_workarounds *w = &dev_priv->workarounds;
5354 serge 1089
 
6084 serge 1090
	if (WARN_ON_ONCE(w->count == 0))
5354 serge 1091
		return 0;
1092
 
6084 serge 1093
	ring->gpu_caches_dirty = true;
1094
	ret = logical_ring_flush_all_caches(req);
1095
	if (ret)
1096
		return ret;
5354 serge 1097
 
6084 serge 1098
	ret = intel_logical_ring_begin(req, w->count * 2 + 2);
1099
	if (ret)
1100
		return ret;
5354 serge 1101
 
6084 serge 1102
	intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(w->count));
1103
	for (i = 0; i < w->count; i++) {
6937 serge 1104
		intel_logical_ring_emit_reg(ringbuf, w->reg[i].addr);
6084 serge 1105
		intel_logical_ring_emit(ringbuf, w->reg[i].value);
1106
	}
1107
	intel_logical_ring_emit(ringbuf, MI_NOOP);
5354 serge 1108
 
6084 serge 1109
	intel_logical_ring_advance(ringbuf);
5354 serge 1110
 
6084 serge 1111
	ring->gpu_caches_dirty = true;
1112
	ret = logical_ring_flush_all_caches(req);
1113
	if (ret)
1114
		return ret;
5354 serge 1115
 
6084 serge 1116
	return 0;
5354 serge 1117
}
1118
 
6084 serge 1119
#define wa_ctx_emit(batch, index, cmd)					\
1120
	do {								\
1121
		int __index = (index)++;				\
1122
		if (WARN_ON(__index >= (PAGE_SIZE / sizeof(uint32_t)))) { \
1123
			return -ENOSPC;					\
1124
		}							\
1125
		batch[__index] = (cmd);					\
1126
	} while (0)
1127
 
6937 serge 1128
#define wa_ctx_emit_reg(batch, index, reg) \
1129
	wa_ctx_emit((batch), (index), i915_mmio_reg_offset(reg))
6084 serge 1130
 
1131
/*
1132
 * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
1133
 * PIPE_CONTROL instruction. This is required for the flush to happen correctly
1134
 * but there is a slight complication as this is applied in WA batch where the
1135
 * values are only initialized once so we cannot take register value at the
1136
 * beginning and reuse it further; hence we save its value to memory, upload a
1137
 * constant value with bit21 set and then we restore it back with the saved value.
1138
 * To simplify the WA, a constant value is formed by using the default value
1139
 * of this register. This shouldn't be a problem because we are only modifying
1140
 * it for a short period and this batch in non-premptible. We can ofcourse
1141
 * use additional instructions that read the actual value of the register
1142
 * at that time and set our bit of interest but it makes the WA complicated.
1143
 *
1144
 * This WA is also required for Gen9 so extracting as a function avoids
1145
 * code duplication.
1146
 */
1147
static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *ring,
1148
						uint32_t *const batch,
1149
						uint32_t index)
5354 serge 1150
{
6084 serge 1151
	uint32_t l3sqc4_flush = (0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES);
5354 serge 1152
 
6084 serge 1153
	/*
1154
	 * WaDisableLSQCROPERFforOCL:skl
1155
	 * This WA is implemented in skl_init_clock_gating() but since
1156
	 * this batch updates GEN8_L3SQCREG4 with default value we need to
1157
	 * set this bit here to retain the WA during flush.
1158
	 */
6937 serge 1159
	if (IS_SKL_REVID(ring->dev, 0, SKL_REVID_E0))
6084 serge 1160
		l3sqc4_flush |= GEN8_LQSC_RO_PERF_DIS;
5354 serge 1161
 
6084 serge 1162
	wa_ctx_emit(batch, index, (MI_STORE_REGISTER_MEM_GEN8 |
1163
				   MI_SRM_LRM_GLOBAL_GTT));
6937 serge 1164
	wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4);
6084 serge 1165
	wa_ctx_emit(batch, index, ring->scratch.gtt_offset + 256);
1166
	wa_ctx_emit(batch, index, 0);
5354 serge 1167
 
6084 serge 1168
	wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1));
6937 serge 1169
	wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4);
6084 serge 1170
	wa_ctx_emit(batch, index, l3sqc4_flush);
5354 serge 1171
 
6084 serge 1172
	wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6));
1173
	wa_ctx_emit(batch, index, (PIPE_CONTROL_CS_STALL |
1174
				   PIPE_CONTROL_DC_FLUSH_ENABLE));
1175
	wa_ctx_emit(batch, index, 0);
1176
	wa_ctx_emit(batch, index, 0);
1177
	wa_ctx_emit(batch, index, 0);
1178
	wa_ctx_emit(batch, index, 0);
5354 serge 1179
 
6084 serge 1180
	wa_ctx_emit(batch, index, (MI_LOAD_REGISTER_MEM_GEN8 |
1181
				   MI_SRM_LRM_GLOBAL_GTT));
6937 serge 1182
	wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4);
6084 serge 1183
	wa_ctx_emit(batch, index, ring->scratch.gtt_offset + 256);
1184
	wa_ctx_emit(batch, index, 0);
5354 serge 1185
 
6084 serge 1186
	return index;
1187
}
5354 serge 1188
 
6084 serge 1189
static inline uint32_t wa_ctx_start(struct i915_wa_ctx_bb *wa_ctx,
1190
				    uint32_t offset,
1191
				    uint32_t start_alignment)
1192
{
1193
	return wa_ctx->offset = ALIGN(offset, start_alignment);
1194
}
1195
 
1196
static inline int wa_ctx_end(struct i915_wa_ctx_bb *wa_ctx,
1197
			     uint32_t offset,
1198
			     uint32_t size_alignment)
1199
{
1200
	wa_ctx->size = offset - wa_ctx->offset;
1201
 
1202
	WARN(wa_ctx->size % size_alignment,
1203
	     "wa_ctx_bb failed sanity checks: size %d is not aligned to %d\n",
1204
	     wa_ctx->size, size_alignment);
5354 serge 1205
	return 0;
1206
}
1207
 
6084 serge 1208
/**
1209
 * gen8_init_indirectctx_bb() - initialize indirect ctx batch with WA
1210
 *
1211
 * @ring: only applicable for RCS
1212
 * @wa_ctx: structure representing wa_ctx
1213
 *  offset: specifies start of the batch, should be cache-aligned. This is updated
1214
 *    with the offset value received as input.
1215
 *  size: size of the batch in DWORDS but HW expects in terms of cachelines
1216
 * @batch: page in which WA are loaded
1217
 * @offset: This field specifies the start of the batch, it should be
1218
 *  cache-aligned otherwise it is adjusted accordingly.
1219
 *  Typically we only have one indirect_ctx and per_ctx batch buffer which are
1220
 *  initialized at the beginning and shared across all contexts but this field
1221
 *  helps us to have multiple batches at different offsets and select them based
1222
 *  on a criteria. At the moment this batch always start at the beginning of the page
1223
 *  and at this point we don't have multiple wa_ctx batch buffers.
1224
 *
1225
 *  The number of WA applied are not known at the beginning; we use this field
1226
 *  to return the no of DWORDS written.
1227
 *
1228
 *  It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
1229
 *  so it adds NOOPs as padding to make it cacheline aligned.
1230
 *  MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
1231
 *  makes a complete batch buffer.
1232
 *
1233
 * Return: non-zero if we exceed the PAGE_SIZE limit.
1234
 */
1235
 
1236
static int gen8_init_indirectctx_bb(struct intel_engine_cs *ring,
1237
				    struct i915_wa_ctx_bb *wa_ctx,
1238
				    uint32_t *const batch,
1239
				    uint32_t *offset)
5354 serge 1240
{
6084 serge 1241
	uint32_t scratch_addr;
1242
	uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
5354 serge 1243
 
6084 serge 1244
	/* WaDisableCtxRestoreArbitration:bdw,chv */
1245
	wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_DISABLE);
5354 serge 1246
 
6084 serge 1247
	/* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
1248
	if (IS_BROADWELL(ring->dev)) {
1249
		int rc = gen8_emit_flush_coherentl3_wa(ring, batch, index);
1250
		if (rc < 0)
1251
			return rc;
1252
		index = rc;
1253
	}
5354 serge 1254
 
6084 serge 1255
	/* WaClearSlmSpaceAtContextSwitch:bdw,chv */
1256
	/* Actual scratch location is at 128 bytes offset */
1257
	scratch_addr = ring->scratch.gtt_offset + 2*CACHELINE_BYTES;
5354 serge 1258
 
6084 serge 1259
	wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6));
1260
	wa_ctx_emit(batch, index, (PIPE_CONTROL_FLUSH_L3 |
1261
				   PIPE_CONTROL_GLOBAL_GTT_IVB |
1262
				   PIPE_CONTROL_CS_STALL |
1263
				   PIPE_CONTROL_QW_WRITE));
1264
	wa_ctx_emit(batch, index, scratch_addr);
1265
	wa_ctx_emit(batch, index, 0);
1266
	wa_ctx_emit(batch, index, 0);
1267
	wa_ctx_emit(batch, index, 0);
5354 serge 1268
 
6084 serge 1269
	/* Pad to end of cacheline */
1270
	while (index % CACHELINE_DWORDS)
1271
		wa_ctx_emit(batch, index, MI_NOOP);
5354 serge 1272
 
6084 serge 1273
	/*
1274
	 * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
1275
	 * execution depends on the length specified in terms of cache lines
1276
	 * in the register CTX_RCS_INDIRECT_CTX
1277
	 */
5354 serge 1278
 
6084 serge 1279
	return wa_ctx_end(wa_ctx, *offset = index, CACHELINE_DWORDS);
1280
}
5354 serge 1281
 
6084 serge 1282
/**
1283
 * gen8_init_perctx_bb() - initialize per ctx batch with WA
1284
 *
1285
 * @ring: only applicable for RCS
1286
 * @wa_ctx: structure representing wa_ctx
1287
 *  offset: specifies start of the batch, should be cache-aligned.
1288
 *  size: size of the batch in DWORDS but HW expects in terms of cachelines
1289
 * @batch: page in which WA are loaded
1290
 * @offset: This field specifies the start of this batch.
1291
 *   This batch is started immediately after indirect_ctx batch. Since we ensure
1292
 *   that indirect_ctx ends on a cacheline this batch is aligned automatically.
1293
 *
1294
 *   The number of DWORDS written are returned using this field.
1295
 *
1296
 *  This batch is terminated with MI_BATCH_BUFFER_END and so we need not add padding
1297
 *  to align it with cacheline as padding after MI_BATCH_BUFFER_END is redundant.
1298
 */
1299
static int gen8_init_perctx_bb(struct intel_engine_cs *ring,
1300
			       struct i915_wa_ctx_bb *wa_ctx,
1301
			       uint32_t *const batch,
1302
			       uint32_t *offset)
1303
{
1304
	uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
1305
 
1306
	/* WaDisableCtxRestoreArbitration:bdw,chv */
1307
	wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_ENABLE);
1308
 
1309
	wa_ctx_emit(batch, index, MI_BATCH_BUFFER_END);
1310
 
1311
	return wa_ctx_end(wa_ctx, *offset = index, 1);
5354 serge 1312
}
1313
 
6084 serge 1314
static int gen9_init_indirectctx_bb(struct intel_engine_cs *ring,
1315
				    struct i915_wa_ctx_bb *wa_ctx,
1316
				    uint32_t *const batch,
1317
				    uint32_t *offset)
5354 serge 1318
{
6084 serge 1319
	int ret;
1320
	struct drm_device *dev = ring->dev;
1321
	uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
5354 serge 1322
 
6084 serge 1323
	/* WaDisableCtxRestoreArbitration:skl,bxt */
6937 serge 1324
	if (IS_SKL_REVID(dev, 0, SKL_REVID_D0) ||
1325
	    IS_BXT_REVID(dev, 0, BXT_REVID_A1))
6084 serge 1326
		wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_DISABLE);
5354 serge 1327
 
6084 serge 1328
	/* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt */
1329
	ret = gen8_emit_flush_coherentl3_wa(ring, batch, index);
1330
	if (ret < 0)
1331
		return ret;
1332
	index = ret;
5354 serge 1333
 
6084 serge 1334
	/* Pad to end of cacheline */
1335
	while (index % CACHELINE_DWORDS)
1336
		wa_ctx_emit(batch, index, MI_NOOP);
5354 serge 1337
 
6084 serge 1338
	return wa_ctx_end(wa_ctx, *offset = index, CACHELINE_DWORDS);
5354 serge 1339
}
1340
 
6084 serge 1341
static int gen9_init_perctx_bb(struct intel_engine_cs *ring,
1342
			       struct i915_wa_ctx_bb *wa_ctx,
1343
			       uint32_t *const batch,
1344
			       uint32_t *offset)
5354 serge 1345
{
6084 serge 1346
	struct drm_device *dev = ring->dev;
1347
	uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
5354 serge 1348
 
6084 serge 1349
	/* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:skl,bxt */
6937 serge 1350
	if (IS_SKL_REVID(dev, 0, SKL_REVID_B0) ||
1351
	    IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
6084 serge 1352
		wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1));
6937 serge 1353
		wa_ctx_emit_reg(batch, index, GEN9_SLICE_COMMON_ECO_CHICKEN0);
6084 serge 1354
		wa_ctx_emit(batch, index,
1355
			    _MASKED_BIT_ENABLE(DISABLE_PIXEL_MASK_CAMMING));
1356
		wa_ctx_emit(batch, index, MI_NOOP);
5354 serge 1357
	}
1358
 
6084 serge 1359
	/* WaDisableCtxRestoreArbitration:skl,bxt */
6937 serge 1360
	if (IS_SKL_REVID(dev, 0, SKL_REVID_D0) ||
1361
	    IS_BXT_REVID(dev, 0, BXT_REVID_A1))
6084 serge 1362
		wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_ENABLE);
5354 serge 1363
 
6084 serge 1364
	wa_ctx_emit(batch, index, MI_BATCH_BUFFER_END);
1365
 
1366
	return wa_ctx_end(wa_ctx, *offset = index, 1);
5354 serge 1367
}
1368
 
6084 serge 1369
static int lrc_setup_wa_ctx_obj(struct intel_engine_cs *ring, u32 size)
5354 serge 1370
{
1371
	int ret;
1372
 
6084 serge 1373
	ring->wa_ctx.obj = i915_gem_alloc_object(ring->dev, PAGE_ALIGN(size));
1374
	if (!ring->wa_ctx.obj) {
1375
		DRM_DEBUG_DRIVER("alloc LRC WA ctx backing obj failed.\n");
1376
		return -ENOMEM;
1377
	}
5354 serge 1378
 
6084 serge 1379
	ret = i915_gem_obj_ggtt_pin(ring->wa_ctx.obj, PAGE_SIZE, 0);
1380
	if (ret) {
1381
		DRM_DEBUG_DRIVER("pin LRC WA ctx backing obj failed: %d\n",
1382
				 ret);
1383
		drm_gem_object_unreference(&ring->wa_ctx.obj->base);
5354 serge 1384
		return ret;
6084 serge 1385
	}
5354 serge 1386
 
1387
	return 0;
1388
}
1389
 
6084 serge 1390
static void lrc_destroy_wa_ctx_obj(struct intel_engine_cs *ring)
5354 serge 1391
{
6084 serge 1392
	if (ring->wa_ctx.obj) {
1393
		i915_gem_object_ggtt_unpin(ring->wa_ctx.obj);
1394
		drm_gem_object_unreference(&ring->wa_ctx.obj->base);
1395
		ring->wa_ctx.obj = NULL;
1396
	}
1397
}
5354 serge 1398
 
6084 serge 1399
static int intel_init_workaround_bb(struct intel_engine_cs *ring)
1400
{
1401
	int ret;
1402
	uint32_t *batch;
1403
	uint32_t offset;
1404
	struct page *page;
1405
	struct i915_ctx_workarounds *wa_ctx = &ring->wa_ctx;
1406
 
1407
	WARN_ON(ring->id != RCS);
1408
 
1409
	/* update this when WA for higher Gen are added */
1410
	if (INTEL_INFO(ring->dev)->gen > 9) {
1411
		DRM_ERROR("WA batch buffer is not initialized for Gen%d\n",
1412
			  INTEL_INFO(ring->dev)->gen);
5354 serge 1413
		return 0;
6084 serge 1414
	}
5354 serge 1415
 
6084 serge 1416
	/* some WA perform writes to scratch page, ensure it is valid */
1417
	if (ring->scratch.obj == NULL) {
1418
		DRM_ERROR("scratch page not allocated for %s\n", ring->name);
1419
		return -EINVAL;
1420
	}
5354 serge 1421
 
6084 serge 1422
	ret = lrc_setup_wa_ctx_obj(ring, PAGE_SIZE);
1423
	if (ret) {
1424
		DRM_DEBUG_DRIVER("Failed to setup context WA page: %d\n", ret);
5354 serge 1425
		return ret;
6084 serge 1426
	}
5354 serge 1427
 
6937 serge 1428
	page = i915_gem_object_get_dirty_page(wa_ctx->obj, 0);
6084 serge 1429
	batch = kmap_atomic(page);
1430
	offset = 0;
1431
 
1432
	if (INTEL_INFO(ring->dev)->gen == 8) {
1433
		ret = gen8_init_indirectctx_bb(ring,
1434
					       &wa_ctx->indirect_ctx,
1435
					       batch,
1436
					       &offset);
1437
		if (ret)
1438
			goto out;
1439
 
1440
		ret = gen8_init_perctx_bb(ring,
1441
					  &wa_ctx->per_ctx,
1442
					  batch,
1443
					  &offset);
1444
		if (ret)
1445
			goto out;
1446
	} else if (INTEL_INFO(ring->dev)->gen == 9) {
1447
		ret = gen9_init_indirectctx_bb(ring,
1448
					       &wa_ctx->indirect_ctx,
1449
					       batch,
1450
					       &offset);
1451
		if (ret)
1452
			goto out;
1453
 
1454
		ret = gen9_init_perctx_bb(ring,
1455
					  &wa_ctx->per_ctx,
1456
					  batch,
1457
					  &offset);
1458
		if (ret)
1459
			goto out;
5354 serge 1460
	}
1461
 
6084 serge 1462
out:
1463
	kunmap_atomic(batch);
5354 serge 1464
	if (ret)
6084 serge 1465
		lrc_destroy_wa_ctx_obj(ring);
5354 serge 1466
 
6084 serge 1467
	return ret;
5354 serge 1468
}
1469
 
1470
static int gen8_init_common_ring(struct intel_engine_cs *ring)
1471
{
1472
	struct drm_device *dev = ring->dev;
1473
	struct drm_i915_private *dev_priv = dev->dev_private;
6084 serge 1474
	u8 next_context_status_buffer_hw;
5354 serge 1475
 
6084 serge 1476
	lrc_setup_hardware_status_page(ring,
1477
				ring->default_context->engine[ring->id].state);
1478
 
5354 serge 1479
	I915_WRITE_IMR(ring, ~(ring->irq_enable_mask | ring->irq_keep_mask));
1480
	I915_WRITE(RING_HWSTAM(ring->mmio_base), 0xffffffff);
1481
 
1482
	I915_WRITE(RING_MODE_GEN7(ring),
1483
		   _MASKED_BIT_DISABLE(GFX_REPLAY_MODE) |
1484
		   _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE));
1485
	POSTING_READ(RING_MODE_GEN7(ring));
6084 serge 1486
 
1487
	/*
1488
	 * Instead of resetting the Context Status Buffer (CSB) read pointer to
1489
	 * zero, we need to read the write pointer from hardware and use its
1490
	 * value because "this register is power context save restored".
1491
	 * Effectively, these states have been observed:
1492
	 *
1493
	 *      | Suspend-to-idle (freeze) | Suspend-to-RAM (mem) |
1494
	 * BDW  | CSB regs not reset       | CSB regs reset       |
1495
	 * CHT  | CSB regs not reset       | CSB regs not reset   |
1496
	 */
1497
	next_context_status_buffer_hw = (I915_READ(RING_CONTEXT_STATUS_PTR(ring))
1498
						   & GEN8_CSB_PTR_MASK);
1499
 
1500
	/*
1501
	 * When the CSB registers are reset (also after power-up / gpu reset),
1502
	 * CSB write pointer is set to all 1's, which is not valid, use '5' in
1503
	 * this special case, so the first element read is CSB[0].
1504
	 */
1505
	if (next_context_status_buffer_hw == GEN8_CSB_PTR_MASK)
1506
		next_context_status_buffer_hw = (GEN8_CSB_ENTRIES - 1);
1507
 
1508
	ring->next_context_status_buffer = next_context_status_buffer_hw;
5354 serge 1509
	DRM_DEBUG_DRIVER("Execlists enabled for %s\n", ring->name);
1510
 
1511
	memset(&ring->hangcheck, 0, sizeof(ring->hangcheck));
1512
 
1513
	return 0;
1514
}
1515
 
1516
static int gen8_init_render_ring(struct intel_engine_cs *ring)
1517
{
1518
	struct drm_device *dev = ring->dev;
1519
	struct drm_i915_private *dev_priv = dev->dev_private;
1520
	int ret;
1521
 
1522
	ret = gen8_init_common_ring(ring);
1523
	if (ret)
1524
		return ret;
1525
 
1526
	/* We need to disable the AsyncFlip performance optimisations in order
1527
	 * to use MI_WAIT_FOR_EVENT within the CS. It should already be
1528
	 * programmed to '1' on all products.
1529
	 *
1530
	 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv,bdw,chv
1531
	 */
1532
	I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
1533
 
6084 serge 1534
	I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
1535
 
1536
	return init_workarounds_ring(ring);
1537
}
1538
 
1539
static int gen9_init_render_ring(struct intel_engine_cs *ring)
1540
{
1541
	int ret;
1542
 
1543
	ret = gen8_init_common_ring(ring);
5354 serge 1544
	if (ret)
1545
		return ret;
1546
 
1547
	return init_workarounds_ring(ring);
1548
}
1549
 
6084 serge 1550
static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req)
5354 serge 1551
{
6084 serge 1552
	struct i915_hw_ppgtt *ppgtt = req->ctx->ppgtt;
1553
	struct intel_engine_cs *ring = req->ring;
1554
	struct intel_ringbuffer *ringbuf = req->ringbuf;
1555
	const int num_lri_cmds = GEN8_LEGACY_PDPES * 2;
1556
	int i, ret;
1557
 
1558
	ret = intel_logical_ring_begin(req, num_lri_cmds * 2 + 2);
1559
	if (ret)
1560
		return ret;
1561
 
1562
	intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(num_lri_cmds));
1563
	for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) {
1564
		const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
1565
 
6937 serge 1566
		intel_logical_ring_emit_reg(ringbuf, GEN8_RING_PDP_UDW(ring, i));
6084 serge 1567
		intel_logical_ring_emit(ringbuf, upper_32_bits(pd_daddr));
6937 serge 1568
		intel_logical_ring_emit_reg(ringbuf, GEN8_RING_PDP_LDW(ring, i));
6084 serge 1569
		intel_logical_ring_emit(ringbuf, lower_32_bits(pd_daddr));
1570
	}
1571
 
1572
	intel_logical_ring_emit(ringbuf, MI_NOOP);
1573
	intel_logical_ring_advance(ringbuf);
1574
 
1575
	return 0;
1576
}
1577
 
1578
static int gen8_emit_bb_start(struct drm_i915_gem_request *req,
1579
			      u64 offset, unsigned dispatch_flags)
1580
{
1581
	struct intel_ringbuffer *ringbuf = req->ringbuf;
1582
	bool ppgtt = !(dispatch_flags & I915_DISPATCH_SECURE);
5354 serge 1583
	int ret;
1584
 
6084 serge 1585
	/* Don't rely in hw updating PDPs, specially in lite-restore.
1586
	 * Ideally, we should set Force PD Restore in ctx descriptor,
1587
	 * but we can't. Force Restore would be a second option, but
1588
	 * it is unsafe in case of lite-restore (because the ctx is
1589
	 * not idle). PML4 is allocated during ppgtt init so this is
1590
	 * not needed in 48-bit.*/
1591
	if (req->ctx->ppgtt &&
1592
	    (intel_ring_flag(req->ring) & req->ctx->ppgtt->pd_dirty_rings)) {
1593
		if (!USES_FULL_48BIT_PPGTT(req->i915) &&
1594
		    !intel_vgpu_active(req->i915->dev)) {
1595
			ret = intel_logical_ring_emit_pdps(req);
1596
			if (ret)
1597
				return ret;
1598
		}
1599
 
1600
		req->ctx->ppgtt->pd_dirty_rings &= ~intel_ring_flag(req->ring);
1601
	}
1602
 
1603
	ret = intel_logical_ring_begin(req, 4);
5354 serge 1604
	if (ret)
1605
		return ret;
1606
 
1607
	/* FIXME(BDW): Address space and security selectors. */
6084 serge 1608
	intel_logical_ring_emit(ringbuf, MI_BATCH_BUFFER_START_GEN8 |
1609
				(ppgtt<<8) |
1610
				(dispatch_flags & I915_DISPATCH_RS ?
1611
				 MI_BATCH_RESOURCE_STREAMER : 0));
5354 serge 1612
	intel_logical_ring_emit(ringbuf, lower_32_bits(offset));
1613
	intel_logical_ring_emit(ringbuf, upper_32_bits(offset));
1614
	intel_logical_ring_emit(ringbuf, MI_NOOP);
1615
	intel_logical_ring_advance(ringbuf);
1616
 
1617
	return 0;
1618
}
1619
 
1620
static bool gen8_logical_ring_get_irq(struct intel_engine_cs *ring)
1621
{
1622
	struct drm_device *dev = ring->dev;
1623
	struct drm_i915_private *dev_priv = dev->dev_private;
1624
	unsigned long flags;
1625
 
1626
	if (WARN_ON(!intel_irqs_enabled(dev_priv)))
1627
		return false;
1628
 
1629
	spin_lock_irqsave(&dev_priv->irq_lock, flags);
1630
	if (ring->irq_refcount++ == 0) {
1631
		I915_WRITE_IMR(ring, ~(ring->irq_enable_mask | ring->irq_keep_mask));
1632
		POSTING_READ(RING_IMR(ring->mmio_base));
1633
	}
1634
	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1635
 
1636
	return true;
1637
}
1638
 
1639
static void gen8_logical_ring_put_irq(struct intel_engine_cs *ring)
1640
{
1641
	struct drm_device *dev = ring->dev;
1642
	struct drm_i915_private *dev_priv = dev->dev_private;
1643
	unsigned long flags;
1644
 
1645
	spin_lock_irqsave(&dev_priv->irq_lock, flags);
1646
	if (--ring->irq_refcount == 0) {
1647
		I915_WRITE_IMR(ring, ~ring->irq_keep_mask);
1648
		POSTING_READ(RING_IMR(ring->mmio_base));
1649
	}
1650
	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1651
}
1652
 
6084 serge 1653
static int gen8_emit_flush(struct drm_i915_gem_request *request,
5354 serge 1654
			   u32 invalidate_domains,
1655
			   u32 unused)
1656
{
6084 serge 1657
	struct intel_ringbuffer *ringbuf = request->ringbuf;
5354 serge 1658
	struct intel_engine_cs *ring = ringbuf->ring;
1659
	struct drm_device *dev = ring->dev;
1660
	struct drm_i915_private *dev_priv = dev->dev_private;
1661
	uint32_t cmd;
1662
	int ret;
1663
 
6084 serge 1664
	ret = intel_logical_ring_begin(request, 4);
5354 serge 1665
	if (ret)
1666
		return ret;
1667
 
1668
	cmd = MI_FLUSH_DW + 1;
1669
 
6084 serge 1670
	/* We always require a command barrier so that subsequent
1671
	 * commands, such as breadcrumb interrupts, are strictly ordered
1672
	 * wrt the contents of the write cache being flushed to memory
1673
	 * (and thus being coherent from the CPU).
1674
	 */
1675
	cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
1676
 
1677
	if (invalidate_domains & I915_GEM_GPU_DOMAINS) {
1678
		cmd |= MI_INVALIDATE_TLB;
1679
		if (ring == &dev_priv->ring[VCS])
1680
			cmd |= MI_INVALIDATE_BSD;
5354 serge 1681
	}
1682
 
1683
	intel_logical_ring_emit(ringbuf, cmd);
1684
	intel_logical_ring_emit(ringbuf,
1685
				I915_GEM_HWS_SCRATCH_ADDR |
1686
				MI_FLUSH_DW_USE_GTT);
1687
	intel_logical_ring_emit(ringbuf, 0); /* upper addr */
1688
	intel_logical_ring_emit(ringbuf, 0); /* value */
1689
	intel_logical_ring_advance(ringbuf);
1690
 
1691
	return 0;
1692
}
1693
 
6084 serge 1694
static int gen8_emit_flush_render(struct drm_i915_gem_request *request,
5354 serge 1695
				  u32 invalidate_domains,
1696
				  u32 flush_domains)
1697
{
6084 serge 1698
	struct intel_ringbuffer *ringbuf = request->ringbuf;
5354 serge 1699
	struct intel_engine_cs *ring = ringbuf->ring;
1700
	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
6084 serge 1701
	bool vf_flush_wa;
5354 serge 1702
	u32 flags = 0;
1703
	int ret;
1704
 
1705
	flags |= PIPE_CONTROL_CS_STALL;
1706
 
1707
	if (flush_domains) {
1708
		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
1709
		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
6320 serge 1710
		flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
6084 serge 1711
		flags |= PIPE_CONTROL_FLUSH_ENABLE;
5354 serge 1712
	}
1713
 
1714
	if (invalidate_domains) {
1715
		flags |= PIPE_CONTROL_TLB_INVALIDATE;
1716
		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
1717
		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
1718
		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
1719
		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
1720
		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
1721
		flags |= PIPE_CONTROL_QW_WRITE;
1722
		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
1723
	}
1724
 
6084 serge 1725
	/*
1726
	 * On GEN9+ Before VF_CACHE_INVALIDATE we need to emit a NULL pipe
1727
	 * control.
1728
	 */
1729
	vf_flush_wa = INTEL_INFO(ring->dev)->gen >= 9 &&
1730
		      flags & PIPE_CONTROL_VF_CACHE_INVALIDATE;
1731
 
1732
	ret = intel_logical_ring_begin(request, vf_flush_wa ? 12 : 6);
5354 serge 1733
	if (ret)
1734
		return ret;
1735
 
6084 serge 1736
	if (vf_flush_wa) {
1737
		intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6));
1738
		intel_logical_ring_emit(ringbuf, 0);
1739
		intel_logical_ring_emit(ringbuf, 0);
1740
		intel_logical_ring_emit(ringbuf, 0);
1741
		intel_logical_ring_emit(ringbuf, 0);
1742
		intel_logical_ring_emit(ringbuf, 0);
1743
	}
1744
 
5354 serge 1745
	intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6));
1746
	intel_logical_ring_emit(ringbuf, flags);
1747
	intel_logical_ring_emit(ringbuf, scratch_addr);
1748
	intel_logical_ring_emit(ringbuf, 0);
1749
	intel_logical_ring_emit(ringbuf, 0);
1750
	intel_logical_ring_emit(ringbuf, 0);
1751
	intel_logical_ring_advance(ringbuf);
1752
 
1753
	return 0;
1754
}
1755
 
1756
static u32 gen8_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
1757
{
1758
	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
1759
}
1760
 
1761
static void gen8_set_seqno(struct intel_engine_cs *ring, u32 seqno)
1762
{
1763
	intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno);
1764
}
1765
 
6084 serge 1766
static u32 bxt_a_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
5354 serge 1767
{
6084 serge 1768
 
1769
	/*
1770
	 * On BXT A steppings there is a HW coherency issue whereby the
1771
	 * MI_STORE_DATA_IMM storing the completed request's seqno
1772
	 * occasionally doesn't invalidate the CPU cache. Work around this by
1773
	 * clflushing the corresponding cacheline whenever the caller wants
1774
	 * the coherency to be guaranteed. Note that this cacheline is known
1775
	 * to be clean at this point, since we only write it in
1776
	 * bxt_a_set_seqno(), where we also do a clflush after the write. So
1777
	 * this clflush in practice becomes an invalidate operation.
1778
	 */
1779
 
1780
	if (!lazy_coherency)
1781
		intel_flush_status_page(ring, I915_GEM_HWS_INDEX);
1782
 
1783
	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
1784
}
1785
 
1786
static void bxt_a_set_seqno(struct intel_engine_cs *ring, u32 seqno)
1787
{
1788
	intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno);
1789
 
1790
	/* See bxt_a_get_seqno() explaining the reason for the clflush. */
1791
	intel_flush_status_page(ring, I915_GEM_HWS_INDEX);
1792
}
1793
 
1794
static int gen8_emit_request(struct drm_i915_gem_request *request)
1795
{
1796
	struct intel_ringbuffer *ringbuf = request->ringbuf;
5354 serge 1797
	struct intel_engine_cs *ring = ringbuf->ring;
1798
	u32 cmd;
1799
	int ret;
1800
 
6084 serge 1801
	/*
1802
	 * Reserve space for 2 NOOPs at the end of each request to be
1803
	 * used as a workaround for not being allowed to do lite
1804
	 * restore with HEAD==TAIL (WaIdleLiteRestore).
1805
	 */
1806
	ret = intel_logical_ring_begin(request, 8);
5354 serge 1807
	if (ret)
1808
		return ret;
1809
 
6084 serge 1810
	cmd = MI_STORE_DWORD_IMM_GEN4;
5354 serge 1811
	cmd |= MI_GLOBAL_GTT;
1812
 
1813
	intel_logical_ring_emit(ringbuf, cmd);
1814
	intel_logical_ring_emit(ringbuf,
1815
				(ring->status_page.gfx_addr +
1816
				(I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT)));
1817
	intel_logical_ring_emit(ringbuf, 0);
6084 serge 1818
	intel_logical_ring_emit(ringbuf, i915_gem_request_get_seqno(request));
5354 serge 1819
	intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT);
1820
	intel_logical_ring_emit(ringbuf, MI_NOOP);
6084 serge 1821
	intel_logical_ring_advance_and_submit(request);
5354 serge 1822
 
6084 serge 1823
	/*
1824
	 * Here we add two extra NOOPs as padding to avoid
1825
	 * lite restore of a context with HEAD==TAIL.
1826
	 */
1827
	intel_logical_ring_emit(ringbuf, MI_NOOP);
1828
	intel_logical_ring_emit(ringbuf, MI_NOOP);
1829
	intel_logical_ring_advance(ringbuf);
1830
 
5354 serge 1831
	return 0;
1832
}
1833
 
6084 serge 1834
static int intel_lr_context_render_state_init(struct drm_i915_gem_request *req)
1835
{
1836
	struct render_state so;
1837
	int ret;
1838
 
1839
	ret = i915_gem_render_state_prepare(req->ring, &so);
1840
	if (ret)
1841
		return ret;
1842
 
1843
	if (so.rodata == NULL)
1844
		return 0;
1845
 
1846
	ret = req->ring->emit_bb_start(req, so.ggtt_offset,
1847
				       I915_DISPATCH_SECURE);
1848
	if (ret)
1849
		goto out;
1850
 
1851
	ret = req->ring->emit_bb_start(req,
1852
				       (so.ggtt_offset + so.aux_batch_offset),
1853
				       I915_DISPATCH_SECURE);
1854
	if (ret)
1855
		goto out;
1856
 
1857
	i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req);
1858
 
1859
out:
1860
	i915_gem_render_state_fini(&so);
1861
	return ret;
1862
}
1863
 
1864
static int gen8_init_rcs_context(struct drm_i915_gem_request *req)
1865
{
1866
	int ret;
1867
 
1868
	ret = intel_logical_ring_workarounds_emit(req);
1869
	if (ret)
1870
		return ret;
1871
 
1872
	ret = intel_rcs_context_init_mocs(req);
1873
	/*
1874
	 * Failing to program the MOCS is non-fatal.The system will not
1875
	 * run at peak performance. So generate an error and carry on.
1876
	 */
1877
	if (ret)
1878
		DRM_ERROR("MOCS failed to program: expect performance issues.\n");
1879
 
1880
	return intel_lr_context_render_state_init(req);
1881
}
1882
 
5354 serge 1883
/**
1884
 * intel_logical_ring_cleanup() - deallocate the Engine Command Streamer
1885
 *
1886
 * @ring: Engine Command Streamer.
1887
 *
1888
 */
1889
void intel_logical_ring_cleanup(struct intel_engine_cs *ring)
1890
{
1891
	struct drm_i915_private *dev_priv;
1892
 
1893
	if (!intel_ring_initialized(ring))
1894
		return;
1895
 
1896
	dev_priv = ring->dev->dev_private;
1897
 
6937 serge 1898
	if (ring->buffer) {
5354 serge 1899
	intel_logical_ring_stop(ring);
1900
	WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0);
6937 serge 1901
	}
5354 serge 1902
 
1903
	if (ring->cleanup)
1904
		ring->cleanup(ring);
1905
 
1906
	i915_cmd_parser_fini_ring(ring);
6084 serge 1907
	i915_gem_batch_pool_fini(&ring->batch_pool);
5354 serge 1908
 
1909
	if (ring->status_page.obj) {
1910
		kunmap(sg_page(ring->status_page.obj->pages->sgl));
1911
		ring->status_page.obj = NULL;
1912
	}
6084 serge 1913
 
1914
	lrc_destroy_wa_ctx_obj(ring);
6937 serge 1915
	ring->dev = NULL;
5354 serge 1916
}
1917
 
1918
static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *ring)
1919
{
1920
	int ret;
1921
 
1922
	/* Intentionally left blank. */
1923
	ring->buffer = NULL;
1924
 
1925
	ring->dev = dev;
1926
	INIT_LIST_HEAD(&ring->active_list);
1927
	INIT_LIST_HEAD(&ring->request_list);
6084 serge 1928
	i915_gem_batch_pool_init(dev, &ring->batch_pool);
5354 serge 1929
	init_waitqueue_head(&ring->irq_queue);
1930
 
6937 serge 1931
	INIT_LIST_HEAD(&ring->buffers);
5354 serge 1932
	INIT_LIST_HEAD(&ring->execlist_queue);
1933
	INIT_LIST_HEAD(&ring->execlist_retired_req_list);
1934
	spin_lock_init(&ring->execlist_lock);
1935
 
1936
	ret = i915_cmd_parser_init_ring(ring);
1937
	if (ret)
6937 serge 1938
		goto error;
5354 serge 1939
 
6084 serge 1940
	ret = intel_lr_context_deferred_alloc(ring->default_context, ring);
1941
	if (ret)
6937 serge 1942
		goto error;
6084 serge 1943
 
1944
	/* As this is the default context, always pin it */
1945
	ret = intel_lr_context_do_pin(
1946
			ring,
1947
			ring->default_context->engine[ring->id].state,
1948
			ring->default_context->engine[ring->id].ringbuf);
1949
	if (ret) {
1950
		DRM_ERROR(
1951
			"Failed to pin and map ringbuffer %s: %d\n",
1952
			ring->name, ret);
6937 serge 1953
		goto error;
5354 serge 1954
	}
1955
 
6937 serge 1956
	return 0;
1957
 
1958
error:
1959
	intel_logical_ring_cleanup(ring);
5354 serge 1960
	return ret;
1961
}
1962
 
1963
static int logical_render_ring_init(struct drm_device *dev)
1964
{
1965
	struct drm_i915_private *dev_priv = dev->dev_private;
1966
	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
6084 serge 1967
	int ret;
5354 serge 1968
 
1969
	ring->name = "render ring";
1970
	ring->id = RCS;
1971
	ring->mmio_base = RENDER_RING_BASE;
1972
	ring->irq_enable_mask =
1973
		GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT;
1974
	ring->irq_keep_mask =
1975
		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_RCS_IRQ_SHIFT;
1976
	if (HAS_L3_DPF(dev))
1977
		ring->irq_keep_mask |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
1978
 
6084 serge 1979
	if (INTEL_INFO(dev)->gen >= 9)
1980
		ring->init_hw = gen9_init_render_ring;
1981
	else
1982
		ring->init_hw = gen8_init_render_ring;
1983
	ring->init_context = gen8_init_rcs_context;
5354 serge 1984
	ring->cleanup = intel_fini_pipe_control;
6937 serge 1985
	if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
6084 serge 1986
		ring->get_seqno = bxt_a_get_seqno;
1987
		ring->set_seqno = bxt_a_set_seqno;
1988
	} else {
1989
		ring->get_seqno = gen8_get_seqno;
1990
		ring->set_seqno = gen8_set_seqno;
1991
	}
5354 serge 1992
	ring->emit_request = gen8_emit_request;
1993
	ring->emit_flush = gen8_emit_flush_render;
1994
	ring->irq_get = gen8_logical_ring_get_irq;
1995
	ring->irq_put = gen8_logical_ring_put_irq;
1996
	ring->emit_bb_start = gen8_emit_bb_start;
1997
 
6084 serge 1998
	ring->dev = dev;
1999
 
2000
	ret = intel_init_pipe_control(ring);
2001
	if (ret)
2002
		return ret;
2003
 
2004
	ret = intel_init_workaround_bb(ring);
2005
	if (ret) {
2006
		/*
2007
		 * We continue even if we fail to initialize WA batch
2008
		 * because we only expect rare glitches but nothing
2009
		 * critical to prevent us from using GPU
2010
		 */
2011
		DRM_ERROR("WA batch buffer initialization failed: %d\n",
2012
			  ret);
2013
	}
2014
 
2015
	ret = logical_ring_init(dev, ring);
2016
	if (ret) {
2017
		lrc_destroy_wa_ctx_obj(ring);
2018
	}
2019
 
2020
	return ret;
5354 serge 2021
}
2022
 
2023
static int logical_bsd_ring_init(struct drm_device *dev)
2024
{
2025
	struct drm_i915_private *dev_priv = dev->dev_private;
2026
	struct intel_engine_cs *ring = &dev_priv->ring[VCS];
2027
 
2028
	ring->name = "bsd ring";
2029
	ring->id = VCS;
2030
	ring->mmio_base = GEN6_BSD_RING_BASE;
2031
	ring->irq_enable_mask =
2032
		GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT;
2033
	ring->irq_keep_mask =
2034
		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS1_IRQ_SHIFT;
2035
 
6084 serge 2036
	ring->init_hw = gen8_init_common_ring;
6937 serge 2037
	if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
6084 serge 2038
		ring->get_seqno = bxt_a_get_seqno;
2039
		ring->set_seqno = bxt_a_set_seqno;
2040
	} else {
2041
		ring->get_seqno = gen8_get_seqno;
2042
		ring->set_seqno = gen8_set_seqno;
2043
	}
5354 serge 2044
	ring->emit_request = gen8_emit_request;
2045
	ring->emit_flush = gen8_emit_flush;
2046
	ring->irq_get = gen8_logical_ring_get_irq;
2047
	ring->irq_put = gen8_logical_ring_put_irq;
2048
	ring->emit_bb_start = gen8_emit_bb_start;
2049
 
2050
	return logical_ring_init(dev, ring);
2051
}
2052
 
2053
static int logical_bsd2_ring_init(struct drm_device *dev)
2054
{
2055
	struct drm_i915_private *dev_priv = dev->dev_private;
2056
	struct intel_engine_cs *ring = &dev_priv->ring[VCS2];
2057
 
2058
	ring->name = "bds2 ring";
2059
	ring->id = VCS2;
2060
	ring->mmio_base = GEN8_BSD2_RING_BASE;
2061
	ring->irq_enable_mask =
2062
		GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT;
2063
	ring->irq_keep_mask =
2064
		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS2_IRQ_SHIFT;
2065
 
6084 serge 2066
	ring->init_hw = gen8_init_common_ring;
5354 serge 2067
	ring->get_seqno = gen8_get_seqno;
2068
	ring->set_seqno = gen8_set_seqno;
2069
	ring->emit_request = gen8_emit_request;
2070
	ring->emit_flush = gen8_emit_flush;
2071
	ring->irq_get = gen8_logical_ring_get_irq;
2072
	ring->irq_put = gen8_logical_ring_put_irq;
2073
	ring->emit_bb_start = gen8_emit_bb_start;
2074
 
2075
	return logical_ring_init(dev, ring);
2076
}
2077
 
2078
static int logical_blt_ring_init(struct drm_device *dev)
2079
{
2080
	struct drm_i915_private *dev_priv = dev->dev_private;
2081
	struct intel_engine_cs *ring = &dev_priv->ring[BCS];
2082
 
2083
	ring->name = "blitter ring";
2084
	ring->id = BCS;
2085
	ring->mmio_base = BLT_RING_BASE;
2086
	ring->irq_enable_mask =
2087
		GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT;
2088
	ring->irq_keep_mask =
2089
		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_BCS_IRQ_SHIFT;
2090
 
6084 serge 2091
	ring->init_hw = gen8_init_common_ring;
6937 serge 2092
	if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
6084 serge 2093
		ring->get_seqno = bxt_a_get_seqno;
2094
		ring->set_seqno = bxt_a_set_seqno;
2095
	} else {
2096
		ring->get_seqno = gen8_get_seqno;
2097
		ring->set_seqno = gen8_set_seqno;
2098
	}
5354 serge 2099
	ring->emit_request = gen8_emit_request;
2100
	ring->emit_flush = gen8_emit_flush;
2101
	ring->irq_get = gen8_logical_ring_get_irq;
2102
	ring->irq_put = gen8_logical_ring_put_irq;
2103
	ring->emit_bb_start = gen8_emit_bb_start;
2104
 
2105
	return logical_ring_init(dev, ring);
2106
}
2107
 
2108
static int logical_vebox_ring_init(struct drm_device *dev)
2109
{
2110
	struct drm_i915_private *dev_priv = dev->dev_private;
2111
	struct intel_engine_cs *ring = &dev_priv->ring[VECS];
2112
 
2113
	ring->name = "video enhancement ring";
2114
	ring->id = VECS;
2115
	ring->mmio_base = VEBOX_RING_BASE;
2116
	ring->irq_enable_mask =
2117
		GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT;
2118
	ring->irq_keep_mask =
2119
		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VECS_IRQ_SHIFT;
2120
 
6084 serge 2121
	ring->init_hw = gen8_init_common_ring;
6937 serge 2122
	if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
6084 serge 2123
		ring->get_seqno = bxt_a_get_seqno;
2124
		ring->set_seqno = bxt_a_set_seqno;
2125
	} else {
2126
		ring->get_seqno = gen8_get_seqno;
2127
		ring->set_seqno = gen8_set_seqno;
2128
	}
5354 serge 2129
	ring->emit_request = gen8_emit_request;
2130
	ring->emit_flush = gen8_emit_flush;
2131
	ring->irq_get = gen8_logical_ring_get_irq;
2132
	ring->irq_put = gen8_logical_ring_put_irq;
2133
	ring->emit_bb_start = gen8_emit_bb_start;
2134
 
2135
	return logical_ring_init(dev, ring);
2136
}
2137
 
2138
/**
2139
 * intel_logical_rings_init() - allocate, populate and init the Engine Command Streamers
2140
 * @dev: DRM device.
2141
 *
2142
 * This function inits the engines for an Execlists submission style (the equivalent in the
2143
 * legacy ringbuffer submission world would be i915_gem_init_rings). It does it only for
2144
 * those engines that are present in the hardware.
2145
 *
2146
 * Return: non-zero if the initialization failed.
2147
 */
2148
int intel_logical_rings_init(struct drm_device *dev)
2149
{
2150
	struct drm_i915_private *dev_priv = dev->dev_private;
2151
	int ret;
2152
 
2153
	ret = logical_render_ring_init(dev);
2154
	if (ret)
2155
		return ret;
2156
 
2157
	if (HAS_BSD(dev)) {
2158
		ret = logical_bsd_ring_init(dev);
2159
		if (ret)
2160
			goto cleanup_render_ring;
2161
	}
2162
 
2163
	if (HAS_BLT(dev)) {
2164
		ret = logical_blt_ring_init(dev);
2165
		if (ret)
2166
			goto cleanup_bsd_ring;
2167
	}
2168
 
2169
	if (HAS_VEBOX(dev)) {
2170
		ret = logical_vebox_ring_init(dev);
2171
		if (ret)
2172
			goto cleanup_blt_ring;
2173
	}
2174
 
2175
	if (HAS_BSD2(dev)) {
2176
		ret = logical_bsd2_ring_init(dev);
2177
		if (ret)
2178
			goto cleanup_vebox_ring;
2179
	}
2180
 
2181
	return 0;
2182
 
2183
cleanup_vebox_ring:
2184
	intel_logical_ring_cleanup(&dev_priv->ring[VECS]);
2185
cleanup_blt_ring:
2186
	intel_logical_ring_cleanup(&dev_priv->ring[BCS]);
2187
cleanup_bsd_ring:
2188
	intel_logical_ring_cleanup(&dev_priv->ring[VCS]);
2189
cleanup_render_ring:
2190
	intel_logical_ring_cleanup(&dev_priv->ring[RCS]);
2191
 
2192
	return ret;
2193
}
2194
 
6084 serge 2195
static u32
2196
make_rpcs(struct drm_device *dev)
5354 serge 2197
{
6084 serge 2198
	u32 rpcs = 0;
5354 serge 2199
 
6084 serge 2200
	/*
2201
	 * No explicit RPCS request is needed to ensure full
2202
	 * slice/subslice/EU enablement prior to Gen9.
2203
	*/
2204
	if (INTEL_INFO(dev)->gen < 9)
5354 serge 2205
		return 0;
2206
 
6084 serge 2207
	/*
2208
	 * Starting in Gen9, render power gating can leave
2209
	 * slice/subslice/EU in a partially enabled state. We
2210
	 * must make an explicit request through RPCS for full
2211
	 * enablement.
2212
	*/
2213
	if (INTEL_INFO(dev)->has_slice_pg) {
2214
		rpcs |= GEN8_RPCS_S_CNT_ENABLE;
2215
		rpcs |= INTEL_INFO(dev)->slice_total <<
2216
			GEN8_RPCS_S_CNT_SHIFT;
2217
		rpcs |= GEN8_RPCS_ENABLE;
2218
	}
5354 serge 2219
 
6084 serge 2220
	if (INTEL_INFO(dev)->has_subslice_pg) {
2221
		rpcs |= GEN8_RPCS_SS_CNT_ENABLE;
2222
		rpcs |= INTEL_INFO(dev)->subslice_per_slice <<
2223
			GEN8_RPCS_SS_CNT_SHIFT;
2224
		rpcs |= GEN8_RPCS_ENABLE;
2225
	}
5354 serge 2226
 
6084 serge 2227
	if (INTEL_INFO(dev)->has_eu_pg) {
2228
		rpcs |= INTEL_INFO(dev)->eu_per_subslice <<
2229
			GEN8_RPCS_EU_MIN_SHIFT;
2230
		rpcs |= INTEL_INFO(dev)->eu_per_subslice <<
2231
			GEN8_RPCS_EU_MAX_SHIFT;
2232
		rpcs |= GEN8_RPCS_ENABLE;
2233
	}
2234
 
2235
	return rpcs;
5354 serge 2236
}
2237
 
2238
static int
2239
populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_obj,
2240
		    struct intel_engine_cs *ring, struct intel_ringbuffer *ringbuf)
2241
{
2242
	struct drm_device *dev = ring->dev;
2243
	struct drm_i915_private *dev_priv = dev->dev_private;
2244
	struct i915_hw_ppgtt *ppgtt = ctx->ppgtt;
2245
	struct page *page;
2246
	uint32_t *reg_state;
2247
	int ret;
2248
 
2249
	if (!ppgtt)
2250
		ppgtt = dev_priv->mm.aliasing_ppgtt;
2251
 
2252
	ret = i915_gem_object_set_to_cpu_domain(ctx_obj, true);
2253
	if (ret) {
2254
		DRM_DEBUG_DRIVER("Could not set to CPU domain\n");
2255
		return ret;
2256
	}
2257
 
2258
	ret = i915_gem_object_get_pages(ctx_obj);
2259
	if (ret) {
2260
		DRM_DEBUG_DRIVER("Could not get object pages\n");
2261
		return ret;
2262
	}
2263
 
2264
	i915_gem_object_pin_pages(ctx_obj);
2265
 
2266
	/* The second page of the context object contains some fields which must
2267
	 * be set up prior to the first execution. */
6937 serge 2268
	page = i915_gem_object_get_dirty_page(ctx_obj, LRC_STATE_PN);
5354 serge 2269
	reg_state = kmap_atomic(page);
2270
 
2271
	/* A context is actually a big batch buffer with several MI_LOAD_REGISTER_IMM
2272
	 * commands followed by (reg, value) pairs. The values we are setting here are
2273
	 * only for the first context restore: on a subsequent save, the GPU will
2274
	 * recreate this batchbuffer with new values (including all the missing
2275
	 * MI_LOAD_REGISTER_IMM commands that we are not initializing here). */
6937 serge 2276
	reg_state[CTX_LRI_HEADER_0] =
2277
		MI_LOAD_REGISTER_IMM(ring->id == RCS ? 14 : 11) | MI_LRI_FORCE_POSTED;
2278
	ASSIGN_CTX_REG(reg_state, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(ring),
6084 serge 2279
		_MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
2280
				   CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
6937 serge 2281
					  CTX_CTRL_RS_CTX_ENABLE));
2282
	ASSIGN_CTX_REG(reg_state, CTX_RING_HEAD, RING_HEAD(ring->mmio_base), 0);
2283
	ASSIGN_CTX_REG(reg_state, CTX_RING_TAIL, RING_TAIL(ring->mmio_base), 0);
5354 serge 2284
	/* Ring buffer start address is not known until the buffer is pinned.
2285
	 * It is written to the context image in execlists_update_context()
2286
	 */
6937 serge 2287
	ASSIGN_CTX_REG(reg_state, CTX_RING_BUFFER_START, RING_START(ring->mmio_base), 0);
2288
	ASSIGN_CTX_REG(reg_state, CTX_RING_BUFFER_CONTROL, RING_CTL(ring->mmio_base),
2289
		       ((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES) | RING_VALID);
2290
	ASSIGN_CTX_REG(reg_state, CTX_BB_HEAD_U, RING_BBADDR_UDW(ring->mmio_base), 0);
2291
	ASSIGN_CTX_REG(reg_state, CTX_BB_HEAD_L, RING_BBADDR(ring->mmio_base), 0);
2292
	ASSIGN_CTX_REG(reg_state, CTX_BB_STATE, RING_BBSTATE(ring->mmio_base),
2293
		       RING_BB_PPGTT);
2294
	ASSIGN_CTX_REG(reg_state, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(ring->mmio_base), 0);
2295
	ASSIGN_CTX_REG(reg_state, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(ring->mmio_base), 0);
2296
	ASSIGN_CTX_REG(reg_state, CTX_SECOND_BB_STATE, RING_SBBSTATE(ring->mmio_base), 0);
5354 serge 2297
	if (ring->id == RCS) {
6937 serge 2298
		ASSIGN_CTX_REG(reg_state, CTX_BB_PER_CTX_PTR, RING_BB_PER_CTX_PTR(ring->mmio_base), 0);
2299
		ASSIGN_CTX_REG(reg_state, CTX_RCS_INDIRECT_CTX, RING_INDIRECT_CTX(ring->mmio_base), 0);
2300
		ASSIGN_CTX_REG(reg_state, CTX_RCS_INDIRECT_CTX_OFFSET, RING_INDIRECT_CTX_OFFSET(ring->mmio_base), 0);
6084 serge 2301
		if (ring->wa_ctx.obj) {
2302
			struct i915_ctx_workarounds *wa_ctx = &ring->wa_ctx;
2303
			uint32_t ggtt_offset = i915_gem_obj_ggtt_offset(wa_ctx->obj);
2304
 
2305
			reg_state[CTX_RCS_INDIRECT_CTX+1] =
2306
				(ggtt_offset + wa_ctx->indirect_ctx.offset * sizeof(uint32_t)) |
2307
				(wa_ctx->indirect_ctx.size / CACHELINE_DWORDS);
2308
 
2309
			reg_state[CTX_RCS_INDIRECT_CTX_OFFSET+1] =
2310
				CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT << 6;
2311
 
2312
			reg_state[CTX_BB_PER_CTX_PTR+1] =
2313
				(ggtt_offset + wa_ctx->per_ctx.offset * sizeof(uint32_t)) |
2314
				0x01;
2315
		}
5354 serge 2316
	}
6937 serge 2317
	reg_state[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED;
2318
	ASSIGN_CTX_REG(reg_state, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(ring->mmio_base), 0);
2319
	/* PDP values well be assigned later if needed */
2320
	ASSIGN_CTX_REG(reg_state, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(ring, 3), 0);
2321
	ASSIGN_CTX_REG(reg_state, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(ring, 3), 0);
2322
	ASSIGN_CTX_REG(reg_state, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(ring, 2), 0);
2323
	ASSIGN_CTX_REG(reg_state, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(ring, 2), 0);
2324
	ASSIGN_CTX_REG(reg_state, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(ring, 1), 0);
2325
	ASSIGN_CTX_REG(reg_state, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(ring, 1), 0);
2326
	ASSIGN_CTX_REG(reg_state, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(ring, 0), 0);
2327
	ASSIGN_CTX_REG(reg_state, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(ring, 0), 0);
6084 serge 2328
 
2329
	if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
2330
		/* 64b PPGTT (48bit canonical)
2331
		 * PDP0_DESCRIPTOR contains the base address to PML4 and
2332
		 * other PDP Descriptors are ignored.
2333
		 */
2334
		ASSIGN_CTX_PML4(ppgtt, reg_state);
2335
	} else {
2336
		/* 32b PPGTT
2337
		 * PDP*_DESCRIPTOR contains the base address of space supported.
2338
		 * With dynamic page allocation, PDPs may not be allocated at
2339
		 * this point. Point the unallocated PDPs to the scratch page
2340
		 */
2341
		ASSIGN_CTX_PDP(ppgtt, reg_state, 3);
2342
		ASSIGN_CTX_PDP(ppgtt, reg_state, 2);
2343
		ASSIGN_CTX_PDP(ppgtt, reg_state, 1);
2344
		ASSIGN_CTX_PDP(ppgtt, reg_state, 0);
2345
	}
2346
 
5354 serge 2347
	if (ring->id == RCS) {
2348
		reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
6937 serge 2349
		ASSIGN_CTX_REG(reg_state, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
2350
			       make_rpcs(dev));
5354 serge 2351
	}
2352
 
2353
	kunmap_atomic(reg_state);
6088 serge 2354
	i915_gem_object_unpin_pages(ctx_obj);
5354 serge 2355
 
2356
	return 0;
2357
}
2358
 
2359
/**
2360
 * intel_lr_context_free() - free the LRC specific bits of a context
2361
 * @ctx: the LR context to free.
2362
 *
2363
 * The real context freeing is done in i915_gem_context_free: this only
2364
 * takes care of the bits that are LRC related: the per-engine backing
2365
 * objects and the logical ringbuffer.
2366
 */
2367
void intel_lr_context_free(struct intel_context *ctx)
2368
{
2369
	int i;
2370
 
2371
	for (i = 0; i < I915_NUM_RINGS; i++) {
2372
		struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state;
2373
 
2374
		if (ctx_obj) {
2375
			struct intel_ringbuffer *ringbuf =
2376
					ctx->engine[i].ringbuf;
2377
			struct intel_engine_cs *ring = ringbuf->ring;
2378
 
2379
			if (ctx == ring->default_context) {
2380
				intel_unpin_ringbuffer_obj(ringbuf);
2381
				i915_gem_object_ggtt_unpin(ctx_obj);
2382
			}
6084 serge 2383
			WARN_ON(ctx->engine[ring->id].pin_count);
2384
			intel_ringbuffer_free(ringbuf);
5354 serge 2385
			drm_gem_object_unreference(&ctx_obj->base);
2386
		}
2387
	}
2388
}
2389
 
2390
static uint32_t get_lr_context_size(struct intel_engine_cs *ring)
2391
{
2392
	int ret = 0;
2393
 
2394
	WARN_ON(INTEL_INFO(ring->dev)->gen < 8);
2395
 
2396
	switch (ring->id) {
2397
	case RCS:
2398
		if (INTEL_INFO(ring->dev)->gen >= 9)
2399
			ret = GEN9_LR_CONTEXT_RENDER_SIZE;
2400
		else
2401
			ret = GEN8_LR_CONTEXT_RENDER_SIZE;
2402
		break;
2403
	case VCS:
2404
	case BCS:
2405
	case VECS:
2406
	case VCS2:
2407
		ret = GEN8_LR_CONTEXT_OTHER_SIZE;
2408
		break;
2409
	}
2410
 
2411
	return ret;
2412
}
2413
 
2414
static void lrc_setup_hardware_status_page(struct intel_engine_cs *ring,
2415
		struct drm_i915_gem_object *default_ctx_obj)
2416
{
2417
	struct drm_i915_private *dev_priv = ring->dev->dev_private;
6084 serge 2418
	struct page *page;
5354 serge 2419
 
6084 serge 2420
	/* The HWSP is part of the default context object in LRC mode. */
2421
	ring->status_page.gfx_addr = i915_gem_obj_ggtt_offset(default_ctx_obj)
2422
			+ LRC_PPHWSP_PN * PAGE_SIZE;
2423
	page = i915_gem_object_get_page(default_ctx_obj, LRC_PPHWSP_PN);
2424
	ring->status_page.page_addr = kmap(page);
5354 serge 2425
	ring->status_page.obj = default_ctx_obj;
2426
 
2427
	I915_WRITE(RING_HWS_PGA(ring->mmio_base),
2428
			(u32)ring->status_page.gfx_addr);
2429
	POSTING_READ(RING_HWS_PGA(ring->mmio_base));
2430
}
2431
 
2432
/**
6084 serge 2433
 * intel_lr_context_deferred_alloc() - create the LRC specific bits of a context
5354 serge 2434
 * @ctx: LR context to create.
2435
 * @ring: engine to be used with the context.
2436
 *
2437
 * This function can be called more than once, with different engines, if we plan
2438
 * to use the context with them. The context backing objects and the ringbuffers
2439
 * (specially the ringbuffer backing objects) suck a lot of memory up, and that's why
2440
 * the creation is a deferred call: it's better to make sure first that we need to use
2441
 * a given ring with the context.
2442
 *
2443
 * Return: non-zero on error.
2444
 */
6084 serge 2445
 
2446
int intel_lr_context_deferred_alloc(struct intel_context *ctx,
5354 serge 2447
				     struct intel_engine_cs *ring)
2448
{
2449
	struct drm_device *dev = ring->dev;
2450
	struct drm_i915_gem_object *ctx_obj;
2451
	uint32_t context_size;
2452
	struct intel_ringbuffer *ringbuf;
2453
	int ret;
2454
 
2455
	WARN_ON(ctx->legacy_hw_ctx.rcs_state != NULL);
6084 serge 2456
	WARN_ON(ctx->engine[ring->id].state);
5354 serge 2457
 
2458
	context_size = round_up(get_lr_context_size(ring), 4096);
2459
 
6084 serge 2460
	/* One extra page as the sharing data between driver and GuC */
2461
	context_size += PAGE_SIZE * LRC_PPHWSP_PN;
5354 serge 2462
 
6084 serge 2463
	ctx_obj = i915_gem_alloc_object(dev, context_size);
2464
	if (!ctx_obj) {
2465
		DRM_DEBUG_DRIVER("Alloc LRC backing obj failed.\n");
2466
		return -ENOMEM;
5354 serge 2467
	}
2468
 
6084 serge 2469
	ringbuf = intel_engine_create_ringbuffer(ring, 4 * PAGE_SIZE);
2470
	if (IS_ERR(ringbuf)) {
2471
		ret = PTR_ERR(ringbuf);
2472
		goto error_deref_obj;
5354 serge 2473
	}
2474
 
2475
	ret = populate_lr_context(ctx, ctx_obj, ring, ringbuf);
2476
	if (ret) {
2477
		DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret);
6084 serge 2478
		goto error_ringbuf;
5354 serge 2479
	}
2480
 
2481
	ctx->engine[ring->id].ringbuf = ringbuf;
2482
	ctx->engine[ring->id].state = ctx_obj;
2483
 
6084 serge 2484
	if (ctx != ring->default_context && ring->init_context) {
2485
		struct drm_i915_gem_request *req;
5354 serge 2486
 
6084 serge 2487
		ret = i915_gem_request_alloc(ring,
2488
			ctx, &req);
2489
		if (ret) {
2490
			DRM_ERROR("ring create req: %d\n",
2491
				ret);
2492
			goto error_ringbuf;
5354 serge 2493
		}
2494
 
6084 serge 2495
		ret = ring->init_context(req);
5354 serge 2496
		if (ret) {
6084 serge 2497
			DRM_ERROR("ring init context: %d\n",
2498
				ret);
2499
			i915_gem_request_cancel(req);
2500
			goto error_ringbuf;
5354 serge 2501
		}
6084 serge 2502
		i915_add_request_no_flush(req);
5354 serge 2503
	}
2504
	return 0;
2505
 
6084 serge 2506
error_ringbuf:
2507
	intel_ringbuffer_free(ringbuf);
2508
error_deref_obj:
5354 serge 2509
	drm_gem_object_unreference(&ctx_obj->base);
6084 serge 2510
	ctx->engine[ring->id].ringbuf = NULL;
2511
	ctx->engine[ring->id].state = NULL;
5354 serge 2512
	return ret;
2513
}
6084 serge 2514
 
2515
void intel_lr_context_reset(struct drm_device *dev,
2516
			struct intel_context *ctx)
2517
{
2518
	struct drm_i915_private *dev_priv = dev->dev_private;
2519
	struct intel_engine_cs *ring;
2520
	int i;
2521
 
2522
	for_each_ring(ring, dev_priv, i) {
2523
		struct drm_i915_gem_object *ctx_obj =
2524
				ctx->engine[ring->id].state;
2525
		struct intel_ringbuffer *ringbuf =
2526
				ctx->engine[ring->id].ringbuf;
2527
		uint32_t *reg_state;
2528
		struct page *page;
2529
 
2530
		if (!ctx_obj)
2531
			continue;
2532
 
2533
		if (i915_gem_object_get_pages(ctx_obj)) {
2534
			WARN(1, "Failed get_pages for context obj\n");
2535
			continue;
2536
		}
6937 serge 2537
		page = i915_gem_object_get_dirty_page(ctx_obj, LRC_STATE_PN);
6084 serge 2538
		reg_state = kmap_atomic(page);
2539
 
2540
		reg_state[CTX_RING_HEAD+1] = 0;
2541
		reg_state[CTX_RING_TAIL+1] = 0;
2542
 
2543
		kunmap_atomic(reg_state);
2544
 
2545
		ringbuf->head = 0;
2546
		ringbuf->tail = 0;
2547
	}
2548
}