Subversion Repositories Kolibri OS

Rev

Rev 6084 | Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
5354 serge 1
/*
2
 * Copyright © 2014 Intel Corporation
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
 * IN THE SOFTWARE.
22
 *
23
 * Authors:
24
 *    Ben Widawsky 
25
 *    Michel Thierry 
26
 *    Thomas Daniel 
27
 *    Oscar Mateo 
28
 *
29
 */
30
 
31
/**
32
 * DOC: Logical Rings, Logical Ring Contexts and Execlists
33
 *
34
 * Motivation:
35
 * GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts".
36
 * These expanded contexts enable a number of new abilities, especially
37
 * "Execlists" (also implemented in this file).
38
 *
39
 * One of the main differences with the legacy HW contexts is that logical
40
 * ring contexts incorporate many more things to the context's state, like
41
 * PDPs or ringbuffer control registers:
42
 *
43
 * The reason why PDPs are included in the context is straightforward: as
44
 * PPGTTs (per-process GTTs) are actually per-context, having the PDPs
45
 * contained there mean you don't need to do a ppgtt->switch_mm yourself,
46
 * instead, the GPU will do it for you on the context switch.
47
 *
48
 * But, what about the ringbuffer control registers (head, tail, etc..)?
49
 * shouldn't we just need a set of those per engine command streamer? This is
50
 * where the name "Logical Rings" starts to make sense: by virtualizing the
51
 * rings, the engine cs shifts to a new "ring buffer" with every context
52
 * switch. When you want to submit a workload to the GPU you: A) choose your
53
 * context, B) find its appropriate virtualized ring, C) write commands to it
54
 * and then, finally, D) tell the GPU to switch to that context.
55
 *
56
 * Instead of the legacy MI_SET_CONTEXT, the way you tell the GPU to switch
57
 * to a contexts is via a context execution list, ergo "Execlists".
58
 *
59
 * LRC implementation:
60
 * Regarding the creation of contexts, we have:
61
 *
62
 * - One global default context.
63
 * - One local default context for each opened fd.
64
 * - One local extra context for each context create ioctl call.
65
 *
66
 * Now that ringbuffers belong per-context (and not per-engine, like before)
67
 * and that contexts are uniquely tied to a given engine (and not reusable,
68
 * like before) we need:
69
 *
70
 * - One ringbuffer per-engine inside each context.
71
 * - One backing object per-engine inside each context.
72
 *
73
 * The global default context starts its life with these new objects fully
74
 * allocated and populated. The local default context for each opened fd is
75
 * more complex, because we don't know at creation time which engine is going
76
 * to use them. To handle this, we have implemented a deferred creation of LR
77
 * contexts:
78
 *
79
 * The local context starts its life as a hollow or blank holder, that only
80
 * gets populated for a given engine once we receive an execbuffer. If later
81
 * on we receive another execbuffer ioctl for the same context but a different
82
 * engine, we allocate/populate a new ringbuffer and context backing object and
83
 * so on.
84
 *
85
 * Finally, regarding local contexts created using the ioctl call: as they are
86
 * only allowed with the render ring, we can allocate & populate them right
87
 * away (no need to defer anything, at least for now).
88
 *
89
 * Execlists implementation:
90
 * Execlists are the new method by which, on gen8+ hardware, workloads are
91
 * submitted for execution (as opposed to the legacy, ringbuffer-based, method).
92
 * This method works as follows:
93
 *
94
 * When a request is committed, its commands (the BB start and any leading or
95
 * trailing commands, like the seqno breadcrumbs) are placed in the ringbuffer
96
 * for the appropriate context. The tail pointer in the hardware context is not
97
 * updated at this time, but instead, kept by the driver in the ringbuffer
98
 * structure. A structure representing this request is added to a request queue
99
 * for the appropriate engine: this structure contains a copy of the context's
100
 * tail after the request was written to the ring buffer and a pointer to the
101
 * context itself.
102
 *
103
 * If the engine's request queue was empty before the request was added, the
104
 * queue is processed immediately. Otherwise the queue will be processed during
105
 * a context switch interrupt. In any case, elements on the queue will get sent
106
 * (in pairs) to the GPU's ExecLists Submit Port (ELSP, for short) with a
107
 * globally unique 20-bits submission ID.
108
 *
109
 * When execution of a request completes, the GPU updates the context status
110
 * buffer with a context complete event and generates a context switch interrupt.
111
 * During the interrupt handling, the driver examines the events in the buffer:
112
 * for each context complete event, if the announced ID matches that on the head
113
 * of the request queue, then that request is retired and removed from the queue.
114
 *
115
 * After processing, if any requests were retired and the queue is not empty
116
 * then a new execution list can be submitted. The two requests at the front of
117
 * the queue are next to be submitted but since a context may not occur twice in
118
 * an execution list, if subsequent requests have the same ID as the first then
119
 * the two requests must be combined. This is done simply by discarding requests
120
 * at the head of the queue until either only one requests is left (in which case
121
 * we use a NULL second context) or the first two requests have unique IDs.
122
 *
123
 * By always executing the first two requests in the queue the driver ensures
124
 * that the GPU is kept as busy as possible. In the case where a single context
125
 * completes but a second context is still executing, the request for this second
126
 * context will be at the head of the queue when we remove the first one. This
127
 * request will then be resubmitted along with a new request for a different context,
128
 * which will cause the hardware to continue executing the second request and queue
129
 * the new request (the GPU detects the condition of a context getting preempted
130
 * with the same context and optimizes the context switch flow by not doing
131
 * preemption, but just sampling the new tail pointer).
132
 *
133
 */
134
 
135
#include 
136
#include 
137
#include "intel_drv.h"
138
#include "i915_drv.h"
139
 
140
#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE)
141
#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE)
142
#define GEN8_LR_CONTEXT_OTHER_SIZE (2 * PAGE_SIZE)
143
 
144
#define RING_EXECLIST_QFULL		(1 << 0x2)
145
#define RING_EXECLIST1_VALID		(1 << 0x3)
146
#define RING_EXECLIST0_VALID		(1 << 0x4)
147
#define RING_EXECLIST_ACTIVE_STATUS	(3 << 0xE)
148
#define RING_EXECLIST1_ACTIVE		(1 << 0x11)
149
#define RING_EXECLIST0_ACTIVE		(1 << 0x12)
150
 
151
#define GEN8_CTX_STATUS_IDLE_ACTIVE	(1 << 0)
152
#define GEN8_CTX_STATUS_PREEMPTED	(1 << 1)
153
#define GEN8_CTX_STATUS_ELEMENT_SWITCH	(1 << 2)
154
#define GEN8_CTX_STATUS_ACTIVE_IDLE	(1 << 3)
155
#define GEN8_CTX_STATUS_COMPLETE	(1 << 4)
156
#define GEN8_CTX_STATUS_LITE_RESTORE	(1 << 15)
157
 
158
#define CTX_LRI_HEADER_0		0x01
159
#define CTX_CONTEXT_CONTROL		0x02
160
#define CTX_RING_HEAD			0x04
161
#define CTX_RING_TAIL			0x06
162
#define CTX_RING_BUFFER_START		0x08
163
#define CTX_RING_BUFFER_CONTROL		0x0a
164
#define CTX_BB_HEAD_U			0x0c
165
#define CTX_BB_HEAD_L			0x0e
166
#define CTX_BB_STATE			0x10
167
#define CTX_SECOND_BB_HEAD_U		0x12
168
#define CTX_SECOND_BB_HEAD_L		0x14
169
#define CTX_SECOND_BB_STATE		0x16
170
#define CTX_BB_PER_CTX_PTR		0x18
171
#define CTX_RCS_INDIRECT_CTX		0x1a
172
#define CTX_RCS_INDIRECT_CTX_OFFSET	0x1c
173
#define CTX_LRI_HEADER_1		0x21
174
#define CTX_CTX_TIMESTAMP		0x22
175
#define CTX_PDP3_UDW			0x24
176
#define CTX_PDP3_LDW			0x26
177
#define CTX_PDP2_UDW			0x28
178
#define CTX_PDP2_LDW			0x2a
179
#define CTX_PDP1_UDW			0x2c
180
#define CTX_PDP1_LDW			0x2e
181
#define CTX_PDP0_UDW			0x30
182
#define CTX_PDP0_LDW			0x32
183
#define CTX_LRI_HEADER_2		0x41
184
#define CTX_R_PWR_CLK_STATE		0x42
185
#define CTX_GPGPU_CSR_BASE_ADDRESS	0x44
186
 
187
#define GEN8_CTX_VALID (1<<0)
188
#define GEN8_CTX_FORCE_PD_RESTORE (1<<1)
189
#define GEN8_CTX_FORCE_RESTORE (1<<2)
190
#define GEN8_CTX_L3LLC_COHERENT (1<<5)
191
#define GEN8_CTX_PRIVILEGE (1<<8)
192
enum {
193
	ADVANCED_CONTEXT = 0,
194
	LEGACY_CONTEXT,
195
	ADVANCED_AD_CONTEXT,
196
	LEGACY_64B_CONTEXT
197
};
198
#define GEN8_CTX_MODE_SHIFT 3
199
enum {
200
	FAULT_AND_HANG = 0,
201
	FAULT_AND_HALT, /* Debug only */
202
	FAULT_AND_STREAM,
203
	FAULT_AND_CONTINUE /* Unsupported */
204
};
205
#define GEN8_CTX_ID_SHIFT 32
206
 
207
static int intel_lr_context_pin(struct intel_engine_cs *ring,
208
		struct intel_context *ctx);
209
 
210
/**
211
 * intel_sanitize_enable_execlists() - sanitize i915.enable_execlists
212
 * @dev: DRM device.
213
 * @enable_execlists: value of i915.enable_execlists module parameter.
214
 *
215
 * Only certain platforms support Execlists (the prerequisites being
216
 * support for Logical Ring Contexts and Aliasing PPGTT or better),
217
 * and only when enabled via module parameter.
218
 *
219
 * Return: 1 if Execlists is supported and has to be enabled.
220
 */
221
int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists)
222
{
223
	WARN_ON(i915.enable_ppgtt == -1);
224
 
225
	if (INTEL_INFO(dev)->gen >= 9)
226
		return 1;
227
 
228
	if (enable_execlists == 0)
229
		return 0;
230
 
231
	if (HAS_LOGICAL_RING_CONTEXTS(dev) && USES_PPGTT(dev) &&
232
	    i915.use_mmio_flip >= 0)
233
		return 1;
234
 
235
	return 0;
236
}
237
 
238
/**
239
 * intel_execlists_ctx_id() - get the Execlists Context ID
240
 * @ctx_obj: Logical Ring Context backing object.
241
 *
242
 * Do not confuse with ctx->id! Unfortunately we have a name overload
243
 * here: the old context ID we pass to userspace as a handler so that
244
 * they can refer to a context, and the new context ID we pass to the
245
 * ELSP so that the GPU can inform us of the context status via
246
 * interrupts.
247
 *
248
 * Return: 20-bits globally unique context ID.
249
 */
250
u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj)
251
{
252
	u32 lrca = i915_gem_obj_ggtt_offset(ctx_obj);
253
 
254
	/* LRCA is required to be 4K aligned so the more significant 20 bits
255
	 * are globally unique */
256
	return lrca >> 12;
257
}
258
 
259
static uint64_t execlists_ctx_descriptor(struct drm_i915_gem_object *ctx_obj)
260
{
261
	uint64_t desc;
262
	uint64_t lrca = i915_gem_obj_ggtt_offset(ctx_obj);
263
 
264
	WARN_ON(lrca & 0xFFFFFFFF00000FFFULL);
265
 
266
	desc = GEN8_CTX_VALID;
267
	desc |= LEGACY_CONTEXT << GEN8_CTX_MODE_SHIFT;
268
	desc |= GEN8_CTX_L3LLC_COHERENT;
269
	desc |= GEN8_CTX_PRIVILEGE;
270
	desc |= lrca;
271
	desc |= (u64)intel_execlists_ctx_id(ctx_obj) << GEN8_CTX_ID_SHIFT;
272
 
273
	/* TODO: WaDisableLiteRestore when we start using semaphore
274
	 * signalling between Command Streamers */
275
	/* desc |= GEN8_CTX_FORCE_RESTORE; */
276
 
277
	return desc;
278
}
279
 
280
static void execlists_elsp_write(struct intel_engine_cs *ring,
281
				 struct drm_i915_gem_object *ctx_obj0,
282
				 struct drm_i915_gem_object *ctx_obj1)
283
{
284
	struct drm_device *dev = ring->dev;
285
	struct drm_i915_private *dev_priv = dev->dev_private;
286
	uint64_t temp = 0;
287
	uint32_t desc[4];
288
	unsigned long flags;
289
 
290
	/* XXX: You must always write both descriptors in the order below. */
291
	if (ctx_obj1)
292
		temp = execlists_ctx_descriptor(ctx_obj1);
293
	else
294
		temp = 0;
295
	desc[1] = (u32)(temp >> 32);
296
	desc[0] = (u32)temp;
297
 
298
	temp = execlists_ctx_descriptor(ctx_obj0);
299
	desc[3] = (u32)(temp >> 32);
300
	desc[2] = (u32)temp;
301
 
302
	/* Set Force Wakeup bit to prevent GT from entering C6 while ELSP writes
303
	 * are in progress.
304
	 *
305
	 * The other problem is that we can't just call gen6_gt_force_wake_get()
306
	 * because that function calls intel_runtime_pm_get(), which might sleep.
307
	 * Instead, we do the runtime_pm_get/put when creating/destroying requests.
308
	 */
309
	spin_lock_irqsave(&dev_priv->uncore.lock, flags);
310
	if (IS_CHERRYVIEW(dev) || INTEL_INFO(dev)->gen >= 9) {
311
		if (dev_priv->uncore.fw_rendercount++ == 0)
312
			dev_priv->uncore.funcs.force_wake_get(dev_priv,
313
							      FORCEWAKE_RENDER);
314
		if (dev_priv->uncore.fw_mediacount++ == 0)
315
			dev_priv->uncore.funcs.force_wake_get(dev_priv,
316
							      FORCEWAKE_MEDIA);
317
		if (INTEL_INFO(dev)->gen >= 9) {
318
			if (dev_priv->uncore.fw_blittercount++ == 0)
319
				dev_priv->uncore.funcs.force_wake_get(dev_priv,
320
							FORCEWAKE_BLITTER);
321
		}
322
	} else {
323
		if (dev_priv->uncore.forcewake_count++ == 0)
324
			dev_priv->uncore.funcs.force_wake_get(dev_priv,
325
							      FORCEWAKE_ALL);
326
	}
327
	spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);
328
 
329
	I915_WRITE(RING_ELSP(ring), desc[1]);
330
	I915_WRITE(RING_ELSP(ring), desc[0]);
331
	I915_WRITE(RING_ELSP(ring), desc[3]);
332
	/* The context is automatically loaded after the following */
333
	I915_WRITE(RING_ELSP(ring), desc[2]);
334
 
335
	/* ELSP is a wo register, so use another nearby reg for posting instead */
336
	POSTING_READ(RING_EXECLIST_STATUS(ring));
337
 
338
	/* Release Force Wakeup (see the big comment above). */
339
	spin_lock_irqsave(&dev_priv->uncore.lock, flags);
340
	if (IS_CHERRYVIEW(dev) || INTEL_INFO(dev)->gen >= 9) {
341
		if (--dev_priv->uncore.fw_rendercount == 0)
342
			dev_priv->uncore.funcs.force_wake_put(dev_priv,
343
							      FORCEWAKE_RENDER);
344
		if (--dev_priv->uncore.fw_mediacount == 0)
345
			dev_priv->uncore.funcs.force_wake_put(dev_priv,
346
							      FORCEWAKE_MEDIA);
347
		if (INTEL_INFO(dev)->gen >= 9) {
348
			if (--dev_priv->uncore.fw_blittercount == 0)
349
				dev_priv->uncore.funcs.force_wake_put(dev_priv,
350
							FORCEWAKE_BLITTER);
351
		}
352
	} else {
353
		if (--dev_priv->uncore.forcewake_count == 0)
354
			dev_priv->uncore.funcs.force_wake_put(dev_priv,
355
							      FORCEWAKE_ALL);
356
	}
357
 
358
	spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);
359
}
360
 
361
static int execlists_update_context(struct drm_i915_gem_object *ctx_obj,
362
				    struct drm_i915_gem_object *ring_obj,
363
				    u32 tail)
364
{
365
	struct page *page;
366
	uint32_t *reg_state;
367
 
368
	page = i915_gem_object_get_page(ctx_obj, 1);
369
	reg_state = kmap_atomic(page);
370
 
371
	reg_state[CTX_RING_TAIL+1] = tail;
372
	reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(ring_obj);
373
 
374
	kunmap_atomic(reg_state);
375
 
376
	return 0;
377
}
378
 
379
static void execlists_submit_contexts(struct intel_engine_cs *ring,
380
				      struct intel_context *to0, u32 tail0,
381
				      struct intel_context *to1, u32 tail1)
382
{
383
	struct drm_i915_gem_object *ctx_obj0 = to0->engine[ring->id].state;
384
	struct intel_ringbuffer *ringbuf0 = to0->engine[ring->id].ringbuf;
385
	struct drm_i915_gem_object *ctx_obj1 = NULL;
386
	struct intel_ringbuffer *ringbuf1 = NULL;
387
 
388
	BUG_ON(!ctx_obj0);
389
	WARN_ON(!i915_gem_obj_is_pinned(ctx_obj0));
390
	WARN_ON(!i915_gem_obj_is_pinned(ringbuf0->obj));
391
 
392
	execlists_update_context(ctx_obj0, ringbuf0->obj, tail0);
393
 
394
	if (to1) {
395
		ringbuf1 = to1->engine[ring->id].ringbuf;
396
		ctx_obj1 = to1->engine[ring->id].state;
397
		BUG_ON(!ctx_obj1);
398
		WARN_ON(!i915_gem_obj_is_pinned(ctx_obj1));
399
		WARN_ON(!i915_gem_obj_is_pinned(ringbuf1->obj));
400
 
401
		execlists_update_context(ctx_obj1, ringbuf1->obj, tail1);
402
	}
403
 
404
	execlists_elsp_write(ring, ctx_obj0, ctx_obj1);
405
}
406
 
407
static void execlists_context_unqueue(struct intel_engine_cs *ring)
408
{
409
	struct intel_ctx_submit_request *req0 = NULL, *req1 = NULL;
410
	struct intel_ctx_submit_request *cursor = NULL, *tmp = NULL;
411
 
412
	assert_spin_locked(&ring->execlist_lock);
413
 
414
	if (list_empty(&ring->execlist_queue))
415
		return;
416
 
417
	/* Try to read in pairs */
418
	list_for_each_entry_safe(cursor, tmp, &ring->execlist_queue,
419
				 execlist_link) {
420
		if (!req0) {
421
			req0 = cursor;
422
		} else if (req0->ctx == cursor->ctx) {
423
			/* Same ctx: ignore first request, as second request
424
			 * will update tail past first request's workload */
425
			cursor->elsp_submitted = req0->elsp_submitted;
426
			list_del(&req0->execlist_link);
427
			list_add_tail(&req0->execlist_link,
428
				&ring->execlist_retired_req_list);
429
			req0 = cursor;
430
		} else {
431
			req1 = cursor;
432
			break;
433
		}
434
	}
435
 
436
	WARN_ON(req1 && req1->elsp_submitted);
437
 
438
	execlists_submit_contexts(ring, req0->ctx, req0->tail,
439
				  req1 ? req1->ctx : NULL,
440
				  req1 ? req1->tail : 0);
441
 
442
	req0->elsp_submitted++;
443
	if (req1)
444
		req1->elsp_submitted++;
445
}
446
 
447
static bool execlists_check_remove_request(struct intel_engine_cs *ring,
448
					   u32 request_id)
449
{
450
	struct intel_ctx_submit_request *head_req;
451
 
452
	assert_spin_locked(&ring->execlist_lock);
453
 
454
	head_req = list_first_entry_or_null(&ring->execlist_queue,
455
					    struct intel_ctx_submit_request,
456
					    execlist_link);
457
 
458
	if (head_req != NULL) {
459
		struct drm_i915_gem_object *ctx_obj =
460
				head_req->ctx->engine[ring->id].state;
461
		if (intel_execlists_ctx_id(ctx_obj) == request_id) {
462
			WARN(head_req->elsp_submitted == 0,
463
			     "Never submitted head request\n");
464
 
465
			if (--head_req->elsp_submitted <= 0) {
466
				list_del(&head_req->execlist_link);
467
				list_add_tail(&head_req->execlist_link,
468
					&ring->execlist_retired_req_list);
469
				return true;
470
			}
471
		}
472
	}
473
 
474
	return false;
475
}
476
 
477
/**
478
 * intel_execlists_handle_ctx_events() - handle Context Switch interrupts
479
 * @ring: Engine Command Streamer to handle.
480
 *
481
 * Check the unread Context Status Buffers and manage the submission of new
482
 * contexts to the ELSP accordingly.
483
 */
484
void intel_execlists_handle_ctx_events(struct intel_engine_cs *ring)
485
{
486
	struct drm_i915_private *dev_priv = ring->dev->dev_private;
487
	u32 status_pointer;
488
	u8 read_pointer;
489
	u8 write_pointer;
490
	u32 status;
491
	u32 status_id;
492
	u32 submit_contexts = 0;
493
 
494
	status_pointer = I915_READ(RING_CONTEXT_STATUS_PTR(ring));
495
 
496
	read_pointer = ring->next_context_status_buffer;
497
	write_pointer = status_pointer & 0x07;
498
	if (read_pointer > write_pointer)
499
		write_pointer += 6;
500
 
501
	spin_lock(&ring->execlist_lock);
502
 
503
	while (read_pointer < write_pointer) {
504
		read_pointer++;
505
		status = I915_READ(RING_CONTEXT_STATUS_BUF(ring) +
506
				(read_pointer % 6) * 8);
507
		status_id = I915_READ(RING_CONTEXT_STATUS_BUF(ring) +
508
				(read_pointer % 6) * 8 + 4);
509
 
510
		if (status & GEN8_CTX_STATUS_PREEMPTED) {
511
			if (status & GEN8_CTX_STATUS_LITE_RESTORE) {
512
				if (execlists_check_remove_request(ring, status_id))
513
					WARN(1, "Lite Restored request removed from queue\n");
514
			} else
515
				WARN(1, "Preemption without Lite Restore\n");
516
		}
517
 
518
		 if ((status & GEN8_CTX_STATUS_ACTIVE_IDLE) ||
519
		     (status & GEN8_CTX_STATUS_ELEMENT_SWITCH)) {
520
			if (execlists_check_remove_request(ring, status_id))
521
				submit_contexts++;
522
		}
523
	}
524
 
525
	if (submit_contexts != 0)
526
		execlists_context_unqueue(ring);
527
 
528
	spin_unlock(&ring->execlist_lock);
529
 
530
	WARN(submit_contexts > 2, "More than two context complete events?\n");
531
	ring->next_context_status_buffer = write_pointer % 6;
532
 
533
	I915_WRITE(RING_CONTEXT_STATUS_PTR(ring),
534
		   ((u32)ring->next_context_status_buffer & 0x07) << 8);
535
}
536
 
537
static int execlists_context_queue(struct intel_engine_cs *ring,
538
				   struct intel_context *to,
539
				   u32 tail)
540
{
541
	struct intel_ctx_submit_request *req = NULL, *cursor;
542
	struct drm_i915_private *dev_priv = ring->dev->dev_private;
543
	unsigned long flags;
544
	int num_elements = 0;
545
 
546
	req = kzalloc(sizeof(*req), GFP_KERNEL);
547
	if (req == NULL)
548
		return -ENOMEM;
549
	req->ctx = to;
550
	i915_gem_context_reference(req->ctx);
551
 
552
	if (to != ring->default_context)
553
		intel_lr_context_pin(ring, to);
554
 
555
	req->ring = ring;
556
	req->tail = tail;
557
 
558
	intel_runtime_pm_get(dev_priv);
559
 
560
	spin_lock_irqsave(&ring->execlist_lock, flags);
561
 
562
	list_for_each_entry(cursor, &ring->execlist_queue, execlist_link)
563
		if (++num_elements > 2)
564
			break;
565
 
566
	if (num_elements > 2) {
567
		struct intel_ctx_submit_request *tail_req;
568
 
569
		tail_req = list_last_entry(&ring->execlist_queue,
570
					   struct intel_ctx_submit_request,
571
					   execlist_link);
572
 
573
		if (to == tail_req->ctx) {
574
			WARN(tail_req->elsp_submitted != 0,
575
				"More than 2 already-submitted reqs queued\n");
576
			list_del(&tail_req->execlist_link);
577
			list_add_tail(&tail_req->execlist_link,
578
				&ring->execlist_retired_req_list);
579
		}
580
	}
581
 
582
	list_add_tail(&req->execlist_link, &ring->execlist_queue);
583
	if (num_elements == 0)
584
		execlists_context_unqueue(ring);
585
 
586
	spin_unlock_irqrestore(&ring->execlist_lock, flags);
587
 
588
	return 0;
589
}
590
 
591
static int logical_ring_invalidate_all_caches(struct intel_ringbuffer *ringbuf)
592
{
593
	struct intel_engine_cs *ring = ringbuf->ring;
594
	uint32_t flush_domains;
595
	int ret;
596
 
597
	flush_domains = 0;
598
	if (ring->gpu_caches_dirty)
599
		flush_domains = I915_GEM_GPU_DOMAINS;
600
 
601
	ret = ring->emit_flush(ringbuf, I915_GEM_GPU_DOMAINS, flush_domains);
602
	if (ret)
603
		return ret;
604
 
605
	ring->gpu_caches_dirty = false;
606
	return 0;
607
}
608
 
609
static int execlists_move_to_gpu(struct intel_ringbuffer *ringbuf,
610
				 struct list_head *vmas)
611
{
612
	struct intel_engine_cs *ring = ringbuf->ring;
613
	struct i915_vma *vma;
614
	uint32_t flush_domains = 0;
615
	bool flush_chipset = false;
616
	int ret;
617
 
618
	list_for_each_entry(vma, vmas, exec_list) {
619
		struct drm_i915_gem_object *obj = vma->obj;
620
 
621
		ret = i915_gem_object_sync(obj, ring);
622
		if (ret)
623
			return ret;
624
 
625
		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
626
			flush_chipset |= i915_gem_clflush_object(obj, false);
627
 
628
		flush_domains |= obj->base.write_domain;
629
	}
630
 
631
	if (flush_domains & I915_GEM_DOMAIN_GTT)
632
		wmb();
633
 
634
	/* Unconditionally invalidate gpu caches and ensure that we do flush
635
	 * any residual writes from the previous batch.
636
	 */
637
	return logical_ring_invalidate_all_caches(ringbuf);
638
}
639
 
640
/**
641
 * execlists_submission() - submit a batchbuffer for execution, Execlists style
642
 * @dev: DRM device.
643
 * @file: DRM file.
644
 * @ring: Engine Command Streamer to submit to.
645
 * @ctx: Context to employ for this submission.
646
 * @args: execbuffer call arguments.
647
 * @vmas: list of vmas.
648
 * @batch_obj: the batchbuffer to submit.
649
 * @exec_start: batchbuffer start virtual address pointer.
650
 * @flags: translated execbuffer call flags.
651
 *
652
 * This is the evil twin version of i915_gem_ringbuffer_submission. It abstracts
653
 * away the submission details of the execbuffer ioctl call.
654
 *
655
 * Return: non-zero if the submission fails.
656
 */
657
int intel_execlists_submission(struct drm_device *dev, struct drm_file *file,
658
			       struct intel_engine_cs *ring,
659
			       struct intel_context *ctx,
660
			       struct drm_i915_gem_execbuffer2 *args,
661
			       struct list_head *vmas,
662
			       struct drm_i915_gem_object *batch_obj,
663
			       u64 exec_start, u32 flags)
664
{
665
	struct drm_i915_private *dev_priv = dev->dev_private;
666
	struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
667
	int instp_mode;
668
	u32 instp_mask;
669
	int ret;
670
 
671
	instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK;
672
	instp_mask = I915_EXEC_CONSTANTS_MASK;
673
	switch (instp_mode) {
674
	case I915_EXEC_CONSTANTS_REL_GENERAL:
675
	case I915_EXEC_CONSTANTS_ABSOLUTE:
676
	case I915_EXEC_CONSTANTS_REL_SURFACE:
677
		if (instp_mode != 0 && ring != &dev_priv->ring[RCS]) {
678
			DRM_DEBUG("non-0 rel constants mode on non-RCS\n");
679
			return -EINVAL;
680
		}
681
 
682
		if (instp_mode != dev_priv->relative_constants_mode) {
683
			if (instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) {
684
				DRM_DEBUG("rel surface constants mode invalid on gen5+\n");
685
				return -EINVAL;
686
			}
687
 
688
			/* The HW changed the meaning on this bit on gen6 */
689
			instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
690
		}
691
		break;
692
	default:
693
		DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode);
694
		return -EINVAL;
695
	}
696
 
697
	if (args->num_cliprects != 0) {
698
		DRM_DEBUG("clip rectangles are only valid on pre-gen5\n");
699
		return -EINVAL;
700
	} else {
701
		if (args->DR4 == 0xffffffff) {
702
			DRM_DEBUG("UXA submitting garbage DR4, fixing up\n");
703
			args->DR4 = 0;
704
		}
705
 
706
		if (args->DR1 || args->DR4 || args->cliprects_ptr) {
707
			DRM_DEBUG("0 cliprects but dirt in cliprects fields\n");
708
			return -EINVAL;
709
		}
710
	}
711
 
712
	if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
713
		DRM_DEBUG("sol reset is gen7 only\n");
714
		return -EINVAL;
715
	}
716
 
717
	ret = execlists_move_to_gpu(ringbuf, vmas);
718
	if (ret)
719
		return ret;
720
 
721
	if (ring == &dev_priv->ring[RCS] &&
722
	    instp_mode != dev_priv->relative_constants_mode) {
723
		ret = intel_logical_ring_begin(ringbuf, 4);
724
		if (ret)
725
			return ret;
726
 
727
		intel_logical_ring_emit(ringbuf, MI_NOOP);
728
		intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(1));
729
		intel_logical_ring_emit(ringbuf, INSTPM);
730
		intel_logical_ring_emit(ringbuf, instp_mask << 16 | instp_mode);
731
		intel_logical_ring_advance(ringbuf);
732
 
733
		dev_priv->relative_constants_mode = instp_mode;
734
	}
735
 
736
	ret = ring->emit_bb_start(ringbuf, exec_start, flags);
737
	if (ret)
738
		return ret;
739
 
740
	i915_gem_execbuffer_move_to_active(vmas, ring);
741
	i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj);
742
 
743
	return 0;
744
}
745
 
746
void intel_execlists_retire_requests(struct intel_engine_cs *ring)
747
{
748
	struct intel_ctx_submit_request *req, *tmp;
749
	struct drm_i915_private *dev_priv = ring->dev->dev_private;
750
	unsigned long flags;
751
	struct list_head retired_list;
752
 
753
	WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
754
	if (list_empty(&ring->execlist_retired_req_list))
755
		return;
756
 
757
	INIT_LIST_HEAD(&retired_list);
758
	spin_lock_irqsave(&ring->execlist_lock, flags);
759
	list_replace_init(&ring->execlist_retired_req_list, &retired_list);
760
	spin_unlock_irqrestore(&ring->execlist_lock, flags);
761
 
762
	list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
763
		struct intel_context *ctx = req->ctx;
764
		struct drm_i915_gem_object *ctx_obj =
765
				ctx->engine[ring->id].state;
766
 
767
		if (ctx_obj && (ctx != ring->default_context))
768
			intel_lr_context_unpin(ring, ctx);
769
		intel_runtime_pm_put(dev_priv);
770
		i915_gem_context_unreference(req->ctx);
771
		list_del(&req->execlist_link);
772
		kfree(req);
773
	}
774
}
775
 
776
void intel_logical_ring_stop(struct intel_engine_cs *ring)
777
{
778
	struct drm_i915_private *dev_priv = ring->dev->dev_private;
779
	int ret;
780
 
781
	if (!intel_ring_initialized(ring))
782
		return;
783
 
784
	ret = intel_ring_idle(ring);
785
	if (ret && !i915_reset_in_progress(&to_i915(ring->dev)->gpu_error))
786
		DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n",
787
			  ring->name, ret);
788
 
789
	/* TODO: Is this correct with Execlists enabled? */
790
	I915_WRITE_MODE(ring, _MASKED_BIT_ENABLE(STOP_RING));
791
	if (wait_for_atomic((I915_READ_MODE(ring) & MODE_IDLE) != 0, 1000)) {
792
		DRM_ERROR("%s :timed out trying to stop ring\n", ring->name);
793
		return;
794
	}
795
	I915_WRITE_MODE(ring, _MASKED_BIT_DISABLE(STOP_RING));
796
}
797
 
798
int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf)
799
{
800
	struct intel_engine_cs *ring = ringbuf->ring;
801
	int ret;
802
 
803
	if (!ring->gpu_caches_dirty)
804
		return 0;
805
 
806
	ret = ring->emit_flush(ringbuf, 0, I915_GEM_GPU_DOMAINS);
807
	if (ret)
808
		return ret;
809
 
810
	ring->gpu_caches_dirty = false;
811
	return 0;
812
}
813
 
814
/**
815
 * intel_logical_ring_advance_and_submit() - advance the tail and submit the workload
816
 * @ringbuf: Logical Ringbuffer to advance.
817
 *
818
 * The tail is updated in our logical ringbuffer struct, not in the actual context. What
819
 * really happens during submission is that the context and current tail will be placed
820
 * on a queue waiting for the ELSP to be ready to accept a new context submission. At that
821
 * point, the tail *inside* the context is updated and the ELSP written to.
822
 */
823
void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf)
824
{
825
	struct intel_engine_cs *ring = ringbuf->ring;
826
	struct intel_context *ctx = ringbuf->FIXME_lrc_ctx;
827
 
828
	intel_logical_ring_advance(ringbuf);
829
 
830
	if (intel_ring_stopped(ring))
831
		return;
832
 
833
	execlists_context_queue(ring, ctx, ringbuf->tail);
834
}
835
 
836
static int intel_lr_context_pin(struct intel_engine_cs *ring,
837
		struct intel_context *ctx)
838
{
839
	struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
840
	struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
841
	int ret = 0;
842
 
843
	WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
844
	if (ctx->engine[ring->id].unpin_count++ == 0) {
845
		ret = i915_gem_obj_ggtt_pin(ctx_obj,
846
				GEN8_LR_CONTEXT_ALIGN, 0);
847
		if (ret)
848
			goto reset_unpin_count;
849
 
850
		ret = intel_pin_and_map_ringbuffer_obj(ring->dev, ringbuf);
851
		if (ret)
852
			goto unpin_ctx_obj;
853
	}
854
 
855
	return ret;
856
 
857
unpin_ctx_obj:
858
	i915_gem_object_ggtt_unpin(ctx_obj);
859
reset_unpin_count:
860
	ctx->engine[ring->id].unpin_count = 0;
861
 
862
	return ret;
863
}
864
 
865
void intel_lr_context_unpin(struct intel_engine_cs *ring,
866
		struct intel_context *ctx)
867
{
868
	struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
869
	struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
870
 
871
	if (ctx_obj) {
872
		WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
873
		if (--ctx->engine[ring->id].unpin_count == 0) {
874
			intel_unpin_ringbuffer_obj(ringbuf);
875
			i915_gem_object_ggtt_unpin(ctx_obj);
876
		}
877
	}
878
}
879
 
880
static int logical_ring_alloc_seqno(struct intel_engine_cs *ring,
881
				    struct intel_context *ctx)
882
{
883
	int ret;
884
 
885
	if (ring->outstanding_lazy_seqno)
886
		return 0;
887
 
888
	if (ring->preallocated_lazy_request == NULL) {
889
		struct drm_i915_gem_request *request;
890
 
891
		request = kmalloc(sizeof(*request), GFP_KERNEL);
892
		if (request == NULL)
893
			return -ENOMEM;
894
 
895
		if (ctx != ring->default_context) {
896
			ret = intel_lr_context_pin(ring, ctx);
897
			if (ret) {
898
				kfree(request);
899
				return ret;
900
			}
901
		}
902
 
903
		/* Hold a reference to the context this request belongs to
904
		 * (we will need it when the time comes to emit/retire the
905
		 * request).
906
		 */
907
		request->ctx = ctx;
908
		i915_gem_context_reference(request->ctx);
909
 
910
		ring->preallocated_lazy_request = request;
911
	}
912
 
913
	return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_seqno);
914
}
915
 
916
static int logical_ring_wait_request(struct intel_ringbuffer *ringbuf,
917
				     int bytes)
918
{
919
	struct intel_engine_cs *ring = ringbuf->ring;
920
	struct drm_i915_gem_request *request;
921
	u32 seqno = 0;
922
	int ret;
923
 
924
	if (ringbuf->last_retired_head != -1) {
925
		ringbuf->head = ringbuf->last_retired_head;
926
		ringbuf->last_retired_head = -1;
927
 
928
		ringbuf->space = intel_ring_space(ringbuf);
929
		if (ringbuf->space >= bytes)
930
			return 0;
931
	}
932
 
933
	list_for_each_entry(request, &ring->request_list, list) {
934
		if (__intel_ring_space(request->tail, ringbuf->tail,
935
				       ringbuf->size) >= bytes) {
936
			seqno = request->seqno;
937
			break;
938
		}
939
	}
940
 
941
	if (seqno == 0)
942
		return -ENOSPC;
943
 
944
	ret = i915_wait_seqno(ring, seqno);
945
	if (ret)
946
		return ret;
947
 
948
	i915_gem_retire_requests_ring(ring);
949
	ringbuf->head = ringbuf->last_retired_head;
950
	ringbuf->last_retired_head = -1;
951
 
952
	ringbuf->space = intel_ring_space(ringbuf);
953
	return 0;
954
}
955
 
956
static int logical_ring_wait_for_space(struct intel_ringbuffer *ringbuf,
957
				       int bytes)
958
{
959
	struct intel_engine_cs *ring = ringbuf->ring;
960
	struct drm_device *dev = ring->dev;
961
	struct drm_i915_private *dev_priv = dev->dev_private;
962
	unsigned long end;
963
	int ret;
964
 
965
	ret = logical_ring_wait_request(ringbuf, bytes);
966
	if (ret != -ENOSPC)
967
		return ret;
968
 
969
	/* Force the context submission in case we have been skipping it */
970
	intel_logical_ring_advance_and_submit(ringbuf);
971
 
972
	/* With GEM the hangcheck timer should kick us out of the loop,
973
	 * leaving it early runs the risk of corrupting GEM state (due
974
	 * to running on almost untested codepaths). But on resume
975
	 * timers don't work yet, so prevent a complete hang in that
976
	 * case by choosing an insanely large timeout. */
977
	end = jiffies + 60 * HZ;
978
 
979
	do {
980
		ringbuf->head = I915_READ_HEAD(ring);
981
		ringbuf->space = intel_ring_space(ringbuf);
982
		if (ringbuf->space >= bytes) {
983
			ret = 0;
984
			break;
985
		}
986
 
987
		msleep(1);
988
 
989
		ret = i915_gem_check_wedge(&dev_priv->gpu_error,
990
					   dev_priv->mm.interruptible);
991
		if (ret)
992
			break;
993
 
994
		if (time_after(jiffies, end)) {
995
			ret = -EBUSY;
996
			break;
997
		}
998
	} while (1);
999
 
1000
	return ret;
1001
}
1002
 
1003
static int logical_ring_wrap_buffer(struct intel_ringbuffer *ringbuf)
1004
{
1005
	uint32_t __iomem *virt;
1006
	int rem = ringbuf->size - ringbuf->tail;
1007
 
1008
	if (ringbuf->space < rem) {
1009
		int ret = logical_ring_wait_for_space(ringbuf, rem);
1010
 
1011
		if (ret)
1012
			return ret;
1013
	}
1014
 
1015
	virt = ringbuf->virtual_start + ringbuf->tail;
1016
	rem /= 4;
1017
	while (rem--)
1018
		iowrite32(MI_NOOP, virt++);
1019
 
1020
	ringbuf->tail = 0;
1021
	ringbuf->space = intel_ring_space(ringbuf);
1022
 
1023
	return 0;
1024
}
1025
 
1026
static int logical_ring_prepare(struct intel_ringbuffer *ringbuf, int bytes)
1027
{
1028
	int ret;
1029
 
1030
	if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) {
1031
		ret = logical_ring_wrap_buffer(ringbuf);
1032
		if (unlikely(ret))
1033
			return ret;
1034
	}
1035
 
1036
	if (unlikely(ringbuf->space < bytes)) {
1037
		ret = logical_ring_wait_for_space(ringbuf, bytes);
1038
		if (unlikely(ret))
1039
			return ret;
1040
	}
1041
 
1042
	return 0;
1043
}
1044
 
1045
/**
1046
 * intel_logical_ring_begin() - prepare the logical ringbuffer to accept some commands
1047
 *
1048
 * @ringbuf: Logical ringbuffer.
1049
 * @num_dwords: number of DWORDs that we plan to write to the ringbuffer.
1050
 *
1051
 * The ringbuffer might not be ready to accept the commands right away (maybe it needs to
1052
 * be wrapped, or wait a bit for the tail to be updated). This function takes care of that
1053
 * and also preallocates a request (every workload submission is still mediated through
1054
 * requests, same as it did with legacy ringbuffer submission).
1055
 *
1056
 * Return: non-zero if the ringbuffer is not ready to be written to.
1057
 */
1058
int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords)
1059
{
1060
	struct intel_engine_cs *ring = ringbuf->ring;
1061
	struct drm_device *dev = ring->dev;
1062
	struct drm_i915_private *dev_priv = dev->dev_private;
1063
	int ret;
1064
 
1065
	ret = i915_gem_check_wedge(&dev_priv->gpu_error,
1066
				   dev_priv->mm.interruptible);
1067
	if (ret)
1068
		return ret;
1069
 
1070
	ret = logical_ring_prepare(ringbuf, num_dwords * sizeof(uint32_t));
1071
	if (ret)
1072
		return ret;
1073
 
1074
	/* Preallocate the olr before touching the ring */
1075
	ret = logical_ring_alloc_seqno(ring, ringbuf->FIXME_lrc_ctx);
1076
	if (ret)
1077
		return ret;
1078
 
1079
	ringbuf->space -= num_dwords * sizeof(uint32_t);
1080
	return 0;
1081
}
1082
 
1083
static int intel_logical_ring_workarounds_emit(struct intel_engine_cs *ring,
1084
					       struct intel_context *ctx)
1085
{
1086
	int ret, i;
1087
	struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
1088
	struct drm_device *dev = ring->dev;
1089
	struct drm_i915_private *dev_priv = dev->dev_private;
1090
	struct i915_workarounds *w = &dev_priv->workarounds;
1091
 
1092
	if (WARN_ON(w->count == 0))
1093
		return 0;
1094
 
1095
	ring->gpu_caches_dirty = true;
1096
	ret = logical_ring_flush_all_caches(ringbuf);
1097
	if (ret)
1098
		return ret;
1099
 
1100
	ret = intel_logical_ring_begin(ringbuf, w->count * 2 + 2);
1101
	if (ret)
1102
		return ret;
1103
 
1104
	intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(w->count));
1105
	for (i = 0; i < w->count; i++) {
1106
		intel_logical_ring_emit(ringbuf, w->reg[i].addr);
1107
		intel_logical_ring_emit(ringbuf, w->reg[i].value);
1108
	}
1109
	intel_logical_ring_emit(ringbuf, MI_NOOP);
1110
 
1111
	intel_logical_ring_advance(ringbuf);
1112
 
1113
	ring->gpu_caches_dirty = true;
1114
	ret = logical_ring_flush_all_caches(ringbuf);
1115
	if (ret)
1116
		return ret;
1117
 
1118
	return 0;
1119
}
1120
 
1121
static int gen8_init_common_ring(struct intel_engine_cs *ring)
1122
{
1123
	struct drm_device *dev = ring->dev;
1124
	struct drm_i915_private *dev_priv = dev->dev_private;
1125
 
1126
	I915_WRITE_IMR(ring, ~(ring->irq_enable_mask | ring->irq_keep_mask));
1127
	I915_WRITE(RING_HWSTAM(ring->mmio_base), 0xffffffff);
1128
 
1129
	I915_WRITE(RING_MODE_GEN7(ring),
1130
		   _MASKED_BIT_DISABLE(GFX_REPLAY_MODE) |
1131
		   _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE));
1132
	POSTING_READ(RING_MODE_GEN7(ring));
1133
	DRM_DEBUG_DRIVER("Execlists enabled for %s\n", ring->name);
1134
 
1135
	memset(&ring->hangcheck, 0, sizeof(ring->hangcheck));
1136
 
1137
	return 0;
1138
}
1139
 
1140
static int gen8_init_render_ring(struct intel_engine_cs *ring)
1141
{
1142
	struct drm_device *dev = ring->dev;
1143
	struct drm_i915_private *dev_priv = dev->dev_private;
1144
	int ret;
1145
 
1146
	ret = gen8_init_common_ring(ring);
1147
	if (ret)
1148
		return ret;
1149
 
1150
	/* We need to disable the AsyncFlip performance optimisations in order
1151
	 * to use MI_WAIT_FOR_EVENT within the CS. It should already be
1152
	 * programmed to '1' on all products.
1153
	 *
1154
	 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv,bdw,chv
1155
	 */
1156
	I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
1157
 
1158
	ret = intel_init_pipe_control(ring);
1159
	if (ret)
1160
		return ret;
1161
 
1162
	I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
1163
 
1164
	return init_workarounds_ring(ring);
1165
}
1166
 
1167
static int gen8_emit_bb_start(struct intel_ringbuffer *ringbuf,
1168
			      u64 offset, unsigned flags)
1169
{
1170
	bool ppgtt = !(flags & I915_DISPATCH_SECURE);
1171
	int ret;
1172
 
1173
	ret = intel_logical_ring_begin(ringbuf, 4);
1174
	if (ret)
1175
		return ret;
1176
 
1177
	/* FIXME(BDW): Address space and security selectors. */
1178
	intel_logical_ring_emit(ringbuf, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8));
1179
	intel_logical_ring_emit(ringbuf, lower_32_bits(offset));
1180
	intel_logical_ring_emit(ringbuf, upper_32_bits(offset));
1181
	intel_logical_ring_emit(ringbuf, MI_NOOP);
1182
	intel_logical_ring_advance(ringbuf);
1183
 
1184
	return 0;
1185
}
1186
 
1187
static bool gen8_logical_ring_get_irq(struct intel_engine_cs *ring)
1188
{
1189
	struct drm_device *dev = ring->dev;
1190
	struct drm_i915_private *dev_priv = dev->dev_private;
1191
	unsigned long flags;
1192
 
1193
	if (WARN_ON(!intel_irqs_enabled(dev_priv)))
1194
		return false;
1195
 
1196
	spin_lock_irqsave(&dev_priv->irq_lock, flags);
1197
	if (ring->irq_refcount++ == 0) {
1198
		I915_WRITE_IMR(ring, ~(ring->irq_enable_mask | ring->irq_keep_mask));
1199
		POSTING_READ(RING_IMR(ring->mmio_base));
1200
	}
1201
	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1202
 
1203
	return true;
1204
}
1205
 
1206
static void gen8_logical_ring_put_irq(struct intel_engine_cs *ring)
1207
{
1208
	struct drm_device *dev = ring->dev;
1209
	struct drm_i915_private *dev_priv = dev->dev_private;
1210
	unsigned long flags;
1211
 
1212
	spin_lock_irqsave(&dev_priv->irq_lock, flags);
1213
	if (--ring->irq_refcount == 0) {
1214
		I915_WRITE_IMR(ring, ~ring->irq_keep_mask);
1215
		POSTING_READ(RING_IMR(ring->mmio_base));
1216
	}
1217
	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1218
}
1219
 
1220
static int gen8_emit_flush(struct intel_ringbuffer *ringbuf,
1221
			   u32 invalidate_domains,
1222
			   u32 unused)
1223
{
1224
	struct intel_engine_cs *ring = ringbuf->ring;
1225
	struct drm_device *dev = ring->dev;
1226
	struct drm_i915_private *dev_priv = dev->dev_private;
1227
	uint32_t cmd;
1228
	int ret;
1229
 
1230
	ret = intel_logical_ring_begin(ringbuf, 4);
1231
	if (ret)
1232
		return ret;
1233
 
1234
	cmd = MI_FLUSH_DW + 1;
1235
 
1236
	if (ring == &dev_priv->ring[VCS]) {
1237
		if (invalidate_domains & I915_GEM_GPU_DOMAINS)
1238
			cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD |
1239
				MI_FLUSH_DW_STORE_INDEX |
1240
				MI_FLUSH_DW_OP_STOREDW;
1241
	} else {
1242
		if (invalidate_domains & I915_GEM_DOMAIN_RENDER)
1243
			cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX |
1244
				MI_FLUSH_DW_OP_STOREDW;
1245
	}
1246
 
1247
	intel_logical_ring_emit(ringbuf, cmd);
1248
	intel_logical_ring_emit(ringbuf,
1249
				I915_GEM_HWS_SCRATCH_ADDR |
1250
				MI_FLUSH_DW_USE_GTT);
1251
	intel_logical_ring_emit(ringbuf, 0); /* upper addr */
1252
	intel_logical_ring_emit(ringbuf, 0); /* value */
1253
	intel_logical_ring_advance(ringbuf);
1254
 
1255
	return 0;
1256
}
1257
 
1258
static int gen8_emit_flush_render(struct intel_ringbuffer *ringbuf,
1259
				  u32 invalidate_domains,
1260
				  u32 flush_domains)
1261
{
1262
	struct intel_engine_cs *ring = ringbuf->ring;
1263
	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
1264
	u32 flags = 0;
1265
	int ret;
1266
 
1267
	flags |= PIPE_CONTROL_CS_STALL;
1268
 
1269
	if (flush_domains) {
1270
		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
1271
		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
1272
	}
1273
 
1274
	if (invalidate_domains) {
1275
		flags |= PIPE_CONTROL_TLB_INVALIDATE;
1276
		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
1277
		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
1278
		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
1279
		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
1280
		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
1281
		flags |= PIPE_CONTROL_QW_WRITE;
1282
		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
1283
	}
1284
 
1285
	ret = intel_logical_ring_begin(ringbuf, 6);
1286
	if (ret)
1287
		return ret;
1288
 
1289
	intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6));
1290
	intel_logical_ring_emit(ringbuf, flags);
1291
	intel_logical_ring_emit(ringbuf, scratch_addr);
1292
	intel_logical_ring_emit(ringbuf, 0);
1293
	intel_logical_ring_emit(ringbuf, 0);
1294
	intel_logical_ring_emit(ringbuf, 0);
1295
	intel_logical_ring_advance(ringbuf);
1296
 
1297
	return 0;
1298
}
1299
 
1300
static u32 gen8_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
1301
{
1302
	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
1303
}
1304
 
1305
static void gen8_set_seqno(struct intel_engine_cs *ring, u32 seqno)
1306
{
1307
	intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno);
1308
}
1309
 
1310
static int gen8_emit_request(struct intel_ringbuffer *ringbuf)
1311
{
1312
	struct intel_engine_cs *ring = ringbuf->ring;
1313
	u32 cmd;
1314
	int ret;
1315
 
1316
	ret = intel_logical_ring_begin(ringbuf, 6);
1317
	if (ret)
1318
		return ret;
1319
 
1320
	cmd = MI_STORE_DWORD_IMM_GEN8;
1321
	cmd |= MI_GLOBAL_GTT;
1322
 
1323
	intel_logical_ring_emit(ringbuf, cmd);
1324
	intel_logical_ring_emit(ringbuf,
1325
				(ring->status_page.gfx_addr +
1326
				(I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT)));
1327
	intel_logical_ring_emit(ringbuf, 0);
1328
	intel_logical_ring_emit(ringbuf, ring->outstanding_lazy_seqno);
1329
	intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT);
1330
	intel_logical_ring_emit(ringbuf, MI_NOOP);
1331
	intel_logical_ring_advance_and_submit(ringbuf);
1332
 
1333
	return 0;
1334
}
1335
 
1336
/**
1337
 * intel_logical_ring_cleanup() - deallocate the Engine Command Streamer
1338
 *
1339
 * @ring: Engine Command Streamer.
1340
 *
1341
 */
1342
void intel_logical_ring_cleanup(struct intel_engine_cs *ring)
1343
{
1344
	struct drm_i915_private *dev_priv;
1345
 
1346
	if (!intel_ring_initialized(ring))
1347
		return;
1348
 
1349
	dev_priv = ring->dev->dev_private;
1350
 
1351
	intel_logical_ring_stop(ring);
1352
	WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0);
1353
	ring->preallocated_lazy_request = NULL;
1354
	ring->outstanding_lazy_seqno = 0;
1355
 
1356
	if (ring->cleanup)
1357
		ring->cleanup(ring);
1358
 
1359
	i915_cmd_parser_fini_ring(ring);
1360
 
1361
	if (ring->status_page.obj) {
1362
		kunmap(sg_page(ring->status_page.obj->pages->sgl));
1363
		ring->status_page.obj = NULL;
1364
	}
1365
}
1366
 
1367
static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *ring)
1368
{
1369
	int ret;
1370
 
1371
	/* Intentionally left blank. */
1372
	ring->buffer = NULL;
1373
 
1374
	ring->dev = dev;
1375
	INIT_LIST_HEAD(&ring->active_list);
1376
	INIT_LIST_HEAD(&ring->request_list);
1377
	init_waitqueue_head(&ring->irq_queue);
1378
 
1379
	INIT_LIST_HEAD(&ring->execlist_queue);
1380
	INIT_LIST_HEAD(&ring->execlist_retired_req_list);
1381
	spin_lock_init(&ring->execlist_lock);
1382
	ring->next_context_status_buffer = 0;
1383
 
1384
	ret = i915_cmd_parser_init_ring(ring);
1385
	if (ret)
1386
		return ret;
1387
 
1388
	if (ring->init) {
1389
		ret = ring->init(ring);
1390
		if (ret)
1391
			return ret;
1392
	}
1393
 
1394
	ret = intel_lr_context_deferred_create(ring->default_context, ring);
1395
 
1396
	return ret;
1397
}
1398
 
1399
static int logical_render_ring_init(struct drm_device *dev)
1400
{
1401
	struct drm_i915_private *dev_priv = dev->dev_private;
1402
	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
1403
 
1404
	ring->name = "render ring";
1405
	ring->id = RCS;
1406
	ring->mmio_base = RENDER_RING_BASE;
1407
	ring->irq_enable_mask =
1408
		GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT;
1409
	ring->irq_keep_mask =
1410
		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_RCS_IRQ_SHIFT;
1411
	if (HAS_L3_DPF(dev))
1412
		ring->irq_keep_mask |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
1413
 
1414
	ring->init = gen8_init_render_ring;
1415
	ring->init_context = intel_logical_ring_workarounds_emit;
1416
	ring->cleanup = intel_fini_pipe_control;
1417
	ring->get_seqno = gen8_get_seqno;
1418
	ring->set_seqno = gen8_set_seqno;
1419
	ring->emit_request = gen8_emit_request;
1420
	ring->emit_flush = gen8_emit_flush_render;
1421
	ring->irq_get = gen8_logical_ring_get_irq;
1422
	ring->irq_put = gen8_logical_ring_put_irq;
1423
	ring->emit_bb_start = gen8_emit_bb_start;
1424
 
1425
	return logical_ring_init(dev, ring);
1426
}
1427
 
1428
static int logical_bsd_ring_init(struct drm_device *dev)
1429
{
1430
	struct drm_i915_private *dev_priv = dev->dev_private;
1431
	struct intel_engine_cs *ring = &dev_priv->ring[VCS];
1432
 
1433
	ring->name = "bsd ring";
1434
	ring->id = VCS;
1435
	ring->mmio_base = GEN6_BSD_RING_BASE;
1436
	ring->irq_enable_mask =
1437
		GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT;
1438
	ring->irq_keep_mask =
1439
		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS1_IRQ_SHIFT;
1440
 
1441
	ring->init = gen8_init_common_ring;
1442
	ring->get_seqno = gen8_get_seqno;
1443
	ring->set_seqno = gen8_set_seqno;
1444
	ring->emit_request = gen8_emit_request;
1445
	ring->emit_flush = gen8_emit_flush;
1446
	ring->irq_get = gen8_logical_ring_get_irq;
1447
	ring->irq_put = gen8_logical_ring_put_irq;
1448
	ring->emit_bb_start = gen8_emit_bb_start;
1449
 
1450
	return logical_ring_init(dev, ring);
1451
}
1452
 
1453
static int logical_bsd2_ring_init(struct drm_device *dev)
1454
{
1455
	struct drm_i915_private *dev_priv = dev->dev_private;
1456
	struct intel_engine_cs *ring = &dev_priv->ring[VCS2];
1457
 
1458
	ring->name = "bds2 ring";
1459
	ring->id = VCS2;
1460
	ring->mmio_base = GEN8_BSD2_RING_BASE;
1461
	ring->irq_enable_mask =
1462
		GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT;
1463
	ring->irq_keep_mask =
1464
		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS2_IRQ_SHIFT;
1465
 
1466
	ring->init = gen8_init_common_ring;
1467
	ring->get_seqno = gen8_get_seqno;
1468
	ring->set_seqno = gen8_set_seqno;
1469
	ring->emit_request = gen8_emit_request;
1470
	ring->emit_flush = gen8_emit_flush;
1471
	ring->irq_get = gen8_logical_ring_get_irq;
1472
	ring->irq_put = gen8_logical_ring_put_irq;
1473
	ring->emit_bb_start = gen8_emit_bb_start;
1474
 
1475
	return logical_ring_init(dev, ring);
1476
}
1477
 
1478
static int logical_blt_ring_init(struct drm_device *dev)
1479
{
1480
	struct drm_i915_private *dev_priv = dev->dev_private;
1481
	struct intel_engine_cs *ring = &dev_priv->ring[BCS];
1482
 
1483
	ring->name = "blitter ring";
1484
	ring->id = BCS;
1485
	ring->mmio_base = BLT_RING_BASE;
1486
	ring->irq_enable_mask =
1487
		GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT;
1488
	ring->irq_keep_mask =
1489
		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_BCS_IRQ_SHIFT;
1490
 
1491
	ring->init = gen8_init_common_ring;
1492
	ring->get_seqno = gen8_get_seqno;
1493
	ring->set_seqno = gen8_set_seqno;
1494
	ring->emit_request = gen8_emit_request;
1495
	ring->emit_flush = gen8_emit_flush;
1496
	ring->irq_get = gen8_logical_ring_get_irq;
1497
	ring->irq_put = gen8_logical_ring_put_irq;
1498
	ring->emit_bb_start = gen8_emit_bb_start;
1499
 
1500
	return logical_ring_init(dev, ring);
1501
}
1502
 
1503
static int logical_vebox_ring_init(struct drm_device *dev)
1504
{
1505
	struct drm_i915_private *dev_priv = dev->dev_private;
1506
	struct intel_engine_cs *ring = &dev_priv->ring[VECS];
1507
 
1508
	ring->name = "video enhancement ring";
1509
	ring->id = VECS;
1510
	ring->mmio_base = VEBOX_RING_BASE;
1511
	ring->irq_enable_mask =
1512
		GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT;
1513
	ring->irq_keep_mask =
1514
		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VECS_IRQ_SHIFT;
1515
 
1516
	ring->init = gen8_init_common_ring;
1517
	ring->get_seqno = gen8_get_seqno;
1518
	ring->set_seqno = gen8_set_seqno;
1519
	ring->emit_request = gen8_emit_request;
1520
	ring->emit_flush = gen8_emit_flush;
1521
	ring->irq_get = gen8_logical_ring_get_irq;
1522
	ring->irq_put = gen8_logical_ring_put_irq;
1523
	ring->emit_bb_start = gen8_emit_bb_start;
1524
 
1525
	return logical_ring_init(dev, ring);
1526
}
1527
 
1528
/**
1529
 * intel_logical_rings_init() - allocate, populate and init the Engine Command Streamers
1530
 * @dev: DRM device.
1531
 *
1532
 * This function inits the engines for an Execlists submission style (the equivalent in the
1533
 * legacy ringbuffer submission world would be i915_gem_init_rings). It does it only for
1534
 * those engines that are present in the hardware.
1535
 *
1536
 * Return: non-zero if the initialization failed.
1537
 */
1538
int intel_logical_rings_init(struct drm_device *dev)
1539
{
1540
	struct drm_i915_private *dev_priv = dev->dev_private;
1541
	int ret;
1542
 
1543
	ret = logical_render_ring_init(dev);
1544
	if (ret)
1545
		return ret;
1546
 
1547
	if (HAS_BSD(dev)) {
1548
		ret = logical_bsd_ring_init(dev);
1549
		if (ret)
1550
			goto cleanup_render_ring;
1551
	}
1552
 
1553
	if (HAS_BLT(dev)) {
1554
		ret = logical_blt_ring_init(dev);
1555
		if (ret)
1556
			goto cleanup_bsd_ring;
1557
	}
1558
 
1559
	if (HAS_VEBOX(dev)) {
1560
		ret = logical_vebox_ring_init(dev);
1561
		if (ret)
1562
			goto cleanup_blt_ring;
1563
	}
1564
 
1565
	if (HAS_BSD2(dev)) {
1566
		ret = logical_bsd2_ring_init(dev);
1567
		if (ret)
1568
			goto cleanup_vebox_ring;
1569
	}
1570
 
1571
	ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000));
1572
	if (ret)
1573
		goto cleanup_bsd2_ring;
1574
 
1575
	return 0;
1576
 
1577
cleanup_bsd2_ring:
1578
	intel_logical_ring_cleanup(&dev_priv->ring[VCS2]);
1579
cleanup_vebox_ring:
1580
	intel_logical_ring_cleanup(&dev_priv->ring[VECS]);
1581
cleanup_blt_ring:
1582
	intel_logical_ring_cleanup(&dev_priv->ring[BCS]);
1583
cleanup_bsd_ring:
1584
	intel_logical_ring_cleanup(&dev_priv->ring[VCS]);
1585
cleanup_render_ring:
1586
	intel_logical_ring_cleanup(&dev_priv->ring[RCS]);
1587
 
1588
	return ret;
1589
}
1590
 
1591
int intel_lr_context_render_state_init(struct intel_engine_cs *ring,
1592
				       struct intel_context *ctx)
1593
{
1594
	struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
1595
	struct render_state so;
1596
	struct drm_i915_file_private *file_priv = ctx->file_priv;
1597
	struct drm_file *file = file_priv ? file_priv->file : NULL;
1598
	int ret;
1599
 
1600
	ret = i915_gem_render_state_prepare(ring, &so);
1601
	if (ret)
1602
		return ret;
1603
 
1604
	if (so.rodata == NULL)
1605
		return 0;
1606
 
1607
	ret = ring->emit_bb_start(ringbuf,
1608
			so.ggtt_offset,
1609
			I915_DISPATCH_SECURE);
1610
	if (ret)
1611
		goto out;
1612
 
1613
	i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), ring);
1614
 
1615
	ret = __i915_add_request(ring, file, so.obj, NULL);
1616
	/* intel_logical_ring_add_request moves object to inactive if it
1617
	 * fails */
1618
out:
1619
	i915_gem_render_state_fini(&so);
1620
	return ret;
1621
}
1622
 
1623
static int
1624
populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_obj,
1625
		    struct intel_engine_cs *ring, struct intel_ringbuffer *ringbuf)
1626
{
1627
	struct drm_device *dev = ring->dev;
1628
	struct drm_i915_private *dev_priv = dev->dev_private;
1629
	struct i915_hw_ppgtt *ppgtt = ctx->ppgtt;
1630
	struct page *page;
1631
	uint32_t *reg_state;
1632
	int ret;
1633
 
1634
	if (!ppgtt)
1635
		ppgtt = dev_priv->mm.aliasing_ppgtt;
1636
 
1637
	ret = i915_gem_object_set_to_cpu_domain(ctx_obj, true);
1638
	if (ret) {
1639
		DRM_DEBUG_DRIVER("Could not set to CPU domain\n");
1640
		return ret;
1641
	}
1642
 
1643
	ret = i915_gem_object_get_pages(ctx_obj);
1644
	if (ret) {
1645
		DRM_DEBUG_DRIVER("Could not get object pages\n");
1646
		return ret;
1647
	}
1648
 
1649
	i915_gem_object_pin_pages(ctx_obj);
1650
 
1651
	/* The second page of the context object contains some fields which must
1652
	 * be set up prior to the first execution. */
1653
	page = i915_gem_object_get_page(ctx_obj, 1);
1654
	reg_state = kmap_atomic(page);
1655
 
1656
	/* A context is actually a big batch buffer with several MI_LOAD_REGISTER_IMM
1657
	 * commands followed by (reg, value) pairs. The values we are setting here are
1658
	 * only for the first context restore: on a subsequent save, the GPU will
1659
	 * recreate this batchbuffer with new values (including all the missing
1660
	 * MI_LOAD_REGISTER_IMM commands that we are not initializing here). */
1661
	if (ring->id == RCS)
1662
		reg_state[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(14);
1663
	else
1664
		reg_state[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(11);
1665
	reg_state[CTX_LRI_HEADER_0] |= MI_LRI_FORCE_POSTED;
1666
	reg_state[CTX_CONTEXT_CONTROL] = RING_CONTEXT_CONTROL(ring);
1667
	reg_state[CTX_CONTEXT_CONTROL+1] =
1668
			_MASKED_BIT_ENABLE((1<<3) | MI_RESTORE_INHIBIT);
1669
	reg_state[CTX_RING_HEAD] = RING_HEAD(ring->mmio_base);
1670
	reg_state[CTX_RING_HEAD+1] = 0;
1671
	reg_state[CTX_RING_TAIL] = RING_TAIL(ring->mmio_base);
1672
	reg_state[CTX_RING_TAIL+1] = 0;
1673
	reg_state[CTX_RING_BUFFER_START] = RING_START(ring->mmio_base);
1674
	/* Ring buffer start address is not known until the buffer is pinned.
1675
	 * It is written to the context image in execlists_update_context()
1676
	 */
1677
	reg_state[CTX_RING_BUFFER_CONTROL] = RING_CTL(ring->mmio_base);
1678
	reg_state[CTX_RING_BUFFER_CONTROL+1] =
1679
			((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES) | RING_VALID;
1680
	reg_state[CTX_BB_HEAD_U] = ring->mmio_base + 0x168;
1681
	reg_state[CTX_BB_HEAD_U+1] = 0;
1682
	reg_state[CTX_BB_HEAD_L] = ring->mmio_base + 0x140;
1683
	reg_state[CTX_BB_HEAD_L+1] = 0;
1684
	reg_state[CTX_BB_STATE] = ring->mmio_base + 0x110;
1685
	reg_state[CTX_BB_STATE+1] = (1<<5);
1686
	reg_state[CTX_SECOND_BB_HEAD_U] = ring->mmio_base + 0x11c;
1687
	reg_state[CTX_SECOND_BB_HEAD_U+1] = 0;
1688
	reg_state[CTX_SECOND_BB_HEAD_L] = ring->mmio_base + 0x114;
1689
	reg_state[CTX_SECOND_BB_HEAD_L+1] = 0;
1690
	reg_state[CTX_SECOND_BB_STATE] = ring->mmio_base + 0x118;
1691
	reg_state[CTX_SECOND_BB_STATE+1] = 0;
1692
	if (ring->id == RCS) {
1693
		/* TODO: according to BSpec, the register state context
1694
		 * for CHV does not have these. OTOH, these registers do
1695
		 * exist in CHV. I'm waiting for a clarification */
1696
		reg_state[CTX_BB_PER_CTX_PTR] = ring->mmio_base + 0x1c0;
1697
		reg_state[CTX_BB_PER_CTX_PTR+1] = 0;
1698
		reg_state[CTX_RCS_INDIRECT_CTX] = ring->mmio_base + 0x1c4;
1699
		reg_state[CTX_RCS_INDIRECT_CTX+1] = 0;
1700
		reg_state[CTX_RCS_INDIRECT_CTX_OFFSET] = ring->mmio_base + 0x1c8;
1701
		reg_state[CTX_RCS_INDIRECT_CTX_OFFSET+1] = 0;
1702
	}
1703
	reg_state[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9);
1704
	reg_state[CTX_LRI_HEADER_1] |= MI_LRI_FORCE_POSTED;
1705
	reg_state[CTX_CTX_TIMESTAMP] = ring->mmio_base + 0x3a8;
1706
	reg_state[CTX_CTX_TIMESTAMP+1] = 0;
1707
	reg_state[CTX_PDP3_UDW] = GEN8_RING_PDP_UDW(ring, 3);
1708
	reg_state[CTX_PDP3_LDW] = GEN8_RING_PDP_LDW(ring, 3);
1709
	reg_state[CTX_PDP2_UDW] = GEN8_RING_PDP_UDW(ring, 2);
1710
	reg_state[CTX_PDP2_LDW] = GEN8_RING_PDP_LDW(ring, 2);
1711
	reg_state[CTX_PDP1_UDW] = GEN8_RING_PDP_UDW(ring, 1);
1712
	reg_state[CTX_PDP1_LDW] = GEN8_RING_PDP_LDW(ring, 1);
1713
	reg_state[CTX_PDP0_UDW] = GEN8_RING_PDP_UDW(ring, 0);
1714
	reg_state[CTX_PDP0_LDW] = GEN8_RING_PDP_LDW(ring, 0);
1715
	reg_state[CTX_PDP3_UDW+1] = upper_32_bits(ppgtt->pd_dma_addr[3]);
1716
	reg_state[CTX_PDP3_LDW+1] = lower_32_bits(ppgtt->pd_dma_addr[3]);
1717
	reg_state[CTX_PDP2_UDW+1] = upper_32_bits(ppgtt->pd_dma_addr[2]);
1718
	reg_state[CTX_PDP2_LDW+1] = lower_32_bits(ppgtt->pd_dma_addr[2]);
1719
	reg_state[CTX_PDP1_UDW+1] = upper_32_bits(ppgtt->pd_dma_addr[1]);
1720
	reg_state[CTX_PDP1_LDW+1] = lower_32_bits(ppgtt->pd_dma_addr[1]);
1721
	reg_state[CTX_PDP0_UDW+1] = upper_32_bits(ppgtt->pd_dma_addr[0]);
1722
	reg_state[CTX_PDP0_LDW+1] = lower_32_bits(ppgtt->pd_dma_addr[0]);
1723
	if (ring->id == RCS) {
1724
		reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
1725
		reg_state[CTX_R_PWR_CLK_STATE] = 0x20c8;
1726
		reg_state[CTX_R_PWR_CLK_STATE+1] = 0;
1727
	}
1728
 
1729
	kunmap_atomic(reg_state);
1730
 
1731
	ctx_obj->dirty = 1;
1732
//   set_page_dirty(page);
1733
	i915_gem_object_unpin_pages(ctx_obj);
1734
 
1735
	return 0;
1736
}
1737
 
1738
/**
1739
 * intel_lr_context_free() - free the LRC specific bits of a context
1740
 * @ctx: the LR context to free.
1741
 *
1742
 * The real context freeing is done in i915_gem_context_free: this only
1743
 * takes care of the bits that are LRC related: the per-engine backing
1744
 * objects and the logical ringbuffer.
1745
 */
1746
void intel_lr_context_free(struct intel_context *ctx)
1747
{
1748
	int i;
1749
 
1750
	for (i = 0; i < I915_NUM_RINGS; i++) {
1751
		struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state;
1752
 
1753
		if (ctx_obj) {
1754
			struct intel_ringbuffer *ringbuf =
1755
					ctx->engine[i].ringbuf;
1756
			struct intel_engine_cs *ring = ringbuf->ring;
1757
 
1758
			if (ctx == ring->default_context) {
1759
				intel_unpin_ringbuffer_obj(ringbuf);
1760
				i915_gem_object_ggtt_unpin(ctx_obj);
1761
			}
1762
			intel_destroy_ringbuffer_obj(ringbuf);
1763
			kfree(ringbuf);
1764
			drm_gem_object_unreference(&ctx_obj->base);
1765
		}
1766
	}
1767
}
1768
 
1769
static uint32_t get_lr_context_size(struct intel_engine_cs *ring)
1770
{
1771
	int ret = 0;
1772
 
1773
	WARN_ON(INTEL_INFO(ring->dev)->gen < 8);
1774
 
1775
	switch (ring->id) {
1776
	case RCS:
1777
		if (INTEL_INFO(ring->dev)->gen >= 9)
1778
			ret = GEN9_LR_CONTEXT_RENDER_SIZE;
1779
		else
1780
			ret = GEN8_LR_CONTEXT_RENDER_SIZE;
1781
		break;
1782
	case VCS:
1783
	case BCS:
1784
	case VECS:
1785
	case VCS2:
1786
		ret = GEN8_LR_CONTEXT_OTHER_SIZE;
1787
		break;
1788
	}
1789
 
1790
	return ret;
1791
}
1792
 
1793
static void lrc_setup_hardware_status_page(struct intel_engine_cs *ring,
1794
		struct drm_i915_gem_object *default_ctx_obj)
1795
{
1796
	struct drm_i915_private *dev_priv = ring->dev->dev_private;
1797
 
1798
	/* The status page is offset 0 from the default context object
1799
	 * in LRC mode. */
1800
	ring->status_page.gfx_addr = i915_gem_obj_ggtt_offset(default_ctx_obj);
1801
	ring->status_page.page_addr =
1802
			kmap(sg_page(default_ctx_obj->pages->sgl));
1803
	ring->status_page.obj = default_ctx_obj;
1804
 
1805
	I915_WRITE(RING_HWS_PGA(ring->mmio_base),
1806
			(u32)ring->status_page.gfx_addr);
1807
	POSTING_READ(RING_HWS_PGA(ring->mmio_base));
1808
}
1809
 
1810
/**
1811
 * intel_lr_context_deferred_create() - create the LRC specific bits of a context
1812
 * @ctx: LR context to create.
1813
 * @ring: engine to be used with the context.
1814
 *
1815
 * This function can be called more than once, with different engines, if we plan
1816
 * to use the context with them. The context backing objects and the ringbuffers
1817
 * (specially the ringbuffer backing objects) suck a lot of memory up, and that's why
1818
 * the creation is a deferred call: it's better to make sure first that we need to use
1819
 * a given ring with the context.
1820
 *
1821
 * Return: non-zero on error.
1822
 */
1823
int intel_lr_context_deferred_create(struct intel_context *ctx,
1824
				     struct intel_engine_cs *ring)
1825
{
1826
	const bool is_global_default_ctx = (ctx == ring->default_context);
1827
	struct drm_device *dev = ring->dev;
1828
	struct drm_i915_gem_object *ctx_obj;
1829
	uint32_t context_size;
1830
	struct intel_ringbuffer *ringbuf;
1831
	int ret;
1832
 
1833
	WARN_ON(ctx->legacy_hw_ctx.rcs_state != NULL);
1834
	if (ctx->engine[ring->id].state)
1835
		return 0;
1836
 
1837
	context_size = round_up(get_lr_context_size(ring), 4096);
1838
 
1839
	ctx_obj = i915_gem_alloc_context_obj(dev, context_size);
1840
	if (IS_ERR(ctx_obj)) {
1841
		ret = PTR_ERR(ctx_obj);
1842
		DRM_DEBUG_DRIVER("Alloc LRC backing obj failed: %d\n", ret);
1843
		return ret;
1844
	}
1845
 
1846
	if (is_global_default_ctx) {
1847
		ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN, 0);
1848
		if (ret) {
1849
			DRM_DEBUG_DRIVER("Pin LRC backing obj failed: %d\n",
1850
					ret);
1851
			drm_gem_object_unreference(&ctx_obj->base);
1852
			return ret;
1853
		}
1854
	}
1855
 
1856
	ringbuf = kzalloc(sizeof(*ringbuf), GFP_KERNEL);
1857
	if (!ringbuf) {
1858
		DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n",
1859
				ring->name);
1860
		ret = -ENOMEM;
1861
		goto error_unpin_ctx;
1862
	}
1863
 
1864
	ringbuf->ring = ring;
1865
	ringbuf->FIXME_lrc_ctx = ctx;
1866
 
1867
	ringbuf->size = 32 * PAGE_SIZE;
1868
	ringbuf->effective_size = ringbuf->size;
1869
	ringbuf->head = 0;
1870
	ringbuf->tail = 0;
1871
	ringbuf->space = ringbuf->size;
1872
	ringbuf->last_retired_head = -1;
1873
 
1874
	if (ringbuf->obj == NULL) {
1875
		ret = intel_alloc_ringbuffer_obj(dev, ringbuf);
1876
		if (ret) {
1877
			DRM_DEBUG_DRIVER(
1878
				"Failed to allocate ringbuffer obj %s: %d\n",
1879
				ring->name, ret);
1880
			goto error_free_rbuf;
1881
		}
1882
 
1883
		if (is_global_default_ctx) {
1884
			ret = intel_pin_and_map_ringbuffer_obj(dev, ringbuf);
1885
			if (ret) {
1886
				DRM_ERROR(
1887
					"Failed to pin and map ringbuffer %s: %d\n",
1888
					ring->name, ret);
1889
				goto error_destroy_rbuf;
1890
			}
1891
		}
1892
 
1893
	}
1894
 
1895
	ret = populate_lr_context(ctx, ctx_obj, ring, ringbuf);
1896
	if (ret) {
1897
		DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret);
1898
		goto error;
1899
	}
1900
 
1901
	ctx->engine[ring->id].ringbuf = ringbuf;
1902
	ctx->engine[ring->id].state = ctx_obj;
1903
 
1904
	if (ctx == ring->default_context)
1905
		lrc_setup_hardware_status_page(ring, ctx_obj);
1906
 
1907
	if (ring->id == RCS && !ctx->rcs_initialized) {
1908
		if (ring->init_context) {
1909
			ret = ring->init_context(ring, ctx);
1910
			if (ret)
1911
				DRM_ERROR("ring init context: %d\n", ret);
1912
		}
1913
 
1914
		ret = intel_lr_context_render_state_init(ring, ctx);
1915
		if (ret) {
1916
			DRM_ERROR("Init render state failed: %d\n", ret);
1917
			ctx->engine[ring->id].ringbuf = NULL;
1918
			ctx->engine[ring->id].state = NULL;
1919
			goto error;
1920
		}
1921
		ctx->rcs_initialized = true;
1922
	}
1923
 
1924
	return 0;
1925
 
1926
error:
1927
	if (is_global_default_ctx)
1928
		intel_unpin_ringbuffer_obj(ringbuf);
1929
error_destroy_rbuf:
1930
	intel_destroy_ringbuffer_obj(ringbuf);
1931
error_free_rbuf:
1932
	kfree(ringbuf);
1933
error_unpin_ctx:
1934
	if (is_global_default_ctx)
1935
		i915_gem_object_ggtt_unpin(ctx_obj);
1936
	drm_gem_object_unreference(&ctx_obj->base);
1937
	return ret;
1938
}