Subversion Repositories Kolibri OS

Rev

Rev 5354 | Rev 6088 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
5354 serge 1
/*
2
 * Copyright © 2014 Intel Corporation
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
 * IN THE SOFTWARE.
22
 *
23
 * Authors:
24
 *    Ben Widawsky 
25
 *    Michel Thierry 
26
 *    Thomas Daniel 
27
 *    Oscar Mateo 
28
 *
29
 */
30
 
31
/**
32
 * DOC: Logical Rings, Logical Ring Contexts and Execlists
33
 *
34
 * Motivation:
35
 * GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts".
36
 * These expanded contexts enable a number of new abilities, especially
37
 * "Execlists" (also implemented in this file).
38
 *
39
 * One of the main differences with the legacy HW contexts is that logical
40
 * ring contexts incorporate many more things to the context's state, like
41
 * PDPs or ringbuffer control registers:
42
 *
43
 * The reason why PDPs are included in the context is straightforward: as
44
 * PPGTTs (per-process GTTs) are actually per-context, having the PDPs
45
 * contained there mean you don't need to do a ppgtt->switch_mm yourself,
46
 * instead, the GPU will do it for you on the context switch.
47
 *
48
 * But, what about the ringbuffer control registers (head, tail, etc..)?
49
 * shouldn't we just need a set of those per engine command streamer? This is
50
 * where the name "Logical Rings" starts to make sense: by virtualizing the
51
 * rings, the engine cs shifts to a new "ring buffer" with every context
52
 * switch. When you want to submit a workload to the GPU you: A) choose your
53
 * context, B) find its appropriate virtualized ring, C) write commands to it
54
 * and then, finally, D) tell the GPU to switch to that context.
55
 *
56
 * Instead of the legacy MI_SET_CONTEXT, the way you tell the GPU to switch
57
 * to a contexts is via a context execution list, ergo "Execlists".
58
 *
59
 * LRC implementation:
60
 * Regarding the creation of contexts, we have:
61
 *
62
 * - One global default context.
63
 * - One local default context for each opened fd.
64
 * - One local extra context for each context create ioctl call.
65
 *
66
 * Now that ringbuffers belong per-context (and not per-engine, like before)
67
 * and that contexts are uniquely tied to a given engine (and not reusable,
68
 * like before) we need:
69
 *
70
 * - One ringbuffer per-engine inside each context.
71
 * - One backing object per-engine inside each context.
72
 *
73
 * The global default context starts its life with these new objects fully
74
 * allocated and populated. The local default context for each opened fd is
75
 * more complex, because we don't know at creation time which engine is going
76
 * to use them. To handle this, we have implemented a deferred creation of LR
77
 * contexts:
78
 *
79
 * The local context starts its life as a hollow or blank holder, that only
80
 * gets populated for a given engine once we receive an execbuffer. If later
81
 * on we receive another execbuffer ioctl for the same context but a different
82
 * engine, we allocate/populate a new ringbuffer and context backing object and
83
 * so on.
84
 *
85
 * Finally, regarding local contexts created using the ioctl call: as they are
86
 * only allowed with the render ring, we can allocate & populate them right
87
 * away (no need to defer anything, at least for now).
88
 *
89
 * Execlists implementation:
90
 * Execlists are the new method by which, on gen8+ hardware, workloads are
91
 * submitted for execution (as opposed to the legacy, ringbuffer-based, method).
92
 * This method works as follows:
93
 *
94
 * When a request is committed, its commands (the BB start and any leading or
95
 * trailing commands, like the seqno breadcrumbs) are placed in the ringbuffer
96
 * for the appropriate context. The tail pointer in the hardware context is not
97
 * updated at this time, but instead, kept by the driver in the ringbuffer
98
 * structure. A structure representing this request is added to a request queue
99
 * for the appropriate engine: this structure contains a copy of the context's
100
 * tail after the request was written to the ring buffer and a pointer to the
101
 * context itself.
102
 *
103
 * If the engine's request queue was empty before the request was added, the
104
 * queue is processed immediately. Otherwise the queue will be processed during
105
 * a context switch interrupt. In any case, elements on the queue will get sent
106
 * (in pairs) to the GPU's ExecLists Submit Port (ELSP, for short) with a
107
 * globally unique 20-bits submission ID.
108
 *
109
 * When execution of a request completes, the GPU updates the context status
110
 * buffer with a context complete event and generates a context switch interrupt.
111
 * During the interrupt handling, the driver examines the events in the buffer:
112
 * for each context complete event, if the announced ID matches that on the head
113
 * of the request queue, then that request is retired and removed from the queue.
114
 *
115
 * After processing, if any requests were retired and the queue is not empty
116
 * then a new execution list can be submitted. The two requests at the front of
117
 * the queue are next to be submitted but since a context may not occur twice in
118
 * an execution list, if subsequent requests have the same ID as the first then
119
 * the two requests must be combined. This is done simply by discarding requests
120
 * at the head of the queue until either only one requests is left (in which case
121
 * we use a NULL second context) or the first two requests have unique IDs.
122
 *
123
 * By always executing the first two requests in the queue the driver ensures
124
 * that the GPU is kept as busy as possible. In the case where a single context
125
 * completes but a second context is still executing, the request for this second
126
 * context will be at the head of the queue when we remove the first one. This
127
 * request will then be resubmitted along with a new request for a different context,
128
 * which will cause the hardware to continue executing the second request and queue
129
 * the new request (the GPU detects the condition of a context getting preempted
130
 * with the same context and optimizes the context switch flow by not doing
131
 * preemption, but just sampling the new tail pointer).
132
 *
133
 */
134
 
135
#include 
136
#include 
137
#include "intel_drv.h"
6084 serge 138
#include "intel_mocs.h"
5354 serge 139
 
140
#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE)
141
#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE)
142
#define GEN8_LR_CONTEXT_OTHER_SIZE (2 * PAGE_SIZE)
143
 
144
#define RING_EXECLIST_QFULL		(1 << 0x2)
145
#define RING_EXECLIST1_VALID		(1 << 0x3)
146
#define RING_EXECLIST0_VALID		(1 << 0x4)
147
#define RING_EXECLIST_ACTIVE_STATUS	(3 << 0xE)
148
#define RING_EXECLIST1_ACTIVE		(1 << 0x11)
149
#define RING_EXECLIST0_ACTIVE		(1 << 0x12)
150
 
151
#define GEN8_CTX_STATUS_IDLE_ACTIVE	(1 << 0)
152
#define GEN8_CTX_STATUS_PREEMPTED	(1 << 1)
153
#define GEN8_CTX_STATUS_ELEMENT_SWITCH	(1 << 2)
154
#define GEN8_CTX_STATUS_ACTIVE_IDLE	(1 << 3)
155
#define GEN8_CTX_STATUS_COMPLETE	(1 << 4)
156
#define GEN8_CTX_STATUS_LITE_RESTORE	(1 << 15)
157
 
158
#define CTX_LRI_HEADER_0		0x01
159
#define CTX_CONTEXT_CONTROL		0x02
160
#define CTX_RING_HEAD			0x04
161
#define CTX_RING_TAIL			0x06
162
#define CTX_RING_BUFFER_START		0x08
163
#define CTX_RING_BUFFER_CONTROL		0x0a
164
#define CTX_BB_HEAD_U			0x0c
165
#define CTX_BB_HEAD_L			0x0e
166
#define CTX_BB_STATE			0x10
167
#define CTX_SECOND_BB_HEAD_U		0x12
168
#define CTX_SECOND_BB_HEAD_L		0x14
169
#define CTX_SECOND_BB_STATE		0x16
170
#define CTX_BB_PER_CTX_PTR		0x18
171
#define CTX_RCS_INDIRECT_CTX		0x1a
172
#define CTX_RCS_INDIRECT_CTX_OFFSET	0x1c
173
#define CTX_LRI_HEADER_1		0x21
174
#define CTX_CTX_TIMESTAMP		0x22
175
#define CTX_PDP3_UDW			0x24
176
#define CTX_PDP3_LDW			0x26
177
#define CTX_PDP2_UDW			0x28
178
#define CTX_PDP2_LDW			0x2a
179
#define CTX_PDP1_UDW			0x2c
180
#define CTX_PDP1_LDW			0x2e
181
#define CTX_PDP0_UDW			0x30
182
#define CTX_PDP0_LDW			0x32
183
#define CTX_LRI_HEADER_2		0x41
184
#define CTX_R_PWR_CLK_STATE		0x42
185
#define CTX_GPGPU_CSR_BASE_ADDRESS	0x44
186
 
187
#define GEN8_CTX_VALID (1<<0)
188
#define GEN8_CTX_FORCE_PD_RESTORE (1<<1)
189
#define GEN8_CTX_FORCE_RESTORE (1<<2)
190
#define GEN8_CTX_L3LLC_COHERENT (1<<5)
191
#define GEN8_CTX_PRIVILEGE (1<<8)
6084 serge 192
 
193
#define ASSIGN_CTX_PDP(ppgtt, reg_state, n) { \
194
	const u64 _addr = i915_page_dir_dma_addr((ppgtt), (n));	\
195
	reg_state[CTX_PDP ## n ## _UDW+1] = upper_32_bits(_addr); \
196
	reg_state[CTX_PDP ## n ## _LDW+1] = lower_32_bits(_addr); \
197
}
198
 
199
#define ASSIGN_CTX_PML4(ppgtt, reg_state) { \
200
	reg_state[CTX_PDP0_UDW + 1] = upper_32_bits(px_dma(&ppgtt->pml4)); \
201
	reg_state[CTX_PDP0_LDW + 1] = lower_32_bits(px_dma(&ppgtt->pml4)); \
202
}
203
 
5354 serge 204
enum {
205
	ADVANCED_CONTEXT = 0,
6084 serge 206
	LEGACY_32B_CONTEXT,
5354 serge 207
	ADVANCED_AD_CONTEXT,
208
	LEGACY_64B_CONTEXT
209
};
6084 serge 210
#define GEN8_CTX_ADDRESSING_MODE_SHIFT 3
211
#define GEN8_CTX_ADDRESSING_MODE(dev)  (USES_FULL_48BIT_PPGTT(dev) ?\
212
		LEGACY_64B_CONTEXT :\
213
		LEGACY_32B_CONTEXT)
5354 serge 214
enum {
215
	FAULT_AND_HANG = 0,
216
	FAULT_AND_HALT, /* Debug only */
217
	FAULT_AND_STREAM,
218
	FAULT_AND_CONTINUE /* Unsupported */
219
};
220
#define GEN8_CTX_ID_SHIFT 32
6084 serge 221
#define CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT  0x17
5354 serge 222
 
6084 serge 223
static int intel_lr_context_pin(struct drm_i915_gem_request *rq);
224
static void lrc_setup_hardware_status_page(struct intel_engine_cs *ring,
225
		struct drm_i915_gem_object *default_ctx_obj);
5354 serge 226
 
6084 serge 227
 
5354 serge 228
/**
229
 * intel_sanitize_enable_execlists() - sanitize i915.enable_execlists
230
 * @dev: DRM device.
231
 * @enable_execlists: value of i915.enable_execlists module parameter.
232
 *
233
 * Only certain platforms support Execlists (the prerequisites being
6084 serge 234
 * support for Logical Ring Contexts and Aliasing PPGTT or better).
5354 serge 235
 *
236
 * Return: 1 if Execlists is supported and has to be enabled.
237
 */
238
int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists)
239
{
240
	WARN_ON(i915.enable_ppgtt == -1);
241
 
6084 serge 242
	/* On platforms with execlist available, vGPU will only
243
	 * support execlist mode, no ring buffer mode.
244
	 */
245
	if (HAS_LOGICAL_RING_CONTEXTS(dev) && intel_vgpu_active(dev))
246
		return 1;
247
 
5354 serge 248
	if (INTEL_INFO(dev)->gen >= 9)
249
		return 1;
250
 
251
	if (enable_execlists == 0)
252
		return 0;
253
 
254
	if (HAS_LOGICAL_RING_CONTEXTS(dev) && USES_PPGTT(dev) &&
255
	    i915.use_mmio_flip >= 0)
256
		return 1;
257
 
258
	return 0;
259
}
260
 
261
/**
262
 * intel_execlists_ctx_id() - get the Execlists Context ID
263
 * @ctx_obj: Logical Ring Context backing object.
264
 *
265
 * Do not confuse with ctx->id! Unfortunately we have a name overload
266
 * here: the old context ID we pass to userspace as a handler so that
267
 * they can refer to a context, and the new context ID we pass to the
268
 * ELSP so that the GPU can inform us of the context status via
269
 * interrupts.
270
 *
271
 * Return: 20-bits globally unique context ID.
272
 */
273
u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj)
274
{
6084 serge 275
	u32 lrca = i915_gem_obj_ggtt_offset(ctx_obj) +
276
			LRC_PPHWSP_PN * PAGE_SIZE;
5354 serge 277
 
278
	/* LRCA is required to be 4K aligned so the more significant 20 bits
279
	 * are globally unique */
280
	return lrca >> 12;
281
}
282
 
6084 serge 283
static bool disable_lite_restore_wa(struct intel_engine_cs *ring)
5354 serge 284
{
6084 serge 285
	struct drm_device *dev = ring->dev;
286
 
287
	return ((IS_SKYLAKE(dev) && INTEL_REVID(dev) <= SKL_REVID_B0) ||
288
		(IS_BROXTON(dev) && INTEL_REVID(dev) == BXT_REVID_A0)) &&
289
	       (ring->id == VCS || ring->id == VCS2);
290
}
291
 
292
uint64_t intel_lr_context_descriptor(struct intel_context *ctx,
293
				     struct intel_engine_cs *ring)
294
{
295
	struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
5354 serge 296
	uint64_t desc;
6084 serge 297
	uint64_t lrca = i915_gem_obj_ggtt_offset(ctx_obj) +
298
			LRC_PPHWSP_PN * PAGE_SIZE;
5354 serge 299
 
300
	WARN_ON(lrca & 0xFFFFFFFF00000FFFULL);
301
 
302
	desc = GEN8_CTX_VALID;
6084 serge 303
	desc |= GEN8_CTX_ADDRESSING_MODE(dev) << GEN8_CTX_ADDRESSING_MODE_SHIFT;
304
	if (IS_GEN8(ctx_obj->base.dev))
305
		desc |= GEN8_CTX_L3LLC_COHERENT;
5354 serge 306
	desc |= GEN8_CTX_PRIVILEGE;
307
	desc |= lrca;
308
	desc |= (u64)intel_execlists_ctx_id(ctx_obj) << GEN8_CTX_ID_SHIFT;
309
 
310
	/* TODO: WaDisableLiteRestore when we start using semaphore
311
	 * signalling between Command Streamers */
312
	/* desc |= GEN8_CTX_FORCE_RESTORE; */
313
 
6084 serge 314
	/* WaEnableForceRestoreInCtxtDescForVCS:skl */
315
	/* WaEnableForceRestoreInCtxtDescForVCS:bxt */
316
	if (disable_lite_restore_wa(ring))
317
		desc |= GEN8_CTX_FORCE_RESTORE;
318
 
5354 serge 319
	return desc;
320
}
321
 
6084 serge 322
static void execlists_elsp_write(struct drm_i915_gem_request *rq0,
323
				 struct drm_i915_gem_request *rq1)
5354 serge 324
{
6084 serge 325
 
326
	struct intel_engine_cs *ring = rq0->ring;
5354 serge 327
	struct drm_device *dev = ring->dev;
328
	struct drm_i915_private *dev_priv = dev->dev_private;
6084 serge 329
	uint64_t desc[2];
5354 serge 330
 
6084 serge 331
	if (rq1) {
332
		desc[1] = intel_lr_context_descriptor(rq1->ctx, rq1->ring);
333
		rq1->elsp_submitted++;
5354 serge 334
	} else {
6084 serge 335
		desc[1] = 0;
5354 serge 336
	}
337
 
6084 serge 338
	desc[0] = intel_lr_context_descriptor(rq0->ctx, rq0->ring);
339
	rq0->elsp_submitted++;
5354 serge 340
 
6084 serge 341
	/* You must always write both descriptors in the order below. */
342
	spin_lock(&dev_priv->uncore.lock);
343
	intel_uncore_forcewake_get__locked(dev_priv, FORCEWAKE_ALL);
344
	I915_WRITE_FW(RING_ELSP(ring), upper_32_bits(desc[1]));
345
	I915_WRITE_FW(RING_ELSP(ring), lower_32_bits(desc[1]));
5354 serge 346
 
6084 serge 347
	I915_WRITE_FW(RING_ELSP(ring), upper_32_bits(desc[0]));
348
	/* The context is automatically loaded after the following */
349
	I915_WRITE_FW(RING_ELSP(ring), lower_32_bits(desc[0]));
5354 serge 350
 
6084 serge 351
	/* ELSP is a wo register, use another nearby reg for posting */
352
	POSTING_READ_FW(RING_EXECLIST_STATUS_LO(ring));
353
	intel_uncore_forcewake_put__locked(dev_priv, FORCEWAKE_ALL);
354
	spin_unlock(&dev_priv->uncore.lock);
5354 serge 355
}
356
 
6084 serge 357
static int execlists_update_context(struct drm_i915_gem_request *rq)
5354 serge 358
{
6084 serge 359
	struct intel_engine_cs *ring = rq->ring;
360
	struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt;
361
	struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state;
362
	struct drm_i915_gem_object *rb_obj = rq->ringbuf->obj;
5354 serge 363
	struct page *page;
364
	uint32_t *reg_state;
365
 
6084 serge 366
	BUG_ON(!ctx_obj);
367
	WARN_ON(!i915_gem_obj_is_pinned(ctx_obj));
368
	WARN_ON(!i915_gem_obj_is_pinned(rb_obj));
369
 
370
	page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
5354 serge 371
	reg_state = kmap_atomic(page);
372
 
6084 serge 373
	reg_state[CTX_RING_TAIL+1] = rq->tail;
374
	reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(rb_obj);
5354 serge 375
 
6084 serge 376
	if (ppgtt && !USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
377
		/* True 32b PPGTT with dynamic page allocation: update PDP
378
		 * registers and point the unallocated PDPs to scratch page.
379
		 * PML4 is allocated during ppgtt init, so this is not needed
380
		 * in 48-bit mode.
381
		 */
382
		ASSIGN_CTX_PDP(ppgtt, reg_state, 3);
383
		ASSIGN_CTX_PDP(ppgtt, reg_state, 2);
384
		ASSIGN_CTX_PDP(ppgtt, reg_state, 1);
385
		ASSIGN_CTX_PDP(ppgtt, reg_state, 0);
386
	}
387
 
5354 serge 388
	kunmap_atomic(reg_state);
389
 
390
	return 0;
391
}
392
 
6084 serge 393
static void execlists_submit_requests(struct drm_i915_gem_request *rq0,
394
				      struct drm_i915_gem_request *rq1)
5354 serge 395
{
6084 serge 396
	execlists_update_context(rq0);
5354 serge 397
 
6084 serge 398
	if (rq1)
399
		execlists_update_context(rq1);
5354 serge 400
 
6084 serge 401
	execlists_elsp_write(rq0, rq1);
5354 serge 402
}
403
 
404
static void execlists_context_unqueue(struct intel_engine_cs *ring)
405
{
6084 serge 406
	struct drm_i915_gem_request *req0 = NULL, *req1 = NULL;
407
	struct drm_i915_gem_request *cursor = NULL, *tmp = NULL;
5354 serge 408
 
409
	assert_spin_locked(&ring->execlist_lock);
410
 
6084 serge 411
	/*
412
	 * If irqs are not active generate a warning as batches that finish
413
	 * without the irqs may get lost and a GPU Hang may occur.
414
	 */
415
	WARN_ON(!intel_irqs_enabled(ring->dev->dev_private));
416
 
5354 serge 417
	if (list_empty(&ring->execlist_queue))
418
		return;
419
 
420
	/* Try to read in pairs */
421
	list_for_each_entry_safe(cursor, tmp, &ring->execlist_queue,
422
				 execlist_link) {
423
		if (!req0) {
424
			req0 = cursor;
425
		} else if (req0->ctx == cursor->ctx) {
426
			/* Same ctx: ignore first request, as second request
427
			 * will update tail past first request's workload */
428
			cursor->elsp_submitted = req0->elsp_submitted;
429
			list_del(&req0->execlist_link);
430
			list_add_tail(&req0->execlist_link,
431
				&ring->execlist_retired_req_list);
432
			req0 = cursor;
433
		} else {
434
			req1 = cursor;
435
			break;
436
		}
437
	}
438
 
6084 serge 439
	if (IS_GEN8(ring->dev) || IS_GEN9(ring->dev)) {
440
		/*
441
		 * WaIdleLiteRestore: make sure we never cause a lite
442
		 * restore with HEAD==TAIL
443
		 */
444
		if (req0->elsp_submitted) {
445
			/*
446
			 * Apply the wa NOOPS to prevent ring:HEAD == req:TAIL
447
			 * as we resubmit the request. See gen8_emit_request()
448
			 * for where we prepare the padding after the end of the
449
			 * request.
450
			 */
451
			struct intel_ringbuffer *ringbuf;
452
 
453
			ringbuf = req0->ctx->engine[ring->id].ringbuf;
454
			req0->tail += 8;
455
			req0->tail &= ringbuf->size - 1;
456
		}
457
	}
458
 
5354 serge 459
	WARN_ON(req1 && req1->elsp_submitted);
460
 
6084 serge 461
	execlists_submit_requests(req0, req1);
5354 serge 462
}
463
 
464
static bool execlists_check_remove_request(struct intel_engine_cs *ring,
465
					   u32 request_id)
466
{
6084 serge 467
	struct drm_i915_gem_request *head_req;
5354 serge 468
 
469
	assert_spin_locked(&ring->execlist_lock);
470
 
471
	head_req = list_first_entry_or_null(&ring->execlist_queue,
6084 serge 472
					    struct drm_i915_gem_request,
5354 serge 473
					    execlist_link);
474
 
475
	if (head_req != NULL) {
476
		struct drm_i915_gem_object *ctx_obj =
477
				head_req->ctx->engine[ring->id].state;
478
		if (intel_execlists_ctx_id(ctx_obj) == request_id) {
479
			WARN(head_req->elsp_submitted == 0,
480
			     "Never submitted head request\n");
481
 
482
			if (--head_req->elsp_submitted <= 0) {
483
				list_del(&head_req->execlist_link);
484
				list_add_tail(&head_req->execlist_link,
485
					&ring->execlist_retired_req_list);
486
				return true;
487
			}
488
		}
489
	}
490
 
491
	return false;
492
}
493
 
494
/**
6084 serge 495
 * intel_lrc_irq_handler() - handle Context Switch interrupts
5354 serge 496
 * @ring: Engine Command Streamer to handle.
497
 *
498
 * Check the unread Context Status Buffers and manage the submission of new
499
 * contexts to the ELSP accordingly.
500
 */
6084 serge 501
void intel_lrc_irq_handler(struct intel_engine_cs *ring)
5354 serge 502
{
503
	struct drm_i915_private *dev_priv = ring->dev->dev_private;
504
	u32 status_pointer;
505
	u8 read_pointer;
506
	u8 write_pointer;
6084 serge 507
	u32 status = 0;
5354 serge 508
	u32 status_id;
509
	u32 submit_contexts = 0;
510
 
511
	status_pointer = I915_READ(RING_CONTEXT_STATUS_PTR(ring));
512
 
513
	read_pointer = ring->next_context_status_buffer;
6084 serge 514
	write_pointer = status_pointer & GEN8_CSB_PTR_MASK;
5354 serge 515
	if (read_pointer > write_pointer)
6084 serge 516
		write_pointer += GEN8_CSB_ENTRIES;
5354 serge 517
 
518
	spin_lock(&ring->execlist_lock);
519
 
520
	while (read_pointer < write_pointer) {
521
		read_pointer++;
6084 serge 522
		status = I915_READ(RING_CONTEXT_STATUS_BUF_LO(ring, read_pointer % GEN8_CSB_ENTRIES));
523
		status_id = I915_READ(RING_CONTEXT_STATUS_BUF_HI(ring, read_pointer % GEN8_CSB_ENTRIES));
5354 serge 524
 
6084 serge 525
		if (status & GEN8_CTX_STATUS_IDLE_ACTIVE)
526
			continue;
527
 
5354 serge 528
		if (status & GEN8_CTX_STATUS_PREEMPTED) {
529
			if (status & GEN8_CTX_STATUS_LITE_RESTORE) {
530
				if (execlists_check_remove_request(ring, status_id))
531
					WARN(1, "Lite Restored request removed from queue\n");
532
			} else
533
				WARN(1, "Preemption without Lite Restore\n");
534
		}
535
 
536
		 if ((status & GEN8_CTX_STATUS_ACTIVE_IDLE) ||
537
		     (status & GEN8_CTX_STATUS_ELEMENT_SWITCH)) {
538
			if (execlists_check_remove_request(ring, status_id))
539
				submit_contexts++;
540
		}
541
	}
542
 
6084 serge 543
	if (disable_lite_restore_wa(ring)) {
544
		/* Prevent a ctx to preempt itself */
545
		if ((status & GEN8_CTX_STATUS_ACTIVE_IDLE) &&
546
		    (submit_contexts != 0))
547
			execlists_context_unqueue(ring);
548
	} else if (submit_contexts != 0) {
5354 serge 549
		execlists_context_unqueue(ring);
6084 serge 550
	}
5354 serge 551
 
552
	spin_unlock(&ring->execlist_lock);
553
 
554
	WARN(submit_contexts > 2, "More than two context complete events?\n");
6084 serge 555
	ring->next_context_status_buffer = write_pointer % GEN8_CSB_ENTRIES;
5354 serge 556
 
557
	I915_WRITE(RING_CONTEXT_STATUS_PTR(ring),
6084 serge 558
		   _MASKED_FIELD(GEN8_CSB_PTR_MASK << 8,
559
				 ((u32)ring->next_context_status_buffer &
560
				  GEN8_CSB_PTR_MASK) << 8));
5354 serge 561
}
562
 
6084 serge 563
static int execlists_context_queue(struct drm_i915_gem_request *request)
5354 serge 564
{
6084 serge 565
	struct intel_engine_cs *ring = request->ring;
566
	struct drm_i915_gem_request *cursor;
5354 serge 567
	int num_elements = 0;
568
 
6084 serge 569
	if (request->ctx != ring->default_context)
570
		intel_lr_context_pin(request);
5354 serge 571
 
6084 serge 572
	i915_gem_request_reference(request);
5354 serge 573
 
6084 serge 574
	spin_lock_irq(&ring->execlist_lock);
5354 serge 575
 
576
	list_for_each_entry(cursor, &ring->execlist_queue, execlist_link)
577
		if (++num_elements > 2)
578
			break;
579
 
580
	if (num_elements > 2) {
6084 serge 581
		struct drm_i915_gem_request *tail_req;
5354 serge 582
 
583
		tail_req = list_last_entry(&ring->execlist_queue,
6084 serge 584
					   struct drm_i915_gem_request,
5354 serge 585
					   execlist_link);
586
 
6084 serge 587
		if (request->ctx == tail_req->ctx) {
5354 serge 588
			WARN(tail_req->elsp_submitted != 0,
589
				"More than 2 already-submitted reqs queued\n");
590
			list_del(&tail_req->execlist_link);
591
			list_add_tail(&tail_req->execlist_link,
592
				&ring->execlist_retired_req_list);
593
		}
594
	}
595
 
6084 serge 596
	list_add_tail(&request->execlist_link, &ring->execlist_queue);
5354 serge 597
	if (num_elements == 0)
598
		execlists_context_unqueue(ring);
599
 
6084 serge 600
	spin_unlock_irq(&ring->execlist_lock);
5354 serge 601
 
602
	return 0;
603
}
604
 
6084 serge 605
static int logical_ring_invalidate_all_caches(struct drm_i915_gem_request *req)
5354 serge 606
{
6084 serge 607
	struct intel_engine_cs *ring = req->ring;
5354 serge 608
	uint32_t flush_domains;
609
	int ret;
610
 
611
	flush_domains = 0;
612
	if (ring->gpu_caches_dirty)
613
		flush_domains = I915_GEM_GPU_DOMAINS;
614
 
6084 serge 615
	ret = ring->emit_flush(req, I915_GEM_GPU_DOMAINS, flush_domains);
5354 serge 616
	if (ret)
617
		return ret;
618
 
619
	ring->gpu_caches_dirty = false;
620
	return 0;
621
}
622
 
6084 serge 623
static int execlists_move_to_gpu(struct drm_i915_gem_request *req,
5354 serge 624
				 struct list_head *vmas)
625
{
6084 serge 626
	const unsigned other_rings = ~intel_ring_flag(req->ring);
5354 serge 627
	struct i915_vma *vma;
628
	uint32_t flush_domains = 0;
629
	bool flush_chipset = false;
630
	int ret;
631
 
632
	list_for_each_entry(vma, vmas, exec_list) {
633
		struct drm_i915_gem_object *obj = vma->obj;
634
 
6084 serge 635
		if (obj->active & other_rings) {
636
			ret = i915_gem_object_sync(obj, req->ring, &req);
637
			if (ret)
638
				return ret;
639
		}
5354 serge 640
 
641
		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
642
			flush_chipset |= i915_gem_clflush_object(obj, false);
643
 
644
		flush_domains |= obj->base.write_domain;
645
	}
646
 
647
	if (flush_domains & I915_GEM_DOMAIN_GTT)
648
		wmb();
649
 
650
	/* Unconditionally invalidate gpu caches and ensure that we do flush
651
	 * any residual writes from the previous batch.
652
	 */
6084 serge 653
	return logical_ring_invalidate_all_caches(req);
5354 serge 654
}
655
 
6084 serge 656
int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request)
657
{
658
	int ret;
659
 
660
	request->ringbuf = request->ctx->engine[request->ring->id].ringbuf;
661
 
662
	if (request->ctx != request->ring->default_context) {
663
		ret = intel_lr_context_pin(request);
664
		if (ret)
665
			return ret;
666
	}
667
 
668
	return 0;
669
}
670
 
671
static int logical_ring_wait_for_space(struct drm_i915_gem_request *req,
672
				       int bytes)
673
{
674
	struct intel_ringbuffer *ringbuf = req->ringbuf;
675
	struct intel_engine_cs *ring = req->ring;
676
	struct drm_i915_gem_request *target;
677
	unsigned space;
678
	int ret;
679
 
680
	if (intel_ring_space(ringbuf) >= bytes)
681
		return 0;
682
 
683
	/* The whole point of reserving space is to not wait! */
684
	WARN_ON(ringbuf->reserved_in_use);
685
 
686
	list_for_each_entry(target, &ring->request_list, list) {
687
		/*
688
		 * The request queue is per-engine, so can contain requests
689
		 * from multiple ringbuffers. Here, we must ignore any that
690
		 * aren't from the ringbuffer we're considering.
691
		 */
692
		if (target->ringbuf != ringbuf)
693
			continue;
694
 
695
		/* Would completion of this request free enough space? */
696
		space = __intel_ring_space(target->postfix, ringbuf->tail,
697
					   ringbuf->size);
698
		if (space >= bytes)
699
			break;
700
	}
701
 
702
	if (WARN_ON(&target->list == &ring->request_list))
703
		return -ENOSPC;
704
 
705
	ret = i915_wait_request(target);
706
	if (ret)
707
		return ret;
708
 
709
	ringbuf->space = space;
710
	return 0;
711
}
712
 
713
/*
714
 * intel_logical_ring_advance_and_submit() - advance the tail and submit the workload
715
 * @request: Request to advance the logical ringbuffer of.
716
 *
717
 * The tail is updated in our logical ringbuffer struct, not in the actual context. What
718
 * really happens during submission is that the context and current tail will be placed
719
 * on a queue waiting for the ELSP to be ready to accept a new context submission. At that
720
 * point, the tail *inside* the context is updated and the ELSP written to.
721
 */
722
static void
723
intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request)
724
{
725
	struct intel_engine_cs *ring = request->ring;
726
	struct drm_i915_private *dev_priv = request->i915;
727
 
728
	intel_logical_ring_advance(request->ringbuf);
729
 
730
	request->tail = request->ringbuf->tail;
731
 
732
	if (intel_ring_stopped(ring))
733
		return;
734
 
735
	if (dev_priv->guc.execbuf_client)
736
		i915_guc_submit(dev_priv->guc.execbuf_client, request);
737
	else
738
		execlists_context_queue(request);
739
}
740
 
741
static void __wrap_ring_buffer(struct intel_ringbuffer *ringbuf)
742
{
743
	uint32_t __iomem *virt;
744
	int rem = ringbuf->size - ringbuf->tail;
745
 
746
	virt = ringbuf->virtual_start + ringbuf->tail;
747
	rem /= 4;
748
	while (rem--)
749
		iowrite32(MI_NOOP, virt++);
750
 
751
	ringbuf->tail = 0;
752
	intel_ring_update_space(ringbuf);
753
}
754
 
755
static int logical_ring_prepare(struct drm_i915_gem_request *req, int bytes)
756
{
757
	struct intel_ringbuffer *ringbuf = req->ringbuf;
758
	int remain_usable = ringbuf->effective_size - ringbuf->tail;
759
	int remain_actual = ringbuf->size - ringbuf->tail;
760
	int ret, total_bytes, wait_bytes = 0;
761
	bool need_wrap = false;
762
 
763
	if (ringbuf->reserved_in_use)
764
		total_bytes = bytes;
765
	else
766
		total_bytes = bytes + ringbuf->reserved_size;
767
 
768
	if (unlikely(bytes > remain_usable)) {
769
		/*
770
		 * Not enough space for the basic request. So need to flush
771
		 * out the remainder and then wait for base + reserved.
772
		 */
773
		wait_bytes = remain_actual + total_bytes;
774
		need_wrap = true;
775
	} else {
776
		if (unlikely(total_bytes > remain_usable)) {
777
			/*
778
			 * The base request will fit but the reserved space
779
			 * falls off the end. So only need to to wait for the
780
			 * reserved size after flushing out the remainder.
781
			 */
782
			wait_bytes = remain_actual + ringbuf->reserved_size;
783
			need_wrap = true;
784
		} else if (total_bytes > ringbuf->space) {
785
			/* No wrapping required, just waiting. */
786
			wait_bytes = total_bytes;
787
		}
788
	}
789
 
790
	if (wait_bytes) {
791
		ret = logical_ring_wait_for_space(req, wait_bytes);
792
		if (unlikely(ret))
793
			return ret;
794
 
795
		if (need_wrap)
796
			__wrap_ring_buffer(ringbuf);
797
	}
798
 
799
	return 0;
800
}
801
 
5354 serge 802
/**
6084 serge 803
 * intel_logical_ring_begin() - prepare the logical ringbuffer to accept some commands
804
 *
805
 * @req: The request to start some new work for
806
 * @num_dwords: number of DWORDs that we plan to write to the ringbuffer.
807
 *
808
 * The ringbuffer might not be ready to accept the commands right away (maybe it needs to
809
 * be wrapped, or wait a bit for the tail to be updated). This function takes care of that
810
 * and also preallocates a request (every workload submission is still mediated through
811
 * requests, same as it did with legacy ringbuffer submission).
812
 *
813
 * Return: non-zero if the ringbuffer is not ready to be written to.
814
 */
815
int intel_logical_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
816
{
817
	struct drm_i915_private *dev_priv;
818
	int ret;
819
 
820
	WARN_ON(req == NULL);
821
	dev_priv = req->ring->dev->dev_private;
822
 
823
	ret = i915_gem_check_wedge(&dev_priv->gpu_error,
824
				   dev_priv->mm.interruptible);
825
	if (ret)
826
		return ret;
827
 
828
	ret = logical_ring_prepare(req, num_dwords * sizeof(uint32_t));
829
	if (ret)
830
		return ret;
831
 
832
	req->ringbuf->space -= num_dwords * sizeof(uint32_t);
833
	return 0;
834
}
835
 
836
int intel_logical_ring_reserve_space(struct drm_i915_gem_request *request)
837
{
838
	/*
839
	 * The first call merely notes the reserve request and is common for
840
	 * all back ends. The subsequent localised _begin() call actually
841
	 * ensures that the reservation is available. Without the begin, if
842
	 * the request creator immediately submitted the request without
843
	 * adding any commands to it then there might not actually be
844
	 * sufficient room for the submission commands.
845
	 */
846
	intel_ring_reserved_space_reserve(request->ringbuf, MIN_SPACE_FOR_ADD_REQUEST);
847
 
848
	return intel_logical_ring_begin(request, 0);
849
}
850
 
851
/**
5354 serge 852
 * execlists_submission() - submit a batchbuffer for execution, Execlists style
853
 * @dev: DRM device.
854
 * @file: DRM file.
855
 * @ring: Engine Command Streamer to submit to.
856
 * @ctx: Context to employ for this submission.
857
 * @args: execbuffer call arguments.
858
 * @vmas: list of vmas.
859
 * @batch_obj: the batchbuffer to submit.
860
 * @exec_start: batchbuffer start virtual address pointer.
6084 serge 861
 * @dispatch_flags: translated execbuffer call flags.
5354 serge 862
 *
863
 * This is the evil twin version of i915_gem_ringbuffer_submission. It abstracts
864
 * away the submission details of the execbuffer ioctl call.
865
 *
866
 * Return: non-zero if the submission fails.
867
 */
6084 serge 868
int intel_execlists_submission(struct i915_execbuffer_params *params,
5354 serge 869
			       struct drm_i915_gem_execbuffer2 *args,
6084 serge 870
			       struct list_head *vmas)
5354 serge 871
{
6084 serge 872
	struct drm_device       *dev = params->dev;
873
	struct intel_engine_cs  *ring = params->ring;
5354 serge 874
	struct drm_i915_private *dev_priv = dev->dev_private;
6084 serge 875
	struct intel_ringbuffer *ringbuf = params->ctx->engine[ring->id].ringbuf;
876
	u64 exec_start;
5354 serge 877
	int instp_mode;
878
	u32 instp_mask;
879
	int ret;
880
 
881
	instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK;
882
	instp_mask = I915_EXEC_CONSTANTS_MASK;
883
	switch (instp_mode) {
884
	case I915_EXEC_CONSTANTS_REL_GENERAL:
885
	case I915_EXEC_CONSTANTS_ABSOLUTE:
886
	case I915_EXEC_CONSTANTS_REL_SURFACE:
887
		if (instp_mode != 0 && ring != &dev_priv->ring[RCS]) {
888
			DRM_DEBUG("non-0 rel constants mode on non-RCS\n");
889
			return -EINVAL;
890
		}
891
 
892
		if (instp_mode != dev_priv->relative_constants_mode) {
893
			if (instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) {
894
				DRM_DEBUG("rel surface constants mode invalid on gen5+\n");
895
				return -EINVAL;
896
			}
897
 
898
			/* The HW changed the meaning on this bit on gen6 */
899
			instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
900
		}
901
		break;
902
	default:
903
		DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode);
904
		return -EINVAL;
905
	}
906
 
907
	if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
908
		DRM_DEBUG("sol reset is gen7 only\n");
909
		return -EINVAL;
910
	}
911
 
6084 serge 912
	ret = execlists_move_to_gpu(params->request, vmas);
5354 serge 913
	if (ret)
914
		return ret;
915
 
916
	if (ring == &dev_priv->ring[RCS] &&
917
	    instp_mode != dev_priv->relative_constants_mode) {
6084 serge 918
		ret = intel_logical_ring_begin(params->request, 4);
5354 serge 919
		if (ret)
920
			return ret;
921
 
922
		intel_logical_ring_emit(ringbuf, MI_NOOP);
923
		intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(1));
924
		intel_logical_ring_emit(ringbuf, INSTPM);
925
		intel_logical_ring_emit(ringbuf, instp_mask << 16 | instp_mode);
926
		intel_logical_ring_advance(ringbuf);
927
 
928
		dev_priv->relative_constants_mode = instp_mode;
929
	}
930
 
6084 serge 931
	exec_start = params->batch_obj_vm_offset +
932
		     args->batch_start_offset;
933
 
934
	ret = ring->emit_bb_start(params->request, exec_start, params->dispatch_flags);
5354 serge 935
	if (ret)
936
		return ret;
937
 
6084 serge 938
	trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags);
5354 serge 939
 
6084 serge 940
	i915_gem_execbuffer_move_to_active(vmas, params->request);
941
	i915_gem_execbuffer_retire_commands(params);
942
 
5354 serge 943
	return 0;
944
}
945
 
946
void intel_execlists_retire_requests(struct intel_engine_cs *ring)
947
{
6084 serge 948
	struct drm_i915_gem_request *req, *tmp;
5354 serge 949
	struct list_head retired_list;
950
 
951
	WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
952
	if (list_empty(&ring->execlist_retired_req_list))
953
		return;
954
 
955
	INIT_LIST_HEAD(&retired_list);
6084 serge 956
	spin_lock_irq(&ring->execlist_lock);
5354 serge 957
	list_replace_init(&ring->execlist_retired_req_list, &retired_list);
6084 serge 958
	spin_unlock_irq(&ring->execlist_lock);
5354 serge 959
 
960
	list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
961
		struct intel_context *ctx = req->ctx;
962
		struct drm_i915_gem_object *ctx_obj =
963
				ctx->engine[ring->id].state;
964
 
965
		if (ctx_obj && (ctx != ring->default_context))
6084 serge 966
			intel_lr_context_unpin(req);
5354 serge 967
		list_del(&req->execlist_link);
6084 serge 968
		i915_gem_request_unreference(req);
5354 serge 969
	}
970
}
971
 
972
void intel_logical_ring_stop(struct intel_engine_cs *ring)
973
{
974
	struct drm_i915_private *dev_priv = ring->dev->dev_private;
975
	int ret;
976
 
977
	if (!intel_ring_initialized(ring))
978
		return;
979
 
980
	ret = intel_ring_idle(ring);
981
	if (ret && !i915_reset_in_progress(&to_i915(ring->dev)->gpu_error))
982
		DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n",
983
			  ring->name, ret);
984
 
985
	/* TODO: Is this correct with Execlists enabled? */
986
	I915_WRITE_MODE(ring, _MASKED_BIT_ENABLE(STOP_RING));
987
	if (wait_for_atomic((I915_READ_MODE(ring) & MODE_IDLE) != 0, 1000)) {
988
		DRM_ERROR("%s :timed out trying to stop ring\n", ring->name);
989
		return;
990
	}
991
	I915_WRITE_MODE(ring, _MASKED_BIT_DISABLE(STOP_RING));
992
}
993
 
6084 serge 994
int logical_ring_flush_all_caches(struct drm_i915_gem_request *req)
5354 serge 995
{
6084 serge 996
	struct intel_engine_cs *ring = req->ring;
5354 serge 997
	int ret;
998
 
999
	if (!ring->gpu_caches_dirty)
1000
		return 0;
1001
 
6084 serge 1002
	ret = ring->emit_flush(req, 0, I915_GEM_GPU_DOMAINS);
5354 serge 1003
	if (ret)
1004
		return ret;
1005
 
1006
	ring->gpu_caches_dirty = false;
1007
	return 0;
1008
}
1009
 
6084 serge 1010
static int intel_lr_context_do_pin(struct intel_engine_cs *ring,
1011
		struct drm_i915_gem_object *ctx_obj,
1012
		struct intel_ringbuffer *ringbuf)
5354 serge 1013
{
6084 serge 1014
	struct drm_device *dev = ring->dev;
1015
	struct drm_i915_private *dev_priv = dev->dev_private;
1016
	int ret = 0;
5354 serge 1017
 
6084 serge 1018
	WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
1019
	ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN,
1020
			PIN_OFFSET_BIAS | GUC_WOPCM_TOP);
1021
	if (ret)
1022
		return ret;
5354 serge 1023
 
6084 serge 1024
	ret = intel_pin_and_map_ringbuffer_obj(ring->dev, ringbuf);
1025
	if (ret)
1026
		goto unpin_ctx_obj;
5354 serge 1027
 
6084 serge 1028
	ctx_obj->dirty = true;
1029
 
1030
	/* Invalidate GuC TLB. */
1031
	if (i915.enable_guc_submission)
1032
		I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
1033
 
1034
	return ret;
1035
 
1036
unpin_ctx_obj:
1037
	i915_gem_object_ggtt_unpin(ctx_obj);
1038
 
1039
	return ret;
5354 serge 1040
}
1041
 
6084 serge 1042
static int intel_lr_context_pin(struct drm_i915_gem_request *rq)
5354 serge 1043
{
1044
	int ret = 0;
6084 serge 1045
	struct intel_engine_cs *ring = rq->ring;
1046
	struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state;
1047
	struct intel_ringbuffer *ringbuf = rq->ringbuf;
5354 serge 1048
 
6084 serge 1049
	if (rq->ctx->engine[ring->id].pin_count++ == 0) {
1050
		ret = intel_lr_context_do_pin(ring, ctx_obj, ringbuf);
5354 serge 1051
		if (ret)
6084 serge 1052
			goto reset_pin_count;
5354 serge 1053
	}
1054
	return ret;
1055
 
6084 serge 1056
reset_pin_count:
1057
	rq->ctx->engine[ring->id].pin_count = 0;
5354 serge 1058
	return ret;
1059
}
1060
 
6084 serge 1061
void intel_lr_context_unpin(struct drm_i915_gem_request *rq)
5354 serge 1062
{
6084 serge 1063
	struct intel_engine_cs *ring = rq->ring;
1064
	struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state;
1065
	struct intel_ringbuffer *ringbuf = rq->ringbuf;
5354 serge 1066
 
1067
	if (ctx_obj) {
1068
		WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
6084 serge 1069
		if (--rq->ctx->engine[ring->id].pin_count == 0) {
5354 serge 1070
			intel_unpin_ringbuffer_obj(ringbuf);
1071
			i915_gem_object_ggtt_unpin(ctx_obj);
1072
		}
1073
	}
1074
}
1075
 
6084 serge 1076
static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req)
5354 serge 1077
{
6084 serge 1078
	int ret, i;
1079
	struct intel_engine_cs *ring = req->ring;
1080
	struct intel_ringbuffer *ringbuf = req->ringbuf;
1081
	struct drm_device *dev = ring->dev;
1082
	struct drm_i915_private *dev_priv = dev->dev_private;
1083
	struct i915_workarounds *w = &dev_priv->workarounds;
5354 serge 1084
 
6084 serge 1085
	if (WARN_ON_ONCE(w->count == 0))
5354 serge 1086
		return 0;
1087
 
6084 serge 1088
	ring->gpu_caches_dirty = true;
1089
	ret = logical_ring_flush_all_caches(req);
1090
	if (ret)
1091
		return ret;
5354 serge 1092
 
6084 serge 1093
	ret = intel_logical_ring_begin(req, w->count * 2 + 2);
1094
	if (ret)
1095
		return ret;
5354 serge 1096
 
6084 serge 1097
	intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(w->count));
1098
	for (i = 0; i < w->count; i++) {
1099
		intel_logical_ring_emit(ringbuf, w->reg[i].addr);
1100
		intel_logical_ring_emit(ringbuf, w->reg[i].value);
1101
	}
1102
	intel_logical_ring_emit(ringbuf, MI_NOOP);
5354 serge 1103
 
6084 serge 1104
	intel_logical_ring_advance(ringbuf);
5354 serge 1105
 
6084 serge 1106
	ring->gpu_caches_dirty = true;
1107
	ret = logical_ring_flush_all_caches(req);
1108
	if (ret)
1109
		return ret;
5354 serge 1110
 
6084 serge 1111
	return 0;
5354 serge 1112
}
1113
 
6084 serge 1114
#define wa_ctx_emit(batch, index, cmd)					\
1115
	do {								\
1116
		int __index = (index)++;				\
1117
		if (WARN_ON(__index >= (PAGE_SIZE / sizeof(uint32_t)))) { \
1118
			return -ENOSPC;					\
1119
		}							\
1120
		batch[__index] = (cmd);					\
1121
	} while (0)
1122
 
1123
 
1124
/*
1125
 * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
1126
 * PIPE_CONTROL instruction. This is required for the flush to happen correctly
1127
 * but there is a slight complication as this is applied in WA batch where the
1128
 * values are only initialized once so we cannot take register value at the
1129
 * beginning and reuse it further; hence we save its value to memory, upload a
1130
 * constant value with bit21 set and then we restore it back with the saved value.
1131
 * To simplify the WA, a constant value is formed by using the default value
1132
 * of this register. This shouldn't be a problem because we are only modifying
1133
 * it for a short period and this batch in non-premptible. We can ofcourse
1134
 * use additional instructions that read the actual value of the register
1135
 * at that time and set our bit of interest but it makes the WA complicated.
1136
 *
1137
 * This WA is also required for Gen9 so extracting as a function avoids
1138
 * code duplication.
1139
 */
1140
static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *ring,
1141
						uint32_t *const batch,
1142
						uint32_t index)
5354 serge 1143
{
6084 serge 1144
	uint32_t l3sqc4_flush = (0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES);
5354 serge 1145
 
6084 serge 1146
	/*
1147
	 * WaDisableLSQCROPERFforOCL:skl
1148
	 * This WA is implemented in skl_init_clock_gating() but since
1149
	 * this batch updates GEN8_L3SQCREG4 with default value we need to
1150
	 * set this bit here to retain the WA during flush.
1151
	 */
1152
	if (IS_SKYLAKE(ring->dev) && INTEL_REVID(ring->dev) <= SKL_REVID_E0)
1153
		l3sqc4_flush |= GEN8_LQSC_RO_PERF_DIS;
5354 serge 1154
 
6084 serge 1155
	wa_ctx_emit(batch, index, (MI_STORE_REGISTER_MEM_GEN8 |
1156
				   MI_SRM_LRM_GLOBAL_GTT));
1157
	wa_ctx_emit(batch, index, GEN8_L3SQCREG4);
1158
	wa_ctx_emit(batch, index, ring->scratch.gtt_offset + 256);
1159
	wa_ctx_emit(batch, index, 0);
5354 serge 1160
 
6084 serge 1161
	wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1));
1162
	wa_ctx_emit(batch, index, GEN8_L3SQCREG4);
1163
	wa_ctx_emit(batch, index, l3sqc4_flush);
5354 serge 1164
 
6084 serge 1165
	wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6));
1166
	wa_ctx_emit(batch, index, (PIPE_CONTROL_CS_STALL |
1167
				   PIPE_CONTROL_DC_FLUSH_ENABLE));
1168
	wa_ctx_emit(batch, index, 0);
1169
	wa_ctx_emit(batch, index, 0);
1170
	wa_ctx_emit(batch, index, 0);
1171
	wa_ctx_emit(batch, index, 0);
5354 serge 1172
 
6084 serge 1173
	wa_ctx_emit(batch, index, (MI_LOAD_REGISTER_MEM_GEN8 |
1174
				   MI_SRM_LRM_GLOBAL_GTT));
1175
	wa_ctx_emit(batch, index, GEN8_L3SQCREG4);
1176
	wa_ctx_emit(batch, index, ring->scratch.gtt_offset + 256);
1177
	wa_ctx_emit(batch, index, 0);
5354 serge 1178
 
6084 serge 1179
	return index;
1180
}
5354 serge 1181
 
6084 serge 1182
static inline uint32_t wa_ctx_start(struct i915_wa_ctx_bb *wa_ctx,
1183
				    uint32_t offset,
1184
				    uint32_t start_alignment)
1185
{
1186
	return wa_ctx->offset = ALIGN(offset, start_alignment);
1187
}
1188
 
1189
static inline int wa_ctx_end(struct i915_wa_ctx_bb *wa_ctx,
1190
			     uint32_t offset,
1191
			     uint32_t size_alignment)
1192
{
1193
	wa_ctx->size = offset - wa_ctx->offset;
1194
 
1195
	WARN(wa_ctx->size % size_alignment,
1196
	     "wa_ctx_bb failed sanity checks: size %d is not aligned to %d\n",
1197
	     wa_ctx->size, size_alignment);
5354 serge 1198
	return 0;
1199
}
1200
 
6084 serge 1201
/**
1202
 * gen8_init_indirectctx_bb() - initialize indirect ctx batch with WA
1203
 *
1204
 * @ring: only applicable for RCS
1205
 * @wa_ctx: structure representing wa_ctx
1206
 *  offset: specifies start of the batch, should be cache-aligned. This is updated
1207
 *    with the offset value received as input.
1208
 *  size: size of the batch in DWORDS but HW expects in terms of cachelines
1209
 * @batch: page in which WA are loaded
1210
 * @offset: This field specifies the start of the batch, it should be
1211
 *  cache-aligned otherwise it is adjusted accordingly.
1212
 *  Typically we only have one indirect_ctx and per_ctx batch buffer which are
1213
 *  initialized at the beginning and shared across all contexts but this field
1214
 *  helps us to have multiple batches at different offsets and select them based
1215
 *  on a criteria. At the moment this batch always start at the beginning of the page
1216
 *  and at this point we don't have multiple wa_ctx batch buffers.
1217
 *
1218
 *  The number of WA applied are not known at the beginning; we use this field
1219
 *  to return the no of DWORDS written.
1220
 *
1221
 *  It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
1222
 *  so it adds NOOPs as padding to make it cacheline aligned.
1223
 *  MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
1224
 *  makes a complete batch buffer.
1225
 *
1226
 * Return: non-zero if we exceed the PAGE_SIZE limit.
1227
 */
1228
 
1229
static int gen8_init_indirectctx_bb(struct intel_engine_cs *ring,
1230
				    struct i915_wa_ctx_bb *wa_ctx,
1231
				    uint32_t *const batch,
1232
				    uint32_t *offset)
5354 serge 1233
{
6084 serge 1234
	uint32_t scratch_addr;
1235
	uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
5354 serge 1236
 
6084 serge 1237
	/* WaDisableCtxRestoreArbitration:bdw,chv */
1238
	wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_DISABLE);
5354 serge 1239
 
6084 serge 1240
	/* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
1241
	if (IS_BROADWELL(ring->dev)) {
1242
		int rc = gen8_emit_flush_coherentl3_wa(ring, batch, index);
1243
		if (rc < 0)
1244
			return rc;
1245
		index = rc;
1246
	}
5354 serge 1247
 
6084 serge 1248
	/* WaClearSlmSpaceAtContextSwitch:bdw,chv */
1249
	/* Actual scratch location is at 128 bytes offset */
1250
	scratch_addr = ring->scratch.gtt_offset + 2*CACHELINE_BYTES;
5354 serge 1251
 
6084 serge 1252
	wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6));
1253
	wa_ctx_emit(batch, index, (PIPE_CONTROL_FLUSH_L3 |
1254
				   PIPE_CONTROL_GLOBAL_GTT_IVB |
1255
				   PIPE_CONTROL_CS_STALL |
1256
				   PIPE_CONTROL_QW_WRITE));
1257
	wa_ctx_emit(batch, index, scratch_addr);
1258
	wa_ctx_emit(batch, index, 0);
1259
	wa_ctx_emit(batch, index, 0);
1260
	wa_ctx_emit(batch, index, 0);
5354 serge 1261
 
6084 serge 1262
	/* Pad to end of cacheline */
1263
	while (index % CACHELINE_DWORDS)
1264
		wa_ctx_emit(batch, index, MI_NOOP);
5354 serge 1265
 
6084 serge 1266
	/*
1267
	 * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
1268
	 * execution depends on the length specified in terms of cache lines
1269
	 * in the register CTX_RCS_INDIRECT_CTX
1270
	 */
5354 serge 1271
 
6084 serge 1272
	return wa_ctx_end(wa_ctx, *offset = index, CACHELINE_DWORDS);
1273
}
5354 serge 1274
 
6084 serge 1275
/**
1276
 * gen8_init_perctx_bb() - initialize per ctx batch with WA
1277
 *
1278
 * @ring: only applicable for RCS
1279
 * @wa_ctx: structure representing wa_ctx
1280
 *  offset: specifies start of the batch, should be cache-aligned.
1281
 *  size: size of the batch in DWORDS but HW expects in terms of cachelines
1282
 * @batch: page in which WA are loaded
1283
 * @offset: This field specifies the start of this batch.
1284
 *   This batch is started immediately after indirect_ctx batch. Since we ensure
1285
 *   that indirect_ctx ends on a cacheline this batch is aligned automatically.
1286
 *
1287
 *   The number of DWORDS written are returned using this field.
1288
 *
1289
 *  This batch is terminated with MI_BATCH_BUFFER_END and so we need not add padding
1290
 *  to align it with cacheline as padding after MI_BATCH_BUFFER_END is redundant.
1291
 */
1292
static int gen8_init_perctx_bb(struct intel_engine_cs *ring,
1293
			       struct i915_wa_ctx_bb *wa_ctx,
1294
			       uint32_t *const batch,
1295
			       uint32_t *offset)
1296
{
1297
	uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
1298
 
1299
	/* WaDisableCtxRestoreArbitration:bdw,chv */
1300
	wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_ENABLE);
1301
 
1302
	wa_ctx_emit(batch, index, MI_BATCH_BUFFER_END);
1303
 
1304
	return wa_ctx_end(wa_ctx, *offset = index, 1);
5354 serge 1305
}
1306
 
6084 serge 1307
static int gen9_init_indirectctx_bb(struct intel_engine_cs *ring,
1308
				    struct i915_wa_ctx_bb *wa_ctx,
1309
				    uint32_t *const batch,
1310
				    uint32_t *offset)
5354 serge 1311
{
6084 serge 1312
	int ret;
1313
	struct drm_device *dev = ring->dev;
1314
	uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
5354 serge 1315
 
6084 serge 1316
	/* WaDisableCtxRestoreArbitration:skl,bxt */
1317
	if ((IS_SKYLAKE(dev) && (INTEL_REVID(dev) <= SKL_REVID_D0)) ||
1318
	    (IS_BROXTON(dev) && (INTEL_REVID(dev) == BXT_REVID_A0)))
1319
		wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_DISABLE);
5354 serge 1320
 
6084 serge 1321
	/* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt */
1322
	ret = gen8_emit_flush_coherentl3_wa(ring, batch, index);
1323
	if (ret < 0)
1324
		return ret;
1325
	index = ret;
5354 serge 1326
 
6084 serge 1327
	/* Pad to end of cacheline */
1328
	while (index % CACHELINE_DWORDS)
1329
		wa_ctx_emit(batch, index, MI_NOOP);
5354 serge 1330
 
6084 serge 1331
	return wa_ctx_end(wa_ctx, *offset = index, CACHELINE_DWORDS);
5354 serge 1332
}
1333
 
6084 serge 1334
static int gen9_init_perctx_bb(struct intel_engine_cs *ring,
1335
			       struct i915_wa_ctx_bb *wa_ctx,
1336
			       uint32_t *const batch,
1337
			       uint32_t *offset)
5354 serge 1338
{
6084 serge 1339
	struct drm_device *dev = ring->dev;
1340
	uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
5354 serge 1341
 
6084 serge 1342
	/* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:skl,bxt */
1343
	if ((IS_SKYLAKE(dev) && (INTEL_REVID(dev) <= SKL_REVID_B0)) ||
1344
	    (IS_BROXTON(dev) && (INTEL_REVID(dev) == BXT_REVID_A0))) {
1345
		wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1));
1346
		wa_ctx_emit(batch, index, GEN9_SLICE_COMMON_ECO_CHICKEN0);
1347
		wa_ctx_emit(batch, index,
1348
			    _MASKED_BIT_ENABLE(DISABLE_PIXEL_MASK_CAMMING));
1349
		wa_ctx_emit(batch, index, MI_NOOP);
5354 serge 1350
	}
1351
 
6084 serge 1352
	/* WaDisableCtxRestoreArbitration:skl,bxt */
1353
	if ((IS_SKYLAKE(dev) && (INTEL_REVID(dev) <= SKL_REVID_D0)) ||
1354
	    (IS_BROXTON(dev) && (INTEL_REVID(dev) == BXT_REVID_A0)))
1355
		wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_ENABLE);
5354 serge 1356
 
6084 serge 1357
	wa_ctx_emit(batch, index, MI_BATCH_BUFFER_END);
1358
 
1359
	return wa_ctx_end(wa_ctx, *offset = index, 1);
5354 serge 1360
}
1361
 
6084 serge 1362
static int lrc_setup_wa_ctx_obj(struct intel_engine_cs *ring, u32 size)
5354 serge 1363
{
1364
	int ret;
1365
 
6084 serge 1366
	ring->wa_ctx.obj = i915_gem_alloc_object(ring->dev, PAGE_ALIGN(size));
1367
	if (!ring->wa_ctx.obj) {
1368
		DRM_DEBUG_DRIVER("alloc LRC WA ctx backing obj failed.\n");
1369
		return -ENOMEM;
1370
	}
5354 serge 1371
 
6084 serge 1372
	ret = i915_gem_obj_ggtt_pin(ring->wa_ctx.obj, PAGE_SIZE, 0);
1373
	if (ret) {
1374
		DRM_DEBUG_DRIVER("pin LRC WA ctx backing obj failed: %d\n",
1375
				 ret);
1376
		drm_gem_object_unreference(&ring->wa_ctx.obj->base);
5354 serge 1377
		return ret;
6084 serge 1378
	}
5354 serge 1379
 
1380
	return 0;
1381
}
1382
 
6084 serge 1383
static void lrc_destroy_wa_ctx_obj(struct intel_engine_cs *ring)
5354 serge 1384
{
6084 serge 1385
	if (ring->wa_ctx.obj) {
1386
		i915_gem_object_ggtt_unpin(ring->wa_ctx.obj);
1387
		drm_gem_object_unreference(&ring->wa_ctx.obj->base);
1388
		ring->wa_ctx.obj = NULL;
1389
	}
1390
}
5354 serge 1391
 
6084 serge 1392
static int intel_init_workaround_bb(struct intel_engine_cs *ring)
1393
{
1394
	int ret;
1395
	uint32_t *batch;
1396
	uint32_t offset;
1397
	struct page *page;
1398
	struct i915_ctx_workarounds *wa_ctx = &ring->wa_ctx;
1399
 
1400
	WARN_ON(ring->id != RCS);
1401
 
1402
	/* update this when WA for higher Gen are added */
1403
	if (INTEL_INFO(ring->dev)->gen > 9) {
1404
		DRM_ERROR("WA batch buffer is not initialized for Gen%d\n",
1405
			  INTEL_INFO(ring->dev)->gen);
5354 serge 1406
		return 0;
6084 serge 1407
	}
5354 serge 1408
 
6084 serge 1409
	/* some WA perform writes to scratch page, ensure it is valid */
1410
	if (ring->scratch.obj == NULL) {
1411
		DRM_ERROR("scratch page not allocated for %s\n", ring->name);
1412
		return -EINVAL;
1413
	}
5354 serge 1414
 
6084 serge 1415
	ret = lrc_setup_wa_ctx_obj(ring, PAGE_SIZE);
1416
	if (ret) {
1417
		DRM_DEBUG_DRIVER("Failed to setup context WA page: %d\n", ret);
5354 serge 1418
		return ret;
6084 serge 1419
	}
5354 serge 1420
 
6084 serge 1421
	page = i915_gem_object_get_page(wa_ctx->obj, 0);
1422
	batch = kmap_atomic(page);
1423
	offset = 0;
1424
 
1425
	if (INTEL_INFO(ring->dev)->gen == 8) {
1426
		ret = gen8_init_indirectctx_bb(ring,
1427
					       &wa_ctx->indirect_ctx,
1428
					       batch,
1429
					       &offset);
1430
		if (ret)
1431
			goto out;
1432
 
1433
		ret = gen8_init_perctx_bb(ring,
1434
					  &wa_ctx->per_ctx,
1435
					  batch,
1436
					  &offset);
1437
		if (ret)
1438
			goto out;
1439
	} else if (INTEL_INFO(ring->dev)->gen == 9) {
1440
		ret = gen9_init_indirectctx_bb(ring,
1441
					       &wa_ctx->indirect_ctx,
1442
					       batch,
1443
					       &offset);
1444
		if (ret)
1445
			goto out;
1446
 
1447
		ret = gen9_init_perctx_bb(ring,
1448
					  &wa_ctx->per_ctx,
1449
					  batch,
1450
					  &offset);
1451
		if (ret)
1452
			goto out;
5354 serge 1453
	}
1454
 
6084 serge 1455
out:
1456
	kunmap_atomic(batch);
5354 serge 1457
	if (ret)
6084 serge 1458
		lrc_destroy_wa_ctx_obj(ring);
5354 serge 1459
 
6084 serge 1460
	return ret;
5354 serge 1461
}
1462
 
1463
static int gen8_init_common_ring(struct intel_engine_cs *ring)
1464
{
1465
	struct drm_device *dev = ring->dev;
1466
	struct drm_i915_private *dev_priv = dev->dev_private;
6084 serge 1467
	u8 next_context_status_buffer_hw;
5354 serge 1468
 
6084 serge 1469
	lrc_setup_hardware_status_page(ring,
1470
				ring->default_context->engine[ring->id].state);
1471
 
5354 serge 1472
	I915_WRITE_IMR(ring, ~(ring->irq_enable_mask | ring->irq_keep_mask));
1473
	I915_WRITE(RING_HWSTAM(ring->mmio_base), 0xffffffff);
1474
 
6084 serge 1475
	if (ring->status_page.obj) {
1476
		I915_WRITE(RING_HWS_PGA(ring->mmio_base),
1477
			   (u32)ring->status_page.gfx_addr);
1478
		POSTING_READ(RING_HWS_PGA(ring->mmio_base));
1479
	}
1480
 
5354 serge 1481
	I915_WRITE(RING_MODE_GEN7(ring),
1482
		   _MASKED_BIT_DISABLE(GFX_REPLAY_MODE) |
1483
		   _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE));
1484
	POSTING_READ(RING_MODE_GEN7(ring));
6084 serge 1485
 
1486
	/*
1487
	 * Instead of resetting the Context Status Buffer (CSB) read pointer to
1488
	 * zero, we need to read the write pointer from hardware and use its
1489
	 * value because "this register is power context save restored".
1490
	 * Effectively, these states have been observed:
1491
	 *
1492
	 *      | Suspend-to-idle (freeze) | Suspend-to-RAM (mem) |
1493
	 * BDW  | CSB regs not reset       | CSB regs reset       |
1494
	 * CHT  | CSB regs not reset       | CSB regs not reset   |
1495
	 */
1496
	next_context_status_buffer_hw = (I915_READ(RING_CONTEXT_STATUS_PTR(ring))
1497
						   & GEN8_CSB_PTR_MASK);
1498
 
1499
	/*
1500
	 * When the CSB registers are reset (also after power-up / gpu reset),
1501
	 * CSB write pointer is set to all 1's, which is not valid, use '5' in
1502
	 * this special case, so the first element read is CSB[0].
1503
	 */
1504
	if (next_context_status_buffer_hw == GEN8_CSB_PTR_MASK)
1505
		next_context_status_buffer_hw = (GEN8_CSB_ENTRIES - 1);
1506
 
1507
	ring->next_context_status_buffer = next_context_status_buffer_hw;
5354 serge 1508
	DRM_DEBUG_DRIVER("Execlists enabled for %s\n", ring->name);
1509
 
1510
	memset(&ring->hangcheck, 0, sizeof(ring->hangcheck));
1511
 
1512
	return 0;
1513
}
1514
 
1515
static int gen8_init_render_ring(struct intel_engine_cs *ring)
1516
{
1517
	struct drm_device *dev = ring->dev;
1518
	struct drm_i915_private *dev_priv = dev->dev_private;
1519
	int ret;
1520
 
1521
	ret = gen8_init_common_ring(ring);
1522
	if (ret)
1523
		return ret;
1524
 
1525
	/* We need to disable the AsyncFlip performance optimisations in order
1526
	 * to use MI_WAIT_FOR_EVENT within the CS. It should already be
1527
	 * programmed to '1' on all products.
1528
	 *
1529
	 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv,bdw,chv
1530
	 */
1531
	I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
1532
 
6084 serge 1533
	I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
1534
 
1535
	return init_workarounds_ring(ring);
1536
}
1537
 
1538
static int gen9_init_render_ring(struct intel_engine_cs *ring)
1539
{
1540
	int ret;
1541
 
1542
	ret = gen8_init_common_ring(ring);
5354 serge 1543
	if (ret)
1544
		return ret;
1545
 
1546
	return init_workarounds_ring(ring);
1547
}
1548
 
6084 serge 1549
static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req)
5354 serge 1550
{
6084 serge 1551
	struct i915_hw_ppgtt *ppgtt = req->ctx->ppgtt;
1552
	struct intel_engine_cs *ring = req->ring;
1553
	struct intel_ringbuffer *ringbuf = req->ringbuf;
1554
	const int num_lri_cmds = GEN8_LEGACY_PDPES * 2;
1555
	int i, ret;
1556
 
1557
	ret = intel_logical_ring_begin(req, num_lri_cmds * 2 + 2);
1558
	if (ret)
1559
		return ret;
1560
 
1561
	intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(num_lri_cmds));
1562
	for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) {
1563
		const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
1564
 
1565
		intel_logical_ring_emit(ringbuf, GEN8_RING_PDP_UDW(ring, i));
1566
		intel_logical_ring_emit(ringbuf, upper_32_bits(pd_daddr));
1567
		intel_logical_ring_emit(ringbuf, GEN8_RING_PDP_LDW(ring, i));
1568
		intel_logical_ring_emit(ringbuf, lower_32_bits(pd_daddr));
1569
	}
1570
 
1571
	intel_logical_ring_emit(ringbuf, MI_NOOP);
1572
	intel_logical_ring_advance(ringbuf);
1573
 
1574
	return 0;
1575
}
1576
 
1577
static int gen8_emit_bb_start(struct drm_i915_gem_request *req,
1578
			      u64 offset, unsigned dispatch_flags)
1579
{
1580
	struct intel_ringbuffer *ringbuf = req->ringbuf;
1581
	bool ppgtt = !(dispatch_flags & I915_DISPATCH_SECURE);
5354 serge 1582
	int ret;
1583
 
6084 serge 1584
	/* Don't rely in hw updating PDPs, specially in lite-restore.
1585
	 * Ideally, we should set Force PD Restore in ctx descriptor,
1586
	 * but we can't. Force Restore would be a second option, but
1587
	 * it is unsafe in case of lite-restore (because the ctx is
1588
	 * not idle). PML4 is allocated during ppgtt init so this is
1589
	 * not needed in 48-bit.*/
1590
	if (req->ctx->ppgtt &&
1591
	    (intel_ring_flag(req->ring) & req->ctx->ppgtt->pd_dirty_rings)) {
1592
		if (!USES_FULL_48BIT_PPGTT(req->i915) &&
1593
		    !intel_vgpu_active(req->i915->dev)) {
1594
			ret = intel_logical_ring_emit_pdps(req);
1595
			if (ret)
1596
				return ret;
1597
		}
1598
 
1599
		req->ctx->ppgtt->pd_dirty_rings &= ~intel_ring_flag(req->ring);
1600
	}
1601
 
1602
	ret = intel_logical_ring_begin(req, 4);
5354 serge 1603
	if (ret)
1604
		return ret;
1605
 
1606
	/* FIXME(BDW): Address space and security selectors. */
6084 serge 1607
	intel_logical_ring_emit(ringbuf, MI_BATCH_BUFFER_START_GEN8 |
1608
				(ppgtt<<8) |
1609
				(dispatch_flags & I915_DISPATCH_RS ?
1610
				 MI_BATCH_RESOURCE_STREAMER : 0));
5354 serge 1611
	intel_logical_ring_emit(ringbuf, lower_32_bits(offset));
1612
	intel_logical_ring_emit(ringbuf, upper_32_bits(offset));
1613
	intel_logical_ring_emit(ringbuf, MI_NOOP);
1614
	intel_logical_ring_advance(ringbuf);
1615
 
1616
	return 0;
1617
}
1618
 
1619
static bool gen8_logical_ring_get_irq(struct intel_engine_cs *ring)
1620
{
1621
	struct drm_device *dev = ring->dev;
1622
	struct drm_i915_private *dev_priv = dev->dev_private;
1623
	unsigned long flags;
1624
 
1625
	if (WARN_ON(!intel_irqs_enabled(dev_priv)))
1626
		return false;
1627
 
1628
	spin_lock_irqsave(&dev_priv->irq_lock, flags);
1629
	if (ring->irq_refcount++ == 0) {
1630
		I915_WRITE_IMR(ring, ~(ring->irq_enable_mask | ring->irq_keep_mask));
1631
		POSTING_READ(RING_IMR(ring->mmio_base));
1632
	}
1633
	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1634
 
1635
	return true;
1636
}
1637
 
1638
static void gen8_logical_ring_put_irq(struct intel_engine_cs *ring)
1639
{
1640
	struct drm_device *dev = ring->dev;
1641
	struct drm_i915_private *dev_priv = dev->dev_private;
1642
	unsigned long flags;
1643
 
1644
	spin_lock_irqsave(&dev_priv->irq_lock, flags);
1645
	if (--ring->irq_refcount == 0) {
1646
		I915_WRITE_IMR(ring, ~ring->irq_keep_mask);
1647
		POSTING_READ(RING_IMR(ring->mmio_base));
1648
	}
1649
	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1650
}
1651
 
6084 serge 1652
static int gen8_emit_flush(struct drm_i915_gem_request *request,
5354 serge 1653
			   u32 invalidate_domains,
1654
			   u32 unused)
1655
{
6084 serge 1656
	struct intel_ringbuffer *ringbuf = request->ringbuf;
5354 serge 1657
	struct intel_engine_cs *ring = ringbuf->ring;
1658
	struct drm_device *dev = ring->dev;
1659
	struct drm_i915_private *dev_priv = dev->dev_private;
1660
	uint32_t cmd;
1661
	int ret;
1662
 
6084 serge 1663
	ret = intel_logical_ring_begin(request, 4);
5354 serge 1664
	if (ret)
1665
		return ret;
1666
 
1667
	cmd = MI_FLUSH_DW + 1;
1668
 
6084 serge 1669
	/* We always require a command barrier so that subsequent
1670
	 * commands, such as breadcrumb interrupts, are strictly ordered
1671
	 * wrt the contents of the write cache being flushed to memory
1672
	 * (and thus being coherent from the CPU).
1673
	 */
1674
	cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
1675
 
1676
	if (invalidate_domains & I915_GEM_GPU_DOMAINS) {
1677
		cmd |= MI_INVALIDATE_TLB;
1678
		if (ring == &dev_priv->ring[VCS])
1679
			cmd |= MI_INVALIDATE_BSD;
5354 serge 1680
	}
1681
 
1682
	intel_logical_ring_emit(ringbuf, cmd);
1683
	intel_logical_ring_emit(ringbuf,
1684
				I915_GEM_HWS_SCRATCH_ADDR |
1685
				MI_FLUSH_DW_USE_GTT);
1686
	intel_logical_ring_emit(ringbuf, 0); /* upper addr */
1687
	intel_logical_ring_emit(ringbuf, 0); /* value */
1688
	intel_logical_ring_advance(ringbuf);
1689
 
1690
	return 0;
1691
}
1692
 
6084 serge 1693
static int gen8_emit_flush_render(struct drm_i915_gem_request *request,
5354 serge 1694
				  u32 invalidate_domains,
1695
				  u32 flush_domains)
1696
{
6084 serge 1697
	struct intel_ringbuffer *ringbuf = request->ringbuf;
5354 serge 1698
	struct intel_engine_cs *ring = ringbuf->ring;
1699
	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
6084 serge 1700
	bool vf_flush_wa;
5354 serge 1701
	u32 flags = 0;
1702
	int ret;
1703
 
1704
	flags |= PIPE_CONTROL_CS_STALL;
1705
 
1706
	if (flush_domains) {
1707
		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
1708
		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
6084 serge 1709
		flags |= PIPE_CONTROL_FLUSH_ENABLE;
5354 serge 1710
	}
1711
 
1712
	if (invalidate_domains) {
1713
		flags |= PIPE_CONTROL_TLB_INVALIDATE;
1714
		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
1715
		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
1716
		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
1717
		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
1718
		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
1719
		flags |= PIPE_CONTROL_QW_WRITE;
1720
		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
1721
	}
1722
 
6084 serge 1723
	/*
1724
	 * On GEN9+ Before VF_CACHE_INVALIDATE we need to emit a NULL pipe
1725
	 * control.
1726
	 */
1727
	vf_flush_wa = INTEL_INFO(ring->dev)->gen >= 9 &&
1728
		      flags & PIPE_CONTROL_VF_CACHE_INVALIDATE;
1729
 
1730
	ret = intel_logical_ring_begin(request, vf_flush_wa ? 12 : 6);
5354 serge 1731
	if (ret)
1732
		return ret;
1733
 
6084 serge 1734
	if (vf_flush_wa) {
1735
		intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6));
1736
		intel_logical_ring_emit(ringbuf, 0);
1737
		intel_logical_ring_emit(ringbuf, 0);
1738
		intel_logical_ring_emit(ringbuf, 0);
1739
		intel_logical_ring_emit(ringbuf, 0);
1740
		intel_logical_ring_emit(ringbuf, 0);
1741
	}
1742
 
5354 serge 1743
	intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6));
1744
	intel_logical_ring_emit(ringbuf, flags);
1745
	intel_logical_ring_emit(ringbuf, scratch_addr);
1746
	intel_logical_ring_emit(ringbuf, 0);
1747
	intel_logical_ring_emit(ringbuf, 0);
1748
	intel_logical_ring_emit(ringbuf, 0);
1749
	intel_logical_ring_advance(ringbuf);
1750
 
1751
	return 0;
1752
}
1753
 
1754
static u32 gen8_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
1755
{
1756
	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
1757
}
1758
 
1759
static void gen8_set_seqno(struct intel_engine_cs *ring, u32 seqno)
1760
{
1761
	intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno);
1762
}
1763
 
6084 serge 1764
static u32 bxt_a_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
5354 serge 1765
{
6084 serge 1766
 
1767
	/*
1768
	 * On BXT A steppings there is a HW coherency issue whereby the
1769
	 * MI_STORE_DATA_IMM storing the completed request's seqno
1770
	 * occasionally doesn't invalidate the CPU cache. Work around this by
1771
	 * clflushing the corresponding cacheline whenever the caller wants
1772
	 * the coherency to be guaranteed. Note that this cacheline is known
1773
	 * to be clean at this point, since we only write it in
1774
	 * bxt_a_set_seqno(), where we also do a clflush after the write. So
1775
	 * this clflush in practice becomes an invalidate operation.
1776
	 */
1777
 
1778
	if (!lazy_coherency)
1779
		intel_flush_status_page(ring, I915_GEM_HWS_INDEX);
1780
 
1781
	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
1782
}
1783
 
1784
static void bxt_a_set_seqno(struct intel_engine_cs *ring, u32 seqno)
1785
{
1786
	intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno);
1787
 
1788
	/* See bxt_a_get_seqno() explaining the reason for the clflush. */
1789
	intel_flush_status_page(ring, I915_GEM_HWS_INDEX);
1790
}
1791
 
1792
static int gen8_emit_request(struct drm_i915_gem_request *request)
1793
{
1794
	struct intel_ringbuffer *ringbuf = request->ringbuf;
5354 serge 1795
	struct intel_engine_cs *ring = ringbuf->ring;
1796
	u32 cmd;
1797
	int ret;
1798
 
6084 serge 1799
	/*
1800
	 * Reserve space for 2 NOOPs at the end of each request to be
1801
	 * used as a workaround for not being allowed to do lite
1802
	 * restore with HEAD==TAIL (WaIdleLiteRestore).
1803
	 */
1804
	ret = intel_logical_ring_begin(request, 8);
5354 serge 1805
	if (ret)
1806
		return ret;
1807
 
6084 serge 1808
	cmd = MI_STORE_DWORD_IMM_GEN4;
5354 serge 1809
	cmd |= MI_GLOBAL_GTT;
1810
 
1811
	intel_logical_ring_emit(ringbuf, cmd);
1812
	intel_logical_ring_emit(ringbuf,
1813
				(ring->status_page.gfx_addr +
1814
				(I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT)));
1815
	intel_logical_ring_emit(ringbuf, 0);
6084 serge 1816
	intel_logical_ring_emit(ringbuf, i915_gem_request_get_seqno(request));
5354 serge 1817
	intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT);
1818
	intel_logical_ring_emit(ringbuf, MI_NOOP);
6084 serge 1819
	intel_logical_ring_advance_and_submit(request);
5354 serge 1820
 
6084 serge 1821
	/*
1822
	 * Here we add two extra NOOPs as padding to avoid
1823
	 * lite restore of a context with HEAD==TAIL.
1824
	 */
1825
	intel_logical_ring_emit(ringbuf, MI_NOOP);
1826
	intel_logical_ring_emit(ringbuf, MI_NOOP);
1827
	intel_logical_ring_advance(ringbuf);
1828
 
5354 serge 1829
	return 0;
1830
}
1831
 
6084 serge 1832
static int intel_lr_context_render_state_init(struct drm_i915_gem_request *req)
1833
{
1834
	struct render_state so;
1835
	int ret;
1836
 
1837
	ret = i915_gem_render_state_prepare(req->ring, &so);
1838
	if (ret)
1839
		return ret;
1840
 
1841
	if (so.rodata == NULL)
1842
		return 0;
1843
 
1844
	ret = req->ring->emit_bb_start(req, so.ggtt_offset,
1845
				       I915_DISPATCH_SECURE);
1846
	if (ret)
1847
		goto out;
1848
 
1849
	ret = req->ring->emit_bb_start(req,
1850
				       (so.ggtt_offset + so.aux_batch_offset),
1851
				       I915_DISPATCH_SECURE);
1852
	if (ret)
1853
		goto out;
1854
 
1855
	i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req);
1856
 
1857
out:
1858
	i915_gem_render_state_fini(&so);
1859
	return ret;
1860
}
1861
 
1862
static int gen8_init_rcs_context(struct drm_i915_gem_request *req)
1863
{
1864
	int ret;
1865
 
1866
	ret = intel_logical_ring_workarounds_emit(req);
1867
	if (ret)
1868
		return ret;
1869
 
1870
	ret = intel_rcs_context_init_mocs(req);
1871
	/*
1872
	 * Failing to program the MOCS is non-fatal.The system will not
1873
	 * run at peak performance. So generate an error and carry on.
1874
	 */
1875
	if (ret)
1876
		DRM_ERROR("MOCS failed to program: expect performance issues.\n");
1877
 
1878
	return intel_lr_context_render_state_init(req);
1879
}
1880
 
5354 serge 1881
/**
1882
 * intel_logical_ring_cleanup() - deallocate the Engine Command Streamer
1883
 *
1884
 * @ring: Engine Command Streamer.
1885
 *
1886
 */
1887
void intel_logical_ring_cleanup(struct intel_engine_cs *ring)
1888
{
1889
	struct drm_i915_private *dev_priv;
1890
 
1891
	if (!intel_ring_initialized(ring))
1892
		return;
1893
 
1894
	dev_priv = ring->dev->dev_private;
1895
 
1896
	intel_logical_ring_stop(ring);
1897
	WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0);
1898
 
1899
	if (ring->cleanup)
1900
		ring->cleanup(ring);
1901
 
1902
	i915_cmd_parser_fini_ring(ring);
6084 serge 1903
	i915_gem_batch_pool_fini(&ring->batch_pool);
5354 serge 1904
 
1905
	if (ring->status_page.obj) {
1906
		kunmap(sg_page(ring->status_page.obj->pages->sgl));
1907
		ring->status_page.obj = NULL;
1908
	}
6084 serge 1909
 
1910
	lrc_destroy_wa_ctx_obj(ring);
5354 serge 1911
}
1912
 
1913
static int logical_ring_init(struct drm_device *dev, struct intel_engine_cs *ring)
1914
{
1915
	int ret;
1916
 
1917
	/* Intentionally left blank. */
1918
	ring->buffer = NULL;
1919
 
1920
	ring->dev = dev;
1921
	INIT_LIST_HEAD(&ring->active_list);
1922
	INIT_LIST_HEAD(&ring->request_list);
6084 serge 1923
	i915_gem_batch_pool_init(dev, &ring->batch_pool);
5354 serge 1924
	init_waitqueue_head(&ring->irq_queue);
1925
 
1926
	INIT_LIST_HEAD(&ring->execlist_queue);
1927
	INIT_LIST_HEAD(&ring->execlist_retired_req_list);
1928
	spin_lock_init(&ring->execlist_lock);
1929
 
1930
	ret = i915_cmd_parser_init_ring(ring);
1931
	if (ret)
1932
		return ret;
1933
 
6084 serge 1934
	ret = intel_lr_context_deferred_alloc(ring->default_context, ring);
1935
	if (ret)
1936
		return ret;
1937
 
1938
	/* As this is the default context, always pin it */
1939
	ret = intel_lr_context_do_pin(
1940
			ring,
1941
			ring->default_context->engine[ring->id].state,
1942
			ring->default_context->engine[ring->id].ringbuf);
1943
	if (ret) {
1944
		DRM_ERROR(
1945
			"Failed to pin and map ringbuffer %s: %d\n",
1946
			ring->name, ret);
1947
		return ret;
5354 serge 1948
	}
1949
 
1950
	return ret;
1951
}
1952
 
1953
static int logical_render_ring_init(struct drm_device *dev)
1954
{
1955
	struct drm_i915_private *dev_priv = dev->dev_private;
1956
	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
6084 serge 1957
	int ret;
5354 serge 1958
 
1959
	ring->name = "render ring";
1960
	ring->id = RCS;
1961
	ring->mmio_base = RENDER_RING_BASE;
1962
	ring->irq_enable_mask =
1963
		GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT;
1964
	ring->irq_keep_mask =
1965
		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_RCS_IRQ_SHIFT;
1966
	if (HAS_L3_DPF(dev))
1967
		ring->irq_keep_mask |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
1968
 
6084 serge 1969
	if (INTEL_INFO(dev)->gen >= 9)
1970
		ring->init_hw = gen9_init_render_ring;
1971
	else
1972
		ring->init_hw = gen8_init_render_ring;
1973
	ring->init_context = gen8_init_rcs_context;
5354 serge 1974
	ring->cleanup = intel_fini_pipe_control;
6084 serge 1975
	if (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0) {
1976
		ring->get_seqno = bxt_a_get_seqno;
1977
		ring->set_seqno = bxt_a_set_seqno;
1978
	} else {
1979
		ring->get_seqno = gen8_get_seqno;
1980
		ring->set_seqno = gen8_set_seqno;
1981
	}
5354 serge 1982
	ring->emit_request = gen8_emit_request;
1983
	ring->emit_flush = gen8_emit_flush_render;
1984
	ring->irq_get = gen8_logical_ring_get_irq;
1985
	ring->irq_put = gen8_logical_ring_put_irq;
1986
	ring->emit_bb_start = gen8_emit_bb_start;
1987
 
6084 serge 1988
	ring->dev = dev;
1989
 
1990
	ret = intel_init_pipe_control(ring);
1991
	if (ret)
1992
		return ret;
1993
 
1994
	ret = intel_init_workaround_bb(ring);
1995
	if (ret) {
1996
		/*
1997
		 * We continue even if we fail to initialize WA batch
1998
		 * because we only expect rare glitches but nothing
1999
		 * critical to prevent us from using GPU
2000
		 */
2001
		DRM_ERROR("WA batch buffer initialization failed: %d\n",
2002
			  ret);
2003
	}
2004
 
2005
	ret = logical_ring_init(dev, ring);
2006
	if (ret) {
2007
		lrc_destroy_wa_ctx_obj(ring);
2008
	}
2009
 
2010
	return ret;
5354 serge 2011
}
2012
 
2013
static int logical_bsd_ring_init(struct drm_device *dev)
2014
{
2015
	struct drm_i915_private *dev_priv = dev->dev_private;
2016
	struct intel_engine_cs *ring = &dev_priv->ring[VCS];
2017
 
2018
	ring->name = "bsd ring";
2019
	ring->id = VCS;
2020
	ring->mmio_base = GEN6_BSD_RING_BASE;
2021
	ring->irq_enable_mask =
2022
		GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT;
2023
	ring->irq_keep_mask =
2024
		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS1_IRQ_SHIFT;
2025
 
6084 serge 2026
	ring->init_hw = gen8_init_common_ring;
2027
	if (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0) {
2028
		ring->get_seqno = bxt_a_get_seqno;
2029
		ring->set_seqno = bxt_a_set_seqno;
2030
	} else {
2031
		ring->get_seqno = gen8_get_seqno;
2032
		ring->set_seqno = gen8_set_seqno;
2033
	}
5354 serge 2034
	ring->emit_request = gen8_emit_request;
2035
	ring->emit_flush = gen8_emit_flush;
2036
	ring->irq_get = gen8_logical_ring_get_irq;
2037
	ring->irq_put = gen8_logical_ring_put_irq;
2038
	ring->emit_bb_start = gen8_emit_bb_start;
2039
 
2040
	return logical_ring_init(dev, ring);
2041
}
2042
 
2043
static int logical_bsd2_ring_init(struct drm_device *dev)
2044
{
2045
	struct drm_i915_private *dev_priv = dev->dev_private;
2046
	struct intel_engine_cs *ring = &dev_priv->ring[VCS2];
2047
 
2048
	ring->name = "bds2 ring";
2049
	ring->id = VCS2;
2050
	ring->mmio_base = GEN8_BSD2_RING_BASE;
2051
	ring->irq_enable_mask =
2052
		GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT;
2053
	ring->irq_keep_mask =
2054
		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS2_IRQ_SHIFT;
2055
 
6084 serge 2056
	ring->init_hw = gen8_init_common_ring;
5354 serge 2057
	ring->get_seqno = gen8_get_seqno;
2058
	ring->set_seqno = gen8_set_seqno;
2059
	ring->emit_request = gen8_emit_request;
2060
	ring->emit_flush = gen8_emit_flush;
2061
	ring->irq_get = gen8_logical_ring_get_irq;
2062
	ring->irq_put = gen8_logical_ring_put_irq;
2063
	ring->emit_bb_start = gen8_emit_bb_start;
2064
 
2065
	return logical_ring_init(dev, ring);
2066
}
2067
 
2068
static int logical_blt_ring_init(struct drm_device *dev)
2069
{
2070
	struct drm_i915_private *dev_priv = dev->dev_private;
2071
	struct intel_engine_cs *ring = &dev_priv->ring[BCS];
2072
 
2073
	ring->name = "blitter ring";
2074
	ring->id = BCS;
2075
	ring->mmio_base = BLT_RING_BASE;
2076
	ring->irq_enable_mask =
2077
		GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT;
2078
	ring->irq_keep_mask =
2079
		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_BCS_IRQ_SHIFT;
2080
 
6084 serge 2081
	ring->init_hw = gen8_init_common_ring;
2082
	if (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0) {
2083
		ring->get_seqno = bxt_a_get_seqno;
2084
		ring->set_seqno = bxt_a_set_seqno;
2085
	} else {
2086
		ring->get_seqno = gen8_get_seqno;
2087
		ring->set_seqno = gen8_set_seqno;
2088
	}
5354 serge 2089
	ring->emit_request = gen8_emit_request;
2090
	ring->emit_flush = gen8_emit_flush;
2091
	ring->irq_get = gen8_logical_ring_get_irq;
2092
	ring->irq_put = gen8_logical_ring_put_irq;
2093
	ring->emit_bb_start = gen8_emit_bb_start;
2094
 
2095
	return logical_ring_init(dev, ring);
2096
}
2097
 
2098
static int logical_vebox_ring_init(struct drm_device *dev)
2099
{
2100
	struct drm_i915_private *dev_priv = dev->dev_private;
2101
	struct intel_engine_cs *ring = &dev_priv->ring[VECS];
2102
 
2103
	ring->name = "video enhancement ring";
2104
	ring->id = VECS;
2105
	ring->mmio_base = VEBOX_RING_BASE;
2106
	ring->irq_enable_mask =
2107
		GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT;
2108
	ring->irq_keep_mask =
2109
		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VECS_IRQ_SHIFT;
2110
 
6084 serge 2111
	ring->init_hw = gen8_init_common_ring;
2112
	if (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0) {
2113
		ring->get_seqno = bxt_a_get_seqno;
2114
		ring->set_seqno = bxt_a_set_seqno;
2115
	} else {
2116
		ring->get_seqno = gen8_get_seqno;
2117
		ring->set_seqno = gen8_set_seqno;
2118
	}
5354 serge 2119
	ring->emit_request = gen8_emit_request;
2120
	ring->emit_flush = gen8_emit_flush;
2121
	ring->irq_get = gen8_logical_ring_get_irq;
2122
	ring->irq_put = gen8_logical_ring_put_irq;
2123
	ring->emit_bb_start = gen8_emit_bb_start;
2124
 
2125
	return logical_ring_init(dev, ring);
2126
}
2127
 
2128
/**
2129
 * intel_logical_rings_init() - allocate, populate and init the Engine Command Streamers
2130
 * @dev: DRM device.
2131
 *
2132
 * This function inits the engines for an Execlists submission style (the equivalent in the
2133
 * legacy ringbuffer submission world would be i915_gem_init_rings). It does it only for
2134
 * those engines that are present in the hardware.
2135
 *
2136
 * Return: non-zero if the initialization failed.
2137
 */
2138
int intel_logical_rings_init(struct drm_device *dev)
2139
{
2140
	struct drm_i915_private *dev_priv = dev->dev_private;
2141
	int ret;
2142
 
2143
	ret = logical_render_ring_init(dev);
2144
	if (ret)
2145
		return ret;
2146
 
2147
	if (HAS_BSD(dev)) {
2148
		ret = logical_bsd_ring_init(dev);
2149
		if (ret)
2150
			goto cleanup_render_ring;
2151
	}
2152
 
2153
	if (HAS_BLT(dev)) {
2154
		ret = logical_blt_ring_init(dev);
2155
		if (ret)
2156
			goto cleanup_bsd_ring;
2157
	}
2158
 
2159
	if (HAS_VEBOX(dev)) {
2160
		ret = logical_vebox_ring_init(dev);
2161
		if (ret)
2162
			goto cleanup_blt_ring;
2163
	}
2164
 
2165
	if (HAS_BSD2(dev)) {
2166
		ret = logical_bsd2_ring_init(dev);
2167
		if (ret)
2168
			goto cleanup_vebox_ring;
2169
	}
2170
 
2171
	return 0;
2172
 
2173
cleanup_vebox_ring:
2174
	intel_logical_ring_cleanup(&dev_priv->ring[VECS]);
2175
cleanup_blt_ring:
2176
	intel_logical_ring_cleanup(&dev_priv->ring[BCS]);
2177
cleanup_bsd_ring:
2178
	intel_logical_ring_cleanup(&dev_priv->ring[VCS]);
2179
cleanup_render_ring:
2180
	intel_logical_ring_cleanup(&dev_priv->ring[RCS]);
2181
 
2182
	return ret;
2183
}
2184
 
6084 serge 2185
static u32
2186
make_rpcs(struct drm_device *dev)
5354 serge 2187
{
6084 serge 2188
	u32 rpcs = 0;
5354 serge 2189
 
6084 serge 2190
	/*
2191
	 * No explicit RPCS request is needed to ensure full
2192
	 * slice/subslice/EU enablement prior to Gen9.
2193
	*/
2194
	if (INTEL_INFO(dev)->gen < 9)
5354 serge 2195
		return 0;
2196
 
6084 serge 2197
	/*
2198
	 * Starting in Gen9, render power gating can leave
2199
	 * slice/subslice/EU in a partially enabled state. We
2200
	 * must make an explicit request through RPCS for full
2201
	 * enablement.
2202
	*/
2203
	if (INTEL_INFO(dev)->has_slice_pg) {
2204
		rpcs |= GEN8_RPCS_S_CNT_ENABLE;
2205
		rpcs |= INTEL_INFO(dev)->slice_total <<
2206
			GEN8_RPCS_S_CNT_SHIFT;
2207
		rpcs |= GEN8_RPCS_ENABLE;
2208
	}
5354 serge 2209
 
6084 serge 2210
	if (INTEL_INFO(dev)->has_subslice_pg) {
2211
		rpcs |= GEN8_RPCS_SS_CNT_ENABLE;
2212
		rpcs |= INTEL_INFO(dev)->subslice_per_slice <<
2213
			GEN8_RPCS_SS_CNT_SHIFT;
2214
		rpcs |= GEN8_RPCS_ENABLE;
2215
	}
5354 serge 2216
 
6084 serge 2217
	if (INTEL_INFO(dev)->has_eu_pg) {
2218
		rpcs |= INTEL_INFO(dev)->eu_per_subslice <<
2219
			GEN8_RPCS_EU_MIN_SHIFT;
2220
		rpcs |= INTEL_INFO(dev)->eu_per_subslice <<
2221
			GEN8_RPCS_EU_MAX_SHIFT;
2222
		rpcs |= GEN8_RPCS_ENABLE;
2223
	}
2224
 
2225
	return rpcs;
5354 serge 2226
}
2227
 
2228
static int
2229
populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_obj,
2230
		    struct intel_engine_cs *ring, struct intel_ringbuffer *ringbuf)
2231
{
2232
	struct drm_device *dev = ring->dev;
2233
	struct drm_i915_private *dev_priv = dev->dev_private;
2234
	struct i915_hw_ppgtt *ppgtt = ctx->ppgtt;
2235
	struct page *page;
2236
	uint32_t *reg_state;
2237
	int ret;
2238
 
2239
	if (!ppgtt)
2240
		ppgtt = dev_priv->mm.aliasing_ppgtt;
2241
 
2242
	ret = i915_gem_object_set_to_cpu_domain(ctx_obj, true);
2243
	if (ret) {
2244
		DRM_DEBUG_DRIVER("Could not set to CPU domain\n");
2245
		return ret;
2246
	}
2247
 
2248
	ret = i915_gem_object_get_pages(ctx_obj);
2249
	if (ret) {
2250
		DRM_DEBUG_DRIVER("Could not get object pages\n");
2251
		return ret;
2252
	}
2253
 
2254
	i915_gem_object_pin_pages(ctx_obj);
2255
 
2256
	/* The second page of the context object contains some fields which must
2257
	 * be set up prior to the first execution. */
6084 serge 2258
	page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
5354 serge 2259
	reg_state = kmap_atomic(page);
2260
 
2261
	/* A context is actually a big batch buffer with several MI_LOAD_REGISTER_IMM
2262
	 * commands followed by (reg, value) pairs. The values we are setting here are
2263
	 * only for the first context restore: on a subsequent save, the GPU will
2264
	 * recreate this batchbuffer with new values (including all the missing
2265
	 * MI_LOAD_REGISTER_IMM commands that we are not initializing here). */
2266
	if (ring->id == RCS)
2267
		reg_state[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(14);
2268
	else
2269
		reg_state[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(11);
2270
	reg_state[CTX_LRI_HEADER_0] |= MI_LRI_FORCE_POSTED;
2271
	reg_state[CTX_CONTEXT_CONTROL] = RING_CONTEXT_CONTROL(ring);
2272
	reg_state[CTX_CONTEXT_CONTROL+1] =
6084 serge 2273
		_MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
2274
				   CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
2275
				   CTX_CTRL_RS_CTX_ENABLE);
5354 serge 2276
	reg_state[CTX_RING_HEAD] = RING_HEAD(ring->mmio_base);
2277
	reg_state[CTX_RING_HEAD+1] = 0;
2278
	reg_state[CTX_RING_TAIL] = RING_TAIL(ring->mmio_base);
2279
	reg_state[CTX_RING_TAIL+1] = 0;
2280
	reg_state[CTX_RING_BUFFER_START] = RING_START(ring->mmio_base);
2281
	/* Ring buffer start address is not known until the buffer is pinned.
2282
	 * It is written to the context image in execlists_update_context()
2283
	 */
2284
	reg_state[CTX_RING_BUFFER_CONTROL] = RING_CTL(ring->mmio_base);
2285
	reg_state[CTX_RING_BUFFER_CONTROL+1] =
2286
			((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES) | RING_VALID;
2287
	reg_state[CTX_BB_HEAD_U] = ring->mmio_base + 0x168;
2288
	reg_state[CTX_BB_HEAD_U+1] = 0;
2289
	reg_state[CTX_BB_HEAD_L] = ring->mmio_base + 0x140;
2290
	reg_state[CTX_BB_HEAD_L+1] = 0;
2291
	reg_state[CTX_BB_STATE] = ring->mmio_base + 0x110;
2292
	reg_state[CTX_BB_STATE+1] = (1<<5);
2293
	reg_state[CTX_SECOND_BB_HEAD_U] = ring->mmio_base + 0x11c;
2294
	reg_state[CTX_SECOND_BB_HEAD_U+1] = 0;
2295
	reg_state[CTX_SECOND_BB_HEAD_L] = ring->mmio_base + 0x114;
2296
	reg_state[CTX_SECOND_BB_HEAD_L+1] = 0;
2297
	reg_state[CTX_SECOND_BB_STATE] = ring->mmio_base + 0x118;
2298
	reg_state[CTX_SECOND_BB_STATE+1] = 0;
2299
	if (ring->id == RCS) {
2300
		reg_state[CTX_BB_PER_CTX_PTR] = ring->mmio_base + 0x1c0;
2301
		reg_state[CTX_BB_PER_CTX_PTR+1] = 0;
2302
		reg_state[CTX_RCS_INDIRECT_CTX] = ring->mmio_base + 0x1c4;
2303
		reg_state[CTX_RCS_INDIRECT_CTX+1] = 0;
2304
		reg_state[CTX_RCS_INDIRECT_CTX_OFFSET] = ring->mmio_base + 0x1c8;
2305
		reg_state[CTX_RCS_INDIRECT_CTX_OFFSET+1] = 0;
6084 serge 2306
		if (ring->wa_ctx.obj) {
2307
			struct i915_ctx_workarounds *wa_ctx = &ring->wa_ctx;
2308
			uint32_t ggtt_offset = i915_gem_obj_ggtt_offset(wa_ctx->obj);
2309
 
2310
			reg_state[CTX_RCS_INDIRECT_CTX+1] =
2311
				(ggtt_offset + wa_ctx->indirect_ctx.offset * sizeof(uint32_t)) |
2312
				(wa_ctx->indirect_ctx.size / CACHELINE_DWORDS);
2313
 
2314
			reg_state[CTX_RCS_INDIRECT_CTX_OFFSET+1] =
2315
				CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT << 6;
2316
 
2317
			reg_state[CTX_BB_PER_CTX_PTR+1] =
2318
				(ggtt_offset + wa_ctx->per_ctx.offset * sizeof(uint32_t)) |
2319
				0x01;
2320
		}
5354 serge 2321
	}
2322
	reg_state[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9);
2323
	reg_state[CTX_LRI_HEADER_1] |= MI_LRI_FORCE_POSTED;
2324
	reg_state[CTX_CTX_TIMESTAMP] = ring->mmio_base + 0x3a8;
2325
	reg_state[CTX_CTX_TIMESTAMP+1] = 0;
2326
	reg_state[CTX_PDP3_UDW] = GEN8_RING_PDP_UDW(ring, 3);
2327
	reg_state[CTX_PDP3_LDW] = GEN8_RING_PDP_LDW(ring, 3);
2328
	reg_state[CTX_PDP2_UDW] = GEN8_RING_PDP_UDW(ring, 2);
2329
	reg_state[CTX_PDP2_LDW] = GEN8_RING_PDP_LDW(ring, 2);
2330
	reg_state[CTX_PDP1_UDW] = GEN8_RING_PDP_UDW(ring, 1);
2331
	reg_state[CTX_PDP1_LDW] = GEN8_RING_PDP_LDW(ring, 1);
2332
	reg_state[CTX_PDP0_UDW] = GEN8_RING_PDP_UDW(ring, 0);
2333
	reg_state[CTX_PDP0_LDW] = GEN8_RING_PDP_LDW(ring, 0);
6084 serge 2334
 
2335
	if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
2336
		/* 64b PPGTT (48bit canonical)
2337
		 * PDP0_DESCRIPTOR contains the base address to PML4 and
2338
		 * other PDP Descriptors are ignored.
2339
		 */
2340
		ASSIGN_CTX_PML4(ppgtt, reg_state);
2341
	} else {
2342
		/* 32b PPGTT
2343
		 * PDP*_DESCRIPTOR contains the base address of space supported.
2344
		 * With dynamic page allocation, PDPs may not be allocated at
2345
		 * this point. Point the unallocated PDPs to the scratch page
2346
		 */
2347
		ASSIGN_CTX_PDP(ppgtt, reg_state, 3);
2348
		ASSIGN_CTX_PDP(ppgtt, reg_state, 2);
2349
		ASSIGN_CTX_PDP(ppgtt, reg_state, 1);
2350
		ASSIGN_CTX_PDP(ppgtt, reg_state, 0);
2351
	}
2352
 
5354 serge 2353
	if (ring->id == RCS) {
2354
		reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
6084 serge 2355
		reg_state[CTX_R_PWR_CLK_STATE] = GEN8_R_PWR_CLK_STATE;
2356
		reg_state[CTX_R_PWR_CLK_STATE+1] = make_rpcs(dev);
5354 serge 2357
	}
2358
 
2359
	kunmap_atomic(reg_state);
2360
 
2361
	ctx_obj->dirty = 1;
6084 serge 2362
    i915_gem_object_unpin_pages(ctx_obj);
5354 serge 2363
 
2364
	return 0;
2365
}
2366
 
2367
/**
2368
 * intel_lr_context_free() - free the LRC specific bits of a context
2369
 * @ctx: the LR context to free.
2370
 *
2371
 * The real context freeing is done in i915_gem_context_free: this only
2372
 * takes care of the bits that are LRC related: the per-engine backing
2373
 * objects and the logical ringbuffer.
2374
 */
2375
void intel_lr_context_free(struct intel_context *ctx)
2376
{
2377
	int i;
2378
 
2379
	for (i = 0; i < I915_NUM_RINGS; i++) {
2380
		struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state;
2381
 
2382
		if (ctx_obj) {
2383
			struct intel_ringbuffer *ringbuf =
2384
					ctx->engine[i].ringbuf;
2385
			struct intel_engine_cs *ring = ringbuf->ring;
2386
 
2387
			if (ctx == ring->default_context) {
2388
				intel_unpin_ringbuffer_obj(ringbuf);
2389
				i915_gem_object_ggtt_unpin(ctx_obj);
2390
			}
6084 serge 2391
			WARN_ON(ctx->engine[ring->id].pin_count);
2392
			intel_ringbuffer_free(ringbuf);
5354 serge 2393
			drm_gem_object_unreference(&ctx_obj->base);
2394
		}
2395
	}
2396
}
2397
 
2398
static uint32_t get_lr_context_size(struct intel_engine_cs *ring)
2399
{
2400
	int ret = 0;
2401
 
2402
	WARN_ON(INTEL_INFO(ring->dev)->gen < 8);
2403
 
2404
	switch (ring->id) {
2405
	case RCS:
2406
		if (INTEL_INFO(ring->dev)->gen >= 9)
2407
			ret = GEN9_LR_CONTEXT_RENDER_SIZE;
2408
		else
2409
			ret = GEN8_LR_CONTEXT_RENDER_SIZE;
2410
		break;
2411
	case VCS:
2412
	case BCS:
2413
	case VECS:
2414
	case VCS2:
2415
		ret = GEN8_LR_CONTEXT_OTHER_SIZE;
2416
		break;
2417
	}
2418
 
2419
	return ret;
2420
}
2421
 
2422
static void lrc_setup_hardware_status_page(struct intel_engine_cs *ring,
2423
		struct drm_i915_gem_object *default_ctx_obj)
2424
{
2425
	struct drm_i915_private *dev_priv = ring->dev->dev_private;
6084 serge 2426
	struct page *page;
5354 serge 2427
 
6084 serge 2428
	/* The HWSP is part of the default context object in LRC mode. */
2429
	ring->status_page.gfx_addr = i915_gem_obj_ggtt_offset(default_ctx_obj)
2430
			+ LRC_PPHWSP_PN * PAGE_SIZE;
2431
	page = i915_gem_object_get_page(default_ctx_obj, LRC_PPHWSP_PN);
2432
	ring->status_page.page_addr = kmap(page);
5354 serge 2433
	ring->status_page.obj = default_ctx_obj;
2434
 
2435
	I915_WRITE(RING_HWS_PGA(ring->mmio_base),
2436
			(u32)ring->status_page.gfx_addr);
2437
	POSTING_READ(RING_HWS_PGA(ring->mmio_base));
2438
}
2439
 
2440
/**
6084 serge 2441
 * intel_lr_context_deferred_alloc() - create the LRC specific bits of a context
5354 serge 2442
 * @ctx: LR context to create.
2443
 * @ring: engine to be used with the context.
2444
 *
2445
 * This function can be called more than once, with different engines, if we plan
2446
 * to use the context with them. The context backing objects and the ringbuffers
2447
 * (specially the ringbuffer backing objects) suck a lot of memory up, and that's why
2448
 * the creation is a deferred call: it's better to make sure first that we need to use
2449
 * a given ring with the context.
2450
 *
2451
 * Return: non-zero on error.
2452
 */
6084 serge 2453
 
2454
int intel_lr_context_deferred_alloc(struct intel_context *ctx,
5354 serge 2455
				     struct intel_engine_cs *ring)
2456
{
2457
	struct drm_device *dev = ring->dev;
2458
	struct drm_i915_gem_object *ctx_obj;
2459
	uint32_t context_size;
2460
	struct intel_ringbuffer *ringbuf;
2461
	int ret;
2462
 
2463
	WARN_ON(ctx->legacy_hw_ctx.rcs_state != NULL);
6084 serge 2464
	WARN_ON(ctx->engine[ring->id].state);
5354 serge 2465
 
2466
	context_size = round_up(get_lr_context_size(ring), 4096);
2467
 
6084 serge 2468
	/* One extra page as the sharing data between driver and GuC */
2469
	context_size += PAGE_SIZE * LRC_PPHWSP_PN;
5354 serge 2470
 
6084 serge 2471
	ctx_obj = i915_gem_alloc_object(dev, context_size);
2472
	if (!ctx_obj) {
2473
		DRM_DEBUG_DRIVER("Alloc LRC backing obj failed.\n");
2474
		return -ENOMEM;
5354 serge 2475
	}
2476
 
6084 serge 2477
	ringbuf = intel_engine_create_ringbuffer(ring, 4 * PAGE_SIZE);
2478
	if (IS_ERR(ringbuf)) {
2479
		ret = PTR_ERR(ringbuf);
2480
		goto error_deref_obj;
5354 serge 2481
	}
2482
 
2483
	ret = populate_lr_context(ctx, ctx_obj, ring, ringbuf);
2484
	if (ret) {
2485
		DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret);
6084 serge 2486
		goto error_ringbuf;
5354 serge 2487
	}
2488
 
2489
	ctx->engine[ring->id].ringbuf = ringbuf;
2490
	ctx->engine[ring->id].state = ctx_obj;
2491
 
6084 serge 2492
	if (ctx != ring->default_context && ring->init_context) {
2493
		struct drm_i915_gem_request *req;
5354 serge 2494
 
6084 serge 2495
		ret = i915_gem_request_alloc(ring,
2496
			ctx, &req);
2497
		if (ret) {
2498
			DRM_ERROR("ring create req: %d\n",
2499
				ret);
2500
			goto error_ringbuf;
5354 serge 2501
		}
2502
 
6084 serge 2503
		ret = ring->init_context(req);
5354 serge 2504
		if (ret) {
6084 serge 2505
			DRM_ERROR("ring init context: %d\n",
2506
				ret);
2507
			i915_gem_request_cancel(req);
2508
			goto error_ringbuf;
5354 serge 2509
		}
6084 serge 2510
		i915_add_request_no_flush(req);
5354 serge 2511
	}
2512
	return 0;
2513
 
6084 serge 2514
error_ringbuf:
2515
	intel_ringbuffer_free(ringbuf);
2516
error_deref_obj:
5354 serge 2517
	drm_gem_object_unreference(&ctx_obj->base);
6084 serge 2518
	ctx->engine[ring->id].ringbuf = NULL;
2519
	ctx->engine[ring->id].state = NULL;
5354 serge 2520
	return ret;
2521
}
6084 serge 2522
 
2523
void intel_lr_context_reset(struct drm_device *dev,
2524
			struct intel_context *ctx)
2525
{
2526
	struct drm_i915_private *dev_priv = dev->dev_private;
2527
	struct intel_engine_cs *ring;
2528
	int i;
2529
 
2530
	for_each_ring(ring, dev_priv, i) {
2531
		struct drm_i915_gem_object *ctx_obj =
2532
				ctx->engine[ring->id].state;
2533
		struct intel_ringbuffer *ringbuf =
2534
				ctx->engine[ring->id].ringbuf;
2535
		uint32_t *reg_state;
2536
		struct page *page;
2537
 
2538
		if (!ctx_obj)
2539
			continue;
2540
 
2541
		if (i915_gem_object_get_pages(ctx_obj)) {
2542
			WARN(1, "Failed get_pages for context obj\n");
2543
			continue;
2544
		}
2545
		page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
2546
		reg_state = kmap_atomic(page);
2547
 
2548
		reg_state[CTX_RING_HEAD+1] = 0;
2549
		reg_state[CTX_RING_TAIL+1] = 0;
2550
 
2551
		kunmap_atomic(reg_state);
2552
 
2553
		ringbuf->head = 0;
2554
		ringbuf->tail = 0;
2555
	}
2556
}