Subversion Repositories Kolibri OS

Rev

Rev 5354 | Rev 6088 | Go to most recent revision | Show entire file | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 5354 Rev 6084
Line 133... Line 133...
133
 */
133
 */
Line 134... Line 134...
134
 
134
 
135
#include 
135
#include 
136
#include 
136
#include 
137
#include "intel_drv.h"
137
#include "intel_drv.h"
Line 138... Line 138...
138
#include "i915_drv.h"
138
#include "intel_mocs.h"
139
 
139
 
140
#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE)
140
#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE)
Line 187... Line 187...
187
#define GEN8_CTX_VALID (1<<0)
187
#define GEN8_CTX_VALID (1<<0)
188
#define GEN8_CTX_FORCE_PD_RESTORE (1<<1)
188
#define GEN8_CTX_FORCE_PD_RESTORE (1<<1)
189
#define GEN8_CTX_FORCE_RESTORE (1<<2)
189
#define GEN8_CTX_FORCE_RESTORE (1<<2)
190
#define GEN8_CTX_L3LLC_COHERENT (1<<5)
190
#define GEN8_CTX_L3LLC_COHERENT (1<<5)
191
#define GEN8_CTX_PRIVILEGE (1<<8)
191
#define GEN8_CTX_PRIVILEGE (1<<8)
-
 
192
 
-
 
193
#define ASSIGN_CTX_PDP(ppgtt, reg_state, n) { \
-
 
194
	const u64 _addr = i915_page_dir_dma_addr((ppgtt), (n));	\
-
 
195
	reg_state[CTX_PDP ## n ## _UDW+1] = upper_32_bits(_addr); \
-
 
196
	reg_state[CTX_PDP ## n ## _LDW+1] = lower_32_bits(_addr); \
-
 
197
}
-
 
198
 
-
 
199
#define ASSIGN_CTX_PML4(ppgtt, reg_state) { \
-
 
200
	reg_state[CTX_PDP0_UDW + 1] = upper_32_bits(px_dma(&ppgtt->pml4)); \
-
 
201
	reg_state[CTX_PDP0_LDW + 1] = lower_32_bits(px_dma(&ppgtt->pml4)); \
-
 
202
}
-
 
203
 
192
enum {
204
enum {
193
	ADVANCED_CONTEXT = 0,
205
	ADVANCED_CONTEXT = 0,
194
	LEGACY_CONTEXT,
206
	LEGACY_32B_CONTEXT,
195
	ADVANCED_AD_CONTEXT,
207
	ADVANCED_AD_CONTEXT,
196
	LEGACY_64B_CONTEXT
208
	LEGACY_64B_CONTEXT
197
};
209
};
198
#define GEN8_CTX_MODE_SHIFT 3
210
#define GEN8_CTX_ADDRESSING_MODE_SHIFT 3
-
 
211
#define GEN8_CTX_ADDRESSING_MODE(dev)  (USES_FULL_48BIT_PPGTT(dev) ?\
-
 
212
		LEGACY_64B_CONTEXT :\
-
 
213
		LEGACY_32B_CONTEXT)
199
enum {
214
enum {
200
	FAULT_AND_HANG = 0,
215
	FAULT_AND_HANG = 0,
201
	FAULT_AND_HALT, /* Debug only */
216
	FAULT_AND_HALT, /* Debug only */
202
	FAULT_AND_STREAM,
217
	FAULT_AND_STREAM,
203
	FAULT_AND_CONTINUE /* Unsupported */
218
	FAULT_AND_CONTINUE /* Unsupported */
204
};
219
};
205
#define GEN8_CTX_ID_SHIFT 32
220
#define GEN8_CTX_ID_SHIFT 32
-
 
221
#define CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT  0x17
-
 
222
 
-
 
223
static int intel_lr_context_pin(struct drm_i915_gem_request *rq);
-
 
224
static void lrc_setup_hardware_status_page(struct intel_engine_cs *ring,
-
 
225
		struct drm_i915_gem_object *default_ctx_obj);
Line 206... Line -...
206
 
-
 
207
static int intel_lr_context_pin(struct intel_engine_cs *ring,
-
 
Line 208... Line 226...
208
		struct intel_context *ctx);
226
 
209
 
227
 
210
/**
228
/**
211
 * intel_sanitize_enable_execlists() - sanitize i915.enable_execlists
229
 * intel_sanitize_enable_execlists() - sanitize i915.enable_execlists
212
 * @dev: DRM device.
230
 * @dev: DRM device.
213
 * @enable_execlists: value of i915.enable_execlists module parameter.
231
 * @enable_execlists: value of i915.enable_execlists module parameter.
214
 *
232
 *
215
 * Only certain platforms support Execlists (the prerequisites being
-
 
216
 * support for Logical Ring Contexts and Aliasing PPGTT or better),
233
 * Only certain platforms support Execlists (the prerequisites being
217
 * and only when enabled via module parameter.
234
 * support for Logical Ring Contexts and Aliasing PPGTT or better).
218
 *
235
 *
219
 * Return: 1 if Execlists is supported and has to be enabled.
236
 * Return: 1 if Execlists is supported and has to be enabled.
220
 */
237
 */
221
int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists)
238
int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists)
Line -... Line 239...
-
 
239
{
-
 
240
	WARN_ON(i915.enable_ppgtt == -1);
-
 
241
 
-
 
242
	/* On platforms with execlist available, vGPU will only
-
 
243
	 * support execlist mode, no ring buffer mode.
-
 
244
	 */
222
{
245
	if (HAS_LOGICAL_RING_CONTEXTS(dev) && intel_vgpu_active(dev))
223
	WARN_ON(i915.enable_ppgtt == -1);
246
		return 1;
Line 224... Line 247...
224
 
247
 
225
	if (INTEL_INFO(dev)->gen >= 9)
248
	if (INTEL_INFO(dev)->gen >= 9)
Line 247... Line 270...
247
 *
270
 *
248
 * Return: 20-bits globally unique context ID.
271
 * Return: 20-bits globally unique context ID.
249
 */
272
 */
250
u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj)
273
u32 intel_execlists_ctx_id(struct drm_i915_gem_object *ctx_obj)
251
{
274
{
252
	u32 lrca = i915_gem_obj_ggtt_offset(ctx_obj);
275
	u32 lrca = i915_gem_obj_ggtt_offset(ctx_obj) +
-
 
276
			LRC_PPHWSP_PN * PAGE_SIZE;
Line 253... Line 277...
253
 
277
 
254
	/* LRCA is required to be 4K aligned so the more significant 20 bits
278
	/* LRCA is required to be 4K aligned so the more significant 20 bits
255
	 * are globally unique */
279
	 * are globally unique */
256
	return lrca >> 12;
280
	return lrca >> 12;
Line 257... Line 281...
257
}
281
}
258
 
282
 
-
 
283
static bool disable_lite_restore_wa(struct intel_engine_cs *ring)
-
 
284
{
-
 
285
	struct drm_device *dev = ring->dev;
-
 
286
 
-
 
287
	return ((IS_SKYLAKE(dev) && INTEL_REVID(dev) <= SKL_REVID_B0) ||
-
 
288
		(IS_BROXTON(dev) && INTEL_REVID(dev) == BXT_REVID_A0)) &&
-
 
289
	       (ring->id == VCS || ring->id == VCS2);
-
 
290
}
-
 
291
 
-
 
292
uint64_t intel_lr_context_descriptor(struct intel_context *ctx,
-
 
293
				     struct intel_engine_cs *ring)
259
static uint64_t execlists_ctx_descriptor(struct drm_i915_gem_object *ctx_obj)
294
{
260
{
295
	struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
-
 
296
	uint64_t desc;
Line 261... Line 297...
261
	uint64_t desc;
297
	uint64_t lrca = i915_gem_obj_ggtt_offset(ctx_obj) +
Line 262... Line 298...
262
	uint64_t lrca = i915_gem_obj_ggtt_offset(ctx_obj);
298
			LRC_PPHWSP_PN * PAGE_SIZE;
263
 
299
 
-
 
300
	WARN_ON(lrca & 0xFFFFFFFF00000FFFULL);
264
	WARN_ON(lrca & 0xFFFFFFFF00000FFFULL);
301
 
265
 
302
	desc = GEN8_CTX_VALID;
266
	desc = GEN8_CTX_VALID;
303
	desc |= GEN8_CTX_ADDRESSING_MODE(dev) << GEN8_CTX_ADDRESSING_MODE_SHIFT;
267
	desc |= LEGACY_CONTEXT << GEN8_CTX_MODE_SHIFT;
304
	if (IS_GEN8(ctx_obj->base.dev))
Line 268... Line 305...
268
	desc |= GEN8_CTX_L3LLC_COHERENT;
305
		desc |= GEN8_CTX_L3LLC_COHERENT;
269
	desc |= GEN8_CTX_PRIVILEGE;
306
	desc |= GEN8_CTX_PRIVILEGE;
270
	desc |= lrca;
307
	desc |= lrca;
Line -... Line 308...
-
 
308
	desc |= (u64)intel_execlists_ctx_id(ctx_obj) << GEN8_CTX_ID_SHIFT;
-
 
309
 
-
 
310
	/* TODO: WaDisableLiteRestore when we start using semaphore
-
 
311
	 * signalling between Command Streamers */
-
 
312
	/* desc |= GEN8_CTX_FORCE_RESTORE; */
271
	desc |= (u64)intel_execlists_ctx_id(ctx_obj) << GEN8_CTX_ID_SHIFT;
313
 
272
 
314
	/* WaEnableForceRestoreInCtxtDescForVCS:skl */
Line 273... Line 315...
273
	/* TODO: WaDisableLiteRestore when we start using semaphore
315
	/* WaEnableForceRestoreInCtxtDescForVCS:bxt */
274
	 * signalling between Command Streamers */
-
 
275
	/* desc |= GEN8_CTX_FORCE_RESTORE; */
316
	if (disable_lite_restore_wa(ring))
276
 
317
		desc |= GEN8_CTX_FORCE_RESTORE;
-
 
318
 
-
 
319
	return desc;
277
	return desc;
320
}
278
}
321
 
279
 
-
 
280
static void execlists_elsp_write(struct intel_engine_cs *ring,
322
static void execlists_elsp_write(struct drm_i915_gem_request *rq0,
281
				 struct drm_i915_gem_object *ctx_obj0,
-
 
Line 282... Line -...
282
				 struct drm_i915_gem_object *ctx_obj1)
-
 
283
{
323
				 struct drm_i915_gem_request *rq1)
284
	struct drm_device *dev = ring->dev;
324
{
285
	struct drm_i915_private *dev_priv = dev->dev_private;
-
 
286
	uint64_t temp = 0;
-
 
287
	uint32_t desc[4];
-
 
288
	unsigned long flags;
-
 
289
 
-
 
290
	/* XXX: You must always write both descriptors in the order below. */
-
 
291
	if (ctx_obj1)
-
 
292
		temp = execlists_ctx_descriptor(ctx_obj1);
325
 
293
	else
-
 
294
		temp = 0;
-
 
295
	desc[1] = (u32)(temp >> 32);
-
 
296
	desc[0] = (u32)temp;
-
 
297
 
-
 
298
	temp = execlists_ctx_descriptor(ctx_obj0);
-
 
299
	desc[3] = (u32)(temp >> 32);
-
 
300
	desc[2] = (u32)temp;
-
 
301
 
-
 
302
	/* Set Force Wakeup bit to prevent GT from entering C6 while ELSP writes
-
 
303
	 * are in progress.
-
 
304
	 *
-
 
305
	 * The other problem is that we can't just call gen6_gt_force_wake_get()
-
 
306
	 * because that function calls intel_runtime_pm_get(), which might sleep.
-
 
307
	 * Instead, we do the runtime_pm_get/put when creating/destroying requests.
-
 
308
	 */
-
 
309
	spin_lock_irqsave(&dev_priv->uncore.lock, flags);
-
 
310
	if (IS_CHERRYVIEW(dev) || INTEL_INFO(dev)->gen >= 9) {
-
 
311
		if (dev_priv->uncore.fw_rendercount++ == 0)
-
 
312
			dev_priv->uncore.funcs.force_wake_get(dev_priv,
-
 
313
							      FORCEWAKE_RENDER);
-
 
314
		if (dev_priv->uncore.fw_mediacount++ == 0)
326
	struct intel_engine_cs *ring = rq0->ring;
315
			dev_priv->uncore.funcs.force_wake_get(dev_priv,
-
 
316
							      FORCEWAKE_MEDIA);
-
 
317
		if (INTEL_INFO(dev)->gen >= 9) {
327
	struct drm_device *dev = ring->dev;
318
			if (dev_priv->uncore.fw_blittercount++ == 0)
328
	struct drm_i915_private *dev_priv = dev->dev_private;
319
				dev_priv->uncore.funcs.force_wake_get(dev_priv,
-
 
320
							FORCEWAKE_BLITTER);
-
 
321
		}
-
 
322
	} else {
-
 
323
		if (dev_priv->uncore.forcewake_count++ == 0)
-
 
324
			dev_priv->uncore.funcs.force_wake_get(dev_priv,
-
 
325
							      FORCEWAKE_ALL);
-
 
Line 326... Line 329...
326
	}
329
	uint64_t desc[2];
327
	spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);
330
 
Line 328... Line 331...
328
 
331
	if (rq1) {
329
	I915_WRITE(RING_ELSP(ring), desc[1]);
332
		desc[1] = intel_lr_context_descriptor(rq1->ctx, rq1->ring);
330
	I915_WRITE(RING_ELSP(ring), desc[0]);
-
 
331
	I915_WRITE(RING_ELSP(ring), desc[3]);
-
 
332
	/* The context is automatically loaded after the following */
-
 
333
	I915_WRITE(RING_ELSP(ring), desc[2]);
-
 
334
 
-
 
335
	/* ELSP is a wo register, so use another nearby reg for posting instead */
333
		rq1->elsp_submitted++;
336
	POSTING_READ(RING_EXECLIST_STATUS(ring));
-
 
337
 
-
 
338
	/* Release Force Wakeup (see the big comment above). */
-
 
339
	spin_lock_irqsave(&dev_priv->uncore.lock, flags);
334
	} else {
340
	if (IS_CHERRYVIEW(dev) || INTEL_INFO(dev)->gen >= 9) {
-
 
341
		if (--dev_priv->uncore.fw_rendercount == 0)
-
 
342
			dev_priv->uncore.funcs.force_wake_put(dev_priv,
-
 
343
							      FORCEWAKE_RENDER);
-
 
344
		if (--dev_priv->uncore.fw_mediacount == 0)
335
		desc[1] = 0;
345
			dev_priv->uncore.funcs.force_wake_put(dev_priv,
-
 
346
							      FORCEWAKE_MEDIA);
-
 
Line -... Line 336...
-
 
336
	}
-
 
337
 
-
 
338
	desc[0] = intel_lr_context_descriptor(rq0->ctx, rq0->ring);
-
 
339
	rq0->elsp_submitted++;
-
 
340
 
-
 
341
	/* You must always write both descriptors in the order below. */
-
 
342
	spin_lock(&dev_priv->uncore.lock);
347
		if (INTEL_INFO(dev)->gen >= 9) {
343
	intel_uncore_forcewake_get__locked(dev_priv, FORCEWAKE_ALL);
348
			if (--dev_priv->uncore.fw_blittercount == 0)
344
	I915_WRITE_FW(RING_ELSP(ring), upper_32_bits(desc[1]));
Line 349... Line 345...
349
				dev_priv->uncore.funcs.force_wake_put(dev_priv,
345
	I915_WRITE_FW(RING_ELSP(ring), lower_32_bits(desc[1]));
350
							FORCEWAKE_BLITTER);
-
 
351
		}
-
 
352
	} else {
346
 
-
 
347
	I915_WRITE_FW(RING_ELSP(ring), upper_32_bits(desc[0]));
-
 
348
	/* The context is automatically loaded after the following */
-
 
349
	I915_WRITE_FW(RING_ELSP(ring), lower_32_bits(desc[0]));
-
 
350
 
353
		if (--dev_priv->uncore.forcewake_count == 0)
351
	/* ELSP is a wo register, use another nearby reg for posting */
354
			dev_priv->uncore.funcs.force_wake_put(dev_priv,
352
	POSTING_READ_FW(RING_EXECLIST_STATUS_LO(ring));
Line -... Line 353...
-
 
353
	intel_uncore_forcewake_put__locked(dev_priv, FORCEWAKE_ALL);
-
 
354
	spin_unlock(&dev_priv->uncore.lock);
-
 
355
}
-
 
356
 
355
							      FORCEWAKE_ALL);
357
static int execlists_update_context(struct drm_i915_gem_request *rq)
356
	}
358
{
Line 357... Line 359...
357
 
359
	struct intel_engine_cs *ring = rq->ring;
358
	spin_unlock_irqrestore(&dev_priv->uncore.lock, flags);
360
	struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt;
-
 
361
	struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state;
-
 
362
	struct drm_i915_gem_object *rb_obj = rq->ringbuf->obj;
-
 
363
	struct page *page;
-
 
364
	uint32_t *reg_state;
-
 
365
 
-
 
366
	BUG_ON(!ctx_obj);
-
 
367
	WARN_ON(!i915_gem_obj_is_pinned(ctx_obj));
-
 
368
	WARN_ON(!i915_gem_obj_is_pinned(rb_obj));
-
 
369
 
-
 
370
	page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
-
 
371
	reg_state = kmap_atomic(page);
-
 
372
 
Line 359... Line 373...
359
}
373
	reg_state[CTX_RING_TAIL+1] = rq->tail;
Line 360... Line 374...
360
 
374
	reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(rb_obj);
361
static int execlists_update_context(struct drm_i915_gem_object *ctx_obj,
375
 
Line 362... Line 376...
362
				    struct drm_i915_gem_object *ring_obj,
376
	if (ppgtt && !USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
363
				    u32 tail)
-
 
364
{
377
		/* True 32b PPGTT with dynamic page allocation: update PDP
365
	struct page *page;
378
		 * registers and point the unallocated PDPs to scratch page.
366
	uint32_t *reg_state;
-
 
367
 
-
 
368
	page = i915_gem_object_get_page(ctx_obj, 1);
-
 
369
	reg_state = kmap_atomic(page);
-
 
370
 
-
 
371
	reg_state[CTX_RING_TAIL+1] = tail;
-
 
372
	reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(ring_obj);
-
 
373
 
-
 
374
	kunmap_atomic(reg_state);
-
 
375
 
379
		 * PML4 is allocated during ppgtt init, so this is not needed
Line 376... Line 380...
376
	return 0;
380
		 * in 48-bit mode.
377
}
-
 
378
 
-
 
379
static void execlists_submit_contexts(struct intel_engine_cs *ring,
381
		 */
380
				      struct intel_context *to0, u32 tail0,
-
 
381
				      struct intel_context *to1, u32 tail1)
-
 
Line 382... Line -...
382
{
-
 
383
	struct drm_i915_gem_object *ctx_obj0 = to0->engine[ring->id].state;
-
 
384
	struct intel_ringbuffer *ringbuf0 = to0->engine[ring->id].ringbuf;
-
 
385
	struct drm_i915_gem_object *ctx_obj1 = NULL;
382
		ASSIGN_CTX_PDP(ppgtt, reg_state, 3);
386
	struct intel_ringbuffer *ringbuf1 = NULL;
383
		ASSIGN_CTX_PDP(ppgtt, reg_state, 2);
Line 387... Line 384...
387
 
384
		ASSIGN_CTX_PDP(ppgtt, reg_state, 1);
388
	BUG_ON(!ctx_obj0);
385
		ASSIGN_CTX_PDP(ppgtt, reg_state, 0);
389
	WARN_ON(!i915_gem_obj_is_pinned(ctx_obj0));
386
	}
390
	WARN_ON(!i915_gem_obj_is_pinned(ringbuf0->obj));
387
 
Line 391... Line 388...
391
 
388
	kunmap_atomic(reg_state);
Line -... Line 389...
-
 
389
 
-
 
390
	return 0;
-
 
391
}
-
 
392
 
-
 
393
static void execlists_submit_requests(struct drm_i915_gem_request *rq0,
-
 
394
				      struct drm_i915_gem_request *rq1)
392
	execlists_update_context(ctx_obj0, ringbuf0->obj, tail0);
395
{
393
 
396
	execlists_update_context(rq0);
Line 394... Line 397...
394
	if (to1) {
397
 
395
		ringbuf1 = to1->engine[ring->id].ringbuf;
398
	if (rq1)
Line 431... Line 434...
431
			req1 = cursor;
434
			req1 = cursor;
432
			break;
435
			break;
433
		}
436
		}
434
	}
437
	}
Line -... Line 438...
-
 
438
 
-
 
439
	if (IS_GEN8(ring->dev) || IS_GEN9(ring->dev)) {
-
 
440
		/*
-
 
441
		 * WaIdleLiteRestore: make sure we never cause a lite
-
 
442
		 * restore with HEAD==TAIL
-
 
443
		 */
-
 
444
		if (req0->elsp_submitted) {
-
 
445
			/*
-
 
446
			 * Apply the wa NOOPS to prevent ring:HEAD == req:TAIL
-
 
447
			 * as we resubmit the request. See gen8_emit_request()
-
 
448
			 * for where we prepare the padding after the end of the
-
 
449
			 * request.
-
 
450
			 */
-
 
451
			struct intel_ringbuffer *ringbuf;
-
 
452
 
-
 
453
			ringbuf = req0->ctx->engine[ring->id].ringbuf;
-
 
454
			req0->tail += 8;
-
 
455
			req0->tail &= ringbuf->size - 1;
-
 
456
		}
-
 
457
	}
435
 
458
 
Line 436... Line 459...
436
	WARN_ON(req1 && req1->elsp_submitted);
459
	WARN_ON(req1 && req1->elsp_submitted);
437
 
-
 
438
	execlists_submit_contexts(ring, req0->ctx, req0->tail,
-
 
439
				  req1 ? req1->ctx : NULL,
-
 
440
				  req1 ? req1->tail : 0);
-
 
441
 
-
 
442
	req0->elsp_submitted++;
-
 
443
	if (req1)
460
 
Line 444... Line 461...
444
		req1->elsp_submitted++;
461
	execlists_submit_requests(req0, req1);
445
}
462
}
446
 
463
 
447
static bool execlists_check_remove_request(struct intel_engine_cs *ring,
464
static bool execlists_check_remove_request(struct intel_engine_cs *ring,
Line 448... Line 465...
448
					   u32 request_id)
465
					   u32 request_id)
Line 449... Line 466...
449
{
466
{
450
	struct intel_ctx_submit_request *head_req;
467
	struct drm_i915_gem_request *head_req;
451
 
468
 
Line 452... Line 469...
452
	assert_spin_locked(&ring->execlist_lock);
469
	assert_spin_locked(&ring->execlist_lock);
453
 
470
 
454
	head_req = list_first_entry_or_null(&ring->execlist_queue,
471
	head_req = list_first_entry_or_null(&ring->execlist_queue,
Line 473... Line 490...
473
 
490
 
474
	return false;
491
	return false;
Line 475... Line 492...
475
}
492
}
476
 
493
 
477
/**
494
/**
478
 * intel_execlists_handle_ctx_events() - handle Context Switch interrupts
495
 * intel_lrc_irq_handler() - handle Context Switch interrupts
479
 * @ring: Engine Command Streamer to handle.
496
 * @ring: Engine Command Streamer to handle.
480
 *
497
 *
481
 * Check the unread Context Status Buffers and manage the submission of new
498
 * Check the unread Context Status Buffers and manage the submission of new
482
 * contexts to the ELSP accordingly.
499
 * contexts to the ELSP accordingly.
483
 */
500
 */
484
void intel_execlists_handle_ctx_events(struct intel_engine_cs *ring)
501
void intel_lrc_irq_handler(struct intel_engine_cs *ring)
485
{
502
{
486
	struct drm_i915_private *dev_priv = ring->dev->dev_private;
503
	struct drm_i915_private *dev_priv = ring->dev->dev_private;
487
	u32 status_pointer;
504
	u32 status_pointer;
488
	u8 read_pointer;
505
	u8 read_pointer;
489
	u8 write_pointer;
506
	u8 write_pointer;
490
	u32 status;
507
	u32 status = 0;
Line 491... Line 508...
491
	u32 status_id;
508
	u32 status_id;
Line 492... Line 509...
492
	u32 submit_contexts = 0;
509
	u32 submit_contexts = 0;
493
 
510
 
494
	status_pointer = I915_READ(RING_CONTEXT_STATUS_PTR(ring));
511
	status_pointer = I915_READ(RING_CONTEXT_STATUS_PTR(ring));
495
 
512
 
Line 496... Line 513...
496
	read_pointer = ring->next_context_status_buffer;
513
	read_pointer = ring->next_context_status_buffer;
Line 497... Line 514...
497
	write_pointer = status_pointer & 0x07;
514
	write_pointer = status_pointer & GEN8_CSB_PTR_MASK;
498
	if (read_pointer > write_pointer)
515
	if (read_pointer > write_pointer)
499
		write_pointer += 6;
516
		write_pointer += GEN8_CSB_ENTRIES;
500
 
517
 
-
 
518
	spin_lock(&ring->execlist_lock);
501
	spin_lock(&ring->execlist_lock);
519
 
502
 
520
	while (read_pointer < write_pointer) {
Line 503... Line 521...
503
	while (read_pointer < write_pointer) {
521
		read_pointer++;
504
		read_pointer++;
522
		status = I915_READ(RING_CONTEXT_STATUS_BUF_LO(ring, read_pointer % GEN8_CSB_ENTRIES));
505
		status = I915_READ(RING_CONTEXT_STATUS_BUF(ring) +
523
		status_id = I915_READ(RING_CONTEXT_STATUS_BUF_HI(ring, read_pointer % GEN8_CSB_ENTRIES));
506
				(read_pointer % 6) * 8);
524
 
Line 520... Line 538...
520
			if (execlists_check_remove_request(ring, status_id))
538
			if (execlists_check_remove_request(ring, status_id))
521
				submit_contexts++;
539
				submit_contexts++;
522
		}
540
		}
523
	}
541
	}
Line -... Line 542...
-
 
542
 
-
 
543
	if (disable_lite_restore_wa(ring)) {
-
 
544
		/* Prevent a ctx to preempt itself */
524
 
545
		if ((status & GEN8_CTX_STATUS_ACTIVE_IDLE) &&
525
	if (submit_contexts != 0)
546
		    (submit_contexts != 0))
-
 
547
			execlists_context_unqueue(ring);
-
 
548
	} else if (submit_contexts != 0) {
-
 
549
		execlists_context_unqueue(ring);
Line 526... Line 550...
526
		execlists_context_unqueue(ring);
550
	}
Line 527... Line 551...
527
 
551
 
528
	spin_unlock(&ring->execlist_lock);
552
	spin_unlock(&ring->execlist_lock);
Line 529... Line 553...
529
 
553
 
-
 
554
	WARN(submit_contexts > 2, "More than two context complete events?\n");
530
	WARN(submit_contexts > 2, "More than two context complete events?\n");
555
	ring->next_context_status_buffer = write_pointer % GEN8_CSB_ENTRIES;
-
 
556
 
531
	ring->next_context_status_buffer = write_pointer % 6;
557
	I915_WRITE(RING_CONTEXT_STATUS_PTR(ring),
Line 532... Line 558...
532
 
558
		   _MASKED_FIELD(GEN8_CSB_PTR_MASK << 8,
533
	I915_WRITE(RING_CONTEXT_STATUS_PTR(ring),
-
 
534
		   ((u32)ring->next_context_status_buffer & 0x07) << 8);
-
 
535
}
559
				 ((u32)ring->next_context_status_buffer &
536
 
560
				  GEN8_CSB_PTR_MASK) << 8));
537
static int execlists_context_queue(struct intel_engine_cs *ring,
561
}
538
				   struct intel_context *to,
-
 
539
				   u32 tail)
562
 
Line 540... Line -...
540
{
-
 
541
	struct intel_ctx_submit_request *req = NULL, *cursor;
-
 
542
	struct drm_i915_private *dev_priv = ring->dev->dev_private;
-
 
543
	unsigned long flags;
-
 
544
	int num_elements = 0;
-
 
545
 
-
 
546
	req = kzalloc(sizeof(*req), GFP_KERNEL);
563
static int execlists_context_queue(struct drm_i915_gem_request *request)
547
	if (req == NULL)
564
{
548
		return -ENOMEM;
-
 
549
	req->ctx = to;
-
 
550
	i915_gem_context_reference(req->ctx);
-
 
Line 551... Line 565...
551
 
565
	struct intel_engine_cs *ring = request->ring;
Line 552... Line 566...
552
	if (to != ring->default_context)
566
	struct drm_i915_gem_request *cursor;
Line 553... Line 567...
553
		intel_lr_context_pin(ring, to);
567
	int num_elements = 0;
554
 
568
 
555
	req->ring = ring;
569
	if (request->ctx != ring->default_context)
Line 556... Line 570...
556
	req->tail = tail;
570
		intel_lr_context_pin(request);
557
 
571
 
Line 558... Line 572...
558
	intel_runtime_pm_get(dev_priv);
572
	i915_gem_request_reference(request);
559
 
573
 
560
	spin_lock_irqsave(&ring->execlist_lock, flags);
574
	spin_lock_irq(&ring->execlist_lock);
Line 561... Line 575...
561
 
575
 
562
	list_for_each_entry(cursor, &ring->execlist_queue, execlist_link)
576
	list_for_each_entry(cursor, &ring->execlist_queue, execlist_link)
563
		if (++num_elements > 2)
577
		if (++num_elements > 2)
564
			break;
578
			break;
565
 
579
 
566
	if (num_elements > 2) {
580
	if (num_elements > 2) {
567
		struct intel_ctx_submit_request *tail_req;
581
		struct drm_i915_gem_request *tail_req;
568
 
582
 
Line 569... Line 583...
569
		tail_req = list_last_entry(&ring->execlist_queue,
583
		tail_req = list_last_entry(&ring->execlist_queue,
570
					   struct intel_ctx_submit_request,
584
					   struct drm_i915_gem_request,
571
					   execlist_link);
585
					   execlist_link);
Line 572... Line 586...
572
 
586
 
Line 573... Line 587...
573
		if (to == tail_req->ctx) {
587
		if (request->ctx == tail_req->ctx) {
574
			WARN(tail_req->elsp_submitted != 0,
588
			WARN(tail_req->elsp_submitted != 0,
Line 575... Line 589...
575
				"More than 2 already-submitted reqs queued\n");
589
				"More than 2 already-submitted reqs queued\n");
576
			list_del(&tail_req->execlist_link);
590
			list_del(&tail_req->execlist_link);
577
			list_add_tail(&tail_req->execlist_link,
591
			list_add_tail(&tail_req->execlist_link,
578
				&ring->execlist_retired_req_list);
592
				&ring->execlist_retired_req_list);
579
		}
593
		}
Line 580... Line 594...
580
	}
594
	}
581
 
595
 
582
	list_add_tail(&req->execlist_link, &ring->execlist_queue);
596
	list_add_tail(&request->execlist_link, &ring->execlist_queue);
Line 583... Line 597...
583
	if (num_elements == 0)
597
	if (num_elements == 0)
584
		execlists_context_unqueue(ring);
598
		execlists_context_unqueue(ring);
585
 
599
 
Line 586... Line 600...
586
	spin_unlock_irqrestore(&ring->execlist_lock, flags);
600
	spin_unlock_irq(&ring->execlist_lock);
587
 
601
 
588
	return 0;
602
	return 0;
Line 589... Line 603...
589
}
603
}
590
 
604
 
591
static int logical_ring_invalidate_all_caches(struct intel_ringbuffer *ringbuf)
605
static int logical_ring_invalidate_all_caches(struct drm_i915_gem_request *req)
592
{
606
{
593
	struct intel_engine_cs *ring = ringbuf->ring;
607
	struct intel_engine_cs *ring = req->ring;
594
	uint32_t flush_domains;
608
	uint32_t flush_domains;
595
	int ret;
609
	int ret;
596
 
610
 
Line 597... Line 611...
597
	flush_domains = 0;
611
	flush_domains = 0;
598
	if (ring->gpu_caches_dirty)
612
	if (ring->gpu_caches_dirty)
Line -... Line 613...
-
 
613
		flush_domains = I915_GEM_GPU_DOMAINS;
599
		flush_domains = I915_GEM_GPU_DOMAINS;
614
 
600
 
615
	ret = ring->emit_flush(req, I915_GEM_GPU_DOMAINS, flush_domains);
601
	ret = ring->emit_flush(ringbuf, I915_GEM_GPU_DOMAINS, flush_domains);
616
	if (ret)
-
 
617
		return ret;
Line 602... Line 618...
602
	if (ret)
618
 
603
		return ret;
619
	ring->gpu_caches_dirty = false;
Line 604... Line 620...
604
 
620
	return 0;
Line 632... Line 648...
632
		wmb();
648
		wmb();
Line 633... Line 649...
633
 
649
 
634
	/* Unconditionally invalidate gpu caches and ensure that we do flush
650
	/* Unconditionally invalidate gpu caches and ensure that we do flush
635
	 * any residual writes from the previous batch.
651
	 * any residual writes from the previous batch.
636
	 */
652
	 */
-
 
653
	return logical_ring_invalidate_all_caches(req);
-
 
654
}
-
 
655
 
-
 
656
int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request)
-
 
657
{
-
 
658
	int ret;
-
 
659
 
-
 
660
	request->ringbuf = request->ctx->engine[request->ring->id].ringbuf;
-
 
661
 
-
 
662
	if (request->ctx != request->ring->default_context) {
-
 
663
		ret = intel_lr_context_pin(request);
-
 
664
		if (ret)
-
 
665
			return ret;
-
 
666
	}
-
 
667
 
-
 
668
	return 0;
-
 
669
}
-
 
670
 
-
 
671
static int logical_ring_wait_for_space(struct drm_i915_gem_request *req,
-
 
672
				       int bytes)
-
 
673
{
-
 
674
	struct intel_ringbuffer *ringbuf = req->ringbuf;
-
 
675
	struct intel_engine_cs *ring = req->ring;
-
 
676
	struct drm_i915_gem_request *target;
-
 
677
	unsigned space;
-
 
678
	int ret;
-
 
679
 
-
 
680
	if (intel_ring_space(ringbuf) >= bytes)
-
 
681
		return 0;
-
 
682
 
-
 
683
	/* The whole point of reserving space is to not wait! */
-
 
684
	WARN_ON(ringbuf->reserved_in_use);
-
 
685
 
-
 
686
	list_for_each_entry(target, &ring->request_list, list) {
-
 
687
		/*
-
 
688
		 * The request queue is per-engine, so can contain requests
-
 
689
		 * from multiple ringbuffers. Here, we must ignore any that
-
 
690
		 * aren't from the ringbuffer we're considering.
-
 
691
		 */
-
 
692
		if (target->ringbuf != ringbuf)
-
 
693
			continue;
-
 
694
 
-
 
695
		/* Would completion of this request free enough space? */
-
 
696
		space = __intel_ring_space(target->postfix, ringbuf->tail,
-
 
697
					   ringbuf->size);
-
 
698
		if (space >= bytes)
-
 
699
			break;
-
 
700
	}
-
 
701
 
-
 
702
	if (WARN_ON(&target->list == &ring->request_list))
-
 
703
		return -ENOSPC;
-
 
704
 
-
 
705
	ret = i915_wait_request(target);
-
 
706
	if (ret)
-
 
707
		return ret;
-
 
708
 
-
 
709
	ringbuf->space = space;
-
 
710
	return 0;
-
 
711
}
-
 
712
 
-
 
713
/*
-
 
714
 * intel_logical_ring_advance_and_submit() - advance the tail and submit the workload
-
 
715
 * @request: Request to advance the logical ringbuffer of.
-
 
716
 *
-
 
717
 * The tail is updated in our logical ringbuffer struct, not in the actual context. What
-
 
718
 * really happens during submission is that the context and current tail will be placed
-
 
719
 * on a queue waiting for the ELSP to be ready to accept a new context submission. At that
-
 
720
 * point, the tail *inside* the context is updated and the ELSP written to.
-
 
721
 */
-
 
722
static void
-
 
723
intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request)
-
 
724
{
-
 
725
	struct intel_engine_cs *ring = request->ring;
-
 
726
	struct drm_i915_private *dev_priv = request->i915;
-
 
727
 
-
 
728
	intel_logical_ring_advance(request->ringbuf);
-
 
729
 
-
 
730
	request->tail = request->ringbuf->tail;
-
 
731
 
-
 
732
	if (intel_ring_stopped(ring))
-
 
733
		return;
-
 
734
 
-
 
735
	if (dev_priv->guc.execbuf_client)
-
 
736
		i915_guc_submit(dev_priv->guc.execbuf_client, request);
-
 
737
	else
-
 
738
		execlists_context_queue(request);
-
 
739
}
-
 
740
 
-
 
741
static void __wrap_ring_buffer(struct intel_ringbuffer *ringbuf)
-
 
742
{
-
 
743
	uint32_t __iomem *virt;
-
 
744
	int rem = ringbuf->size - ringbuf->tail;
-
 
745
 
-
 
746
	virt = ringbuf->virtual_start + ringbuf->tail;
-
 
747
	rem /= 4;
-
 
748
	while (rem--)
-
 
749
		iowrite32(MI_NOOP, virt++);
-
 
750
 
-
 
751
	ringbuf->tail = 0;
-
 
752
	intel_ring_update_space(ringbuf);
-
 
753
}
-
 
754
 
-
 
755
static int logical_ring_prepare(struct drm_i915_gem_request *req, int bytes)
-
 
756
{
-
 
757
	struct intel_ringbuffer *ringbuf = req->ringbuf;
-
 
758
	int remain_usable = ringbuf->effective_size - ringbuf->tail;
-
 
759
	int remain_actual = ringbuf->size - ringbuf->tail;
-
 
760
	int ret, total_bytes, wait_bytes = 0;
-
 
761
	bool need_wrap = false;
-
 
762
 
-
 
763
	if (ringbuf->reserved_in_use)
-
 
764
		total_bytes = bytes;
-
 
765
	else
-
 
766
		total_bytes = bytes + ringbuf->reserved_size;
-
 
767
 
-
 
768
	if (unlikely(bytes > remain_usable)) {
-
 
769
		/*
-
 
770
		 * Not enough space for the basic request. So need to flush
-
 
771
		 * out the remainder and then wait for base + reserved.
-
 
772
		 */
-
 
773
		wait_bytes = remain_actual + total_bytes;
-
 
774
		need_wrap = true;
-
 
775
	} else {
-
 
776
		if (unlikely(total_bytes > remain_usable)) {
-
 
777
			/*
-
 
778
			 * The base request will fit but the reserved space
-
 
779
			 * falls off the end. So only need to to wait for the
-
 
780
			 * reserved size after flushing out the remainder.
-
 
781
			 */
-
 
782
			wait_bytes = remain_actual + ringbuf->reserved_size;
-
 
783
			need_wrap = true;
-
 
784
		} else if (total_bytes > ringbuf->space) {
-
 
785
			/* No wrapping required, just waiting. */
-
 
786
			wait_bytes = total_bytes;
-
 
787
		}
-
 
788
	}
-
 
789
 
-
 
790
	if (wait_bytes) {
-
 
791
		ret = logical_ring_wait_for_space(req, wait_bytes);
-
 
792
		if (unlikely(ret))
-
 
793
			return ret;
-
 
794
 
-
 
795
		if (need_wrap)
-
 
796
			__wrap_ring_buffer(ringbuf);
-
 
797
	}
-
 
798
 
-
 
799
	return 0;
-
 
800
}
-
 
801
 
-
 
802
/**
-
 
803
 * intel_logical_ring_begin() - prepare the logical ringbuffer to accept some commands
-
 
804
 *
-
 
805
 * @req: The request to start some new work for
-
 
806
 * @num_dwords: number of DWORDs that we plan to write to the ringbuffer.
-
 
807
 *
-
 
808
 * The ringbuffer might not be ready to accept the commands right away (maybe it needs to
-
 
809
 * be wrapped, or wait a bit for the tail to be updated). This function takes care of that
-
 
810
 * and also preallocates a request (every workload submission is still mediated through
-
 
811
 * requests, same as it did with legacy ringbuffer submission).
-
 
812
 *
-
 
813
 * Return: non-zero if the ringbuffer is not ready to be written to.
-
 
814
 */
-
 
815
int intel_logical_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
-
 
816
{
-
 
817
	struct drm_i915_private *dev_priv;
-
 
818
	int ret;
-
 
819
 
-
 
820
	WARN_ON(req == NULL);
-
 
821
	dev_priv = req->ring->dev->dev_private;
-
 
822
 
-
 
823
	ret = i915_gem_check_wedge(&dev_priv->gpu_error,
-
 
824
				   dev_priv->mm.interruptible);
-
 
825
	if (ret)
-
 
826
		return ret;
-
 
827
 
-
 
828
	ret = logical_ring_prepare(req, num_dwords * sizeof(uint32_t));
-
 
829
	if (ret)
-
 
830
		return ret;
-
 
831
 
-
 
832
	req->ringbuf->space -= num_dwords * sizeof(uint32_t);
-
 
833
	return 0;
-
 
834
}
-
 
835
 
-
 
836
int intel_logical_ring_reserve_space(struct drm_i915_gem_request *request)
-
 
837
{
-
 
838
	/*
-
 
839
	 * The first call merely notes the reserve request and is common for
-
 
840
	 * all back ends. The subsequent localised _begin() call actually
-
 
841
	 * ensures that the reservation is available. Without the begin, if
-
 
842
	 * the request creator immediately submitted the request without
-
 
843
	 * adding any commands to it then there might not actually be
-
 
844
	 * sufficient room for the submission commands.
-
 
845
	 */
-
 
846
	intel_ring_reserved_space_reserve(request->ringbuf, MIN_SPACE_FOR_ADD_REQUEST);
-
 
847
 
637
	return logical_ring_invalidate_all_caches(ringbuf);
848
	return intel_logical_ring_begin(request, 0);
Line 638... Line 849...
638
}
849
}
639
 
850
 
640
/**
851
/**
Line 645... Line 856...
645
 * @ctx: Context to employ for this submission.
856
 * @ctx: Context to employ for this submission.
646
 * @args: execbuffer call arguments.
857
 * @args: execbuffer call arguments.
647
 * @vmas: list of vmas.
858
 * @vmas: list of vmas.
648
 * @batch_obj: the batchbuffer to submit.
859
 * @batch_obj: the batchbuffer to submit.
649
 * @exec_start: batchbuffer start virtual address pointer.
860
 * @exec_start: batchbuffer start virtual address pointer.
650
 * @flags: translated execbuffer call flags.
861
 * @dispatch_flags: translated execbuffer call flags.
651
 *
862
 *
652
 * This is the evil twin version of i915_gem_ringbuffer_submission. It abstracts
863
 * This is the evil twin version of i915_gem_ringbuffer_submission. It abstracts
653
 * away the submission details of the execbuffer ioctl call.
864
 * away the submission details of the execbuffer ioctl call.
654
 *
865
 *
655
 * Return: non-zero if the submission fails.
866
 * Return: non-zero if the submission fails.
656
 */
867
 */
657
int intel_execlists_submission(struct drm_device *dev, struct drm_file *file,
868
int intel_execlists_submission(struct i915_execbuffer_params *params,
658
			       struct intel_engine_cs *ring,
-
 
659
			       struct intel_context *ctx,
-
 
660
			       struct drm_i915_gem_execbuffer2 *args,
869
			       struct drm_i915_gem_execbuffer2 *args,
661
			       struct list_head *vmas,
870
			       struct list_head *vmas)
662
			       struct drm_i915_gem_object *batch_obj,
-
 
663
			       u64 exec_start, u32 flags)
-
 
664
{
871
{
-
 
872
	struct drm_device       *dev = params->dev;
-
 
873
	struct intel_engine_cs  *ring = params->ring;
665
	struct drm_i915_private *dev_priv = dev->dev_private;
874
	struct drm_i915_private *dev_priv = dev->dev_private;
666
	struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
875
	struct intel_ringbuffer *ringbuf = params->ctx->engine[ring->id].ringbuf;
-
 
876
	u64 exec_start;
667
	int instp_mode;
877
	int instp_mode;
668
	u32 instp_mask;
878
	u32 instp_mask;
669
	int ret;
879
	int ret;
Line 670... Line 880...
670
 
880
 
Line 692... Line 902...
692
	default:
902
	default:
693
		DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode);
903
		DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode);
694
		return -EINVAL;
904
		return -EINVAL;
695
	}
905
	}
Line 696... Line -...
696
 
-
 
697
	if (args->num_cliprects != 0) {
-
 
698
		DRM_DEBUG("clip rectangles are only valid on pre-gen5\n");
-
 
699
		return -EINVAL;
-
 
700
	} else {
-
 
701
		if (args->DR4 == 0xffffffff) {
-
 
702
			DRM_DEBUG("UXA submitting garbage DR4, fixing up\n");
-
 
703
			args->DR4 = 0;
-
 
704
		}
-
 
705
 
-
 
706
		if (args->DR1 || args->DR4 || args->cliprects_ptr) {
-
 
707
			DRM_DEBUG("0 cliprects but dirt in cliprects fields\n");
-
 
708
			return -EINVAL;
-
 
709
		}
-
 
710
	}
-
 
711
 
906
 
712
	if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
907
	if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
713
		DRM_DEBUG("sol reset is gen7 only\n");
908
		DRM_DEBUG("sol reset is gen7 only\n");
714
		return -EINVAL;
909
		return -EINVAL;
Line 715... Line 910...
715
	}
910
	}
716
 
911
 
717
	ret = execlists_move_to_gpu(ringbuf, vmas);
912
	ret = execlists_move_to_gpu(params->request, vmas);
Line 718... Line 913...
718
	if (ret)
913
	if (ret)
719
		return ret;
914
		return ret;
720
 
915
 
721
	if (ring == &dev_priv->ring[RCS] &&
916
	if (ring == &dev_priv->ring[RCS] &&
722
	    instp_mode != dev_priv->relative_constants_mode) {
917
	    instp_mode != dev_priv->relative_constants_mode) {
Line 723... Line 918...
723
		ret = intel_logical_ring_begin(ringbuf, 4);
918
		ret = intel_logical_ring_begin(params->request, 4);
724
		if (ret)
919
		if (ret)
Line 731... Line 926...
731
		intel_logical_ring_advance(ringbuf);
926
		intel_logical_ring_advance(ringbuf);
Line 732... Line 927...
732
 
927
 
733
		dev_priv->relative_constants_mode = instp_mode;
928
		dev_priv->relative_constants_mode = instp_mode;
Line -... Line 929...
-
 
929
	}
-
 
930
 
-
 
931
	exec_start = params->batch_obj_vm_offset +
734
	}
932
		     args->batch_start_offset;
735
 
933
 
736
	ret = ring->emit_bb_start(ringbuf, exec_start, flags);
934
	ret = ring->emit_bb_start(params->request, exec_start, params->dispatch_flags);
Line -... Line 935...
-
 
935
	if (ret)
-
 
936
		return ret;
737
	if (ret)
937
 
738
		return ret;
938
	trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags);
Line 739... Line 939...
739
 
939
 
740
	i915_gem_execbuffer_move_to_active(vmas, ring);
940
	i915_gem_execbuffer_move_to_active(vmas, params->request);
Line 741... Line 941...
741
	i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj);
941
	i915_gem_execbuffer_retire_commands(params);
742
 
942
 
743
	return 0;
943
	return 0;
744
}
-
 
745
 
-
 
746
void intel_execlists_retire_requests(struct intel_engine_cs *ring)
944
}
Line 747... Line 945...
747
{
945
 
748
	struct intel_ctx_submit_request *req, *tmp;
946
void intel_execlists_retire_requests(struct intel_engine_cs *ring)
749
	struct drm_i915_private *dev_priv = ring->dev->dev_private;
947
{
Line 750... Line 948...
750
	unsigned long flags;
948
	struct drm_i915_gem_request *req, *tmp;
751
	struct list_head retired_list;
949
	struct list_head retired_list;
752
 
950
 
753
	WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
951
	WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
Line 754... Line 952...
754
	if (list_empty(&ring->execlist_retired_req_list))
952
	if (list_empty(&ring->execlist_retired_req_list))
755
		return;
953
		return;
756
 
954
 
757
	INIT_LIST_HEAD(&retired_list);
955
	INIT_LIST_HEAD(&retired_list);
Line 758... Line 956...
758
	spin_lock_irqsave(&ring->execlist_lock, flags);
956
	spin_lock_irq(&ring->execlist_lock);
759
	list_replace_init(&ring->execlist_retired_req_list, &retired_list);
957
	list_replace_init(&ring->execlist_retired_req_list, &retired_list);
760
	spin_unlock_irqrestore(&ring->execlist_lock, flags);
-
 
761
 
-
 
762
	list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
958
	spin_unlock_irq(&ring->execlist_lock);
763
		struct intel_context *ctx = req->ctx;
959
 
764
		struct drm_i915_gem_object *ctx_obj =
960
	list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
765
				ctx->engine[ring->id].state;
961
		struct intel_context *ctx = req->ctx;
Line 766... Line 962...
766
 
962
		struct drm_i915_gem_object *ctx_obj =
767
		if (ctx_obj && (ctx != ring->default_context))
963
				ctx->engine[ring->id].state;
Line 793... Line 989...
793
		return;
989
		return;
794
	}
990
	}
795
	I915_WRITE_MODE(ring, _MASKED_BIT_DISABLE(STOP_RING));
991
	I915_WRITE_MODE(ring, _MASKED_BIT_DISABLE(STOP_RING));
796
}
992
}
Line 797... Line 993...
797
 
993
 
798
int logical_ring_flush_all_caches(struct intel_ringbuffer *ringbuf)
994
int logical_ring_flush_all_caches(struct drm_i915_gem_request *req)
799
{
995
{
800
	struct intel_engine_cs *ring = ringbuf->ring;
996
	struct intel_engine_cs *ring = req->ring;
Line 801... Line 997...
801
	int ret;
997
	int ret;
802
 
998
 
Line 803... Line 999...
803
	if (!ring->gpu_caches_dirty)
999
	if (!ring->gpu_caches_dirty)
804
		return 0;
1000
		return 0;
805
 
1001
 
Line 806... Line 1002...
806
	ret = ring->emit_flush(ringbuf, 0, I915_GEM_GPU_DOMAINS);
1002
	ret = ring->emit_flush(req, 0, I915_GEM_GPU_DOMAINS);
807
	if (ret)
1003
	if (ret)
808
		return ret;
1004
		return ret;
Line 809... Line -...
809
 
-
 
810
	ring->gpu_caches_dirty = false;
-
 
811
	return 0;
-
 
812
}
-
 
813
 
-
 
814
/**
-
 
815
 * intel_logical_ring_advance_and_submit() - advance the tail and submit the workload
-
 
816
 * @ringbuf: Logical Ringbuffer to advance.
-
 
817
 *
-
 
818
 * The tail is updated in our logical ringbuffer struct, not in the actual context. What
1005
 
819
 * really happens during submission is that the context and current tail will be placed
-
 
820
 * on a queue waiting for the ELSP to be ready to accept a new context submission. At that
1006
	ring->gpu_caches_dirty = false;
821
 * point, the tail *inside* the context is updated and the ELSP written to.
-
 
822
 */
-
 
823
void intel_logical_ring_advance_and_submit(struct intel_ringbuffer *ringbuf)
-
 
824
{
-
 
825
	struct intel_engine_cs *ring = ringbuf->ring;
-
 
826
	struct intel_context *ctx = ringbuf->FIXME_lrc_ctx;
-
 
827
 
-
 
828
	intel_logical_ring_advance(ringbuf);
-
 
829
 
-
 
830
	if (intel_ring_stopped(ring))
-
 
831
		return;
-
 
832
 
1007
	return 0;
833
	execlists_context_queue(ring, ctx, ringbuf->tail);
1008
}
834
}
1009
 
835
 
1010
static int intel_lr_context_do_pin(struct intel_engine_cs *ring,
836
static int intel_lr_context_pin(struct intel_engine_cs *ring,
1011
		struct drm_i915_gem_object *ctx_obj,
Line 837... Line 1012...
837
		struct intel_context *ctx)
1012
		struct intel_ringbuffer *ringbuf)
838
{
-
 
839
	struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
1013
{
840
	struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
1014
	struct drm_device *dev = ring->dev;
841
	int ret = 0;
1015
	struct drm_i915_private *dev_priv = dev->dev_private;
842
 
1016
	int ret = 0;
Line 843... Line 1017...
843
	WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
1017
 
844
	if (ctx->engine[ring->id].unpin_count++ == 0) {
1018
	WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
845
		ret = i915_gem_obj_ggtt_pin(ctx_obj,
1019
	ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN,
-
 
1020
			PIN_OFFSET_BIAS | GUC_WOPCM_TOP);
-
 
1021
	if (ret)
846
				GEN8_LR_CONTEXT_ALIGN, 0);
1022
		return ret;
-
 
1023
 
-
 
1024
	ret = intel_pin_and_map_ringbuffer_obj(ring->dev, ringbuf);
-
 
1025
	if (ret)
Line 847... Line 1026...
847
		if (ret)
1026
		goto unpin_ctx_obj;
Line 848... Line 1027...
848
			goto reset_unpin_count;
1027
 
849
 
1028
	ctx_obj->dirty = true;
850
		ret = intel_pin_and_map_ringbuffer_obj(ring->dev, ringbuf);
-
 
851
		if (ret)
-
 
Line 852... Line 1029...
852
			goto unpin_ctx_obj;
1029
 
853
	}
1030
	/* Invalidate GuC TLB. */
Line 854... Line 1031...
854
 
1031
	if (i915.enable_guc_submission)
855
	return ret;
-
 
856
 
1032
		I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
-
 
1033
 
-
 
1034
	return ret;
857
unpin_ctx_obj:
1035
 
858
	i915_gem_object_ggtt_unpin(ctx_obj);
1036
unpin_ctx_obj:
-
 
1037
	i915_gem_object_ggtt_unpin(ctx_obj);
-
 
1038
 
-
 
1039
	return ret;
-
 
1040
}
-
 
1041
 
-
 
1042
static int intel_lr_context_pin(struct drm_i915_gem_request *rq)
-
 
1043
{
-
 
1044
	int ret = 0;
-
 
1045
	struct intel_engine_cs *ring = rq->ring;
-
 
1046
	struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state;
-
 
1047
	struct intel_ringbuffer *ringbuf = rq->ringbuf;
-
 
1048
 
-
 
1049
	if (rq->ctx->engine[ring->id].pin_count++ == 0) {
-
 
1050
		ret = intel_lr_context_do_pin(ring, ctx_obj, ringbuf);
-
 
1051
		if (ret)
-
 
1052
			goto reset_pin_count;
-
 
1053
	}
-
 
1054
	return ret;
Line 859... Line 1055...
859
reset_unpin_count:
1055
 
860
	ctx->engine[ring->id].unpin_count = 0;
1056
reset_pin_count:
861
 
1057
	rq->ctx->engine[ring->id].pin_count = 0;
862
	return ret;
1058
	return ret;
863
}
1059
}
864
 
1060
 
865
void intel_lr_context_unpin(struct intel_engine_cs *ring,
1061
void intel_lr_context_unpin(struct drm_i915_gem_request *rq)
866
		struct intel_context *ctx)
1062
{
Line 867... Line 1063...
867
{
1063
	struct intel_engine_cs *ring = rq->ring;
868
	struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
-
 
869
	struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
1064
	struct drm_i915_gem_object *ctx_obj = rq->ctx->engine[ring->id].state;
870
 
1065
	struct intel_ringbuffer *ringbuf = rq->ringbuf;
-
 
1066
 
-
 
1067
	if (ctx_obj) {
-
 
1068
		WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
-
 
1069
		if (--rq->ctx->engine[ring->id].pin_count == 0) {
-
 
1070
			intel_unpin_ringbuffer_obj(ringbuf);
Line 871... Line 1071...
871
	if (ctx_obj) {
1071
			i915_gem_object_ggtt_unpin(ctx_obj);
872
		WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
1072
		}
Line 873... Line 1073...
873
		if (--ctx->engine[ring->id].unpin_count == 0) {
1073
	}
874
			intel_unpin_ringbuffer_obj(ringbuf);
1074
}
875
			i915_gem_object_ggtt_unpin(ctx_obj);
-
 
876
		}
-
 
877
	}
1075
 
878
}
1076
static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req)
Line 879... Line -...
879
 
-
 
880
static int logical_ring_alloc_seqno(struct intel_engine_cs *ring,
1077
{
881
				    struct intel_context *ctx)
1078
	int ret, i;
882
{
-
 
883
	int ret;
1079
	struct intel_engine_cs *ring = req->ring;
-
 
1080
	struct intel_ringbuffer *ringbuf = req->ringbuf;
-
 
1081
	struct drm_device *dev = ring->dev;
-
 
1082
	struct drm_i915_private *dev_priv = dev->dev_private;
-
 
1083
	struct i915_workarounds *w = &dev_priv->workarounds;
-
 
1084
 
884
 
1085
	if (WARN_ON_ONCE(w->count == 0))
885
	if (ring->outstanding_lazy_seqno)
-
 
-
 
1086
		return 0;
Line 886... Line -...
886
		return 0;
-
 
887
 
-
 
888
	if (ring->preallocated_lazy_request == NULL) {
-
 
889
		struct drm_i915_gem_request *request;
-
 
890
 
-
 
891
		request = kmalloc(sizeof(*request), GFP_KERNEL);
1087
 
Line 892... Line 1088...
892
		if (request == NULL)
1088
	ring->gpu_caches_dirty = true;
-
 
1089
	ret = logical_ring_flush_all_caches(req);
893
			return -ENOMEM;
1090
	if (ret)
-
 
1091
		return ret;
Line 894... Line 1092...
894
 
1092
 
895
		if (ctx != ring->default_context) {
1093
	ret = intel_logical_ring_begin(req, w->count * 2 + 2);
Line 896... Line 1094...
896
			ret = intel_lr_context_pin(ring, ctx);
1094
	if (ret)
897
			if (ret) {
1095
		return ret;
898
				kfree(request);
-
 
899
				return ret;
1096
 
-
 
1097
	intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(w->count));
900
			}
1098
	for (i = 0; i < w->count; i++) {
901
		}
1099
		intel_logical_ring_emit(ringbuf, w->reg[i].addr);
-
 
1100
		intel_logical_ring_emit(ringbuf, w->reg[i].value);
902
 
1101
	}
Line 903... Line -...
903
		/* Hold a reference to the context this request belongs to
-
 
904
		 * (we will need it when the time comes to emit/retire the
-
 
905
		 * request).
-
 
Line -... Line 1102...
-
 
1102
	intel_logical_ring_emit(ringbuf, MI_NOOP);
-
 
1103
 
-
 
1104
	intel_logical_ring_advance(ringbuf);
-
 
1105
 
-
 
1106
	ring->gpu_caches_dirty = true;
-
 
1107
	ret = logical_ring_flush_all_caches(req);
-
 
1108
	if (ret)
-
 
1109
		return ret;
-
 
1110
 
-
 
1111
	return 0;
-
 
1112
}
-
 
1113
 
-
 
1114
#define wa_ctx_emit(batch, index, cmd)					\
906
		 */
1115
	do {								\
-
 
1116
		int __index = (index)++;				\
-
 
1117
		if (WARN_ON(__index >= (PAGE_SIZE / sizeof(uint32_t)))) { \
-
 
1118
			return -ENOSPC;					\
907
		request->ctx = ctx;
1119
		}							\
908
		i915_gem_context_reference(request->ctx);
1120
		batch[__index] = (cmd);					\
-
 
1121
	} while (0)
-
 
1122
 
-
 
1123
 
909
 
1124
/*
-
 
1125
 * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
-
 
1126
 * PIPE_CONTROL instruction. This is required for the flush to happen correctly
-
 
1127
 * but there is a slight complication as this is applied in WA batch where the
-
 
1128
 * values are only initialized once so we cannot take register value at the
-
 
1129
 * beginning and reuse it further; hence we save its value to memory, upload a
-
 
1130
 * constant value with bit21 set and then we restore it back with the saved value.
-
 
1131
 * To simplify the WA, a constant value is formed by using the default value
Line 910... Line 1132...
910
		ring->preallocated_lazy_request = request;
1132
 * of this register. This shouldn't be a problem because we are only modifying
911
	}
1133
 * it for a short period and this batch in non-premptible. We can ofcourse
912
 
1134
 * use additional instructions that read the actual value of the register
913
	return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_seqno);
1135
 * at that time and set our bit of interest but it makes the WA complicated.
914
}
1136
 *
915
 
-
 
916
static int logical_ring_wait_request(struct intel_ringbuffer *ringbuf,
-
 
Line -... Line 1137...
-
 
1137
 * This WA is also required for Gen9 so extracting as a function avoids
917
				     int bytes)
1138
 * code duplication.
918
{
1139
 */
Line -... Line 1140...
-
 
1140
static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *ring,
-
 
1141
						uint32_t *const batch,
-
 
1142
						uint32_t index)
919
	struct intel_engine_cs *ring = ringbuf->ring;
1143
{
920
	struct drm_i915_gem_request *request;
1144
	uint32_t l3sqc4_flush = (0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES);
-
 
1145
 
921
	u32 seqno = 0;
1146
	/*
Line -... Line 1147...
-
 
1147
	 * WaDisableLSQCROPERFforOCL:skl
-
 
1148
	 * This WA is implemented in skl_init_clock_gating() but since
922
	int ret;
1149
	 * this batch updates GEN8_L3SQCREG4 with default value we need to
923
 
1150
	 * set this bit here to retain the WA during flush.
924
	if (ringbuf->last_retired_head != -1) {
1151
	 */
Line 925... Line -...
925
		ringbuf->head = ringbuf->last_retired_head;
-
 
926
		ringbuf->last_retired_head = -1;
1152
	if (IS_SKYLAKE(ring->dev) && INTEL_REVID(ring->dev) <= SKL_REVID_E0)
927
 
1153
		l3sqc4_flush |= GEN8_LQSC_RO_PERF_DIS;
Line 928... Line 1154...
928
		ringbuf->space = intel_ring_space(ringbuf);
1154
 
929
		if (ringbuf->space >= bytes)
1155
	wa_ctx_emit(batch, index, (MI_STORE_REGISTER_MEM_GEN8 |
-
 
1156
				   MI_SRM_LRM_GLOBAL_GTT));
930
			return 0;
1157
	wa_ctx_emit(batch, index, GEN8_L3SQCREG4);
931
	}
-
 
932
 
-
 
933
	list_for_each_entry(request, &ring->request_list, list) {
1158
	wa_ctx_emit(batch, index, ring->scratch.gtt_offset + 256);
934
		if (__intel_ring_space(request->tail, ringbuf->tail,
-
 
935
				       ringbuf->size) >= bytes) {
1159
	wa_ctx_emit(batch, index, 0);
Line 936... Line 1160...
936
			seqno = request->seqno;
1160
 
937
			break;
1161
	wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1));
938
		}
1162
	wa_ctx_emit(batch, index, GEN8_L3SQCREG4);
-
 
1163
	wa_ctx_emit(batch, index, l3sqc4_flush);
-
 
1164
 
Line -... Line 1165...
-
 
1165
	wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6));
-
 
1166
	wa_ctx_emit(batch, index, (PIPE_CONTROL_CS_STALL |
-
 
1167
				   PIPE_CONTROL_DC_FLUSH_ENABLE));
-
 
1168
	wa_ctx_emit(batch, index, 0);
-
 
1169
	wa_ctx_emit(batch, index, 0);
-
 
1170
	wa_ctx_emit(batch, index, 0);
-
 
1171
	wa_ctx_emit(batch, index, 0);
-
 
1172
 
-
 
1173
	wa_ctx_emit(batch, index, (MI_LOAD_REGISTER_MEM_GEN8 |
-
 
1174
				   MI_SRM_LRM_GLOBAL_GTT));
-
 
1175
	wa_ctx_emit(batch, index, GEN8_L3SQCREG4);
-
 
1176
	wa_ctx_emit(batch, index, ring->scratch.gtt_offset + 256);
-
 
1177
	wa_ctx_emit(batch, index, 0);
-
 
1178
 
-
 
1179
	return index;
939
	}
1180
}
940
 
1181
 
-
 
1182
static inline uint32_t wa_ctx_start(struct i915_wa_ctx_bb *wa_ctx,
-
 
1183
				    uint32_t offset,
-
 
1184
				    uint32_t start_alignment)
-
 
1185
{
-
 
1186
	return wa_ctx->offset = ALIGN(offset, start_alignment);
-
 
1187
}
-
 
1188
 
-
 
1189
static inline int wa_ctx_end(struct i915_wa_ctx_bb *wa_ctx,
-
 
1190
			     uint32_t offset,
-
 
1191
			     uint32_t size_alignment)
-
 
1192
{
-
 
1193
	wa_ctx->size = offset - wa_ctx->offset;
-
 
1194
 
-
 
1195
	WARN(wa_ctx->size % size_alignment,
-
 
1196
	     "wa_ctx_bb failed sanity checks: size %d is not aligned to %d\n",
-
 
1197
	     wa_ctx->size, size_alignment);
Line 941... Line 1198...
941
	if (seqno == 0)
1198
	return 0;
-
 
1199
}
-
 
1200
 
-
 
1201
/**
-
 
1202
 * gen8_init_indirectctx_bb() - initialize indirect ctx batch with WA
-
 
1203
 *
942
		return -ENOSPC;
1204
 * @ring: only applicable for RCS
-
 
1205
 * @wa_ctx: structure representing wa_ctx
-
 
1206
 *  offset: specifies start of the batch, should be cache-aligned. This is updated
-
 
1207
 *    with the offset value received as input.
-
 
1208
 *  size: size of the batch in DWORDS but HW expects in terms of cachelines
943
 
1209
 * @batch: page in which WA are loaded
-
 
1210
 * @offset: This field specifies the start of the batch, it should be
944
	ret = i915_wait_seqno(ring, seqno);
1211
 *  cache-aligned otherwise it is adjusted accordingly.
-
 
1212
 *  Typically we only have one indirect_ctx and per_ctx batch buffer which are
-
 
1213
 *  initialized at the beginning and shared across all contexts but this field
-
 
1214
 *  helps us to have multiple batches at different offsets and select them based
-
 
1215
 *  on a criteria. At the moment this batch always start at the beginning of the page
-
 
1216
 *  and at this point we don't have multiple wa_ctx batch buffers.
-
 
1217
 *
945
	if (ret)
1218
 *  The number of WA applied are not known at the beginning; we use this field
-
 
1219
 *  to return the no of DWORDS written.
-
 
1220
 *
-
 
1221
 *  It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
-
 
1222
 *  so it adds NOOPs as padding to make it cacheline aligned.
-
 
1223
 *  MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
-
 
1224
 *  makes a complete batch buffer.
-
 
1225
 *
-
 
1226
 * Return: non-zero if we exceed the PAGE_SIZE limit.
-
 
1227
 */
-
 
1228
 
-
 
1229
static int gen8_init_indirectctx_bb(struct intel_engine_cs *ring,
-
 
1230
				    struct i915_wa_ctx_bb *wa_ctx,
946
		return ret;
1231
				    uint32_t *const batch,
-
 
1232
				    uint32_t *offset)
-
 
1233
{
-
 
1234
	uint32_t scratch_addr;
-
 
1235
	uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
-
 
1236
 
-
 
1237
	/* WaDisableCtxRestoreArbitration:bdw,chv */
-
 
1238
	wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_DISABLE);
-
 
1239
 
Line 947... Line -...
947
 
-
 
948
	i915_gem_retire_requests_ring(ring);
-
 
949
	ringbuf->head = ringbuf->last_retired_head;
1240
	/* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
950
	ringbuf->last_retired_head = -1;
-
 
951
 
-
 
952
	ringbuf->space = intel_ring_space(ringbuf);
-
 
953
	return 0;
1241
	if (IS_BROADWELL(ring->dev)) {
Line -... Line 1242...
-
 
1242
		int rc = gen8_emit_flush_coherentl3_wa(ring, batch, index);
-
 
1243
		if (rc < 0)
-
 
1244
			return rc;
-
 
1245
		index = rc;
-
 
1246
	}
-
 
1247
 
-
 
1248
	/* WaClearSlmSpaceAtContextSwitch:bdw,chv */
-
 
1249
	/* Actual scratch location is at 128 bytes offset */
-
 
1250
	scratch_addr = ring->scratch.gtt_offset + 2*CACHELINE_BYTES;
-
 
1251
 
-
 
1252
	wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6));
-
 
1253
	wa_ctx_emit(batch, index, (PIPE_CONTROL_FLUSH_L3 |
-
 
1254
				   PIPE_CONTROL_GLOBAL_GTT_IVB |
-
 
1255
				   PIPE_CONTROL_CS_STALL |
-
 
1256
				   PIPE_CONTROL_QW_WRITE));
-
 
1257
	wa_ctx_emit(batch, index, scratch_addr);
-
 
1258
	wa_ctx_emit(batch, index, 0);
-
 
1259
	wa_ctx_emit(batch, index, 0);
-
 
1260
	wa_ctx_emit(batch, index, 0);
-
 
1261
 
954
}
1262
	/* Pad to end of cacheline */
-
 
1263
	while (index % CACHELINE_DWORDS)
-
 
1264
		wa_ctx_emit(batch, index, MI_NOOP);
Line 955... Line 1265...
955
 
1265
 
956
static int logical_ring_wait_for_space(struct intel_ringbuffer *ringbuf,
1266
	/*
957
				       int bytes)
-
 
958
{
-
 
Line 959... Line 1267...
959
	struct intel_engine_cs *ring = ringbuf->ring;
1267
	 * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
960
	struct drm_device *dev = ring->dev;
-
 
961
	struct drm_i915_private *dev_priv = dev->dev_private;
-
 
962
	unsigned long end;
-
 
963
	int ret;
-
 
Line 964... Line 1268...
964
 
1268
	 * execution depends on the length specified in terms of cache lines
965
	ret = logical_ring_wait_request(ringbuf, bytes);
1269
	 * in the register CTX_RCS_INDIRECT_CTX
Line 966... Line 1270...
966
	if (ret != -ENOSPC)
1270
	 */
-
 
1271
 
-
 
1272
	return wa_ctx_end(wa_ctx, *offset = index, CACHELINE_DWORDS);
-
 
1273
}
967
		return ret;
1274
 
968
 
1275
/**
969
	/* Force the context submission in case we have been skipping it */
1276
 * gen8_init_perctx_bb() - initialize per ctx batch with WA
-
 
1277
 *
Line 970... Line 1278...
970
	intel_logical_ring_advance_and_submit(ringbuf);
1278
 * @ring: only applicable for RCS
-
 
1279
 * @wa_ctx: structure representing wa_ctx
-
 
1280
 *  offset: specifies start of the batch, should be cache-aligned.
971
 
1281
 *  size: size of the batch in DWORDS but HW expects in terms of cachelines
Line -... Line 1282...
-
 
1282
 * @batch: page in which WA are loaded
-
 
1283
 * @offset: This field specifies the start of this batch.
972
	/* With GEM the hangcheck timer should kick us out of the loop,
1284
 *   This batch is started immediately after indirect_ctx batch. Since we ensure
973
	 * leaving it early runs the risk of corrupting GEM state (due
1285
 *   that indirect_ctx ends on a cacheline this batch is aligned automatically.
-
 
1286
 *
-
 
1287
 *   The number of DWORDS written are returned using this field.
-
 
1288
 *
-
 
1289
 *  This batch is terminated with MI_BATCH_BUFFER_END and so we need not add padding
-
 
1290
 *  to align it with cacheline as padding after MI_BATCH_BUFFER_END is redundant.
-
 
1291
 */
-
 
1292
static int gen8_init_perctx_bb(struct intel_engine_cs *ring,
974
	 * to running on almost untested codepaths). But on resume
1293
			       struct i915_wa_ctx_bb *wa_ctx,
Line 975... Line 1294...
975
	 * timers don't work yet, so prevent a complete hang in that
1294
			       uint32_t *const batch,
-
 
1295
			       uint32_t *offset)
976
	 * case by choosing an insanely large timeout. */
1296
{
977
	end = jiffies + 60 * HZ;
1297
	uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
-
 
1298
 
978
 
1299
	/* WaDisableCtxRestoreArbitration:bdw,chv */
-
 
1300
	wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_ENABLE);
Line -... Line 1301...
-
 
1301
 
-
 
1302
	wa_ctx_emit(batch, index, MI_BATCH_BUFFER_END);
-
 
1303
 
-
 
1304
	return wa_ctx_end(wa_ctx, *offset = index, 1);
-
 
1305
}
979
	do {
1306
 
-
 
1307
static int gen9_init_indirectctx_bb(struct intel_engine_cs *ring,
980
		ringbuf->head = I915_READ_HEAD(ring);
1308
				    struct i915_wa_ctx_bb *wa_ctx,
-
 
1309
				    uint32_t *const batch,
Line -... Line 1310...
-
 
1310
				    uint32_t *offset)
-
 
1311
{
-
 
1312
	int ret;
-
 
1313
	struct drm_device *dev = ring->dev;
-
 
1314
	uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
-
 
1315
 
-
 
1316
	/* WaDisableCtxRestoreArbitration:skl,bxt */
981
		ringbuf->space = intel_ring_space(ringbuf);
1317
	if ((IS_SKYLAKE(dev) && (INTEL_REVID(dev) <= SKL_REVID_D0)) ||
982
		if (ringbuf->space >= bytes) {
1318
	    (IS_BROXTON(dev) && (INTEL_REVID(dev) == BXT_REVID_A0)))
Line 983... Line 1319...
983
			ret = 0;
1319
		wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_DISABLE);
984
			break;
1320
 
985
		}
1321
	/* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt */
Line 986... Line 1322...
986
 
1322
	ret = gen8_emit_flush_coherentl3_wa(ring, batch, index);
987
		msleep(1);
1323
	if (ret < 0)
988
 
1324
		return ret;
989
		ret = i915_gem_check_wedge(&dev_priv->gpu_error,
1325
	index = ret;
990
					   dev_priv->mm.interruptible);
1326
 
Line 991... Line 1327...
991
		if (ret)
1327
	/* Pad to end of cacheline */
-
 
1328
	while (index % CACHELINE_DWORDS)
992
			break;
1329
		wa_ctx_emit(batch, index, MI_NOOP);
993
 
1330
 
-
 
1331
	return wa_ctx_end(wa_ctx, *offset = index, CACHELINE_DWORDS);
994
		if (time_after(jiffies, end)) {
1332
}
995
			ret = -EBUSY;
1333
 
Line 996... Line 1334...
996
			break;
1334
static int gen9_init_perctx_bb(struct intel_engine_cs *ring,
997
		}
1335
			       struct i915_wa_ctx_bb *wa_ctx,
Line 998... Line -...
998
	} while (1);
-
 
999
 
-
 
1000
	return ret;
-
 
1001
}
-
 
1002
 
-
 
1003
static int logical_ring_wrap_buffer(struct intel_ringbuffer *ringbuf)
-
 
1004
{
-
 
1005
	uint32_t __iomem *virt;
-
 
1006
	int rem = ringbuf->size - ringbuf->tail;
-
 
1007
 
-
 
1008
	if (ringbuf->space < rem) {
-
 
1009
		int ret = logical_ring_wait_for_space(ringbuf, rem);
-
 
1010
 
-
 
1011
		if (ret)
1336
			       uint32_t *const batch,
1012
			return ret;
1337
			       uint32_t *offset)
1013
	}
1338
{
1014
 
1339
	struct drm_device *dev = ring->dev;
1015
	virt = ringbuf->virtual_start + ringbuf->tail;
1340
	uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
1016
	rem /= 4;
1341
 
1017
	while (rem--)
1342
	/* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:skl,bxt */
1018
		iowrite32(MI_NOOP, virt++);
-
 
1019
 
-
 
1020
	ringbuf->tail = 0;
1343
	if ((IS_SKYLAKE(dev) && (INTEL_REVID(dev) <= SKL_REVID_B0)) ||
1021
	ringbuf->space = intel_ring_space(ringbuf);
-
 
Line 1022... Line 1344...
1022
 
1344
	    (IS_BROXTON(dev) && (INTEL_REVID(dev) == BXT_REVID_A0))) {
-
 
1345
		wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1));
1023
	return 0;
1346
		wa_ctx_emit(batch, index, GEN9_SLICE_COMMON_ECO_CHICKEN0);
-
 
1347
		wa_ctx_emit(batch, index,
1024
}
1348
			    _MASKED_BIT_ENABLE(DISABLE_PIXEL_MASK_CAMMING));
-
 
1349
		wa_ctx_emit(batch, index, MI_NOOP);
-
 
1350
	}
Line 1025... Line -...
1025
 
-
 
1026
static int logical_ring_prepare(struct intel_ringbuffer *ringbuf, int bytes)
-
 
1027
{
-
 
1028
	int ret;
1351
 
Line 1029... Line 1352...
1029
 
1352
	/* WaDisableCtxRestoreArbitration:skl,bxt */
-
 
1353
	if ((IS_SKYLAKE(dev) && (INTEL_REVID(dev) <= SKL_REVID_D0)) ||
-
 
1354
	    (IS_BROXTON(dev) && (INTEL_REVID(dev) == BXT_REVID_A0)))
-
 
1355
		wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_ENABLE);
1030
	if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) {
1356
 
1031
		ret = logical_ring_wrap_buffer(ringbuf);
1357
	wa_ctx_emit(batch, index, MI_BATCH_BUFFER_END);
Line 1032... Line 1358...
1032
		if (unlikely(ret))
1358
 
1033
			return ret;
1359
	return wa_ctx_end(wa_ctx, *offset = index, 1);
1034
	}
-
 
1035
 
-
 
1036
	if (unlikely(ringbuf->space < bytes)) {
1360
}
1037
		ret = logical_ring_wait_for_space(ringbuf, bytes);
1361
 
1038
		if (unlikely(ret))
-
 
1039
			return ret;
-
 
-
 
1362
static int lrc_setup_wa_ctx_obj(struct intel_engine_cs *ring, u32 size)
Line 1040... Line 1363...
1040
	}
1363
{
-
 
1364
	int ret;
-
 
1365
 
1041
 
1366
	ring->wa_ctx.obj = i915_gem_alloc_object(ring->dev, PAGE_ALIGN(size));
-
 
1367
	if (!ring->wa_ctx.obj) {
Line -... Line 1368...
-
 
1368
		DRM_DEBUG_DRIVER("alloc LRC WA ctx backing obj failed.\n");
1042
	return 0;
1369
		return -ENOMEM;
-
 
1370
	}
-
 
1371
 
-
 
1372
	ret = i915_gem_obj_ggtt_pin(ring->wa_ctx.obj, PAGE_SIZE, 0);
1043
}
1373
	if (ret) {
-
 
1374
		DRM_DEBUG_DRIVER("pin LRC WA ctx backing obj failed: %d\n",
-
 
1375
				 ret);
-
 
1376
		drm_gem_object_unreference(&ring->wa_ctx.obj->base);
1044
 
1377
		return ret;
1045
/**
1378
	}
Line 1046... Line 1379...
1046
 * intel_logical_ring_begin() - prepare the logical ringbuffer to accept some commands
1379
 
-
 
1380
	return 0;
-
 
1381
}
-
 
1382
 
1047
 *
1383
static void lrc_destroy_wa_ctx_obj(struct intel_engine_cs *ring)
1048
 * @ringbuf: Logical ringbuffer.
1384
{
-
 
1385
	if (ring->wa_ctx.obj) {
-
 
1386
		i915_gem_object_ggtt_unpin(ring->wa_ctx.obj);
-
 
1387
		drm_gem_object_unreference(&ring->wa_ctx.obj->base);
-
 
1388
		ring->wa_ctx.obj = NULL;
-
 
1389
	}
-
 
1390
}
-
 
1391
 
Line 1049... Line 1392...
1049
 * @num_dwords: number of DWORDs that we plan to write to the ringbuffer.
1392
static int intel_init_workaround_bb(struct intel_engine_cs *ring)
1050
 *
1393
{
-
 
1394
	int ret;
1051
 * The ringbuffer might not be ready to accept the commands right away (maybe it needs to
1395
	uint32_t *batch;
-
 
1396
	uint32_t offset;
1052
 * be wrapped, or wait a bit for the tail to be updated). This function takes care of that
1397
	struct page *page;
1053
 * and also preallocates a request (every workload submission is still mediated through
1398
	struct i915_ctx_workarounds *wa_ctx = &ring->wa_ctx;
1054
 * requests, same as it did with legacy ringbuffer submission).
-
 
Line 1055... Line -...
1055
 *
-
 
1056
 * Return: non-zero if the ringbuffer is not ready to be written to.
1399
 
1057
 */
1400
	WARN_ON(ring->id != RCS);
1058
int intel_logical_ring_begin(struct intel_ringbuffer *ringbuf, int num_dwords)
-
 
1059
{
1401
 
1060
	struct intel_engine_cs *ring = ringbuf->ring;
1402
	/* update this when WA for higher Gen are added */
Line 1061... Line 1403...
1061
	struct drm_device *dev = ring->dev;
1403
	if (INTEL_INFO(ring->dev)->gen > 9) {
1062
	struct drm_i915_private *dev_priv = dev->dev_private;
1404
		DRM_ERROR("WA batch buffer is not initialized for Gen%d\n",
Line 1063... Line 1405...
1063
	int ret;
1405
			  INTEL_INFO(ring->dev)->gen);
1064
 
1406
		return 0;
1065
	ret = i915_gem_check_wedge(&dev_priv->gpu_error,
1407
	}
1066
				   dev_priv->mm.interruptible);
1408
 
-
 
1409
	/* some WA perform writes to scratch page, ensure it is valid */
-
 
1410
	if (ring->scratch.obj == NULL) {
-
 
1411
		DRM_ERROR("scratch page not allocated for %s\n", ring->name);
-
 
1412
		return -EINVAL;
Line 1067... Line 1413...
1067
	if (ret)
1413
	}
1068
		return ret;
1414
 
Line -... Line 1415...
-
 
1415
	ret = lrc_setup_wa_ctx_obj(ring, PAGE_SIZE);
-
 
1416
	if (ret) {
-
 
1417
		DRM_DEBUG_DRIVER("Failed to setup context WA page: %d\n", ret);
-
 
1418
		return ret;
-
 
1419
	}
-
 
1420
 
1069
 
1421
	page = i915_gem_object_get_page(wa_ctx->obj, 0);
1070
	ret = logical_ring_prepare(ringbuf, num_dwords * sizeof(uint32_t));
1422
	batch = kmap_atomic(page);
1071
	if (ret)
1423
	offset = 0;
1072
		return ret;
1424
 
-
 
1425
	if (INTEL_INFO(ring->dev)->gen == 8) {
-
 
1426
		ret = gen8_init_indirectctx_bb(ring,
-
 
1427
					       &wa_ctx->indirect_ctx,
-
 
1428
					       batch,
-
 
1429
					       &offset);
-
 
1430
		if (ret)
-
 
1431
			goto out;
-
 
1432
 
-
 
1433
		ret = gen8_init_perctx_bb(ring,
-
 
1434
					  &wa_ctx->per_ctx,
-
 
1435
					  batch,
-
 
1436
					  &offset);
-
 
1437
		if (ret)
-
 
1438
			goto out;
-
 
1439
	} else if (INTEL_INFO(ring->dev)->gen == 9) {
-
 
1440
		ret = gen9_init_indirectctx_bb(ring,
-
 
1441
					       &wa_ctx->indirect_ctx,
-
 
1442
					       batch,
-
 
1443
					       &offset);
-
 
1444
		if (ret)
-
 
1445
			goto out;
-
 
1446
 
-
 
1447
		ret = gen9_init_perctx_bb(ring,
1073
 
1448
					  &wa_ctx->per_ctx,
Line 1074... Line 1449...
1074
	/* Preallocate the olr before touching the ring */
1449
					  batch,
Line 1075... Line 1450...
1075
	ret = logical_ring_alloc_seqno(ring, ringbuf->FIXME_lrc_ctx);
1450
					  &offset);
Line 1153... Line 1528...
1153
	 *
1528
	 *
1154
	 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv,bdw,chv
1529
	 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv,bdw,chv
1155
	 */
1530
	 */
1156
	I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
1531
	I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
Line -... Line 1532...
-
 
1532
 
-
 
1533
	I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
-
 
1534
 
-
 
1535
	return init_workarounds_ring(ring);
-
 
1536
}
-
 
1537
 
-
 
1538
static int gen9_init_render_ring(struct intel_engine_cs *ring)
-
 
1539
{
-
 
1540
	int ret;
1157
 
1541
 
1158
	ret = intel_init_pipe_control(ring);
1542
	ret = gen8_init_common_ring(ring);
1159
	if (ret)
1543
	if (ret)
Line 1160... Line -...
1160
		return ret;
-
 
1161
 
-
 
1162
	I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
1544
		return ret;
1163
 
1545
 
Line 1164... Line 1546...
1164
	return init_workarounds_ring(ring);
1546
	return init_workarounds_ring(ring);
1165
}
-
 
1166
 
1547
}
-
 
1548
 
-
 
1549
static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req)
-
 
1550
{
-
 
1551
	struct i915_hw_ppgtt *ppgtt = req->ctx->ppgtt;
-
 
1552
	struct intel_engine_cs *ring = req->ring;
-
 
1553
	struct intel_ringbuffer *ringbuf = req->ringbuf;
-
 
1554
	const int num_lri_cmds = GEN8_LEGACY_PDPES * 2;
-
 
1555
	int i, ret;
-
 
1556
 
-
 
1557
	ret = intel_logical_ring_begin(req, num_lri_cmds * 2 + 2);
-
 
1558
	if (ret)
-
 
1559
		return ret;
-
 
1560
 
-
 
1561
	intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(num_lri_cmds));
-
 
1562
	for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) {
-
 
1563
		const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
-
 
1564
 
-
 
1565
		intel_logical_ring_emit(ringbuf, GEN8_RING_PDP_UDW(ring, i));
-
 
1566
		intel_logical_ring_emit(ringbuf, upper_32_bits(pd_daddr));
-
 
1567
		intel_logical_ring_emit(ringbuf, GEN8_RING_PDP_LDW(ring, i));
-
 
1568
		intel_logical_ring_emit(ringbuf, lower_32_bits(pd_daddr));
-
 
1569
	}
-
 
1570
 
-
 
1571
	intel_logical_ring_emit(ringbuf, MI_NOOP);
-
 
1572
	intel_logical_ring_advance(ringbuf);
-
 
1573
 
-
 
1574
	return 0;
-
 
1575
}
-
 
1576
 
-
 
1577
static int gen8_emit_bb_start(struct drm_i915_gem_request *req,
1167
static int gen8_emit_bb_start(struct intel_ringbuffer *ringbuf,
1578
			      u64 offset, unsigned dispatch_flags)
1168
			      u64 offset, unsigned flags)
1579
{
Line -... Line 1580...
-
 
1580
	struct intel_ringbuffer *ringbuf = req->ringbuf;
-
 
1581
	bool ppgtt = !(dispatch_flags & I915_DISPATCH_SECURE);
-
 
1582
	int ret;
-
 
1583
 
-
 
1584
	/* Don't rely in hw updating PDPs, specially in lite-restore.
-
 
1585
	 * Ideally, we should set Force PD Restore in ctx descriptor,
-
 
1586
	 * but we can't. Force Restore would be a second option, but
-
 
1587
	 * it is unsafe in case of lite-restore (because the ctx is
-
 
1588
	 * not idle). PML4 is allocated during ppgtt init so this is
-
 
1589
	 * not needed in 48-bit.*/
-
 
1590
	if (req->ctx->ppgtt &&
-
 
1591
	    (intel_ring_flag(req->ring) & req->ctx->ppgtt->pd_dirty_rings)) {
-
 
1592
		if (!USES_FULL_48BIT_PPGTT(req->i915) &&
-
 
1593
		    !intel_vgpu_active(req->i915->dev)) {
-
 
1594
			ret = intel_logical_ring_emit_pdps(req);
-
 
1595
			if (ret)
-
 
1596
				return ret;
-
 
1597
		}
1169
{
1598
 
1170
	bool ppgtt = !(flags & I915_DISPATCH_SECURE);
1599
		req->ctx->ppgtt->pd_dirty_rings &= ~intel_ring_flag(req->ring);
1171
	int ret;
1600
	}
Line 1172... Line 1601...
1172
 
1601
 
1173
	ret = intel_logical_ring_begin(ringbuf, 4);
1602
	ret = intel_logical_ring_begin(req, 4);
-
 
1603
	if (ret)
-
 
1604
		return ret;
-
 
1605
 
1174
	if (ret)
1606
	/* FIXME(BDW): Address space and security selectors. */
1175
		return ret;
1607
	intel_logical_ring_emit(ringbuf, MI_BATCH_BUFFER_START_GEN8 |
1176
 
1608
				(ppgtt<<8) |
1177
	/* FIXME(BDW): Address space and security selectors. */
1609
				(dispatch_flags & I915_DISPATCH_RS ?
Line 1215... Line 1647...
1215
		POSTING_READ(RING_IMR(ring->mmio_base));
1647
		POSTING_READ(RING_IMR(ring->mmio_base));
1216
	}
1648
	}
1217
	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1649
	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1218
}
1650
}
Line 1219... Line 1651...
1219
 
1651
 
1220
static int gen8_emit_flush(struct intel_ringbuffer *ringbuf,
1652
static int gen8_emit_flush(struct drm_i915_gem_request *request,
1221
			   u32 invalidate_domains,
1653
			   u32 invalidate_domains,
1222
			   u32 unused)
1654
			   u32 unused)
-
 
1655
{
1223
{
1656
	struct intel_ringbuffer *ringbuf = request->ringbuf;
1224
	struct intel_engine_cs *ring = ringbuf->ring;
1657
	struct intel_engine_cs *ring = ringbuf->ring;
1225
	struct drm_device *dev = ring->dev;
1658
	struct drm_device *dev = ring->dev;
1226
	struct drm_i915_private *dev_priv = dev->dev_private;
1659
	struct drm_i915_private *dev_priv = dev->dev_private;
1227
	uint32_t cmd;
1660
	uint32_t cmd;
Line 1228... Line 1661...
1228
	int ret;
1661
	int ret;
1229
 
1662
 
1230
	ret = intel_logical_ring_begin(ringbuf, 4);
1663
	ret = intel_logical_ring_begin(request, 4);
Line 1231... Line 1664...
1231
	if (ret)
1664
	if (ret)
Line 1232... Line 1665...
1232
		return ret;
1665
		return ret;
1233
 
1666
 
1234
	cmd = MI_FLUSH_DW + 1;
1667
	cmd = MI_FLUSH_DW + 1;
1235
 
1668
 
-
 
1669
	/* We always require a command barrier so that subsequent
1236
	if (ring == &dev_priv->ring[VCS]) {
1670
	 * commands, such as breadcrumb interrupts, are strictly ordered
1237
		if (invalidate_domains & I915_GEM_GPU_DOMAINS)
1671
	 * wrt the contents of the write cache being flushed to memory
1238
			cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD |
1672
	 * (and thus being coherent from the CPU).
1239
				MI_FLUSH_DW_STORE_INDEX |
1673
	 */
-
 
1674
	cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
1240
				MI_FLUSH_DW_OP_STOREDW;
1675
 
1241
	} else {
1676
	if (invalidate_domains & I915_GEM_GPU_DOMAINS) {
Line 1242... Line 1677...
1242
		if (invalidate_domains & I915_GEM_DOMAIN_RENDER)
1677
		cmd |= MI_INVALIDATE_TLB;
1243
			cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX |
1678
		if (ring == &dev_priv->ring[VCS])
1244
				MI_FLUSH_DW_OP_STOREDW;
1679
			cmd |= MI_INVALIDATE_BSD;
Line 1253... Line 1688...
1253
	intel_logical_ring_advance(ringbuf);
1688
	intel_logical_ring_advance(ringbuf);
Line 1254... Line 1689...
1254
 
1689
 
1255
	return 0;
1690
	return 0;
Line 1256... Line 1691...
1256
}
1691
}
1257
 
1692
 
1258
static int gen8_emit_flush_render(struct intel_ringbuffer *ringbuf,
1693
static int gen8_emit_flush_render(struct drm_i915_gem_request *request,
1259
				  u32 invalidate_domains,
1694
				  u32 invalidate_domains,
-
 
1695
				  u32 flush_domains)
1260
				  u32 flush_domains)
1696
{
1261
{
1697
	struct intel_ringbuffer *ringbuf = request->ringbuf;
-
 
1698
	struct intel_engine_cs *ring = ringbuf->ring;
1262
	struct intel_engine_cs *ring = ringbuf->ring;
1699
	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
1263
	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
1700
	bool vf_flush_wa;
Line 1264... Line 1701...
1264
	u32 flags = 0;
1701
	u32 flags = 0;
Line 1265... Line 1702...
1265
	int ret;
1702
	int ret;
1266
 
1703
 
1267
	flags |= PIPE_CONTROL_CS_STALL;
1704
	flags |= PIPE_CONTROL_CS_STALL;
-
 
1705
 
1268
 
1706
	if (flush_domains) {
Line 1269... Line 1707...
1269
	if (flush_domains) {
1707
		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
1270
		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
1708
		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
1271
		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
1709
		flags |= PIPE_CONTROL_FLUSH_ENABLE;
Line 1280... Line 1718...
1280
		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
1718
		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
1281
		flags |= PIPE_CONTROL_QW_WRITE;
1719
		flags |= PIPE_CONTROL_QW_WRITE;
1282
		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
1720
		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
1283
	}
1721
	}
Line -... Line 1722...
-
 
1722
 
-
 
1723
	/*
-
 
1724
	 * On GEN9+ Before VF_CACHE_INVALIDATE we need to emit a NULL pipe
-
 
1725
	 * control.
-
 
1726
	 */
-
 
1727
	vf_flush_wa = INTEL_INFO(ring->dev)->gen >= 9 &&
-
 
1728
		      flags & PIPE_CONTROL_VF_CACHE_INVALIDATE;
1284
 
1729
 
1285
	ret = intel_logical_ring_begin(ringbuf, 6);
1730
	ret = intel_logical_ring_begin(request, vf_flush_wa ? 12 : 6);
1286
	if (ret)
1731
	if (ret)
Line -... Line 1732...
-
 
1732
		return ret;
-
 
1733
 
-
 
1734
	if (vf_flush_wa) {
-
 
1735
		intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6));
-
 
1736
		intel_logical_ring_emit(ringbuf, 0);
-
 
1737
		intel_logical_ring_emit(ringbuf, 0);
-
 
1738
		intel_logical_ring_emit(ringbuf, 0);
-
 
1739
		intel_logical_ring_emit(ringbuf, 0);
-
 
1740
		intel_logical_ring_emit(ringbuf, 0);
1287
		return ret;
1741
	}
1288
 
1742
 
1289
	intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6));
1743
	intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6));
1290
	intel_logical_ring_emit(ringbuf, flags);
1744
	intel_logical_ring_emit(ringbuf, flags);
1291
	intel_logical_ring_emit(ringbuf, scratch_addr);
1745
	intel_logical_ring_emit(ringbuf, scratch_addr);
Line 1305... Line 1759...
1305
static void gen8_set_seqno(struct intel_engine_cs *ring, u32 seqno)
1759
static void gen8_set_seqno(struct intel_engine_cs *ring, u32 seqno)
1306
{
1760
{
1307
	intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno);
1761
	intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno);
1308
}
1762
}
Line -... Line 1763...
-
 
1763
 
-
 
1764
static u32 bxt_a_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
-
 
1765
{
-
 
1766
 
-
 
1767
	/*
-
 
1768
	 * On BXT A steppings there is a HW coherency issue whereby the
-
 
1769
	 * MI_STORE_DATA_IMM storing the completed request's seqno
-
 
1770
	 * occasionally doesn't invalidate the CPU cache. Work around this by
-
 
1771
	 * clflushing the corresponding cacheline whenever the caller wants
-
 
1772
	 * the coherency to be guaranteed. Note that this cacheline is known
-
 
1773
	 * to be clean at this point, since we only write it in
-
 
1774
	 * bxt_a_set_seqno(), where we also do a clflush after the write. So
-
 
1775
	 * this clflush in practice becomes an invalidate operation.
-
 
1776
	 */
-
 
1777
 
-
 
1778
	if (!lazy_coherency)
-
 
1779
		intel_flush_status_page(ring, I915_GEM_HWS_INDEX);
-
 
1780
 
-
 
1781
	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
-
 
1782
}
-
 
1783
 
-
 
1784
static void bxt_a_set_seqno(struct intel_engine_cs *ring, u32 seqno)
-
 
1785
{
-
 
1786
	intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno);
-
 
1787
 
-
 
1788
	/* See bxt_a_get_seqno() explaining the reason for the clflush. */
-
 
1789
	intel_flush_status_page(ring, I915_GEM_HWS_INDEX);
-
 
1790
}
1309
 
1791
 
1310
static int gen8_emit_request(struct intel_ringbuffer *ringbuf)
1792
static int gen8_emit_request(struct drm_i915_gem_request *request)
-
 
1793
{
1311
{
1794
	struct intel_ringbuffer *ringbuf = request->ringbuf;
1312
	struct intel_engine_cs *ring = ringbuf->ring;
1795
	struct intel_engine_cs *ring = ringbuf->ring;
1313
	u32 cmd;
1796
	u32 cmd;
Line -... Line 1797...
-
 
1797
	int ret;
-
 
1798
 
-
 
1799
	/*
-
 
1800
	 * Reserve space for 2 NOOPs at the end of each request to be
-
 
1801
	 * used as a workaround for not being allowed to do lite
1314
	int ret;
1802
	 * restore with HEAD==TAIL (WaIdleLiteRestore).
1315
 
1803
	 */
1316
	ret = intel_logical_ring_begin(ringbuf, 6);
1804
	ret = intel_logical_ring_begin(request, 8);
Line 1317... Line 1805...
1317
	if (ret)
1805
	if (ret)
1318
		return ret;
1806
		return ret;
Line 1319... Line 1807...
1319
 
1807
 
1320
	cmd = MI_STORE_DWORD_IMM_GEN8;
1808
	cmd = MI_STORE_DWORD_IMM_GEN4;
1321
	cmd |= MI_GLOBAL_GTT;
1809
	cmd |= MI_GLOBAL_GTT;
1322
 
1810
 
1323
	intel_logical_ring_emit(ringbuf, cmd);
1811
	intel_logical_ring_emit(ringbuf, cmd);
1324
	intel_logical_ring_emit(ringbuf,
1812
	intel_logical_ring_emit(ringbuf,
1325
				(ring->status_page.gfx_addr +
1813
				(ring->status_page.gfx_addr +
1326
				(I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT)));
1814
				(I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT)));
1327
	intel_logical_ring_emit(ringbuf, 0);
1815
	intel_logical_ring_emit(ringbuf, 0);
-
 
1816
	intel_logical_ring_emit(ringbuf, i915_gem_request_get_seqno(request));
-
 
1817
	intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT);
-
 
1818
	intel_logical_ring_emit(ringbuf, MI_NOOP);
-
 
1819
	intel_logical_ring_advance_and_submit(request);
-
 
1820
 
-
 
1821
	/*
-
 
1822
	 * Here we add two extra NOOPs as padding to avoid
-
 
1823
	 * lite restore of a context with HEAD==TAIL.
-
 
1824
	 */
-
 
1825
	intel_logical_ring_emit(ringbuf, MI_NOOP);
-
 
1826
	intel_logical_ring_emit(ringbuf, MI_NOOP);
-
 
1827
	intel_logical_ring_advance(ringbuf);
-
 
1828
 
-
 
1829
	return 0;
-
 
1830
}
-
 
1831
 
-
 
1832
static int intel_lr_context_render_state_init(struct drm_i915_gem_request *req)
-
 
1833
{
-
 
1834
	struct render_state so;
-
 
1835
	int ret;
Line -... Line 1836...
-
 
1836
 
1328
	intel_logical_ring_emit(ringbuf, ring->outstanding_lazy_seqno);
1837
	ret = i915_gem_render_state_prepare(req->ring, &so);
-
 
1838
	if (ret)
-
 
1839
		return ret;
-
 
1840
 
-
 
1841
	if (so.rodata == NULL)
-
 
1842
		return 0;
-
 
1843
 
-
 
1844
	ret = req->ring->emit_bb_start(req, so.ggtt_offset,
-
 
1845
				       I915_DISPATCH_SECURE);
-
 
1846
	if (ret)
-
 
1847
		goto out;
-
 
1848
 
-
 
1849
	ret = req->ring->emit_bb_start(req,
-
 
1850
				       (so.ggtt_offset + so.aux_batch_offset),
-
 
1851
				       I915_DISPATCH_SECURE);
-
 
1852
	if (ret)
-
 
1853
		goto out;
-
 
1854
 
-
 
1855
	i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req);
-
 
1856
 
-
 
1857
out:
-
 
1858
	i915_gem_render_state_fini(&so);
-
 
1859
	return ret;
-
 
1860
}
-
 
1861
 
-
 
1862
static int gen8_init_rcs_context(struct drm_i915_gem_request *req)
-
 
1863
{
-
 
1864
	int ret;
-
 
1865
 
-
 
1866
	ret = intel_logical_ring_workarounds_emit(req);
-
 
1867
	if (ret)
-
 
1868
		return ret;
-
 
1869
 
-
 
1870
	ret = intel_rcs_context_init_mocs(req);
-
 
1871
	/*
-
 
1872
	 * Failing to program the MOCS is non-fatal.The system will not
-
 
1873
	 * run at peak performance. So generate an error and carry on.
1329
	intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT);
1874
	 */
Line 1330... Line 1875...
1330
	intel_logical_ring_emit(ringbuf, MI_NOOP);
1875
	if (ret)
1331
	intel_logical_ring_advance_and_submit(ringbuf);
1876
		DRM_ERROR("MOCS failed to program: expect performance issues.\n");
1332
 
1877
 
Line 1348... Line 1893...
1348
 
1893
 
Line 1349... Line 1894...
1349
	dev_priv = ring->dev->dev_private;
1894
	dev_priv = ring->dev->dev_private;
1350
 
1895
 
1351
	intel_logical_ring_stop(ring);
-
 
1352
	WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0);
-
 
Line 1353... Line 1896...
1353
	ring->preallocated_lazy_request = NULL;
1896
	intel_logical_ring_stop(ring);
1354
	ring->outstanding_lazy_seqno = 0;
1897
	WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0);
Line 1355... Line 1898...
1355
 
1898
 
-
 
1899
	if (ring->cleanup)
Line 1356... Line 1900...
1356
	if (ring->cleanup)
1900
		ring->cleanup(ring);
1357
		ring->cleanup(ring);
1901
 
1358
 
1902
	i915_cmd_parser_fini_ring(ring);
1359
	i915_cmd_parser_fini_ring(ring);
1903
	i915_gem_batch_pool_fini(&ring->batch_pool);
-
 
1904
 
-
 
1905
	if (ring->status_page.obj) {
1360
 
1906
		kunmap(sg_page(ring->status_page.obj->pages->sgl));
Line 1361... Line 1907...
1361
	if (ring->status_page.obj) {
1907
		ring->status_page.obj = NULL;
1362
		kunmap(sg_page(ring->status_page.obj->pages->sgl));
1908
	}
1363
		ring->status_page.obj = NULL;
1909
 
Line 1372... Line 1918...
1372
	ring->buffer = NULL;
1918
	ring->buffer = NULL;
Line 1373... Line 1919...
1373
 
1919
 
1374
	ring->dev = dev;
1920
	ring->dev = dev;
1375
	INIT_LIST_HEAD(&ring->active_list);
1921
	INIT_LIST_HEAD(&ring->active_list);
-
 
1922
	INIT_LIST_HEAD(&ring->request_list);
1376
	INIT_LIST_HEAD(&ring->request_list);
1923
	i915_gem_batch_pool_init(dev, &ring->batch_pool);
Line 1377... Line 1924...
1377
	init_waitqueue_head(&ring->irq_queue);
1924
	init_waitqueue_head(&ring->irq_queue);
1378
 
1925
 
1379
	INIT_LIST_HEAD(&ring->execlist_queue);
1926
	INIT_LIST_HEAD(&ring->execlist_queue);
1380
	INIT_LIST_HEAD(&ring->execlist_retired_req_list);
-
 
Line 1381... Line 1927...
1381
	spin_lock_init(&ring->execlist_lock);
1927
	INIT_LIST_HEAD(&ring->execlist_retired_req_list);
1382
	ring->next_context_status_buffer = 0;
1928
	spin_lock_init(&ring->execlist_lock);
1383
 
1929
 
Line 1384... Line -...
1384
	ret = i915_cmd_parser_init_ring(ring);
-
 
1385
	if (ret)
1930
	ret = i915_cmd_parser_init_ring(ring);
1386
		return ret;
1931
	if (ret)
1387
 
1932
		return ret;
1388
	if (ring->init) {
-
 
Line -... Line 1933...
-
 
1933
 
-
 
1934
	ret = intel_lr_context_deferred_alloc(ring->default_context, ring);
-
 
1935
	if (ret)
-
 
1936
		return ret;
1389
		ret = ring->init(ring);
1937
 
-
 
1938
	/* As this is the default context, always pin it */
-
 
1939
	ret = intel_lr_context_do_pin(
-
 
1940
			ring,
-
 
1941
			ring->default_context->engine[ring->id].state,
-
 
1942
			ring->default_context->engine[ring->id].ringbuf);
-
 
1943
	if (ret) {
Line 1390... Line 1944...
1390
		if (ret)
1944
		DRM_ERROR(
1391
			return ret;
1945
			"Failed to pin and map ringbuffer %s: %d\n",
Line 1392... Line 1946...
1392
	}
1946
			ring->name, ret);
1393
 
1947
		return ret;
1394
	ret = intel_lr_context_deferred_create(ring->default_context, ring);
1948
	}
1395
 
1949
 
-
 
1950
	return ret;
Line 1396... Line 1951...
1396
	return ret;
1951
}
1397
}
1952
 
1398
 
1953
static int logical_render_ring_init(struct drm_device *dev)
1399
static int logical_render_ring_init(struct drm_device *dev)
1954
{
Line 1409... Line 1964...
1409
	ring->irq_keep_mask =
1964
	ring->irq_keep_mask =
1410
		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_RCS_IRQ_SHIFT;
1965
		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_RCS_IRQ_SHIFT;
1411
	if (HAS_L3_DPF(dev))
1966
	if (HAS_L3_DPF(dev))
1412
		ring->irq_keep_mask |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
1967
		ring->irq_keep_mask |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
Line -... Line 1968...
-
 
1968
 
-
 
1969
	if (INTEL_INFO(dev)->gen >= 9)
-
 
1970
		ring->init_hw = gen9_init_render_ring;
1413
 
1971
	else
1414
	ring->init = gen8_init_render_ring;
1972
		ring->init_hw = gen8_init_render_ring;
1415
	ring->init_context = intel_logical_ring_workarounds_emit;
1973
	ring->init_context = gen8_init_rcs_context;
-
 
1974
	ring->cleanup = intel_fini_pipe_control;
-
 
1975
	if (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0) {
-
 
1976
		ring->get_seqno = bxt_a_get_seqno;
-
 
1977
		ring->set_seqno = bxt_a_set_seqno;
1416
	ring->cleanup = intel_fini_pipe_control;
1978
	} else {
1417
	ring->get_seqno = gen8_get_seqno;
1979
		ring->get_seqno = gen8_get_seqno;
-
 
1980
		ring->set_seqno = gen8_set_seqno;
1418
	ring->set_seqno = gen8_set_seqno;
1981
	}
1419
	ring->emit_request = gen8_emit_request;
1982
	ring->emit_request = gen8_emit_request;
1420
	ring->emit_flush = gen8_emit_flush_render;
1983
	ring->emit_flush = gen8_emit_flush_render;
1421
	ring->irq_get = gen8_logical_ring_get_irq;
1984
	ring->irq_get = gen8_logical_ring_get_irq;
1422
	ring->irq_put = gen8_logical_ring_put_irq;
1985
	ring->irq_put = gen8_logical_ring_put_irq;
Line -... Line 1986...
-
 
1986
	ring->emit_bb_start = gen8_emit_bb_start;
-
 
1987
 
-
 
1988
	ring->dev = dev;
-
 
1989
 
-
 
1990
	ret = intel_init_pipe_control(ring);
-
 
1991
	if (ret)
-
 
1992
		return ret;
-
 
1993
 
-
 
1994
	ret = intel_init_workaround_bb(ring);
-
 
1995
	if (ret) {
-
 
1996
		/*
-
 
1997
		 * We continue even if we fail to initialize WA batch
-
 
1998
		 * because we only expect rare glitches but nothing
-
 
1999
		 * critical to prevent us from using GPU
-
 
2000
		 */
-
 
2001
		DRM_ERROR("WA batch buffer initialization failed: %d\n",
-
 
2002
			  ret);
1423
	ring->emit_bb_start = gen8_emit_bb_start;
2003
	}
-
 
2004
 
-
 
2005
	ret = logical_ring_init(dev, ring);
-
 
2006
	if (ret) {
-
 
2007
		lrc_destroy_wa_ctx_obj(ring);
-
 
2008
	}
1424
 
2009
 
Line 1425... Line 2010...
1425
	return logical_ring_init(dev, ring);
2010
	return ret;
1426
}
2011
}
1427
 
2012
 
Line 1436... Line 2021...
1436
	ring->irq_enable_mask =
2021
	ring->irq_enable_mask =
1437
		GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT;
2022
		GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT;
1438
	ring->irq_keep_mask =
2023
	ring->irq_keep_mask =
1439
		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS1_IRQ_SHIFT;
2024
		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS1_IRQ_SHIFT;
Line 1440... Line 2025...
1440
 
2025
 
-
 
2026
	ring->init_hw = gen8_init_common_ring;
-
 
2027
	if (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0) {
-
 
2028
		ring->get_seqno = bxt_a_get_seqno;
-
 
2029
		ring->set_seqno = bxt_a_set_seqno;
1441
	ring->init = gen8_init_common_ring;
2030
	} else {
1442
	ring->get_seqno = gen8_get_seqno;
2031
		ring->get_seqno = gen8_get_seqno;
-
 
2032
		ring->set_seqno = gen8_set_seqno;
1443
	ring->set_seqno = gen8_set_seqno;
2033
	}
1444
	ring->emit_request = gen8_emit_request;
2034
	ring->emit_request = gen8_emit_request;
1445
	ring->emit_flush = gen8_emit_flush;
2035
	ring->emit_flush = gen8_emit_flush;
1446
	ring->irq_get = gen8_logical_ring_get_irq;
2036
	ring->irq_get = gen8_logical_ring_get_irq;
1447
	ring->irq_put = gen8_logical_ring_put_irq;
2037
	ring->irq_put = gen8_logical_ring_put_irq;
Line 1461... Line 2051...
1461
	ring->irq_enable_mask =
2051
	ring->irq_enable_mask =
1462
		GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT;
2052
		GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT;
1463
	ring->irq_keep_mask =
2053
	ring->irq_keep_mask =
1464
		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS2_IRQ_SHIFT;
2054
		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VCS2_IRQ_SHIFT;
Line 1465... Line 2055...
1465
 
2055
 
1466
	ring->init = gen8_init_common_ring;
2056
	ring->init_hw = gen8_init_common_ring;
1467
	ring->get_seqno = gen8_get_seqno;
2057
	ring->get_seqno = gen8_get_seqno;
1468
	ring->set_seqno = gen8_set_seqno;
2058
	ring->set_seqno = gen8_set_seqno;
1469
	ring->emit_request = gen8_emit_request;
2059
	ring->emit_request = gen8_emit_request;
1470
	ring->emit_flush = gen8_emit_flush;
2060
	ring->emit_flush = gen8_emit_flush;
Line 1486... Line 2076...
1486
	ring->irq_enable_mask =
2076
	ring->irq_enable_mask =
1487
		GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT;
2077
		GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT;
1488
	ring->irq_keep_mask =
2078
	ring->irq_keep_mask =
1489
		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_BCS_IRQ_SHIFT;
2079
		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_BCS_IRQ_SHIFT;
Line 1490... Line 2080...
1490
 
2080
 
-
 
2081
	ring->init_hw = gen8_init_common_ring;
-
 
2082
	if (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0) {
-
 
2083
		ring->get_seqno = bxt_a_get_seqno;
-
 
2084
		ring->set_seqno = bxt_a_set_seqno;
1491
	ring->init = gen8_init_common_ring;
2085
	} else {
1492
	ring->get_seqno = gen8_get_seqno;
2086
		ring->get_seqno = gen8_get_seqno;
-
 
2087
		ring->set_seqno = gen8_set_seqno;
1493
	ring->set_seqno = gen8_set_seqno;
2088
	}
1494
	ring->emit_request = gen8_emit_request;
2089
	ring->emit_request = gen8_emit_request;
1495
	ring->emit_flush = gen8_emit_flush;
2090
	ring->emit_flush = gen8_emit_flush;
1496
	ring->irq_get = gen8_logical_ring_get_irq;
2091
	ring->irq_get = gen8_logical_ring_get_irq;
1497
	ring->irq_put = gen8_logical_ring_put_irq;
2092
	ring->irq_put = gen8_logical_ring_put_irq;
Line 1511... Line 2106...
1511
	ring->irq_enable_mask =
2106
	ring->irq_enable_mask =
1512
		GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT;
2107
		GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT;
1513
	ring->irq_keep_mask =
2108
	ring->irq_keep_mask =
1514
		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VECS_IRQ_SHIFT;
2109
		GT_CONTEXT_SWITCH_INTERRUPT << GEN8_VECS_IRQ_SHIFT;
Line 1515... Line 2110...
1515
 
2110
 
-
 
2111
	ring->init_hw = gen8_init_common_ring;
-
 
2112
	if (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0) {
-
 
2113
		ring->get_seqno = bxt_a_get_seqno;
-
 
2114
		ring->set_seqno = bxt_a_set_seqno;
1516
	ring->init = gen8_init_common_ring;
2115
	} else {
1517
	ring->get_seqno = gen8_get_seqno;
2116
		ring->get_seqno = gen8_get_seqno;
-
 
2117
		ring->set_seqno = gen8_set_seqno;
1518
	ring->set_seqno = gen8_set_seqno;
2118
	}
1519
	ring->emit_request = gen8_emit_request;
2119
	ring->emit_request = gen8_emit_request;
1520
	ring->emit_flush = gen8_emit_flush;
2120
	ring->emit_flush = gen8_emit_flush;
1521
	ring->irq_get = gen8_logical_ring_get_irq;
2121
	ring->irq_get = gen8_logical_ring_get_irq;
1522
	ring->irq_put = gen8_logical_ring_put_irq;
2122
	ring->irq_put = gen8_logical_ring_put_irq;
Line 1566... Line 2166...
1566
		ret = logical_bsd2_ring_init(dev);
2166
		ret = logical_bsd2_ring_init(dev);
1567
		if (ret)
2167
		if (ret)
1568
			goto cleanup_vebox_ring;
2168
			goto cleanup_vebox_ring;
1569
	}
2169
	}
Line 1570... Line -...
1570
 
-
 
1571
	ret = i915_gem_set_seqno(dev, ((u32)~0 - 0x1000));
-
 
1572
	if (ret)
-
 
1573
		goto cleanup_bsd2_ring;
-
 
1574
 
2170
 
Line 1575... Line -...
1575
	return 0;
-
 
1576
 
-
 
1577
cleanup_bsd2_ring:
2171
	return 0;
1578
	intel_logical_ring_cleanup(&dev_priv->ring[VCS2]);
2172
 
1579
cleanup_vebox_ring:
2173
cleanup_vebox_ring:
1580
	intel_logical_ring_cleanup(&dev_priv->ring[VECS]);
2174
	intel_logical_ring_cleanup(&dev_priv->ring[VECS]);
1581
cleanup_blt_ring:
2175
cleanup_blt_ring:
Line 1586... Line 2180...
1586
	intel_logical_ring_cleanup(&dev_priv->ring[RCS]);
2180
	intel_logical_ring_cleanup(&dev_priv->ring[RCS]);
Line 1587... Line 2181...
1587
 
2181
 
1588
	return ret;
2182
	return ret;
Line 1589... Line 2183...
1589
}
2183
}
1590
 
2184
 
1591
int intel_lr_context_render_state_init(struct intel_engine_cs *ring,
2185
static u32
1592
				       struct intel_context *ctx)
-
 
1593
{
-
 
1594
	struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
-
 
1595
	struct render_state so;
-
 
1596
	struct drm_i915_file_private *file_priv = ctx->file_priv;
2186
make_rpcs(struct drm_device *dev)
Line -... Line 2187...
-
 
2187
{
1597
	struct drm_file *file = file_priv ? file_priv->file : NULL;
2188
	u32 rpcs = 0;
1598
	int ret;
-
 
1599
 
2189
 
1600
	ret = i915_gem_render_state_prepare(ring, &so);
2190
	/*
1601
	if (ret)
2191
	 * No explicit RPCS request is needed to ensure full
1602
		return ret;
2192
	 * slice/subslice/EU enablement prior to Gen9.
Line -... Line 2193...
-
 
2193
	*/
1603
 
2194
	if (INTEL_INFO(dev)->gen < 9)
-
 
2195
		return 0;
-
 
2196
 
1604
	if (so.rodata == NULL)
2197
	/*
-
 
2198
	 * Starting in Gen9, render power gating can leave
-
 
2199
	 * slice/subslice/EU in a partially enabled state. We
-
 
2200
	 * must make an explicit request through RPCS for full
-
 
2201
	 * enablement.
1605
		return 0;
2202
	*/
1606
 
2203
	if (INTEL_INFO(dev)->has_slice_pg) {
1607
	ret = ring->emit_bb_start(ringbuf,
2204
		rpcs |= GEN8_RPCS_S_CNT_ENABLE;
Line -... Line 2205...
-
 
2205
		rpcs |= INTEL_INFO(dev)->slice_total <<
-
 
2206
			GEN8_RPCS_S_CNT_SHIFT;
1608
			so.ggtt_offset,
2207
		rpcs |= GEN8_RPCS_ENABLE;
-
 
2208
	}
-
 
2209
 
-
 
2210
	if (INTEL_INFO(dev)->has_subslice_pg) {
Line -... Line 2211...
-
 
2211
		rpcs |= GEN8_RPCS_SS_CNT_ENABLE;
1609
			I915_DISPATCH_SECURE);
2212
		rpcs |= INTEL_INFO(dev)->subslice_per_slice <<
-
 
2213
			GEN8_RPCS_SS_CNT_SHIFT;
1610
	if (ret)
2214
		rpcs |= GEN8_RPCS_ENABLE;
-
 
2215
	}
1611
		goto out;
2216
 
1612
 
2217
	if (INTEL_INFO(dev)->has_eu_pg) {
1613
	i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), ring);
-
 
-
 
2218
		rpcs |= INTEL_INFO(dev)->eu_per_subslice <<
1614
 
2219
			GEN8_RPCS_EU_MIN_SHIFT;
1615
	ret = __i915_add_request(ring, file, so.obj, NULL);
2220
		rpcs |= INTEL_INFO(dev)->eu_per_subslice <<
Line 1616... Line 2221...
1616
	/* intel_logical_ring_add_request moves object to inactive if it
2221
			GEN8_RPCS_EU_MAX_SHIFT;
1617
	 * fails */
2222
		rpcs |= GEN8_RPCS_ENABLE;
1618
out:
2223
	}
Line 1648... Line 2253...
1648
 
2253
 
Line 1649... Line 2254...
1649
	i915_gem_object_pin_pages(ctx_obj);
2254
	i915_gem_object_pin_pages(ctx_obj);
1650
 
2255
 
1651
	/* The second page of the context object contains some fields which must
2256
	/* The second page of the context object contains some fields which must
1652
	 * be set up prior to the first execution. */
2257
	 * be set up prior to the first execution. */
Line 1653... Line 2258...
1653
	page = i915_gem_object_get_page(ctx_obj, 1);
2258
	page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
1654
	reg_state = kmap_atomic(page);
2259
	reg_state = kmap_atomic(page);
1655
 
2260
 
Line 1663... Line 2268...
1663
	else
2268
	else
1664
		reg_state[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(11);
2269
		reg_state[CTX_LRI_HEADER_0] = MI_LOAD_REGISTER_IMM(11);
1665
	reg_state[CTX_LRI_HEADER_0] |= MI_LRI_FORCE_POSTED;
2270
	reg_state[CTX_LRI_HEADER_0] |= MI_LRI_FORCE_POSTED;
1666
	reg_state[CTX_CONTEXT_CONTROL] = RING_CONTEXT_CONTROL(ring);
2271
	reg_state[CTX_CONTEXT_CONTROL] = RING_CONTEXT_CONTROL(ring);
1667
	reg_state[CTX_CONTEXT_CONTROL+1] =
2272
	reg_state[CTX_CONTEXT_CONTROL+1] =
1668
			_MASKED_BIT_ENABLE((1<<3) | MI_RESTORE_INHIBIT);
2273
		_MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
-
 
2274
				   CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
-
 
2275
				   CTX_CTRL_RS_CTX_ENABLE);
1669
	reg_state[CTX_RING_HEAD] = RING_HEAD(ring->mmio_base);
2276
	reg_state[CTX_RING_HEAD] = RING_HEAD(ring->mmio_base);
1670
	reg_state[CTX_RING_HEAD+1] = 0;
2277
	reg_state[CTX_RING_HEAD+1] = 0;
1671
	reg_state[CTX_RING_TAIL] = RING_TAIL(ring->mmio_base);
2278
	reg_state[CTX_RING_TAIL] = RING_TAIL(ring->mmio_base);
1672
	reg_state[CTX_RING_TAIL+1] = 0;
2279
	reg_state[CTX_RING_TAIL+1] = 0;
1673
	reg_state[CTX_RING_BUFFER_START] = RING_START(ring->mmio_base);
2280
	reg_state[CTX_RING_BUFFER_START] = RING_START(ring->mmio_base);
Line 1688... Line 2295...
1688
	reg_state[CTX_SECOND_BB_HEAD_L] = ring->mmio_base + 0x114;
2295
	reg_state[CTX_SECOND_BB_HEAD_L] = ring->mmio_base + 0x114;
1689
	reg_state[CTX_SECOND_BB_HEAD_L+1] = 0;
2296
	reg_state[CTX_SECOND_BB_HEAD_L+1] = 0;
1690
	reg_state[CTX_SECOND_BB_STATE] = ring->mmio_base + 0x118;
2297
	reg_state[CTX_SECOND_BB_STATE] = ring->mmio_base + 0x118;
1691
	reg_state[CTX_SECOND_BB_STATE+1] = 0;
2298
	reg_state[CTX_SECOND_BB_STATE+1] = 0;
1692
	if (ring->id == RCS) {
2299
	if (ring->id == RCS) {
1693
		/* TODO: according to BSpec, the register state context
-
 
1694
		 * for CHV does not have these. OTOH, these registers do
-
 
1695
		 * exist in CHV. I'm waiting for a clarification */
-
 
1696
		reg_state[CTX_BB_PER_CTX_PTR] = ring->mmio_base + 0x1c0;
2300
		reg_state[CTX_BB_PER_CTX_PTR] = ring->mmio_base + 0x1c0;
1697
		reg_state[CTX_BB_PER_CTX_PTR+1] = 0;
2301
		reg_state[CTX_BB_PER_CTX_PTR+1] = 0;
1698
		reg_state[CTX_RCS_INDIRECT_CTX] = ring->mmio_base + 0x1c4;
2302
		reg_state[CTX_RCS_INDIRECT_CTX] = ring->mmio_base + 0x1c4;
1699
		reg_state[CTX_RCS_INDIRECT_CTX+1] = 0;
2303
		reg_state[CTX_RCS_INDIRECT_CTX+1] = 0;
1700
		reg_state[CTX_RCS_INDIRECT_CTX_OFFSET] = ring->mmio_base + 0x1c8;
2304
		reg_state[CTX_RCS_INDIRECT_CTX_OFFSET] = ring->mmio_base + 0x1c8;
1701
		reg_state[CTX_RCS_INDIRECT_CTX_OFFSET+1] = 0;
2305
		reg_state[CTX_RCS_INDIRECT_CTX_OFFSET+1] = 0;
-
 
2306
		if (ring->wa_ctx.obj) {
-
 
2307
			struct i915_ctx_workarounds *wa_ctx = &ring->wa_ctx;
-
 
2308
			uint32_t ggtt_offset = i915_gem_obj_ggtt_offset(wa_ctx->obj);
-
 
2309
 
-
 
2310
			reg_state[CTX_RCS_INDIRECT_CTX+1] =
-
 
2311
				(ggtt_offset + wa_ctx->indirect_ctx.offset * sizeof(uint32_t)) |
-
 
2312
				(wa_ctx->indirect_ctx.size / CACHELINE_DWORDS);
-
 
2313
 
-
 
2314
			reg_state[CTX_RCS_INDIRECT_CTX_OFFSET+1] =
-
 
2315
				CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT << 6;
-
 
2316
 
-
 
2317
			reg_state[CTX_BB_PER_CTX_PTR+1] =
-
 
2318
				(ggtt_offset + wa_ctx->per_ctx.offset * sizeof(uint32_t)) |
-
 
2319
				0x01;
-
 
2320
		}
1702
	}
2321
	}
1703
	reg_state[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9);
2322
	reg_state[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9);
1704
	reg_state[CTX_LRI_HEADER_1] |= MI_LRI_FORCE_POSTED;
2323
	reg_state[CTX_LRI_HEADER_1] |= MI_LRI_FORCE_POSTED;
1705
	reg_state[CTX_CTX_TIMESTAMP] = ring->mmio_base + 0x3a8;
2324
	reg_state[CTX_CTX_TIMESTAMP] = ring->mmio_base + 0x3a8;
1706
	reg_state[CTX_CTX_TIMESTAMP+1] = 0;
2325
	reg_state[CTX_CTX_TIMESTAMP+1] = 0;
Line 1710... Line 2329...
1710
	reg_state[CTX_PDP2_LDW] = GEN8_RING_PDP_LDW(ring, 2);
2329
	reg_state[CTX_PDP2_LDW] = GEN8_RING_PDP_LDW(ring, 2);
1711
	reg_state[CTX_PDP1_UDW] = GEN8_RING_PDP_UDW(ring, 1);
2330
	reg_state[CTX_PDP1_UDW] = GEN8_RING_PDP_UDW(ring, 1);
1712
	reg_state[CTX_PDP1_LDW] = GEN8_RING_PDP_LDW(ring, 1);
2331
	reg_state[CTX_PDP1_LDW] = GEN8_RING_PDP_LDW(ring, 1);
1713
	reg_state[CTX_PDP0_UDW] = GEN8_RING_PDP_UDW(ring, 0);
2332
	reg_state[CTX_PDP0_UDW] = GEN8_RING_PDP_UDW(ring, 0);
1714
	reg_state[CTX_PDP0_LDW] = GEN8_RING_PDP_LDW(ring, 0);
2333
	reg_state[CTX_PDP0_LDW] = GEN8_RING_PDP_LDW(ring, 0);
-
 
2334
 
1715
	reg_state[CTX_PDP3_UDW+1] = upper_32_bits(ppgtt->pd_dma_addr[3]);
2335
	if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
-
 
2336
		/* 64b PPGTT (48bit canonical)
-
 
2337
		 * PDP0_DESCRIPTOR contains the base address to PML4 and
-
 
2338
		 * other PDP Descriptors are ignored.
-
 
2339
		 */
-
 
2340
		ASSIGN_CTX_PML4(ppgtt, reg_state);
-
 
2341
	} else {
-
 
2342
		/* 32b PPGTT
1716
	reg_state[CTX_PDP3_LDW+1] = lower_32_bits(ppgtt->pd_dma_addr[3]);
2343
		 * PDP*_DESCRIPTOR contains the base address of space supported.
1717
	reg_state[CTX_PDP2_UDW+1] = upper_32_bits(ppgtt->pd_dma_addr[2]);
2344
		 * With dynamic page allocation, PDPs may not be allocated at
1718
	reg_state[CTX_PDP2_LDW+1] = lower_32_bits(ppgtt->pd_dma_addr[2]);
2345
		 * this point. Point the unallocated PDPs to the scratch page
-
 
2346
		 */
1719
	reg_state[CTX_PDP1_UDW+1] = upper_32_bits(ppgtt->pd_dma_addr[1]);
2347
		ASSIGN_CTX_PDP(ppgtt, reg_state, 3);
1720
	reg_state[CTX_PDP1_LDW+1] = lower_32_bits(ppgtt->pd_dma_addr[1]);
2348
		ASSIGN_CTX_PDP(ppgtt, reg_state, 2);
1721
	reg_state[CTX_PDP0_UDW+1] = upper_32_bits(ppgtt->pd_dma_addr[0]);
2349
		ASSIGN_CTX_PDP(ppgtt, reg_state, 1);
1722
	reg_state[CTX_PDP0_LDW+1] = lower_32_bits(ppgtt->pd_dma_addr[0]);
2350
		ASSIGN_CTX_PDP(ppgtt, reg_state, 0);
-
 
2351
	}
-
 
2352
 
1723
	if (ring->id == RCS) {
2353
	if (ring->id == RCS) {
1724
		reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
2354
		reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
1725
		reg_state[CTX_R_PWR_CLK_STATE] = 0x20c8;
2355
		reg_state[CTX_R_PWR_CLK_STATE] = GEN8_R_PWR_CLK_STATE;
1726
		reg_state[CTX_R_PWR_CLK_STATE+1] = 0;
2356
		reg_state[CTX_R_PWR_CLK_STATE+1] = make_rpcs(dev);
1727
	}
2357
	}
Line 1728... Line 2358...
1728
 
2358
 
Line 1729... Line 2359...
1729
	kunmap_atomic(reg_state);
2359
	kunmap_atomic(reg_state);
1730
 
-
 
1731
	ctx_obj->dirty = 1;
2360
 
Line 1732... Line 2361...
1732
//   set_page_dirty(page);
2361
	ctx_obj->dirty = 1;
1733
	i915_gem_object_unpin_pages(ctx_obj);
2362
    i915_gem_object_unpin_pages(ctx_obj);
Line 1757... Line 2386...
1757
 
2386
 
1758
			if (ctx == ring->default_context) {
2387
			if (ctx == ring->default_context) {
1759
				intel_unpin_ringbuffer_obj(ringbuf);
2388
				intel_unpin_ringbuffer_obj(ringbuf);
1760
				i915_gem_object_ggtt_unpin(ctx_obj);
2389
				i915_gem_object_ggtt_unpin(ctx_obj);
1761
			}
2390
			}
1762
			intel_destroy_ringbuffer_obj(ringbuf);
2391
			WARN_ON(ctx->engine[ring->id].pin_count);
1763
			kfree(ringbuf);
2392
			intel_ringbuffer_free(ringbuf);
1764
			drm_gem_object_unreference(&ctx_obj->base);
2393
			drm_gem_object_unreference(&ctx_obj->base);
1765
		}
2394
		}
1766
	}
2395
	}
Line 1792... Line 2421...
1792
 
2421
 
1793
static void lrc_setup_hardware_status_page(struct intel_engine_cs *ring,
2422
static void lrc_setup_hardware_status_page(struct intel_engine_cs *ring,
1794
		struct drm_i915_gem_object *default_ctx_obj)
2423
		struct drm_i915_gem_object *default_ctx_obj)
1795
{
2424
{
-
 
2425
	struct drm_i915_private *dev_priv = ring->dev->dev_private;
Line 1796... Line 2426...
1796
	struct drm_i915_private *dev_priv = ring->dev->dev_private;
2426
	struct page *page;
1797
 
-
 
1798
	/* The status page is offset 0 from the default context object
2427
 
1799
	 * in LRC mode. */
2428
	/* The HWSP is part of the default context object in LRC mode. */
1800
	ring->status_page.gfx_addr = i915_gem_obj_ggtt_offset(default_ctx_obj);
2429
	ring->status_page.gfx_addr = i915_gem_obj_ggtt_offset(default_ctx_obj)
-
 
2430
			+ LRC_PPHWSP_PN * PAGE_SIZE;
1801
	ring->status_page.page_addr =
2431
	page = i915_gem_object_get_page(default_ctx_obj, LRC_PPHWSP_PN);
Line 1802... Line 2432...
1802
			kmap(sg_page(default_ctx_obj->pages->sgl));
2432
	ring->status_page.page_addr = kmap(page);
1803
	ring->status_page.obj = default_ctx_obj;
2433
	ring->status_page.obj = default_ctx_obj;
1804
 
2434
 
1805
	I915_WRITE(RING_HWS_PGA(ring->mmio_base),
2435
	I915_WRITE(RING_HWS_PGA(ring->mmio_base),
Line 1806... Line 2436...
1806
			(u32)ring->status_page.gfx_addr);
2436
			(u32)ring->status_page.gfx_addr);
1807
	POSTING_READ(RING_HWS_PGA(ring->mmio_base));
2437
	POSTING_READ(RING_HWS_PGA(ring->mmio_base));
1808
}
2438
}
1809
 
2439
 
1810
/**
2440
/**
1811
 * intel_lr_context_deferred_create() - create the LRC specific bits of a context
2441
 * intel_lr_context_deferred_alloc() - create the LRC specific bits of a context
1812
 * @ctx: LR context to create.
2442
 * @ctx: LR context to create.
Line 1818... Line 2448...
1818
 * the creation is a deferred call: it's better to make sure first that we need to use
2448
 * the creation is a deferred call: it's better to make sure first that we need to use
1819
 * a given ring with the context.
2449
 * a given ring with the context.
1820
 *
2450
 *
1821
 * Return: non-zero on error.
2451
 * Return: non-zero on error.
1822
 */
2452
 */
-
 
2453
 
1823
int intel_lr_context_deferred_create(struct intel_context *ctx,
2454
int intel_lr_context_deferred_alloc(struct intel_context *ctx,
1824
				     struct intel_engine_cs *ring)
2455
				     struct intel_engine_cs *ring)
1825
{
2456
{
1826
	const bool is_global_default_ctx = (ctx == ring->default_context);
-
 
1827
	struct drm_device *dev = ring->dev;
2457
	struct drm_device *dev = ring->dev;
1828
	struct drm_i915_gem_object *ctx_obj;
2458
	struct drm_i915_gem_object *ctx_obj;
1829
	uint32_t context_size;
2459
	uint32_t context_size;
1830
	struct intel_ringbuffer *ringbuf;
2460
	struct intel_ringbuffer *ringbuf;
1831
	int ret;
2461
	int ret;
Line 1832... Line 2462...
1832
 
2462
 
1833
	WARN_ON(ctx->legacy_hw_ctx.rcs_state != NULL);
2463
	WARN_ON(ctx->legacy_hw_ctx.rcs_state != NULL);
1834
	if (ctx->engine[ring->id].state)
-
 
Line 1835... Line 2464...
1835
		return 0;
2464
	WARN_ON(ctx->engine[ring->id].state);
Line 1836... Line 2465...
1836
 
2465
 
1837
	context_size = round_up(get_lr_context_size(ring), 4096);
-
 
1838
 
2466
	context_size = round_up(get_lr_context_size(ring), 4096);
1839
	ctx_obj = i915_gem_alloc_context_obj(dev, context_size);
-
 
1840
	if (IS_ERR(ctx_obj)) {
-
 
1841
		ret = PTR_ERR(ctx_obj);
-
 
Line 1842... Line -...
1842
		DRM_DEBUG_DRIVER("Alloc LRC backing obj failed: %d\n", ret);
-
 
1843
		return ret;
2467
 
1844
	}
2468
	/* One extra page as the sharing data between driver and GuC */
1845
 
2469
	context_size += PAGE_SIZE * LRC_PPHWSP_PN;
1846
	if (is_global_default_ctx) {
-
 
1847
		ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN, 0);
-
 
1848
		if (ret) {
2470
 
1849
			DRM_DEBUG_DRIVER("Pin LRC backing obj failed: %d\n",
2471
	ctx_obj = i915_gem_alloc_object(dev, context_size);
-
 
2472
	if (!ctx_obj) {
-
 
2473
		DRM_DEBUG_DRIVER("Alloc LRC backing obj failed.\n");
-
 
2474
		return -ENOMEM;
-
 
2475
	}
-
 
2476
 
1850
					ret);
2477
	ringbuf = intel_engine_create_ringbuffer(ring, 4 * PAGE_SIZE);
Line 1851... Line 2478...
1851
			drm_gem_object_unreference(&ctx_obj->base);
2478
	if (IS_ERR(ringbuf)) {
1852
			return ret;
2479
		ret = PTR_ERR(ringbuf);
1853
		}
2480
		goto error_deref_obj;
1854
	}
-
 
1855
 
-
 
1856
	ringbuf = kzalloc(sizeof(*ringbuf), GFP_KERNEL);
2481
	}
1857
	if (!ringbuf) {
2482
 
Line 1858... Line 2483...
1858
		DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n",
2483
	ret = populate_lr_context(ctx, ctx_obj, ring, ringbuf);
1859
				ring->name);
2484
	if (ret) {
Line 1860... Line -...
1860
		ret = -ENOMEM;
-
 
1861
		goto error_unpin_ctx;
2485
		DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret);
1862
	}
-
 
1863
 
-
 
1864
	ringbuf->ring = ring;
-
 
1865
	ringbuf->FIXME_lrc_ctx = ctx;
2486
		goto error_ringbuf;
Line 1866... Line 2487...
1866
 
2487
	}
1867
	ringbuf->size = 32 * PAGE_SIZE;
2488
 
1868
	ringbuf->effective_size = ringbuf->size;
2489
	ctx->engine[ring->id].ringbuf = ringbuf;
1869
	ringbuf->head = 0;
-
 
1870
	ringbuf->tail = 0;
2490
	ctx->engine[ring->id].state = ctx_obj;
1871
	ringbuf->space = ringbuf->size;
2491
 
1872
	ringbuf->last_retired_head = -1;
2492
	if (ctx != ring->default_context && ring->init_context) {
1873
 
2493
		struct drm_i915_gem_request *req;
Line 1874... Line 2494...
1874
	if (ringbuf->obj == NULL) {
2494
 
1875
		ret = intel_alloc_ringbuffer_obj(dev, ringbuf);
-
 
1876
		if (ret) {
2495
		ret = i915_gem_request_alloc(ring,
1877
			DRM_DEBUG_DRIVER(
-
 
1878
				"Failed to allocate ringbuffer obj %s: %d\n",
2496
			ctx, &req);
1879
				ring->name, ret);
2497
		if (ret) {
-
 
2498
			DRM_ERROR("ring create req: %d\n",
1880
			goto error_free_rbuf;
2499
				ret);
1881
		}
2500
			goto error_ringbuf;
-
 
2501
		}
1882
 
2502
 
-
 
2503
		ret = ring->init_context(req);
Line -... Line 2504...
-
 
2504
		if (ret) {
-
 
2505
			DRM_ERROR("ring init context: %d\n",
-
 
2506
				ret);
-
 
2507
			i915_gem_request_cancel(req);
-
 
2508
			goto error_ringbuf;
-
 
2509
		}
-
 
2510
		i915_add_request_no_flush(req);
1883
		if (is_global_default_ctx) {
2511
	}
Line 1884... Line 2512...
1884
			ret = intel_pin_and_map_ringbuffer_obj(dev, ringbuf);
2512
	return 0;
1885
			if (ret) {
2513
 
-
 
2514
error_ringbuf:
1886
				DRM_ERROR(
2515
	intel_ringbuffer_free(ringbuf);
1887
					"Failed to pin and map ringbuffer %s: %d\n",
2516
error_deref_obj:
1888
					ring->name, ret);
2517
	drm_gem_object_unreference(&ctx_obj->base);
Line -... Line 2518...
-
 
2518
	ctx->engine[ring->id].ringbuf = NULL;
-
 
2519
	ctx->engine[ring->id].state = NULL;
1889
				goto error_destroy_rbuf;
2520
	return ret;
-
 
2521
}
1890
			}
2522
 
-
 
2523
void intel_lr_context_reset(struct drm_device *dev,
-
 
2524
			struct intel_context *ctx)
Line 1891... Line 2525...
1891
		}
2525
{
1892
 
2526
	struct drm_i915_private *dev_priv = dev->dev_private;
Line 1893... Line 2527...
1893
	}
2527
	struct intel_engine_cs *ring;
1894
 
-
 
1895
	ret = populate_lr_context(ctx, ctx_obj, ring, ringbuf);
2528
	int i;
1896
	if (ret) {
2529
 
1897
		DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret);
-
 
1898
		goto error;
2530
	for_each_ring(ring, dev_priv, i) {
-
 
2531
		struct drm_i915_gem_object *ctx_obj =
-
 
2532
				ctx->engine[ring->id].state;
Line 1899... Line -...
1899
	}
-
 
1900
 
-
 
1901
	ctx->engine[ring->id].ringbuf = ringbuf;
-
 
1902
	ctx->engine[ring->id].state = ctx_obj;
2533
		struct intel_ringbuffer *ringbuf =
1903
 
2534
				ctx->engine[ring->id].ringbuf;
1904
	if (ctx == ring->default_context)
-
 
1905
		lrc_setup_hardware_status_page(ring, ctx_obj);
-
 
1906
 
-
 
1907
	if (ring->id == RCS && !ctx->rcs_initialized) {
-
 
Line 1908... Line 2535...
1908
		if (ring->init_context) {
2535
		uint32_t *reg_state;
Line 1909... Line -...
1909
			ret = ring->init_context(ring, ctx);
-
 
1910
			if (ret)
-
 
1911
				DRM_ERROR("ring init context: %d\n", ret);
-
 
1912
		}
-
 
1913
 
-
 
1914
		ret = intel_lr_context_render_state_init(ring, ctx);
-
 
1915
		if (ret) {
2536
		struct page *page;
1916
			DRM_ERROR("Init render state failed: %d\n", ret);
-
 
1917
			ctx->engine[ring->id].ringbuf = NULL;
2537
 
1918
			ctx->engine[ring->id].state = NULL;
-
 
1919
			goto error;
-
 
1920
		}
2538
		if (!ctx_obj)
1921
		ctx->rcs_initialized = true;
2539
			continue;