Subversion Repositories Kolibri OS

Rev

Rev 5354 | Rev 6320 | Go to most recent revision | Show entire file | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 5354 Rev 6084
Line 50... Line 50...
50
		return ring->buffer && ring->buffer->obj;
50
		return ring->buffer && ring->buffer->obj;
51
}
51
}
Line 52... Line 52...
52
 
52
 
53
int __intel_ring_space(int head, int tail, int size)
53
int __intel_ring_space(int head, int tail, int size)
54
{
54
{
55
	int space = head - (tail + I915_RING_FREE_SPACE);
55
	int space = head - tail;
56
	if (space < 0)
56
	if (space <= 0)
57
		space += size;
57
		space += size;
58
	return space;
58
	return space - I915_RING_FREE_SPACE;
Line 59... Line 59...
59
}
59
}
60
 
60
 
-
 
61
void intel_ring_update_space(struct intel_ringbuffer *ringbuf)
-
 
62
{
-
 
63
	if (ringbuf->last_retired_head != -1) {
-
 
64
		ringbuf->head = ringbuf->last_retired_head;
-
 
65
		ringbuf->last_retired_head = -1;
61
int intel_ring_space(struct intel_ringbuffer *ringbuf)
66
	}
62
{
67
 
63
	return __intel_ring_space(ringbuf->head & HEAD_ADDR,
68
	ringbuf->space = __intel_ring_space(ringbuf->head & HEAD_ADDR,
Line -... Line 69...
-
 
69
					    ringbuf->tail, ringbuf->size);
-
 
70
}
-
 
71
 
-
 
72
int intel_ring_space(struct intel_ringbuffer *ringbuf)
-
 
73
{
-
 
74
	intel_ring_update_space(ringbuf);
64
				  ringbuf->tail, ringbuf->size);
75
	return ringbuf->space;
65
}
76
}
66
 
77
 
67
bool intel_ring_stopped(struct intel_engine_cs *ring)
78
bool intel_ring_stopped(struct intel_engine_cs *ring)
68
{
79
{
Line 69... Line 80...
69
	struct drm_i915_private *dev_priv = ring->dev->dev_private;
80
	struct drm_i915_private *dev_priv = ring->dev->dev_private;
70
	return dev_priv->gpu_error.stop_rings & intel_ring_flag(ring);
81
	return dev_priv->gpu_error.stop_rings & intel_ring_flag(ring);
71
}
82
}
72
 
83
 
73
void __intel_ring_advance(struct intel_engine_cs *ring)
84
static void __intel_ring_advance(struct intel_engine_cs *ring)
74
{
85
{
75
	struct intel_ringbuffer *ringbuf = ring->buffer;
86
	struct intel_ringbuffer *ringbuf = ring->buffer;
76
	ringbuf->tail &= ringbuf->size - 1;
87
	ringbuf->tail &= ringbuf->size - 1;
Line 77... Line 88...
77
	if (intel_ring_stopped(ring))
88
	if (intel_ring_stopped(ring))
78
		return;
89
		return;
79
	ring->write_tail(ring, ringbuf->tail);
90
	ring->write_tail(ring, ringbuf->tail);
80
}
91
}
81
 
92
 
-
 
93
static int
82
static int
94
gen2_render_ring_flush(struct drm_i915_gem_request *req,
83
gen2_render_ring_flush(struct intel_engine_cs *ring,
95
		       u32	invalidate_domains,
Line 84... Line 96...
84
		       u32	invalidate_domains,
96
		       u32	flush_domains)
85
		       u32	flush_domains)
97
{
86
{
98
	struct intel_engine_cs *ring = req->ring;
Line 87... Line 99...
87
	u32 cmd;
99
	u32 cmd;
88
	int ret;
100
	int ret;
Line 89... Line 101...
89
 
101
 
90
	cmd = MI_FLUSH;
102
	cmd = MI_FLUSH;
91
	if (((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) == 0)
103
	if (((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) == 0)
Line 92... Line 104...
92
		cmd |= MI_NO_WRITE_FLUSH;
104
		cmd |= MI_NO_WRITE_FLUSH;
93
 
105
 
Line 104... Line 116...
104
 
116
 
105
	return 0;
117
	return 0;
Line 106... Line 118...
106
}
118
}
107
 
119
 
108
static int
120
static int
109
gen4_render_ring_flush(struct intel_engine_cs *ring,
121
gen4_render_ring_flush(struct drm_i915_gem_request *req,
110
		  u32	invalidate_domains,
122
		       u32	invalidate_domains,
-
 
123
		       u32	flush_domains)
111
		  u32	flush_domains)
124
{
112
{
125
	struct intel_engine_cs *ring = req->ring;
113
	struct drm_device *dev = ring->dev;
126
	struct drm_device *dev = ring->dev;
Line 114... Line 127...
114
	u32 cmd;
127
	u32 cmd;
Line 150... Line 163...
150
 
163
 
151
	if (invalidate_domains & I915_GEM_DOMAIN_COMMAND &&
164
	if (invalidate_domains & I915_GEM_DOMAIN_COMMAND &&
152
	    (IS_G4X(dev) || IS_GEN5(dev)))
165
	    (IS_G4X(dev) || IS_GEN5(dev)))
Line 153... Line 166...
153
		cmd |= MI_INVALIDATE_ISP;
166
		cmd |= MI_INVALIDATE_ISP;
154
 
167
 
155
	ret = intel_ring_begin(ring, 2);
168
	ret = intel_ring_begin(req, 2);
Line 156... Line 169...
156
	if (ret)
169
	if (ret)
157
		return ret;
170
		return ret;
Line 199... Line 212...
199
 * Post-sync nonzero is what triggered this second workaround, so we
212
 * Post-sync nonzero is what triggered this second workaround, so we
200
 * can't use that one either.  Notify enable is IRQs, which aren't
213
 * can't use that one either.  Notify enable is IRQs, which aren't
201
 * really our business.  That leaves only stall at scoreboard.
214
 * really our business.  That leaves only stall at scoreboard.
202
 */
215
 */
203
static int
216
static int
204
intel_emit_post_sync_nonzero_flush(struct intel_engine_cs *ring)
217
intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req)
205
{
218
{
-
 
219
	struct intel_engine_cs *ring = req->ring;
206
	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
220
	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
207
	int ret;
221
	int ret;
Line 208... Line -...
208
 
-
 
209
 
222
 
210
	ret = intel_ring_begin(ring, 6);
223
	ret = intel_ring_begin(req, 6);
211
	if (ret)
224
	if (ret)
Line 212... Line 225...
212
		return ret;
225
		return ret;
213
 
226
 
Line 218... Line 231...
218
	intel_ring_emit(ring, 0); /* low dword */
231
	intel_ring_emit(ring, 0); /* low dword */
219
	intel_ring_emit(ring, 0); /* high dword */
232
	intel_ring_emit(ring, 0); /* high dword */
220
	intel_ring_emit(ring, MI_NOOP);
233
	intel_ring_emit(ring, MI_NOOP);
221
	intel_ring_advance(ring);
234
	intel_ring_advance(ring);
Line 222... Line 235...
222
 
235
 
223
	ret = intel_ring_begin(ring, 6);
236
	ret = intel_ring_begin(req, 6);
224
	if (ret)
237
	if (ret)
Line 225... Line 238...
225
		return ret;
238
		return ret;
226
 
239
 
Line 234... Line 247...
234
 
247
 
235
	return 0;
248
	return 0;
Line 236... Line 249...
236
}
249
}
237
 
250
 
238
static int
251
static int
239
gen6_render_ring_flush(struct intel_engine_cs *ring,
252
gen6_render_ring_flush(struct drm_i915_gem_request *req,
-
 
253
		       u32 invalidate_domains, u32 flush_domains)
240
                         u32 invalidate_domains, u32 flush_domains)
254
{
241
{
255
	struct intel_engine_cs *ring = req->ring;
242
	u32 flags = 0;
256
	u32 flags = 0;
Line 243... Line 257...
243
	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
257
	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
244
	int ret;
258
	int ret;
245
 
259
 
246
	/* Force SNB workarounds for PIPE_CONTROL flushes */
260
	/* Force SNB workarounds for PIPE_CONTROL flushes */
Line 247... Line 261...
247
	ret = intel_emit_post_sync_nonzero_flush(ring);
261
	ret = intel_emit_post_sync_nonzero_flush(req);
248
	if (ret)
262
	if (ret)
Line 272... Line 286...
272
		 * TLB invalidate requires a post-sync write.
286
		 * TLB invalidate requires a post-sync write.
273
		 */
287
		 */
274
		flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
288
		flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
275
	}
289
	}
Line 276... Line 290...
276
 
290
 
277
	ret = intel_ring_begin(ring, 4);
291
	ret = intel_ring_begin(req, 4);
278
	if (ret)
292
	if (ret)
Line 279... Line 293...
279
		return ret;
293
		return ret;
280
 
294
 
Line 286... Line 300...
286
 
300
 
287
	return 0;
301
	return 0;
Line 288... Line 302...
288
}
302
}
289
 
303
 
290
static int
304
static int
-
 
305
gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req)
291
gen7_render_ring_cs_stall_wa(struct intel_engine_cs *ring)
306
{
Line 292... Line 307...
292
{
307
	struct intel_engine_cs *ring = req->ring;
293
	int ret;
308
	int ret;
294
 
309
 
Line 295... Line 310...
295
	ret = intel_ring_begin(ring, 4);
310
	ret = intel_ring_begin(req, 4);
296
	if (ret)
311
	if (ret)
Line 304... Line 319...
304
	intel_ring_advance(ring);
319
	intel_ring_advance(ring);
Line 305... Line 320...
305
 
320
 
306
	return 0;
321
	return 0;
Line 307... Line -...
307
}
-
 
308
 
-
 
309
static int gen7_ring_fbc_flush(struct intel_engine_cs *ring, u32 value)
-
 
310
{
-
 
311
	int ret;
-
 
312
 
-
 
313
	if (!ring->fbc_dirty)
-
 
314
		return 0;
-
 
315
 
-
 
316
	ret = intel_ring_begin(ring, 6);
-
 
317
	if (ret)
-
 
318
		return ret;
-
 
319
	/* WaFbcNukeOn3DBlt:ivb/hsw */
-
 
320
	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
-
 
321
	intel_ring_emit(ring, MSG_FBC_REND_STATE);
-
 
322
	intel_ring_emit(ring, value);
-
 
323
	intel_ring_emit(ring, MI_STORE_REGISTER_MEM(1) | MI_SRM_LRM_GLOBAL_GTT);
-
 
324
	intel_ring_emit(ring, MSG_FBC_REND_STATE);
-
 
325
	intel_ring_emit(ring, ring->scratch.gtt_offset + 256);
-
 
326
	intel_ring_advance(ring);
-
 
327
 
-
 
328
	ring->fbc_dirty = false;
-
 
329
	return 0;
-
 
330
}
322
}
331
 
323
 
332
static int
324
static int
333
gen7_render_ring_flush(struct intel_engine_cs *ring,
325
gen7_render_ring_flush(struct drm_i915_gem_request *req,
-
 
326
		       u32 invalidate_domains, u32 flush_domains)
334
		       u32 invalidate_domains, u32 flush_domains)
327
{
335
{
328
	struct intel_engine_cs *ring = req->ring;
336
	u32 flags = 0;
329
	u32 flags = 0;
Line 337... Line 330...
337
	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
330
	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
Line 352... Line 345...
352
	 * impact.
345
	 * impact.
353
	 */
346
	 */
354
	if (flush_domains) {
347
	if (flush_domains) {
355
	flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
348
		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
356
		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
349
		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
-
 
350
		flags |= PIPE_CONTROL_FLUSH_ENABLE;
357
	}
351
	}
358
	if (invalidate_domains) {
352
	if (invalidate_domains) {
359
		flags |= PIPE_CONTROL_TLB_INVALIDATE;
353
		flags |= PIPE_CONTROL_TLB_INVALIDATE;
360
	flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
354
		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
361
	flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
355
		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
Line 372... Line 366...
372
		flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
366
		flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
Line 373... Line 367...
373
 
367
 
374
		/* Workaround: we must issue a pipe_control with CS-stall bit
368
		/* Workaround: we must issue a pipe_control with CS-stall bit
375
		 * set before a pipe_control command that has the state cache
369
		 * set before a pipe_control command that has the state cache
376
		 * invalidate bit set. */
370
		 * invalidate bit set. */
377
		gen7_render_ring_cs_stall_wa(ring);
371
		gen7_render_ring_cs_stall_wa(req);
Line 378... Line 372...
378
	}
372
	}
379
 
373
 
380
	ret = intel_ring_begin(ring, 4);
374
	ret = intel_ring_begin(req, 4);
Line 381... Line 375...
381
	if (ret)
375
	if (ret)
382
		return ret;
376
		return ret;
383
 
377
 
384
	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
378
	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
385
	intel_ring_emit(ring, flags);
379
	intel_ring_emit(ring, flags);
Line 386... Line -...
386
	intel_ring_emit(ring, scratch_addr);
-
 
387
	intel_ring_emit(ring, 0);
-
 
388
	intel_ring_advance(ring);
-
 
389
 
380
	intel_ring_emit(ring, scratch_addr);
390
	if (!invalidate_domains && flush_domains)
381
	intel_ring_emit(ring, 0);
Line 391... Line 382...
391
		return gen7_ring_fbc_flush(ring, FBC_REND_NUKE);
382
	intel_ring_advance(ring);
392
 
383
 
393
	return 0;
384
	return 0;
394
}
385
}
-
 
386
 
395
 
387
static int
Line 396... Line 388...
396
static int
388
gen8_emit_pipe_control(struct drm_i915_gem_request *req,
397
gen8_emit_pipe_control(struct intel_engine_cs *ring,
389
		       u32 flags, u32 scratch_addr)
398
		       u32 flags, u32 scratch_addr)
390
{
Line 399... Line 391...
399
{
391
	struct intel_engine_cs *ring = req->ring;
400
	int ret;
392
	int ret;
Line 413... Line 405...
413
 
405
 
414
	return 0;
406
	return 0;
Line 415... Line 407...
415
}
407
}
416
 
408
 
417
static int
409
static int
418
gen8_render_ring_flush(struct intel_engine_cs *ring,
410
gen8_render_ring_flush(struct drm_i915_gem_request *req,
419
		       u32 invalidate_domains, u32 flush_domains)
411
		       u32 invalidate_domains, u32 flush_domains)
420
{
412
{
421
	u32 flags = 0;
413
	u32 flags = 0;
Line 422... Line 414...
422
	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
414
	u32 scratch_addr = req->ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
Line 423... Line 415...
423
	int ret;
415
	int ret;
424
 
416
 
425
	flags |= PIPE_CONTROL_CS_STALL;
417
	flags |= PIPE_CONTROL_CS_STALL;
-
 
418
 
426
 
419
	if (flush_domains) {
427
	if (flush_domains) {
420
		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
428
		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
421
		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
429
		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
422
		flags |= PIPE_CONTROL_FLUSH_ENABLE;
430
	}
423
	}
Line 437... Line 430...
437
		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
430
		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
438
		flags |= PIPE_CONTROL_QW_WRITE;
431
		flags |= PIPE_CONTROL_QW_WRITE;
439
		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
432
		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
Line 440... Line 433...
440
 
433
 
441
		/* WaCsStallBeforeStateCacheInvalidate:bdw,chv */
434
		/* WaCsStallBeforeStateCacheInvalidate:bdw,chv */
442
		ret = gen8_emit_pipe_control(ring,
435
		ret = gen8_emit_pipe_control(req,
443
					     PIPE_CONTROL_CS_STALL |
436
					     PIPE_CONTROL_CS_STALL |
444
					     PIPE_CONTROL_STALL_AT_SCOREBOARD,
437
					     PIPE_CONTROL_STALL_AT_SCOREBOARD,
445
					     0);
438
					     0);
446
	if (ret)
439
		if (ret)
447
		return ret;
440
			return ret;
Line 448... Line 441...
448
	}
441
	}
449
 
-
 
450
	ret = gen8_emit_pipe_control(ring, flags, scratch_addr);
-
 
451
	if (ret)
-
 
452
		return ret;
-
 
453
 
-
 
454
	if (!invalidate_domains && flush_domains)
-
 
455
		return gen7_ring_fbc_flush(ring, FBC_REND_NUKE);
-
 
456
 
442
 
Line 457... Line 443...
457
	return 0;
443
	return gen8_emit_pipe_control(req, flags, scratch_addr);
458
}
444
}
459
 
445
 
Line 489... Line 475...
489
	if (INTEL_INFO(ring->dev)->gen >= 4)
475
	if (INTEL_INFO(ring->dev)->gen >= 4)
490
		addr |= (dev_priv->status_page_dmah->busaddr >> 28) & 0xf0;
476
		addr |= (dev_priv->status_page_dmah->busaddr >> 28) & 0xf0;
491
	I915_WRITE(HWS_PGA, addr);
477
	I915_WRITE(HWS_PGA, addr);
492
}
478
}
Line -... Line 479...
-
 
479
 
-
 
480
static void intel_ring_setup_status_page(struct intel_engine_cs *ring)
-
 
481
{
-
 
482
	struct drm_device *dev = ring->dev;
-
 
483
	struct drm_i915_private *dev_priv = ring->dev->dev_private;
-
 
484
	u32 mmio = 0;
-
 
485
 
-
 
486
	/* The ring status page addresses are no longer next to the rest of
-
 
487
	 * the ring registers as of gen7.
-
 
488
	 */
-
 
489
	if (IS_GEN7(dev)) {
-
 
490
		switch (ring->id) {
-
 
491
		case RCS:
-
 
492
			mmio = RENDER_HWS_PGA_GEN7;
-
 
493
			break;
-
 
494
		case BCS:
-
 
495
			mmio = BLT_HWS_PGA_GEN7;
-
 
496
			break;
-
 
497
		/*
-
 
498
		 * VCS2 actually doesn't exist on Gen7. Only shut up
-
 
499
		 * gcc switch check warning
-
 
500
		 */
-
 
501
		case VCS2:
-
 
502
		case VCS:
-
 
503
			mmio = BSD_HWS_PGA_GEN7;
-
 
504
			break;
-
 
505
		case VECS:
-
 
506
			mmio = VEBOX_HWS_PGA_GEN7;
-
 
507
			break;
-
 
508
		}
-
 
509
	} else if (IS_GEN6(ring->dev)) {
-
 
510
		mmio = RING_HWS_PGA_GEN6(ring->mmio_base);
-
 
511
	} else {
-
 
512
		/* XXX: gen8 returns to sanity */
-
 
513
		mmio = RING_HWS_PGA(ring->mmio_base);
-
 
514
	}
-
 
515
 
-
 
516
	I915_WRITE(mmio, (u32)ring->status_page.gfx_addr);
-
 
517
	POSTING_READ(mmio);
-
 
518
 
-
 
519
	/*
-
 
520
	 * Flush the TLB for this page
-
 
521
	 *
-
 
522
	 * FIXME: These two bits have disappeared on gen8, so a question
-
 
523
	 * arises: do we still need this and if so how should we go about
-
 
524
	 * invalidating the TLB?
-
 
525
	 */
-
 
526
	if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8) {
-
 
527
		u32 reg = RING_INSTPM(ring->mmio_base);
-
 
528
 
-
 
529
		/* ring should be idle before issuing a sync flush*/
-
 
530
		WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0);
-
 
531
 
-
 
532
		I915_WRITE(reg,
-
 
533
			   _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE |
-
 
534
					      INSTPM_SYNC_FLUSH));
-
 
535
		if (wait_for((I915_READ(reg) & INSTPM_SYNC_FLUSH) == 0,
-
 
536
			     1000))
-
 
537
			DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n",
-
 
538
				  ring->name);
-
 
539
	}
-
 
540
}
493
 
541
 
494
static bool stop_ring(struct intel_engine_cs *ring)
542
static bool stop_ring(struct intel_engine_cs *ring)
495
{
543
{
Line 496... Line 544...
496
	struct drm_i915_private *dev_priv = to_i915(ring->dev);
544
	struct drm_i915_private *dev_priv = to_i915(ring->dev);
Line 526... Line 574...
526
	struct drm_i915_private *dev_priv = dev->dev_private;
574
	struct drm_i915_private *dev_priv = dev->dev_private;
527
	struct intel_ringbuffer *ringbuf = ring->buffer;
575
	struct intel_ringbuffer *ringbuf = ring->buffer;
528
	struct drm_i915_gem_object *obj = ringbuf->obj;
576
	struct drm_i915_gem_object *obj = ringbuf->obj;
529
	int ret = 0;
577
	int ret = 0;
Line 530... Line 578...
530
 
578
 
Line 531... Line 579...
531
	gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL);
579
	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
532
 
580
 
533
	if (!stop_ring(ring)) {
581
	if (!stop_ring(ring)) {
534
		/* G45 ring initialization often fails to reset head to zero */
582
		/* G45 ring initialization often fails to reset head to zero */
Line 590... Line 638...
590
			  I915_READ_START(ring), (unsigned long)i915_gem_obj_ggtt_offset(obj));
638
			  I915_READ_START(ring), (unsigned long)i915_gem_obj_ggtt_offset(obj));
591
		ret = -EIO;
639
		ret = -EIO;
592
		goto out;
640
		goto out;
593
	}
641
	}
Line -... Line 642...
-
 
642
 
594
 
643
	ringbuf->last_retired_head = -1;
595
		ringbuf->head = I915_READ_HEAD(ring);
644
	ringbuf->head = I915_READ_HEAD(ring);
596
		ringbuf->tail = I915_READ_TAIL(ring) & TAIL_ADDR;
645
	ringbuf->tail = I915_READ_TAIL(ring) & TAIL_ADDR;
597
	ringbuf->space = intel_ring_space(ringbuf);
-
 
Line 598... Line 646...
598
		ringbuf->last_retired_head = -1;
646
	intel_ring_update_space(ringbuf);
Line 599... Line 647...
599
 
647
 
600
	memset(&ring->hangcheck, 0, sizeof(ring->hangcheck));
648
	memset(&ring->hangcheck, 0, sizeof(ring->hangcheck));
Line 601... Line 649...
601
 
649
 
602
out:
650
out:
Line 603... Line 651...
603
	gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
651
	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
Line 625... Line 673...
625
int
673
int
626
intel_init_pipe_control(struct intel_engine_cs *ring)
674
intel_init_pipe_control(struct intel_engine_cs *ring)
627
{
675
{
628
	int ret;
676
	int ret;
Line 629... Line 677...
629
 
677
 
630
	if (ring->scratch.obj)
-
 
Line 631... Line 678...
631
		return 0;
678
	WARN_ON(ring->scratch.obj);
632
 
679
 
633
	ring->scratch.obj = i915_gem_alloc_object(ring->dev, 4096);
680
	ring->scratch.obj = i915_gem_alloc_object(ring->dev, 4096);
634
	if (ring->scratch.obj == NULL) {
681
	if (ring->scratch.obj == NULL) {
Line 662... Line 709...
662
	drm_gem_object_unreference(&ring->scratch.obj->base);
709
	drm_gem_object_unreference(&ring->scratch.obj->base);
663
err:
710
err:
664
	return ret;
711
	return ret;
665
}
712
}
Line 666... Line 713...
666
 
713
 
667
static int intel_ring_workarounds_emit(struct intel_engine_cs *ring,
-
 
668
				       struct intel_context *ctx)
714
static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req)
669
{
715
{
-
 
716
	int ret, i;
670
	int ret, i;
717
	struct intel_engine_cs *ring = req->ring;
671
	struct drm_device *dev = ring->dev;
718
	struct drm_device *dev = ring->dev;
672
	struct drm_i915_private *dev_priv = dev->dev_private;
719
	struct drm_i915_private *dev_priv = dev->dev_private;
Line 673... Line 720...
673
	struct i915_workarounds *w = &dev_priv->workarounds;
720
	struct i915_workarounds *w = &dev_priv->workarounds;
674
 
721
 
Line 675... Line 722...
675
	if (WARN_ON(w->count == 0))
722
	if (w->count == 0)
676
		return 0;
723
		return 0;
677
 
724
 
678
	ring->gpu_caches_dirty = true;
725
	ring->gpu_caches_dirty = true;
Line 679... Line 726...
679
	ret = intel_ring_flush_all_caches(ring);
726
	ret = intel_ring_flush_all_caches(req);
680
	if (ret)
727
	if (ret)
681
		return ret;
728
		return ret;
Line 682... Line 729...
682
 
729
 
683
	ret = intel_ring_begin(ring, (w->count * 2 + 2));
730
	ret = intel_ring_begin(req, (w->count * 2 + 2));
Line 692... Line 739...
692
	intel_ring_emit(ring, MI_NOOP);
739
	intel_ring_emit(ring, MI_NOOP);
Line 693... Line 740...
693
 
740
 
Line 694... Line 741...
694
	intel_ring_advance(ring);
741
	intel_ring_advance(ring);
695
 
742
 
696
	ring->gpu_caches_dirty = true;
743
	ring->gpu_caches_dirty = true;
697
	ret = intel_ring_flush_all_caches(ring);
744
	ret = intel_ring_flush_all_caches(req);
Line 698... Line 745...
698
	if (ret)
745
	if (ret)
Line 699... Line 746...
699
		return ret;
746
		return ret;
700
 
747
 
Line -... Line 748...
-
 
748
	DRM_DEBUG_DRIVER("Number of Workarounds emitted: %d\n", w->count);
-
 
749
 
-
 
750
	return 0;
-
 
751
}
-
 
752
 
-
 
753
static int intel_rcs_ctx_init(struct drm_i915_gem_request *req)
-
 
754
{
-
 
755
	int ret;
-
 
756
 
-
 
757
	ret = intel_ring_workarounds_emit(req);
-
 
758
	if (ret != 0)
-
 
759
		return ret;
-
 
760
 
-
 
761
	ret = i915_gem_render_state_init(req);
-
 
762
	if (ret)
701
	DRM_DEBUG_DRIVER("Number of Workarounds emitted: %d\n", w->count);
763
		DRM_ERROR("init render state: %d\n", ret);
702
 
764
 
703
	return 0;
765
	return ret;
704
}
766
}
Line 718... Line 780...
718
	dev_priv->workarounds.count++;
780
	dev_priv->workarounds.count++;
Line 719... Line 781...
719
 
781
 
720
	return 0;
782
	return 0;
Line 721... Line 783...
721
}
783
}
722
 
784
 
723
#define WA_REG(addr, mask, val) { \
785
#define WA_REG(addr, mask, val) do { \
724
		const int r = wa_add(dev_priv, (addr), (mask), (val)); \
786
		const int r = wa_add(dev_priv, (addr), (mask), (val)); \
725
		if (r) \
787
		if (r) \
Line 726... Line 788...
726
			return r; \
788
			return r; \
727
	}
789
	} while (0)
Line 728... Line 790...
728
 
790
 
Line 738... Line 800...
738
#define WA_SET_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) | (mask))
800
#define WA_SET_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) | (mask))
739
#define WA_CLR_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) & ~(mask))
801
#define WA_CLR_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) & ~(mask))
Line 740... Line 802...
740
 
802
 
Line 741... Line 803...
741
#define WA_WRITE(addr, val) WA_REG(addr, 0xffffffff, val)
803
#define WA_WRITE(addr, val) WA_REG(addr, 0xffffffff, val)
742
 
804
 
743
static int bdw_init_workarounds(struct intel_engine_cs *ring)
805
static int gen8_init_workarounds(struct intel_engine_cs *ring)
744
{
806
{
Line 745... Line -...
745
	struct drm_device *dev = ring->dev;
-
 
746
	struct drm_i915_private *dev_priv = dev->dev_private;
-
 
747
 
807
	struct drm_device *dev = ring->dev;
748
	/* WaDisablePartialInstShootdown:bdw */
-
 
749
	/* WaDisableThreadStallDopClockGating:bdw (pre-production) */
-
 
Line 750... Line 808...
750
	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
808
	struct drm_i915_private *dev_priv = dev->dev_private;
751
			  PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE |
809
 
752
			  STALL_DOP_GATING_DISABLE);
-
 
Line -... Line 810...
-
 
810
	WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
753
 
811
 
754
	/* WaDisableDopClockGating:bdw */
812
	/* WaDisableAsyncFlipPerfMode:bdw,chv */
Line 755... Line 813...
755
	WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
813
	WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE);
756
			  DOP_CLOCK_GATING_DISABLE);
814
 
757
 
815
	/* WaDisablePartialInstShootdown:bdw,chv */
758
	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
816
	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
-
 
817
			  PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
759
			  GEN8_SAMPLER_POWER_BYPASS_DIS);
818
 
760
 
819
	/* Use Force Non-Coherent whenever executing a 3D context. This is a
-
 
820
	 * workaround for for a possible hang in the unlikely event a TLB
761
	/* Use Force Non-Coherent whenever executing a 3D context. This is a
821
	 * invalidation occurs during a PSD flush.
-
 
822
	 */
-
 
823
	/* WaForceEnableNonCoherent:bdw,chv */
-
 
824
	/* WaHdcDisableFetchWhenMasked:bdw,chv */
-
 
825
	WA_SET_BIT_MASKED(HDC_CHICKEN0,
-
 
826
			  HDC_DONOT_FETCH_MEM_WHEN_MASKED |
-
 
827
			  HDC_FORCE_NON_COHERENT);
-
 
828
 
-
 
829
	/* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
-
 
830
	 * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
762
	 * workaround for for a possible hang in the unlikely event a TLB
831
	 *  polygons in the same 8x4 pixel/sample area to be processed without
Line 763... Line 832...
763
	 * invalidation occurs during a PSD flush.
832
	 *  stalling waiting for the earlier ones to write to Hierarchical Z
764
	 */
-
 
765
	/* WaDisableFenceDestinationToSLM:bdw (GT3 pre-production) */
833
	 *  buffer."
Line 766... Line 834...
766
	WA_SET_BIT_MASKED(HDC_CHICKEN0,
834
	 *
767
			  HDC_FORCE_NON_COHERENT |
835
	 * This optimization is off by default for BDW and CHV; turn it on.
768
			  (IS_BDW_GT3(dev) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
836
	 */
769
 
837
	WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
Line 784... Line 852...
784
			    GEN6_WIZ_HASHING_16x4);
852
			    GEN6_WIZ_HASHING_16x4);
Line 785... Line 853...
785
 
853
 
786
	return 0;
854
	return 0;
Line -... Line 855...
-
 
855
}
-
 
856
 
-
 
857
static int bdw_init_workarounds(struct intel_engine_cs *ring)
-
 
858
{
-
 
859
	int ret;
-
 
860
	struct drm_device *dev = ring->dev;
-
 
861
	struct drm_i915_private *dev_priv = dev->dev_private;
-
 
862
 
-
 
863
	ret = gen8_init_workarounds(ring);
-
 
864
	if (ret)
-
 
865
		return ret;
-
 
866
 
-
 
867
	/* WaDisableThreadStallDopClockGating:bdw (pre-production) */
-
 
868
	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
-
 
869
 
-
 
870
	/* WaDisableDopClockGating:bdw */
-
 
871
	WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
-
 
872
			  DOP_CLOCK_GATING_DISABLE);
-
 
873
 
-
 
874
	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
-
 
875
			  GEN8_SAMPLER_POWER_BYPASS_DIS);
-
 
876
 
-
 
877
	WA_SET_BIT_MASKED(HDC_CHICKEN0,
-
 
878
			  /* WaForceContextSaveRestoreNonCoherent:bdw */
-
 
879
			  HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
-
 
880
			  /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
-
 
881
			  (IS_BDW_GT3(dev) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
-
 
882
 
-
 
883
	return 0;
787
}
884
}
788
 
885
 
-
 
886
static int chv_init_workarounds(struct intel_engine_cs *ring)
789
static int chv_init_workarounds(struct intel_engine_cs *ring)
887
{
790
{
888
	int ret;
Line 791... Line 889...
791
	struct drm_device *dev = ring->dev;
889
	struct drm_device *dev = ring->dev;
-
 
890
	struct drm_i915_private *dev_priv = dev->dev_private;
-
 
891
 
-
 
892
	ret = gen8_init_workarounds(ring);
792
	struct drm_i915_private *dev_priv = dev->dev_private;
893
	if (ret)
-
 
894
		return ret;
-
 
895
 
-
 
896
	/* WaDisableThreadStallDopClockGating:chv */
-
 
897
	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
-
 
898
 
-
 
899
	/* Improve HiZ throughput on CHV. */
-
 
900
	WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
-
 
901
 
-
 
902
	return 0;
-
 
903
}
-
 
904
 
-
 
905
static int gen9_init_workarounds(struct intel_engine_cs *ring)
-
 
906
{
-
 
907
	struct drm_device *dev = ring->dev;
-
 
908
	struct drm_i915_private *dev_priv = dev->dev_private;
-
 
909
	uint32_t tmp;
-
 
910
 
-
 
911
	/* WaEnableLbsSlaRetryTimerDecrement:skl */
-
 
912
	I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) |
-
 
913
		   GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
-
 
914
 
-
 
915
	/* WaDisableKillLogic:bxt,skl */
-
 
916
	I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
793
 
917
		   ECOCHK_DIS_TLB);
794
	/* WaDisablePartialInstShootdown:chv */
918
 
795
	/* WaDisableThreadStallDopClockGating:chv */
-
 
Line -... Line 919...
-
 
919
	/* WaDisablePartialInstShootdown:skl,bxt */
-
 
920
	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
-
 
921
			  PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
-
 
922
 
-
 
923
	/* Syncing dependencies between camera and graphics:skl,bxt */
-
 
924
	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
-
 
925
			  GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC);
-
 
926
 
-
 
927
	if ((IS_SKYLAKE(dev) && (INTEL_REVID(dev) == SKL_REVID_A0 ||
-
 
928
	    INTEL_REVID(dev) == SKL_REVID_B0)) ||
-
 
929
	    (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0)) {
-
 
930
		/* WaDisableDgMirrorFixInHalfSliceChicken5:skl,bxt */
-
 
931
		WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
-
 
932
				  GEN9_DG_MIRROR_FIX_ENABLE);
-
 
933
	}
-
 
934
 
-
 
935
	if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) <= SKL_REVID_B0) ||
-
 
936
	    (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0)) {
-
 
937
		/* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:skl,bxt */
796
	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
938
		WA_SET_BIT_MASKED(GEN7_COMMON_SLICE_CHICKEN1,
797
			  PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE |
939
				  GEN9_RHWO_OPTIMIZATION_DISABLE);
-
 
940
		/*
-
 
941
		 * WA also requires GEN9_SLICE_COMMON_ECO_CHICKEN0[14:14] to be set
-
 
942
		 * but we do that in per ctx batchbuffer as there is an issue
-
 
943
		 * with this register not getting restored on ctx restore
-
 
944
		 */
-
 
945
	}
-
 
946
 
-
 
947
	if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) >= SKL_REVID_C0) ||
-
 
948
	    IS_BROXTON(dev)) {
-
 
949
		/* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt */
-
 
950
		WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
-
 
951
				  GEN9_ENABLE_YV12_BUGFIX);
-
 
952
	}
-
 
953
 
-
 
954
	/* Wa4x4STCOptimizationDisable:skl,bxt */
-
 
955
	/* WaDisablePartialResolveInVc:skl,bxt */
-
 
956
	WA_SET_BIT_MASKED(CACHE_MODE_1, (GEN8_4x4_STC_OPTIMIZATION_DISABLE |
-
 
957
					 GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE));
-
 
958
 
798
			  STALL_DOP_GATING_DISABLE);
959
	/* WaCcsTlbPrefetchDisable:skl,bxt */
-
 
960
	WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
-
 
961
			  GEN9_CCS_TLB_PREFETCH_ENABLE);
-
 
962
 
-
 
963
	/* WaDisableMaskBasedCammingInRCC:skl,bxt */
-
 
964
	if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) == SKL_REVID_C0) ||
-
 
965
	    (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0))
-
 
966
		WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0,
-
 
967
				  PIXEL_MASK_CAMMING_DISABLE);
-
 
968
 
-
 
969
	/* WaForceContextSaveRestoreNonCoherent:skl,bxt */
-
 
970
	tmp = HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT;
-
 
971
	if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) == SKL_REVID_F0) ||
-
 
972
	    (IS_BROXTON(dev) && INTEL_REVID(dev) >= BXT_REVID_B0))
-
 
973
		tmp |= HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE;
-
 
974
	WA_SET_BIT_MASKED(HDC_CHICKEN0, tmp);
-
 
975
 
-
 
976
	/* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt */
-
 
977
	if (IS_SKYLAKE(dev) ||
-
 
978
	    (IS_BROXTON(dev) && INTEL_REVID(dev) <= BXT_REVID_B0)) {
-
 
979
		WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
-
 
980
				  GEN8_SAMPLER_POWER_BYPASS_DIS);
-
 
981
	}
-
 
982
 
-
 
983
	/* WaDisableSTUnitPowerOptimization:skl,bxt */
-
 
984
	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
-
 
985
 
-
 
986
	return 0;
-
 
987
}
-
 
988
 
-
 
989
static int skl_tune_iz_hashing(struct intel_engine_cs *ring)
-
 
990
{
-
 
991
	struct drm_device *dev = ring->dev;
-
 
992
	struct drm_i915_private *dev_priv = dev->dev_private;
-
 
993
	u8 vals[3] = { 0, 0, 0 };
-
 
994
	unsigned int i;
-
 
995
 
-
 
996
	for (i = 0; i < 3; i++) {
-
 
997
		u8 ss;
-
 
998
 
-
 
999
		/*
-
 
1000
		 * Only consider slices where one, and only one, subslice has 7
-
 
1001
		 * EUs
-
 
1002
		 */
-
 
1003
		if (hweight8(dev_priv->info.subslice_7eu[i]) != 1)
-
 
1004
			continue;
-
 
1005
 
-
 
1006
		/*
-
 
1007
		 * subslice_7eu[i] != 0 (because of the check above) and
-
 
1008
		 * ss_max == 4 (maximum number of subslices possible per slice)
-
 
1009
		 *
-
 
1010
		 * ->    0 <= ss <= 3;
-
 
1011
		 */
-
 
1012
		ss = ffs(dev_priv->info.subslice_7eu[i]) - 1;
-
 
1013
		vals[i] = 3 - ss;
-
 
1014
	}
-
 
1015
 
-
 
1016
	if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
-
 
1017
		return 0;
-
 
1018
 
-
 
1019
	/* Tune IZ hashing. See intel_device_info_runtime_init() */
-
 
1020
	WA_SET_FIELD_MASKED(GEN7_GT_MODE,
-
 
1021
			    GEN9_IZ_HASHING_MASK(2) |
-
 
1022
			    GEN9_IZ_HASHING_MASK(1) |
-
 
1023
			    GEN9_IZ_HASHING_MASK(0),
-
 
1024
			    GEN9_IZ_HASHING(2, vals[2]) |
-
 
1025
			    GEN9_IZ_HASHING(1, vals[1]) |
-
 
1026
			    GEN9_IZ_HASHING(0, vals[0]));
-
 
1027
 
-
 
1028
	return 0;
-
 
1029
}
-
 
1030
 
-
 
1031
static int skl_init_workarounds(struct intel_engine_cs *ring)
-
 
1032
{
-
 
1033
	int ret;
-
 
1034
	struct drm_device *dev = ring->dev;
-
 
1035
	struct drm_i915_private *dev_priv = dev->dev_private;
-
 
1036
 
-
 
1037
	ret = gen9_init_workarounds(ring);
-
 
1038
	if (ret)
-
 
1039
		return ret;
-
 
1040
 
-
 
1041
	if (INTEL_REVID(dev) <= SKL_REVID_D0) {
-
 
1042
		/* WaDisableHDCInvalidation:skl */
-
 
1043
		I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
-
 
1044
			   BDW_DISABLE_HDC_INVALIDATION);
-
 
1045
 
-
 
1046
		/* WaDisableChickenBitTSGBarrierAckForFFSliceCS:skl */
-
 
1047
		I915_WRITE(FF_SLICE_CS_CHICKEN2,
-
 
1048
			   _MASKED_BIT_ENABLE(GEN9_TSG_BARRIER_ACK_DISABLE));
799
 
1049
	}
-
 
1050
 
-
 
1051
	/* GEN8_L3SQCREG4 has a dependency with WA batch so any new changes
-
 
1052
	 * involving this register should also be added to WA batch as required.
-
 
1053
	 */
-
 
1054
	if (INTEL_REVID(dev) <= SKL_REVID_E0)
800
	/* Use Force Non-Coherent whenever executing a 3D context. This is a
1055
		/* WaDisableLSQCROPERFforOCL:skl */
-
 
1056
		I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) |
-
 
1057
			   GEN8_LQSC_RO_PERF_DIS);
-
 
1058
 
-
 
1059
	/* WaEnableGapsTsvCreditFix:skl */
-
 
1060
	if (IS_SKYLAKE(dev) && (INTEL_REVID(dev) >= SKL_REVID_C0)) {
-
 
1061
		I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) |
-
 
1062
					   GEN9_GAPS_TSV_CREDIT_DISABLE));
-
 
1063
	}
-
 
1064
 
-
 
1065
	/* WaDisablePowerCompilerClockGating:skl */
-
 
1066
	if (INTEL_REVID(dev) == SKL_REVID_B0)
-
 
1067
		WA_SET_BIT_MASKED(HIZ_CHICKEN,
-
 
1068
				  BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE);
-
 
1069
 
-
 
1070
	if (INTEL_REVID(dev) <= SKL_REVID_D0) {
-
 
1071
		/*
801
	 * workaround for a possible hang in the unlikely event a TLB
1072
		 *Use Force Non-Coherent whenever executing a 3D context. This
802
	 * invalidation occurs during a PSD flush.
1073
		 * is a workaround for a possible hang in the unlikely event
803
	 */
1074
		 * a TLB invalidation occurs during a PSD flush.
-
 
1075
		 */
-
 
1076
		/* WaForceEnableNonCoherent:skl */
-
 
1077
		WA_SET_BIT_MASKED(HDC_CHICKEN0,
-
 
1078
				  HDC_FORCE_NON_COHERENT);
-
 
1079
	}
-
 
1080
 
804
	/* WaForceEnableNonCoherent:chv */
1081
	if (INTEL_REVID(dev) == SKL_REVID_C0 ||
-
 
1082
	    INTEL_REVID(dev) == SKL_REVID_D0)
-
 
1083
		/* WaBarrierPerformanceFixDisable:skl */
-
 
1084
		WA_SET_BIT_MASKED(HDC_CHICKEN0,
-
 
1085
				  HDC_FENCE_DEST_SLM_DISABLE |
-
 
1086
				  HDC_BARRIER_PERFORMANCE_DISABLE);
-
 
1087
 
-
 
1088
	/* WaDisableSbeCacheDispatchPortSharing:skl */
-
 
1089
	if (INTEL_REVID(dev) <= SKL_REVID_F0) {
-
 
1090
		WA_SET_BIT_MASKED(
-
 
1091
			GEN7_HALF_SLICE_CHICKEN1,
-
 
1092
			GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
-
 
1093
	}
-
 
1094
 
-
 
1095
	return skl_tune_iz_hashing(ring);
-
 
1096
}
-
 
1097
 
-
 
1098
static int bxt_init_workarounds(struct intel_engine_cs *ring)
-
 
1099
{
-
 
1100
	int ret;
-
 
1101
	struct drm_device *dev = ring->dev;
-
 
1102
	struct drm_i915_private *dev_priv = dev->dev_private;
-
 
1103
 
-
 
1104
	ret = gen9_init_workarounds(ring);
-
 
1105
	if (ret)
-
 
1106
		return ret;
-
 
1107
 
-
 
1108
	/* WaStoreMultiplePTEenable:bxt */
-
 
1109
	/* This is a requirement according to Hardware specification */
-
 
1110
	if (INTEL_REVID(dev) == BXT_REVID_A0)
-
 
1111
		I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_TLBPF);
-
 
1112
 
-
 
1113
	/* WaSetClckGatingDisableMedia:bxt */
-
 
1114
	if (INTEL_REVID(dev) == BXT_REVID_A0) {
-
 
1115
		I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) &
-
 
1116
					    ~GEN8_DOP_CLOCK_GATE_MEDIA_ENABLE));
-
 
1117
	}
-
 
1118
 
-
 
1119
	/* WaDisableThreadStallDopClockGating:bxt */
-
 
1120
	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
-
 
1121
			  STALL_DOP_GATING_DISABLE);
-
 
1122
 
-
 
1123
	/* WaDisableSbeCacheDispatchPortSharing:bxt */
-
 
1124
	if (INTEL_REVID(dev) <= BXT_REVID_B0) {
Line 805... Line 1125...
805
	/* WaHdcDisableFetchWhenMasked:chv */
1125
		WA_SET_BIT_MASKED(
806
	WA_SET_BIT_MASKED(HDC_CHICKEN0,
1126
			GEN7_HALF_SLICE_CHICKEN1,
Line 807... Line 1127...
807
			  HDC_FORCE_NON_COHERENT |
1127
			GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
Line 823... Line 1143...
823
		return bdw_init_workarounds(ring);
1143
		return bdw_init_workarounds(ring);
Line 824... Line 1144...
824
 
1144
 
825
	if (IS_CHERRYVIEW(dev))
1145
	if (IS_CHERRYVIEW(dev))
Line -... Line 1146...
-
 
1146
		return chv_init_workarounds(ring);
-
 
1147
 
-
 
1148
	if (IS_SKYLAKE(dev))
-
 
1149
		return skl_init_workarounds(ring);
-
 
1150
 
-
 
1151
	if (IS_BROXTON(dev))
826
		return chv_init_workarounds(ring);
1152
		return bxt_init_workarounds(ring);
827
 
1153
 
Line 828... Line 1154...
828
	return 0;
1154
	return 0;
829
}
1155
}
Line 842... Line 1168...
842
 
1168
 
843
	/* We need to disable the AsyncFlip performance optimisations in order
1169
	/* We need to disable the AsyncFlip performance optimisations in order
844
	 * to use MI_WAIT_FOR_EVENT within the CS. It should already be
1170
	 * to use MI_WAIT_FOR_EVENT within the CS. It should already be
845
	 * programmed to '1' on all products.
1171
	 * programmed to '1' on all products.
846
	 *
1172
	 *
847
	 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv,bdw,chv
1173
	 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv
848
	 */
1174
	 */
849
	if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 9)
1175
	if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8)
Line 850... Line 1176...
850
		I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
1176
		I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
851
 
1177
 
852
	/* Required for the hardware to program scanline values for waiting */
1178
	/* Required for the hardware to program scanline values for waiting */
Line 859... Line 1185...
859
		if (IS_GEN7(dev))
1185
	if (IS_GEN7(dev))
860
			I915_WRITE(GFX_MODE_GEN7,
1186
		I915_WRITE(GFX_MODE_GEN7,
861
			   _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT) |
1187
			   _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT) |
862
				   _MASKED_BIT_ENABLE(GFX_REPLAY_MODE));
1188
			   _MASKED_BIT_ENABLE(GFX_REPLAY_MODE));
Line 863... Line -...
863
 
-
 
864
	if (INTEL_INFO(dev)->gen >= 5) {
-
 
865
		ret = intel_init_pipe_control(ring);
-
 
866
		if (ret)
-
 
867
			return ret;
-
 
868
	}
-
 
869
 
1189
 
870
	if (IS_GEN6(dev)) {
1190
	if (IS_GEN6(dev)) {
871
		/* From the Sandybridge PRM, volume 1 part 3, page 24:
1191
		/* From the Sandybridge PRM, volume 1 part 3, page 24:
872
		 * "If this bit is set, STCunit will have LRA as replacement
1192
		 * "If this bit is set, STCunit will have LRA as replacement
873
		 *  policy. [...] This bit must be reset.  LRA replacement
1193
		 *  policy. [...] This bit must be reset.  LRA replacement
874
		 *  policy is not supported."
1194
		 *  policy is not supported."
875
		 */
1195
		 */
876
		I915_WRITE(CACHE_MODE_0,
1196
		I915_WRITE(CACHE_MODE_0,
877
			   _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
1197
			   _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
Line 878... Line 1198...
878
	}
1198
	}
879
 
1199
 
Line 880... Line 1200...
880
	if (INTEL_INFO(dev)->gen >= 6)
1200
	if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8)
881
		I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
1201
		I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
Line 898... Line 1218...
898
	}
1218
	}
Line 899... Line 1219...
899
 
1219
 
900
	intel_fini_pipe_control(ring);
1220
	intel_fini_pipe_control(ring);
Line 901... Line 1221...
901
}
1221
}
902
 
1222
 
903
static int gen8_rcs_signal(struct intel_engine_cs *signaller,
1223
static int gen8_rcs_signal(struct drm_i915_gem_request *signaller_req,
904
			   unsigned int num_dwords)
1224
			   unsigned int num_dwords)
-
 
1225
{
905
{
1226
#define MBOX_UPDATE_DWORDS 8
906
#define MBOX_UPDATE_DWORDS 8
1227
	struct intel_engine_cs *signaller = signaller_req->ring;
907
	struct drm_device *dev = signaller->dev;
1228
	struct drm_device *dev = signaller->dev;
908
	struct drm_i915_private *dev_priv = dev->dev_private;
1229
	struct drm_i915_private *dev_priv = dev->dev_private;
Line 909... Line 1230...
909
	struct intel_engine_cs *waiter;
1230
	struct intel_engine_cs *waiter;
910
	int i, ret, num_rings;
1231
	int i, ret, num_rings;
911
 
1232
 
Line 912... Line 1233...
912
	num_rings = hweight32(INTEL_INFO(dev)->ring_mask);
1233
	num_rings = hweight32(INTEL_INFO(dev)->ring_mask);
913
	num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS;
1234
	num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS;
914
#undef MBOX_UPDATE_DWORDS
1235
#undef MBOX_UPDATE_DWORDS
Line 915... Line 1236...
915
 
1236
 
-
 
1237
	ret = intel_ring_begin(signaller_req, num_dwords);
916
	ret = intel_ring_begin(signaller, num_dwords);
1238
	if (ret)
917
	if (ret)
1239
		return ret;
918
		return ret;
1240
 
Line -... Line 1241...
-
 
1241
	for_each_ring(waiter, dev_priv, i) {
919
 
1242
		u32 seqno;
920
	for_each_ring(waiter, dev_priv, i) {
1243
		u64 gtt_offset = signaller->semaphore.signal_ggtt[i];
921
		u64 gtt_offset = signaller->semaphore.signal_ggtt[i];
1244
		if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
922
		if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
1245
			continue;
923
			continue;
1246
 
924
 
1247
		seqno = i915_gem_request_get_seqno(signaller_req);
925
		intel_ring_emit(signaller, GFX_OP_PIPE_CONTROL(6));
1248
		intel_ring_emit(signaller, GFX_OP_PIPE_CONTROL(6));
926
		intel_ring_emit(signaller, PIPE_CONTROL_GLOBAL_GTT_IVB |
1249
		intel_ring_emit(signaller, PIPE_CONTROL_GLOBAL_GTT_IVB |
927
					   PIPE_CONTROL_QW_WRITE |
1250
					   PIPE_CONTROL_QW_WRITE |
928
					   PIPE_CONTROL_FLUSH_ENABLE);
1251
					   PIPE_CONTROL_FLUSH_ENABLE);
929
		intel_ring_emit(signaller, lower_32_bits(gtt_offset));
1252
		intel_ring_emit(signaller, lower_32_bits(gtt_offset));
930
		intel_ring_emit(signaller, upper_32_bits(gtt_offset));
1253
		intel_ring_emit(signaller, upper_32_bits(gtt_offset));
Line 931... Line 1254...
931
		intel_ring_emit(signaller, signaller->outstanding_lazy_seqno);
1254
		intel_ring_emit(signaller, seqno);
932
		intel_ring_emit(signaller, 0);
1255
		intel_ring_emit(signaller, 0);
Line 933... Line 1256...
933
		intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
1256
		intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
934
					   MI_SEMAPHORE_TARGET(waiter->id));
1257
					   MI_SEMAPHORE_TARGET(waiter->id));
935
		intel_ring_emit(signaller, 0);
1258
		intel_ring_emit(signaller, 0);
936
	}
1259
	}
-
 
1260
 
937
 
1261
	return 0;
938
	return 0;
1262
}
939
}
1263
 
940
 
1264
static int gen8_xcs_signal(struct drm_i915_gem_request *signaller_req,
Line 941... Line 1265...
941
static int gen8_xcs_signal(struct intel_engine_cs *signaller,
1265
			   unsigned int num_dwords)
942
			   unsigned int num_dwords)
1266
{
943
{
1267
#define MBOX_UPDATE_DWORDS 6
Line 944... Line 1268...
944
#define MBOX_UPDATE_DWORDS 6
1268
	struct intel_engine_cs *signaller = signaller_req->ring;
945
	struct drm_device *dev = signaller->dev;
1269
	struct drm_device *dev = signaller->dev;
946
	struct drm_i915_private *dev_priv = dev->dev_private;
1270
	struct drm_i915_private *dev_priv = dev->dev_private;
Line 947... Line 1271...
947
	struct intel_engine_cs *waiter;
1271
	struct intel_engine_cs *waiter;
-
 
1272
	int i, ret, num_rings;
948
	int i, ret, num_rings;
1273
 
949
 
1274
	num_rings = hweight32(INTEL_INFO(dev)->ring_mask);
950
	num_rings = hweight32(INTEL_INFO(dev)->ring_mask);
1275
	num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS;
Line -... Line 1276...
-
 
1276
#undef MBOX_UPDATE_DWORDS
951
	num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS;
1277
 
952
#undef MBOX_UPDATE_DWORDS
1278
	ret = intel_ring_begin(signaller_req, num_dwords);
953
 
1279
	if (ret)
954
	ret = intel_ring_begin(signaller, num_dwords);
1280
		return ret;
955
	if (ret)
1281
 
956
		return ret;
1282
	for_each_ring(waiter, dev_priv, i) {
957
 
1283
		u32 seqno;
958
	for_each_ring(waiter, dev_priv, i) {
1284
		u64 gtt_offset = signaller->semaphore.signal_ggtt[i];
959
		u64 gtt_offset = signaller->semaphore.signal_ggtt[i];
1285
		if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
960
		if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
1286
			continue;
Line 961... Line 1287...
961
			continue;
1287
 
962
 
1288
		seqno = i915_gem_request_get_seqno(signaller_req);
Line 963... Line 1289...
963
		intel_ring_emit(signaller, (MI_FLUSH_DW + 1) |
1289
		intel_ring_emit(signaller, (MI_FLUSH_DW + 1) |
964
					   MI_FLUSH_DW_OP_STOREDW);
1290
					   MI_FLUSH_DW_OP_STOREDW);
965
		intel_ring_emit(signaller, lower_32_bits(gtt_offset) |
1291
		intel_ring_emit(signaller, lower_32_bits(gtt_offset) |
-
 
1292
					   MI_FLUSH_DW_USE_GTT);
966
					   MI_FLUSH_DW_USE_GTT);
1293
		intel_ring_emit(signaller, upper_32_bits(gtt_offset));
967
		intel_ring_emit(signaller, upper_32_bits(gtt_offset));
1294
		intel_ring_emit(signaller, seqno);
968
		intel_ring_emit(signaller, signaller->outstanding_lazy_seqno);
1295
		intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
969
		intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
1296
					   MI_SEMAPHORE_TARGET(waiter->id));
Line 970... Line 1297...
970
					   MI_SEMAPHORE_TARGET(waiter->id));
1297
		intel_ring_emit(signaller, 0);
971
		intel_ring_emit(signaller, 0);
1298
	}
972
	}
1299
 
973
 
1300
	return 0;
Line 974... Line 1301...
974
	return 0;
1301
}
975
}
1302
 
976
 
1303
static int gen6_signal(struct drm_i915_gem_request *signaller_req,
Line 977... Line 1304...
977
static int gen6_signal(struct intel_engine_cs *signaller,
1304
		       unsigned int num_dwords)
978
		       unsigned int num_dwords)
1305
{
979
{
1306
	struct intel_engine_cs *signaller = signaller_req->ring;
-
 
1307
	struct drm_device *dev = signaller->dev;
980
	struct drm_device *dev = signaller->dev;
1308
	struct drm_i915_private *dev_priv = dev->dev_private;
981
	struct drm_i915_private *dev_priv = dev->dev_private;
1309
	struct intel_engine_cs *useless;
982
	struct intel_engine_cs *useless;
1310
	int i, ret, num_rings;
983
	int i, ret, num_rings;
1311
 
984
 
1312
#define MBOX_UPDATE_DWORDS 3
Line 985... Line 1313...
985
#define MBOX_UPDATE_DWORDS 3
1313
	num_rings = hweight32(INTEL_INFO(dev)->ring_mask);
986
	num_rings = hweight32(INTEL_INFO(dev)->ring_mask);
1314
	num_dwords += round_up((num_rings-1) * MBOX_UPDATE_DWORDS, 2);
Line 1008... Line 1336...
1008
}
1336
}
Line 1009... Line 1337...
1009
 
1337
 
1010
/**
1338
/**
1011
 * gen6_add_request - Update the semaphore mailbox registers
1339
 * gen6_add_request - Update the semaphore mailbox registers
1012
 *
-
 
1013
 * @ring - ring that is adding a request
1340
 *
1014
 * @seqno - return seqno stuck into the ring
1341
 * @request - request to write to the ring
1015
 *
1342
 *
1016
 * Update the mailbox registers in the *other* rings with the current seqno.
1343
 * Update the mailbox registers in the *other* rings with the current seqno.
1017
 * This acts like a signal in the canonical semaphore.
1344
 * This acts like a signal in the canonical semaphore.
1018
 */
1345
 */
1019
static int
1346
static int
1020
gen6_add_request(struct intel_engine_cs *ring)
1347
gen6_add_request(struct drm_i915_gem_request *req)
-
 
1348
{
1021
{
1349
	struct intel_engine_cs *ring = req->ring;
Line 1022... Line 1350...
1022
	int ret;
1350
	int ret;
1023
 
1351
 
1024
	if (ring->semaphore.signal)
1352
	if (ring->semaphore.signal)
1025
	ret = ring->semaphore.signal(ring, 4);
1353
		ret = ring->semaphore.signal(req, 4);
Line 1026... Line 1354...
1026
	else
1354
	else
1027
		ret = intel_ring_begin(ring, 4);
1355
		ret = intel_ring_begin(req, 4);
Line 1028... Line 1356...
1028
 
1356
 
1029
	if (ret)
1357
	if (ret)
1030
		return ret;
1358
		return ret;
1031
 
1359
 
1032
	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
1360
	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
Line 1033... Line 1361...
1033
	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
1361
	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
1034
	intel_ring_emit(ring, ring->outstanding_lazy_seqno);
1362
	intel_ring_emit(ring, i915_gem_request_get_seqno(req));
Line 1052... Line 1380...
1052
 * @signaller - ring which has, or will signal
1380
 * @signaller - ring which has, or will signal
1053
 * @seqno - seqno which the waiter will block on
1381
 * @seqno - seqno which the waiter will block on
1054
 */
1382
 */
Line 1055... Line 1383...
1055
 
1383
 
1056
static int
1384
static int
1057
gen8_ring_sync(struct intel_engine_cs *waiter,
1385
gen8_ring_sync(struct drm_i915_gem_request *waiter_req,
1058
	       struct intel_engine_cs *signaller,
1386
	       struct intel_engine_cs *signaller,
1059
	       u32 seqno)
1387
	       u32 seqno)
-
 
1388
{
1060
{
1389
	struct intel_engine_cs *waiter = waiter_req->ring;
1061
	struct drm_i915_private *dev_priv = waiter->dev->dev_private;
1390
	struct drm_i915_private *dev_priv = waiter->dev->dev_private;
Line 1062... Line 1391...
1062
	int ret;
1391
	int ret;
1063
 
1392
 
1064
	ret = intel_ring_begin(waiter, 4);
1393
	ret = intel_ring_begin(waiter_req, 4);
Line 1065... Line 1394...
1065
	if (ret)
1394
	if (ret)
1066
		return ret;
1395
		return ret;
Line 1077... Line 1406...
1077
	intel_ring_advance(waiter);
1406
	intel_ring_advance(waiter);
1078
	return 0;
1407
	return 0;
1079
}
1408
}
Line 1080... Line 1409...
1080
 
1409
 
1081
static int
1410
static int
1082
gen6_ring_sync(struct intel_engine_cs *waiter,
1411
gen6_ring_sync(struct drm_i915_gem_request *waiter_req,
1083
	       struct intel_engine_cs *signaller,
1412
	       struct intel_engine_cs *signaller,
1084
		u32 seqno)
1413
	       u32 seqno)
-
 
1414
{
1085
{
1415
	struct intel_engine_cs *waiter = waiter_req->ring;
1086
	u32 dw1 = MI_SEMAPHORE_MBOX |
1416
	u32 dw1 = MI_SEMAPHORE_MBOX |
1087
		  MI_SEMAPHORE_COMPARE |
1417
		  MI_SEMAPHORE_COMPARE |
1088
		  MI_SEMAPHORE_REGISTER;
1418
		  MI_SEMAPHORE_REGISTER;
1089
	u32 wait_mbox = signaller->semaphore.mbox.wait[waiter->id];
1419
	u32 wait_mbox = signaller->semaphore.mbox.wait[waiter->id];
Line 1095... Line 1425...
1095
	 */
1425
	 */
1096
	seqno -= 1;
1426
	seqno -= 1;
Line 1097... Line 1427...
1097
 
1427
 
Line 1098... Line 1428...
1098
	WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID);
1428
	WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID);
1099
 
1429
 
1100
	ret = intel_ring_begin(waiter, 4);
1430
	ret = intel_ring_begin(waiter_req, 4);
Line 1101... Line 1431...
1101
	if (ret)
1431
	if (ret)
1102
		return ret;
1432
		return ret;
Line 1126... Line 1456...
1126
	intel_ring_emit(ring__, 0);							\
1456
	intel_ring_emit(ring__, 0);							\
1127
	intel_ring_emit(ring__, 0);							\
1457
	intel_ring_emit(ring__, 0);							\
1128
} while (0)
1458
} while (0)
Line 1129... Line 1459...
1129
 
1459
 
1130
static int
1460
static int
1131
pc_render_add_request(struct intel_engine_cs *ring)
1461
pc_render_add_request(struct drm_i915_gem_request *req)
-
 
1462
{
1132
{
1463
	struct intel_engine_cs *ring = req->ring;
1133
	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
1464
	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
Line 1134... Line 1465...
1134
	int ret;
1465
	int ret;
1135
 
1466
 
Line 1139... Line 1470...
1139
	 *
1470
	 *
1140
	 * However, we also need to workaround the qword write
1471
	 * However, we also need to workaround the qword write
1141
	 * incoherence by flushing the 6 PIPE_NOTIFY buffers out to
1472
	 * incoherence by flushing the 6 PIPE_NOTIFY buffers out to
1142
	 * memory before requesting an interrupt.
1473
	 * memory before requesting an interrupt.
1143
	 */
1474
	 */
1144
	ret = intel_ring_begin(ring, 32);
1475
	ret = intel_ring_begin(req, 32);
1145
	if (ret)
1476
	if (ret)
1146
		return ret;
1477
		return ret;
Line 1147... Line 1478...
1147
 
1478
 
1148
	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
1479
	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
1149
			PIPE_CONTROL_WRITE_FLUSH |
1480
			PIPE_CONTROL_WRITE_FLUSH |
1150
			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
1481
			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
1151
	intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
1482
	intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
1152
	intel_ring_emit(ring, ring->outstanding_lazy_seqno);
1483
	intel_ring_emit(ring, i915_gem_request_get_seqno(req));
1153
	intel_ring_emit(ring, 0);
1484
	intel_ring_emit(ring, 0);
1154
	PIPE_CONTROL_FLUSH(ring, scratch_addr);
1485
	PIPE_CONTROL_FLUSH(ring, scratch_addr);
1155
	scratch_addr += 2 * CACHELINE_BYTES; /* write to separate cachelines */
1486
	scratch_addr += 2 * CACHELINE_BYTES; /* write to separate cachelines */
1156
	PIPE_CONTROL_FLUSH(ring, scratch_addr);
1487
	PIPE_CONTROL_FLUSH(ring, scratch_addr);
Line 1166... Line 1497...
1166
	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
1497
	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
1167
			PIPE_CONTROL_WRITE_FLUSH |
1498
			PIPE_CONTROL_WRITE_FLUSH |
1168
			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
1499
			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
1169
			PIPE_CONTROL_NOTIFY);
1500
			PIPE_CONTROL_NOTIFY);
1170
	intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
1501
	intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
1171
	intel_ring_emit(ring, ring->outstanding_lazy_seqno);
1502
	intel_ring_emit(ring, i915_gem_request_get_seqno(req));
1172
	intel_ring_emit(ring, 0);
1503
	intel_ring_emit(ring, 0);
1173
	__intel_ring_advance(ring);
1504
	__intel_ring_advance(ring);
Line 1174... Line 1505...
1174
 
1505
 
1175
	return 0;
1506
	return 0;
Line 1316... Line 1647...
1316
		POSTING_READ16(IMR);
1647
		POSTING_READ16(IMR);
1317
	}
1648
	}
1318
	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1649
	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1319
}
1650
}
Line 1320... Line -...
1320
 
-
 
1321
void intel_ring_setup_status_page(struct intel_engine_cs *ring)
-
 
1322
{
-
 
1323
	struct drm_device *dev = ring->dev;
-
 
1324
	struct drm_i915_private *dev_priv = ring->dev->dev_private;
-
 
1325
	u32 mmio = 0;
-
 
1326
 
-
 
1327
	/* The ring status page addresses are no longer next to the rest of
-
 
1328
	 * the ring registers as of gen7.
-
 
1329
	 */
-
 
1330
	if (IS_GEN7(dev)) {
-
 
1331
		switch (ring->id) {
-
 
1332
		case RCS:
-
 
1333
			mmio = RENDER_HWS_PGA_GEN7;
-
 
1334
			break;
-
 
1335
		case BCS:
-
 
1336
			mmio = BLT_HWS_PGA_GEN7;
-
 
1337
			break;
-
 
1338
		/*
-
 
1339
		 * VCS2 actually doesn't exist on Gen7. Only shut up
-
 
1340
		 * gcc switch check warning
-
 
1341
		 */
-
 
1342
		case VCS2:
-
 
1343
		case VCS:
-
 
1344
			mmio = BSD_HWS_PGA_GEN7;
-
 
1345
			break;
-
 
1346
		case VECS:
-
 
1347
			mmio = VEBOX_HWS_PGA_GEN7;
-
 
1348
			break;
-
 
1349
		}
-
 
1350
	} else if (IS_GEN6(ring->dev)) {
-
 
1351
		mmio = RING_HWS_PGA_GEN6(ring->mmio_base);
-
 
1352
	} else {
-
 
1353
		/* XXX: gen8 returns to sanity */
-
 
1354
		mmio = RING_HWS_PGA(ring->mmio_base);
-
 
1355
	}
-
 
1356
 
-
 
1357
	I915_WRITE(mmio, (u32)ring->status_page.gfx_addr);
-
 
1358
	POSTING_READ(mmio);
-
 
1359
 
-
 
1360
	/*
-
 
1361
	 * Flush the TLB for this page
-
 
1362
	 *
-
 
1363
	 * FIXME: These two bits have disappeared on gen8, so a question
-
 
1364
	 * arises: do we still need this and if so how should we go about
-
 
1365
	 * invalidating the TLB?
-
 
1366
	 */
-
 
1367
	if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8) {
-
 
1368
		u32 reg = RING_INSTPM(ring->mmio_base);
-
 
1369
 
-
 
1370
		/* ring should be idle before issuing a sync flush*/
-
 
1371
		WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0);
-
 
1372
 
-
 
1373
		I915_WRITE(reg,
-
 
1374
			   _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE |
-
 
1375
					      INSTPM_SYNC_FLUSH));
-
 
1376
		if (wait_for((I915_READ(reg) & INSTPM_SYNC_FLUSH) == 0,
-
 
1377
			     1000))
-
 
1378
			DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n",
-
 
1379
				  ring->name);
-
 
1380
	}
-
 
1381
}
-
 
1382
 
1651
 
1383
static int
1652
static int
1384
bsd_ring_flush(struct intel_engine_cs *ring,
1653
bsd_ring_flush(struct drm_i915_gem_request *req,
1385
	       u32     invalidate_domains,
1654
	       u32     invalidate_domains,
1386
	       u32     flush_domains)
1655
	       u32     flush_domains)
-
 
1656
{
1387
{
1657
	struct intel_engine_cs *ring = req->ring;
Line 1388... Line 1658...
1388
	int ret;
1658
	int ret;
1389
 
1659
 
1390
	ret = intel_ring_begin(ring, 2);
1660
	ret = intel_ring_begin(req, 2);
Line 1391... Line 1661...
1391
	if (ret)
1661
	if (ret)
1392
		return ret;
1662
		return ret;
1393
 
1663
 
1394
	intel_ring_emit(ring, MI_FLUSH);
1664
	intel_ring_emit(ring, MI_FLUSH);
1395
	intel_ring_emit(ring, MI_NOOP);
1665
	intel_ring_emit(ring, MI_NOOP);
Line 1396... Line 1666...
1396
	intel_ring_advance(ring);
1666
	intel_ring_advance(ring);
1397
	return 0;
1667
	return 0;
1398
}
1668
}
-
 
1669
 
1399
 
1670
static int
Line 1400... Line 1671...
1400
static int
1671
i9xx_add_request(struct drm_i915_gem_request *req)
1401
i9xx_add_request(struct intel_engine_cs *ring)
1672
{
1402
{
1673
	struct intel_engine_cs *ring = req->ring;
Line 1403... Line 1674...
1403
	int ret;
1674
	int ret;
1404
 
1675
 
1405
	ret = intel_ring_begin(ring, 4);
1676
	ret = intel_ring_begin(req, 4);
1406
	if (ret)
1677
	if (ret)
1407
		return ret;
1678
		return ret;
Line 1408... Line 1679...
1408
 
1679
 
1409
	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
1680
	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
Line 1538... Line 1809...
1538
	}
1809
	}
1539
	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1810
	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1540
}
1811
}
Line 1541... Line 1812...
1541
 
1812
 
1542
static int
1813
static int
1543
i965_dispatch_execbuffer(struct intel_engine_cs *ring,
1814
i965_dispatch_execbuffer(struct drm_i915_gem_request *req,
1544
			 u64 offset, u32 length,
1815
			 u64 offset, u32 length,
1545
			 unsigned flags)
1816
			 unsigned dispatch_flags)
-
 
1817
{
1546
{
1818
	struct intel_engine_cs *ring = req->ring;
Line 1547... Line 1819...
1547
	int ret;
1819
	int ret;
1548
 
1820
 
1549
	ret = intel_ring_begin(ring, 2);
1821
	ret = intel_ring_begin(req, 2);
Line 1550... Line 1822...
1550
	if (ret)
1822
	if (ret)
1551
		return ret;
1823
		return ret;
1552
 
1824
 
-
 
1825
	intel_ring_emit(ring,
1553
	intel_ring_emit(ring,
1826
			MI_BATCH_BUFFER_START |
1554
			MI_BATCH_BUFFER_START |
1827
			MI_BATCH_GTT |
1555
			MI_BATCH_GTT |
1828
			(dispatch_flags & I915_DISPATCH_SECURE ?
Line 1556... Line 1829...
1556
			(flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965));
1829
			 0 : MI_BATCH_NON_SECURE_I965));
1557
	intel_ring_emit(ring, offset);
1830
	intel_ring_emit(ring, offset);
Line 1563... Line 1836...
1563
/* Just userspace ABI convention to limit the wa batch bo to a resonable size */
1836
/* Just userspace ABI convention to limit the wa batch bo to a resonable size */
1564
#define I830_BATCH_LIMIT (256*1024)
1837
#define I830_BATCH_LIMIT (256*1024)
1565
#define I830_TLB_ENTRIES (2)
1838
#define I830_TLB_ENTRIES (2)
1566
#define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT)
1839
#define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT)
1567
static int
1840
static int
1568
i830_dispatch_execbuffer(struct intel_engine_cs *ring,
1841
i830_dispatch_execbuffer(struct drm_i915_gem_request *req,
1569
				u64 offset, u32 len,
1842
			 u64 offset, u32 len,
1570
				unsigned flags)
1843
			 unsigned dispatch_flags)
1571
{
1844
{
-
 
1845
	struct intel_engine_cs *ring = req->ring;
1572
	u32 cs_offset = ring->scratch.gtt_offset;
1846
	u32 cs_offset = ring->scratch.gtt_offset;
1573
	int ret;
1847
	int ret;
Line 1574... Line 1848...
1574
 
1848
 
1575
	ret = intel_ring_begin(ring, 6);
1849
	ret = intel_ring_begin(req, 6);
1576
		if (ret)
1850
	if (ret)
Line 1577... Line 1851...
1577
			return ret;
1851
		return ret;
1578
 
1852
 
Line 1583... Line 1857...
1583
	intel_ring_emit(ring, cs_offset);
1857
	intel_ring_emit(ring, cs_offset);
1584
	intel_ring_emit(ring, 0xdeadbeef);
1858
	intel_ring_emit(ring, 0xdeadbeef);
1585
		intel_ring_emit(ring, MI_NOOP);
1859
	intel_ring_emit(ring, MI_NOOP);
1586
		intel_ring_advance(ring);
1860
	intel_ring_advance(ring);
Line 1587... Line 1861...
1587
 
1861
 
1588
	if ((flags & I915_DISPATCH_PINNED) == 0) {
1862
	if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) {
1589
		if (len > I830_BATCH_LIMIT)
1863
		if (len > I830_BATCH_LIMIT)
Line 1590... Line 1864...
1590
			return -ENOSPC;
1864
			return -ENOSPC;
1591
 
1865
 
1592
		ret = intel_ring_begin(ring, 6 + 2);
1866
		ret = intel_ring_begin(req, 6 + 2);
Line 1593... Line 1867...
1593
		if (ret)
1867
		if (ret)
1594
			return ret;
1868
			return ret;
Line 1610... Line 1884...
1610
 
1884
 
1611
		/* ... and execute it. */
1885
		/* ... and execute it. */
1612
		offset = cs_offset;
1886
		offset = cs_offset;
Line 1613... Line 1887...
1613
	}
1887
	}
1614
 
1888
 
1615
	ret = intel_ring_begin(ring, 4);
1889
	ret = intel_ring_begin(req, 4);
Line 1616... Line 1890...
1616
	if (ret)
1890
	if (ret)
1617
		return ret;
1891
		return ret;
-
 
1892
 
1618
 
1893
	intel_ring_emit(ring, MI_BATCH_BUFFER);
1619
		intel_ring_emit(ring, MI_BATCH_BUFFER);
1894
	intel_ring_emit(ring, offset | (dispatch_flags & I915_DISPATCH_SECURE ?
1620
	intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
1895
					0 : MI_BATCH_NON_SECURE));
Line 1621... Line 1896...
1621
	intel_ring_emit(ring, offset + len - 8);
1896
	intel_ring_emit(ring, offset + len - 8);
1622
	intel_ring_emit(ring, MI_NOOP);
1897
	intel_ring_emit(ring, MI_NOOP);
Line 1623... Line 1898...
1623
	intel_ring_advance(ring);
1898
	intel_ring_advance(ring);
1624
 
1899
 
1625
	return 0;
1900
	return 0;
1626
}
1901
}
1627
 
1902
 
-
 
1903
static int
1628
static int
1904
i915_dispatch_execbuffer(struct drm_i915_gem_request *req,
Line 1629... Line 1905...
1629
i915_dispatch_execbuffer(struct intel_engine_cs *ring,
1905
			 u64 offset, u32 len,
1630
			 u64 offset, u32 len,
1906
			 unsigned dispatch_flags)
1631
			 unsigned flags)
1907
{
Line 1632... Line 1908...
1632
{
1908
	struct intel_engine_cs *ring = req->ring;
1633
	int ret;
1909
	int ret;
-
 
1910
 
1634
 
1911
	ret = intel_ring_begin(req, 2);
Line 1635... Line 1912...
1635
		ret = intel_ring_begin(ring, 2);
1912
	if (ret)
1636
		if (ret)
1913
		return ret;
Line 1757... Line 2034...
1757
	}
2034
	}
Line 1758... Line 2035...
1758
 
2035
 
1759
		return 0;
2036
	return 0;
Line 1760... Line 2037...
1760
}
2037
}
1761
 
2038
 
1762
void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
2039
static void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
1763
{
2040
{
1764
	drm_gem_object_unreference(&ringbuf->obj->base);
2041
	drm_gem_object_unreference(&ringbuf->obj->base);
Line 1765... Line 2042...
1765
	ringbuf->obj = NULL;
2042
	ringbuf->obj = NULL;
1766
}
2043
}
1767
 
2044
 
1768
int intel_alloc_ringbuffer_obj(struct drm_device *dev,
2045
static int intel_alloc_ringbuffer_obj(struct drm_device *dev,
Line 1769... Line 2046...
1769
			       struct intel_ringbuffer *ringbuf)
2046
				      struct intel_ringbuffer *ringbuf)
Line 1784... Line 2061...
1784
	ringbuf->obj = obj;
2061
	ringbuf->obj = obj;
Line 1785... Line 2062...
1785
 
2062
 
1786
	return 0;
2063
	return 0;
Line -... Line 2064...
-
 
2064
}
-
 
2065
 
-
 
2066
struct intel_ringbuffer *
-
 
2067
intel_engine_create_ringbuffer(struct intel_engine_cs *engine, int size)
-
 
2068
{
-
 
2069
	struct intel_ringbuffer *ring;
-
 
2070
	int ret;
-
 
2071
 
-
 
2072
	ring = kzalloc(sizeof(*ring), GFP_KERNEL);
-
 
2073
	if (ring == NULL)
-
 
2074
		return ERR_PTR(-ENOMEM);
-
 
2075
 
-
 
2076
	ring->ring = engine;
-
 
2077
 
-
 
2078
	ring->size = size;
-
 
2079
	/* Workaround an erratum on the i830 which causes a hang if
-
 
2080
	 * the TAIL pointer points to within the last 2 cachelines
-
 
2081
	 * of the buffer.
-
 
2082
	 */
-
 
2083
	ring->effective_size = size;
-
 
2084
	if (IS_I830(engine->dev) || IS_845G(engine->dev))
-
 
2085
		ring->effective_size -= 2 * CACHELINE_BYTES;
-
 
2086
 
-
 
2087
	ring->last_retired_head = -1;
-
 
2088
	intel_ring_update_space(ring);
-
 
2089
 
-
 
2090
	ret = intel_alloc_ringbuffer_obj(engine->dev, ring);
-
 
2091
	if (ret) {
-
 
2092
		DRM_ERROR("Failed to allocate ringbuffer %s: %d\n",
-
 
2093
			  engine->name, ret);
-
 
2094
		kfree(ring);
-
 
2095
		return ERR_PTR(ret);
-
 
2096
	}
-
 
2097
 
-
 
2098
	return ring;
-
 
2099
}
-
 
2100
 
-
 
2101
void
-
 
2102
intel_ringbuffer_free(struct intel_ringbuffer *ring)
-
 
2103
{
-
 
2104
	intel_destroy_ringbuffer_obj(ring);
-
 
2105
	kfree(ring);
1787
}
2106
}
1788
 
2107
 
1789
static int intel_init_ring_buffer(struct drm_device *dev,
2108
static int intel_init_ring_buffer(struct drm_device *dev,
1790
				  struct intel_engine_cs *ring)
2109
				  struct intel_engine_cs *ring)
1791
{
2110
{
Line 1792... Line -...
1792
	struct intel_ringbuffer *ringbuf = ring->buffer;
-
 
1793
	int ret;
-
 
1794
 
2111
	struct intel_ringbuffer *ringbuf;
1795
	if (ringbuf == NULL) {
-
 
1796
		ringbuf = kzalloc(sizeof(*ringbuf), GFP_KERNEL);
-
 
1797
		if (!ringbuf)
-
 
Line 1798... Line 2112...
1798
			return -ENOMEM;
2112
	int ret;
1799
		ring->buffer = ringbuf;
2113
 
1800
	}
2114
	WARN_ON(ring->buffer);
1801
 
2115
 
1802
	ring->dev = dev;
2116
	ring->dev = dev;
1803
	INIT_LIST_HEAD(&ring->active_list);
-
 
1804
	INIT_LIST_HEAD(&ring->request_list);
2117
	INIT_LIST_HEAD(&ring->active_list);
Line 1805... Line 2118...
1805
	INIT_LIST_HEAD(&ring->execlist_queue);
2118
	INIT_LIST_HEAD(&ring->request_list);
Line -... Line 2119...
-
 
2119
	INIT_LIST_HEAD(&ring->execlist_queue);
-
 
2120
	i915_gem_batch_pool_init(dev, &ring->batch_pool);
-
 
2121
	memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno));
-
 
2122
 
-
 
2123
	init_waitqueue_head(&ring->irq_queue);
1806
	ringbuf->size = 32 * PAGE_SIZE;
2124
 
1807
	ringbuf->ring = ring;
2125
	ringbuf = intel_engine_create_ringbuffer(ring, 32 * PAGE_SIZE);
1808
	memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno));
2126
	if (IS_ERR(ringbuf))
1809
 
2127
		return PTR_ERR(ringbuf);
1810
	init_waitqueue_head(&ring->irq_queue);
2128
	ring->buffer = ringbuf;
Line 1818... Line 2136...
1818
		ret = init_phys_status_page(ring);
2136
		ret = init_phys_status_page(ring);
1819
	if (ret)
2137
		if (ret)
1820
			goto error;
2138
			goto error;
1821
	}
2139
	}
Line 1822... Line -...
1822
 
-
 
1823
	if (ringbuf->obj == NULL) {
-
 
1824
	ret = intel_alloc_ringbuffer_obj(dev, ringbuf);
-
 
1825
	if (ret) {
-
 
1826
			DRM_ERROR("Failed to allocate ringbuffer %s: %d\n",
-
 
1827
					ring->name, ret);
-
 
1828
			goto error;
-
 
1829
		}
-
 
1830
 
2140
 
1831
		ret = intel_pin_and_map_ringbuffer_obj(dev, ringbuf);
2141
	ret = intel_pin_and_map_ringbuffer_obj(dev, ringbuf);
1832
		if (ret) {
2142
	if (ret) {
1833
			DRM_ERROR("Failed to pin and map ringbuffer %s: %d\n",
2143
		DRM_ERROR("Failed to pin and map ringbuffer %s: %d\n",
1834
					ring->name, ret);
2144
				ring->name, ret);
1835
			intel_destroy_ringbuffer_obj(ringbuf);
2145
		intel_destroy_ringbuffer_obj(ringbuf);
1836
		goto error;
2146
		goto error;
1837
	}
-
 
1838
	}
-
 
1839
 
-
 
1840
	/* Workaround an erratum on the i830 which causes a hang if
-
 
1841
	 * the TAIL pointer points to within the last 2 cachelines
-
 
1842
	 * of the buffer.
-
 
1843
	 */
-
 
1844
	ringbuf->effective_size = ringbuf->size;
-
 
1845
	if (IS_I830(dev) || IS_845G(dev))
-
 
Line 1846... Line 2147...
1846
		ringbuf->effective_size -= 2 * CACHELINE_BYTES;
2147
	}
1847
 
2148
 
1848
	ret = i915_cmd_parser_init_ring(ring);
2149
	ret = i915_cmd_parser_init_ring(ring);
Line 1849... Line -...
1849
	if (ret)
-
 
1850
		goto error;
-
 
1851
 
-
 
1852
	ret = ring->init(ring);
-
 
1853
	if (ret)
2150
	if (ret)
Line 1854... Line 2151...
1854
		goto error;
2151
		goto error;
1855
 
2152
 
1856
	return 0;
2153
	return 0;
1857
 
2154
 
1858
error:
2155
error:
Line 1859... Line 2156...
1859
	kfree(ringbuf);
2156
	intel_ringbuffer_free(ringbuf);
1860
	ring->buffer = NULL;
2157
	ring->buffer = NULL;
1861
	return ret;
2158
	return ret;
1862
}
-
 
Line 1863... Line 2159...
1863
 
2159
}
1864
void intel_cleanup_ring_buffer(struct intel_engine_cs *ring)
2160
 
Line 1865... Line 2161...
1865
{
2161
void intel_cleanup_ring_buffer(struct intel_engine_cs *ring)
1866
	struct drm_i915_private *dev_priv;
-
 
Line 1867... Line 2162...
1867
	struct intel_ringbuffer *ringbuf;
2162
{
1868
 
2163
	struct drm_i915_private *dev_priv;
Line 1869... Line 2164...
1869
	if (!intel_ring_initialized(ring))
2164
 
1870
		return;
2165
	if (!intel_ring_initialized(ring))
1871
 
2166
		return;
1872
	dev_priv = to_i915(ring->dev);
-
 
Line 1873... Line 2167...
1873
	ringbuf = ring->buffer;
2167
 
1874
 
2168
	dev_priv = to_i915(ring->dev);
Line 1875... Line 2169...
1875
	intel_stop_ring_buffer(ring);
2169
 
Line 1876... Line 2170...
1876
	WARN_ON(!IS_GEN2(ring->dev) && (I915_READ_MODE(ring) & MODE_IDLE) == 0);
2170
	intel_stop_ring_buffer(ring);
1877
 
-
 
1878
	intel_unpin_ringbuffer_obj(ringbuf);
2171
	WARN_ON(!IS_GEN2(ring->dev) && (I915_READ_MODE(ring) & MODE_IDLE) == 0);
1879
	intel_destroy_ringbuffer_obj(ringbuf);
-
 
1880
	ring->preallocated_lazy_request = NULL;
2172
 
Line 1881... Line 2173...
1881
	ring->outstanding_lazy_seqno = 0;
2173
	intel_unpin_ringbuffer_obj(ring->buffer);
1882
 
2174
	intel_ringbuffer_free(ring->buffer);
1883
	if (ring->cleanup)
2175
	ring->buffer = NULL;
1884
		ring->cleanup(ring);
2176
 
1885
 
2177
	if (ring->cleanup)
1886
//	cleanup_status_page(ring);
2178
		ring->cleanup(ring);
Line 1887... Line -...
1887
 
-
 
1888
	i915_cmd_parser_fini_ring(ring);
-
 
1889
 
-
 
1890
	kfree(ringbuf);
-
 
1891
	ring->buffer = NULL;
2179
 
1892
}
-
 
1893
 
2180
	cleanup_status_page(ring);
1894
static int intel_ring_wait_request(struct intel_engine_cs *ring, int n)
2181
 
-
 
2182
	i915_cmd_parser_fini_ring(ring);
-
 
2183
	i915_gem_batch_pool_fini(&ring->batch_pool);
Line 1895... Line 2184...
1895
{
2184
}
1896
	struct intel_ringbuffer *ringbuf = ring->buffer;
2185
 
1897
	struct drm_i915_gem_request *request;
2186
static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
1898
	u32 seqno = 0;
2187
{
1899
	int ret;
2188
	struct intel_ringbuffer *ringbuf = ring->buffer;
1900
 
2189
	struct drm_i915_gem_request *request;
1901
	if (ringbuf->last_retired_head != -1) {
-
 
Line 1902... Line 2190...
1902
		ringbuf->head = ringbuf->last_retired_head;
2190
	unsigned space;
1903
		ringbuf->last_retired_head = -1;
2191
	int ret;
Line 1904... Line 2192...
1904
 
2192
 
1905
		ringbuf->space = intel_ring_space(ringbuf);
2193
	if (intel_ring_space(ringbuf) >= n)
1906
		if (ringbuf->space >= n)
2194
		return 0;
Line 1907... Line -...
1907
			return 0;
-
 
1908
	}
-
 
1909
 
-
 
1910
	list_for_each_entry(request, &ring->request_list, list) {
-
 
1911
		if (__intel_ring_space(request->tail, ringbuf->tail,
2195
 
1912
				       ringbuf->size) >= n) {
2196
	/* The whole point of reserving space is to not wait! */
1913
			seqno = request->seqno;
2197
	WARN_ON(ringbuf->reserved_in_use);
Line 1914... Line -...
1914
			break;
-
 
1915
		}
-
 
1916
	}
-
 
1917
 
-
 
1918
	if (seqno == 0)
-
 
1919
		return -ENOSPC;
-
 
1920
 
-
 
1921
	ret = i915_wait_seqno(ring, seqno);
-
 
1922
	if (ret)
-
 
1923
		return ret;
-
 
1924
 
-
 
1925
	i915_gem_retire_requests_ring(ring);
-
 
1926
	ringbuf->head = ringbuf->last_retired_head;
-
 
1927
	ringbuf->last_retired_head = -1;
-
 
1928
 
-
 
1929
	ringbuf->space = intel_ring_space(ringbuf);
-
 
1930
	return 0;
-
 
1931
}
-
 
1932
 
-
 
1933
static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
-
 
1934
{
-
 
1935
	struct drm_device *dev = ring->dev;
-
 
1936
	struct drm_i915_private *dev_priv = dev->dev_private;
-
 
1937
	struct intel_ringbuffer *ringbuf = ring->buffer;
-
 
1938
	unsigned long end;
-
 
1939
	int ret;
-
 
1940
 
-
 
1941
	ret = intel_ring_wait_request(ring, n);
-
 
1942
	if (ret != -ENOSPC)
-
 
1943
		return ret;
-
 
1944
 
-
 
1945
	/* force the tail write in case we have been skipping them */
-
 
1946
	__intel_ring_advance(ring);
-
 
1947
 
-
 
1948
	/* With GEM the hangcheck timer should kick us out of the loop,
-
 
1949
	 * leaving it early runs the risk of corrupting GEM state (due
-
 
1950
	 * to running on almost untested codepaths). But on resume
-
 
1951
	 * timers don't work yet, so prevent a complete hang in that
-
 
1952
	 * case by choosing an insanely large timeout. */
-
 
1953
	end = jiffies + 60 * HZ;
-
 
1954
 
-
 
1955
	trace_i915_ring_wait_begin(ring);
-
 
1956
	do {
-
 
1957
		ringbuf->head = I915_READ_HEAD(ring);
-
 
1958
		ringbuf->space = intel_ring_space(ringbuf);
-
 
1959
		if (ringbuf->space >= n) {
-
 
1960
			ret = 0;
-
 
1961
			break;
2198
 
1962
		}
2199
	list_for_each_entry(request, &ring->request_list, list) {
1963
 
2200
		space = __intel_ring_space(request->postfix, ringbuf->tail,
1964
		msleep(1);
-
 
1965
 
2201
					   ringbuf->size);
Line 1966... Line -...
1966
		ret = i915_gem_check_wedge(&dev_priv->gpu_error,
-
 
1967
					   dev_priv->mm.interruptible);
-
 
1968
		if (ret)
-
 
1969
			break;
-
 
1970
 
-
 
1971
		if (time_after(jiffies, end)) {
-
 
1972
			ret = -EBUSY;
2202
		if (space >= n)
1973
			break;
2203
			break;
1974
		}
2204
	}
1975
	} while (1);
2205
 
Line 1976... Line 2206...
1976
	trace_i915_ring_wait_end(ring);
2206
	if (WARN_ON(&request->list == &ring->request_list))
1977
			return ret;
2207
		return -ENOSPC;
1978
}
-
 
1979
 
-
 
1980
static int intel_wrap_ring_buffer(struct intel_engine_cs *ring)
2208
 
Line 1981... Line 2209...
1981
{
2209
	ret = i915_wait_request(request);
1982
	uint32_t __iomem *virt;
2210
	if (ret)
1983
	struct intel_ringbuffer *ringbuf = ring->buffer;
-
 
1984
	int rem = ringbuf->size - ringbuf->tail;
-
 
1985
 
-
 
1986
	if (ringbuf->space < rem) {
-
 
1987
		int ret = ring_wait_for_space(ring, rem);
-
 
1988
		if (ret)
2211
		return ret;
1989
			return ret;
-
 
1990
	}
-
 
1991
 
-
 
Line 1992... Line 2212...
1992
	virt = ringbuf->virtual_start + ringbuf->tail;
2212
 
1993
	rem /= 4;
2213
	ringbuf->space = space;
1994
	while (rem--)
2214
	return 0;
Line 1995... Line 2215...
1995
		iowrite32(MI_NOOP, virt++);
2215
}
1996
 
2216
 
1997
	ringbuf->tail = 0;
2217
static void __wrap_ring_buffer(struct intel_ringbuffer *ringbuf)
Line -... Line 2218...
-
 
2218
{
1998
	ringbuf->space = intel_ring_space(ringbuf);
2219
	uint32_t __iomem *virt;
-
 
2220
	int rem = ringbuf->size - ringbuf->tail;
-
 
2221
 
-
 
2222
	virt = ringbuf->virtual_start + ringbuf->tail;
1999
 
2223
	rem /= 4;
Line 2000... Line -...
2000
	return 0;
-
 
2001
}
2224
	while (rem--)
2002
 
2225
		iowrite32(MI_NOOP, virt++);
2003
int intel_ring_idle(struct intel_engine_cs *ring)
2226
 
2004
{
2227
	ringbuf->tail = 0;
-
 
2228
	intel_ring_update_space(ringbuf);
Line 2005... Line 2229...
2005
	u32 seqno;
2229
}
-
 
2230
 
-
 
2231
int intel_ring_idle(struct intel_engine_cs *ring)
-
 
2232
{
-
 
2233
	struct drm_i915_gem_request *req;
-
 
2234
 
2006
	int ret;
2235
	/* Wait upon the last request to be completed */
-
 
2236
	if (list_empty(&ring->request_list))
-
 
2237
		return 0;
-
 
2238
 
-
 
2239
	req = list_entry(ring->request_list.prev,
Line 2007... Line 2240...
2007
 
2240
			struct drm_i915_gem_request,
-
 
2241
			list);
-
 
2242
 
-
 
2243
	/* Make sure we do not trigger any retires */
-
 
2244
	return __i915_wait_request(req,
-
 
2245
				   atomic_read(&to_i915(ring->dev)->gpu_error.reset_counter),
-
 
2246
				   to_i915(ring->dev)->mm.interruptible,
-
 
2247
				   NULL, NULL);
2008
	/* We need to add any requests required to flush the objects and ring */
2248
}
-
 
2249
 
-
 
2250
int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request)
-
 
2251
{
-
 
2252
	request->ringbuf = request->ring->buffer;
2009
	if (ring->outstanding_lazy_seqno) {
2253
	return 0;
Line -... Line 2254...
-
 
2254
}
2010
		ret = i915_add_request(ring, NULL);
2255
 
2011
		if (ret)
2256
int intel_ring_reserve_space(struct drm_i915_gem_request *request)
Line 2012... Line 2257...
2012
			return ret;
2257
{
-
 
2258
	/*
-
 
2259
	 * The first call merely notes the reserve request and is common for
-
 
2260
	 * all back ends. The subsequent localised _begin() call actually
-
 
2261
	 * ensures that the reservation is available. Without the begin, if
-
 
2262
	 * the request creator immediately submitted the request without
2013
	}
2263
	 * adding any commands to it then there might not actually be
Line 2014... Line 2264...
2014
 
2264
	 * sufficient room for the submission commands.
-
 
2265
	 */
-
 
2266
	intel_ring_reserved_space_reserve(request->ringbuf, MIN_SPACE_FOR_ADD_REQUEST);
-
 
2267
 
-
 
2268
	return intel_ring_begin(request, 0);
-
 
2269
}
-
 
2270
 
2015
	/* Wait upon the last request to be completed */
2271
void intel_ring_reserved_space_reserve(struct intel_ringbuffer *ringbuf, int size)
-
 
2272
{
-
 
2273
	WARN_ON(ringbuf->reserved_size);
-
 
2274
	WARN_ON(ringbuf->reserved_in_use);
-
 
2275
 
-
 
2276
	ringbuf->reserved_size = size;
-
 
2277
}
-
 
2278
 
-
 
2279
void intel_ring_reserved_space_cancel(struct intel_ringbuffer *ringbuf)
-
 
2280
{
-
 
2281
	WARN_ON(ringbuf->reserved_in_use);
-
 
2282
 
-
 
2283
	ringbuf->reserved_size   = 0;
-
 
2284
	ringbuf->reserved_in_use = false;
-
 
2285
}
-
 
2286
 
-
 
2287
void intel_ring_reserved_space_use(struct intel_ringbuffer *ringbuf)
2016
	if (list_empty(&ring->request_list))
2288
{
2017
		return 0;
2289
	WARN_ON(ringbuf->reserved_in_use);
-
 
2290
 
-
 
2291
	ringbuf->reserved_in_use = true;
-
 
2292
	ringbuf->reserved_tail   = ringbuf->tail;
2018
 
2293
}
Line -... Line 2294...
-
 
2294
 
-
 
2295
void intel_ring_reserved_space_end(struct intel_ringbuffer *ringbuf)
-
 
2296
{
2019
	seqno = list_entry(ring->request_list.prev,
2297
	WARN_ON(!ringbuf->reserved_in_use);
-
 
2298
	if (ringbuf->tail > ringbuf->reserved_tail) {
-
 
2299
		WARN(ringbuf->tail > ringbuf->reserved_tail + ringbuf->reserved_size,
-
 
2300
		     "request reserved size too small: %d vs %d!\n",
-
 
2301
		     ringbuf->tail - ringbuf->reserved_tail, ringbuf->reserved_size);
-
 
2302
	} else {
-
 
2303
		/*
-
 
2304
		 * The ring was wrapped while the reserved space was in use.
2020
			   struct drm_i915_gem_request,
2305
		 * That means that some unknown amount of the ring tail was
-
 
2306
		 * no-op filled and skipped. Thus simply adding the ring size
2021
			   list)->seqno;
2307
		 * to the tail and doing the above space check will not work.
-
 
2308
		 * Rather than attempt to track how much tail was skipped,
-
 
2309
		 * it is much simpler to say that also skipping the sanity
-
 
2310
		 * check every once in a while is not a big issue.
-
 
2311
		 */
-
 
2312
	}
-
 
2313
 
2022
 
2314
	ringbuf->reserved_size   = 0;
-
 
2315
	ringbuf->reserved_in_use = false;
-
 
2316
}
-
 
2317
 
-
 
2318
static int __intel_ring_prepare(struct intel_engine_cs *ring, int bytes)
2023
	return i915_wait_seqno(ring, seqno);
2319
{
Line 2024... Line 2320...
2024
}
2320
	struct intel_ringbuffer *ringbuf = ring->buffer;
2025
 
2321
	int remain_usable = ringbuf->effective_size - ringbuf->tail;
2026
static int
2322
	int remain_actual = ringbuf->size - ringbuf->tail;
2027
intel_ring_alloc_seqno(struct intel_engine_cs *ring)
2323
	int ret, total_bytes, wait_bytes = 0;
-
 
2324
	bool need_wrap = false;
-
 
2325
 
-
 
2326
	if (ringbuf->reserved_in_use)
2028
{
2327
		total_bytes = bytes;
Line 2029... Line 2328...
2029
	if (ring->outstanding_lazy_seqno)
2328
	else
2030
		return 0;
2329
		total_bytes = bytes + ringbuf->reserved_size;
Line 2031... Line 2330...
2031
 
2330
 
2032
	if (ring->preallocated_lazy_request == NULL) {
2331
	if (unlikely(bytes > remain_usable)) {
2033
		struct drm_i915_gem_request *request;
2332
		/*
-
 
2333
		 * Not enough space for the basic request. So need to flush
2034
 
2334
		 * out the remainder and then wait for base + reserved.
2035
		request = kmalloc(sizeof(*request), GFP_KERNEL);
2335
		 */
Line -... Line 2336...
-
 
2336
		wait_bytes = remain_actual + total_bytes;
-
 
2337
		need_wrap = true;
-
 
2338
	} else {
-
 
2339
		if (unlikely(total_bytes > remain_usable)) {
2036
		if (request == NULL)
2340
			/*
2037
			return -ENOMEM;
2341
			 * The base request will fit but the reserved space
2038
 
2342
			 * falls off the end. So only need to to wait for the
2039
		ring->preallocated_lazy_request = request;
2343
			 * reserved size after flushing out the remainder.
Line 2040... Line 2344...
2040
	}
2344
			 */
2041
 
2345
			wait_bytes = remain_actual + ringbuf->reserved_size;
2042
	return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_seqno);
2346
			need_wrap = true;
Line 2043... Line -...
2043
}
-
 
2044
 
-
 
2045
static int __intel_ring_prepare(struct intel_engine_cs *ring,
-
 
2046
			      int bytes)
-
 
2047
{
-
 
2048
	struct intel_ringbuffer *ringbuf = ring->buffer;
2347
		} else if (total_bytes > ringbuf->space) {
2049
	int ret;
2348
			/* No wrapping required, just waiting. */
2050
 
2349
			wait_bytes = total_bytes;
Line 2051... Line 2350...
2051
	if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) {
2350
		}
2052
		ret = intel_wrap_ring_buffer(ring);
2351
	}
2053
		if (unlikely(ret))
2352
 
-
 
2353
	if (wait_bytes) {
2054
			return ret;
2354
		ret = ring_wait_for_space(ring, wait_bytes);
2055
	}
2355
		if (unlikely(ret))
Line 2056... Line 2356...
2056
 
2356
			return ret;
2057
	if (unlikely(ringbuf->space < bytes)) {
2357
 
Line 2058... Line 2358...
2058
		ret = ring_wait_for_space(ring, bytes);
2358
		if (need_wrap)
2059
		if (unlikely(ret))
2359
			__wrap_ring_buffer(ringbuf);
2060
			return ret;
2360
	}
2061
	}
2361
 
Line 2062... Line 2362...
2062
 
2362
	return 0;
2063
	return 0;
2363
}
Line 2112... Line 2412...
2112
void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno)
2412
void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno)
2113
{
2413
{
2114
	struct drm_device *dev = ring->dev;
2414
	struct drm_device *dev = ring->dev;
2115
	struct drm_i915_private *dev_priv = dev->dev_private;
2415
	struct drm_i915_private *dev_priv = dev->dev_private;
Line 2116... Line -...
2116
 
-
 
2117
	BUG_ON(ring->outstanding_lazy_seqno);
-
 
2118
 
2416
 
2119
	if (INTEL_INFO(dev)->gen == 6 || INTEL_INFO(dev)->gen == 7) {
2417
	if (INTEL_INFO(dev)->gen == 6 || INTEL_INFO(dev)->gen == 7) {
2120
		I915_WRITE(RING_SYNC_0(ring->mmio_base), 0);
2418
		I915_WRITE(RING_SYNC_0(ring->mmio_base), 0);
2121
		I915_WRITE(RING_SYNC_1(ring->mmio_base), 0);
2419
		I915_WRITE(RING_SYNC_1(ring->mmio_base), 0);
2122
		if (HAS_VEBOX(dev))
2420
		if (HAS_VEBOX(dev))
Line 2158... Line 2456...
2158
	 */
2456
	 */
2159
       I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
2457
	I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
2160
		   _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
2458
		   _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
2161
}
2459
}
Line 2162... Line 2460...
2162
 
2460
 
2163
static int gen6_bsd_ring_flush(struct intel_engine_cs *ring,
2461
static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req,
2164
			   u32 invalidate, u32 flush)
2462
			       u32 invalidate, u32 flush)
-
 
2463
{
2165
{
2464
	struct intel_engine_cs *ring = req->ring;
2166
	uint32_t cmd;
2465
	uint32_t cmd;
Line 2167... Line 2466...
2167
	int ret;
2466
	int ret;
2168
 
2467
 
2169
	ret = intel_ring_begin(ring, 4);
2468
	ret = intel_ring_begin(req, 4);
Line 2170... Line 2469...
2170
	if (ret)
2469
	if (ret)
2171
		return ret;
2470
		return ret;
2172
 
2471
 
-
 
2472
	cmd = MI_FLUSH_DW;
-
 
2473
	if (INTEL_INFO(ring->dev)->gen >= 8)
-
 
2474
		cmd += 1;
-
 
2475
 
-
 
2476
	/* We always require a command barrier so that subsequent
-
 
2477
	 * commands, such as breadcrumb interrupts, are strictly ordered
-
 
2478
	 * wrt the contents of the write cache being flushed to memory
-
 
2479
	 * (and thus being coherent from the CPU).
2173
	cmd = MI_FLUSH_DW;
2480
	 */
2174
	if (INTEL_INFO(ring->dev)->gen >= 8)
2481
	cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
2175
		cmd += 1;
2482
 
2176
	/*
2483
	/*
2177
	 * Bspec vol 1c.5 - video engine command streamer:
2484
	 * Bspec vol 1c.5 - video engine command streamer:
2178
	 * "If ENABLED, all TLBs will be invalidated once the flush
2485
	 * "If ENABLED, all TLBs will be invalidated once the flush
2179
	 * operation is complete. This bit is only valid when the
2486
	 * operation is complete. This bit is only valid when the
2180
	 * Post-Sync Operation field is a value of 1h or 3h."
2487
	 * Post-Sync Operation field is a value of 1h or 3h."
2181
	 */
-
 
-
 
2488
	 */
2182
	if (invalidate & I915_GEM_GPU_DOMAINS)
2489
	if (invalidate & I915_GEM_GPU_DOMAINS)
2183
		cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD |
2490
		cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD;
2184
			MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
2491
 
2185
	intel_ring_emit(ring, cmd);
2492
	intel_ring_emit(ring, cmd);
2186
	intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
2493
	intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
Line 2194... Line 2501...
2194
	intel_ring_advance(ring);
2501
	intel_ring_advance(ring);
2195
	return 0;
2502
	return 0;
2196
}
2503
}
Line 2197... Line 2504...
2197
 
2504
 
2198
static int
2505
static int
2199
gen8_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
2506
gen8_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
2200
			      u64 offset, u32 len,
2507
			      u64 offset, u32 len,
2201
			      unsigned flags)
2508
			      unsigned dispatch_flags)
-
 
2509
{
2202
{
2510
	struct intel_engine_cs *ring = req->ring;
-
 
2511
	bool ppgtt = USES_PPGTT(ring->dev) &&
2203
	bool ppgtt = USES_PPGTT(ring->dev) && !(flags & I915_DISPATCH_SECURE);
2512
			!(dispatch_flags & I915_DISPATCH_SECURE);
Line 2204... Line 2513...
2204
	int ret;
2513
	int ret;
2205
 
2514
 
2206
	ret = intel_ring_begin(ring, 4);
2515
	ret = intel_ring_begin(req, 4);
Line 2207... Line 2516...
2207
	if (ret)
2516
	if (ret)
2208
		return ret;
2517
		return ret;
-
 
2518
 
-
 
2519
	/* FIXME(BDW): Address space and security selectors. */
2209
 
2520
	intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8) |
2210
	/* FIXME(BDW): Address space and security selectors. */
2521
			(dispatch_flags & I915_DISPATCH_RS ?
2211
	intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8));
2522
			 MI_BATCH_RESOURCE_STREAMER : 0));
2212
	intel_ring_emit(ring, lower_32_bits(offset));
2523
	intel_ring_emit(ring, lower_32_bits(offset));
Line 2213... Line 2524...
2213
	intel_ring_emit(ring, upper_32_bits(offset));
2524
	intel_ring_emit(ring, upper_32_bits(offset));
2214
	intel_ring_emit(ring, MI_NOOP);
2525
	intel_ring_emit(ring, MI_NOOP);
Line 2215... Line 2526...
2215
	intel_ring_advance(ring);
2526
	intel_ring_advance(ring);
2216
 
2527
 
2217
	return 0;
2528
	return 0;
2218
}
2529
}
2219
 
2530
 
-
 
2531
static int
2220
static int
2532
hsw_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
Line 2221... Line 2533...
2221
hsw_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
2533
			     u64 offset, u32 len,
2222
			      u64 offset, u32 len,
2534
			     unsigned dispatch_flags)
2223
			      unsigned flags)
2535
{
Line 2224... Line 2536...
2224
{
2536
	struct intel_engine_cs *ring = req->ring;
2225
	int ret;
2537
	int ret;
2226
 
2538
 
2227
	ret = intel_ring_begin(ring, 2);
2539
	ret = intel_ring_begin(req, 2);
-
 
2540
	if (ret)
-
 
2541
		return ret;
2228
	if (ret)
2542
 
2229
		return ret;
2543
	intel_ring_emit(ring,
2230
 
2544
			MI_BATCH_BUFFER_START |
Line 2231... Line 2545...
2231
	intel_ring_emit(ring,
2545
			(dispatch_flags & I915_DISPATCH_SECURE ?
2232
			MI_BATCH_BUFFER_START |
2546
			 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW) |
Line 2233... Line 2547...
2233
			(flags & I915_DISPATCH_SECURE ?
2547
			(dispatch_flags & I915_DISPATCH_RS ?
2234
			 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW));
2548
			 MI_BATCH_RESOURCE_STREAMER : 0));
2235
	/* bit0-7 is the length on GEN6+ */
2549
	/* bit0-7 is the length on GEN6+ */
2236
	intel_ring_emit(ring, offset);
2550
	intel_ring_emit(ring, offset);
2237
	intel_ring_advance(ring);
2551
	intel_ring_advance(ring);
-
 
2552
 
2238
 
2553
	return 0;
Line 2239... Line 2554...
2239
	return 0;
2554
}
2240
}
2555
 
2241
 
2556
static int
Line 2242... Line 2557...
2242
static int
2557
gen6_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
2243
gen6_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
2558
			      u64 offset, u32 len,
-
 
2559
			      unsigned dispatch_flags)
2244
			      u64 offset, u32 len,
2560
{
2245
			      unsigned flags)
2561
	struct intel_engine_cs *ring = req->ring;
2246
{
2562
	int ret;
2247
       int ret;
2563
 
Line 2248... Line 2564...
2248
 
2564
	ret = intel_ring_begin(req, 2);
2249
       ret = intel_ring_begin(ring, 2);
2565
	if (ret)
Line 2250... Line 2566...
2250
       if (ret)
2566
		return ret;
Line 2251... Line 2567...
2251
	       return ret;
2567
 
2252
 
2568
	intel_ring_emit(ring,
2253
	intel_ring_emit(ring,
2569
			MI_BATCH_BUFFER_START |
-
 
2570
			(dispatch_flags & I915_DISPATCH_SECURE ?
2254
			MI_BATCH_BUFFER_START |
2571
			 0 : MI_BATCH_NON_SECURE_I965));
2255
			(flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965));
-
 
2256
       /* bit0-7 is the length on GEN6+ */
2572
	/* bit0-7 is the length on GEN6+ */
2257
       intel_ring_emit(ring, offset);
2573
	intel_ring_emit(ring, offset);
Line 2258... Line 2574...
2258
       intel_ring_advance(ring);
2574
	intel_ring_advance(ring);
2259
 
2575
 
2260
       return 0;
2576
	return 0;
Line 2261... Line 2577...
2261
}
2577
}
2262
 
2578
 
2263
/* Blitter support (SandyBridge+) */
2579
/* Blitter support (SandyBridge+) */
-
 
2580
 
-
 
2581
static int gen6_ring_flush(struct drm_i915_gem_request *req,
-
 
2582
			   u32 invalidate, u32 flush)
-
 
2583
{
-
 
2584
	struct intel_engine_cs *ring = req->ring;
-
 
2585
	struct drm_device *dev = ring->dev;
-
 
2586
	uint32_t cmd;
-
 
2587
	int ret;
2264
 
2588
 
2265
static int gen6_ring_flush(struct intel_engine_cs *ring,
2589
	ret = intel_ring_begin(req, 4);
2266
			  u32 invalidate, u32 flush)
2590
	if (ret)
2267
{
2591
		return ret;
2268
	struct drm_device *dev = ring->dev;
2592
 
2269
	struct drm_i915_private *dev_priv = dev->dev_private;
2593
	cmd = MI_FLUSH_DW;
2270
	uint32_t cmd;
2594
	if (INTEL_INFO(dev)->gen >= 8)
2271
	int ret;
2595
		cmd += 1;
2272
 
-
 
2273
	ret = intel_ring_begin(ring, 4);
2596
 
2274
	if (ret)
2597
	/* We always require a command barrier so that subsequent
2275
		return ret;
2598
	 * commands, such as breadcrumb interrupts, are strictly ordered
2276
 
2599
	 * wrt the contents of the write cache being flushed to memory
2277
	cmd = MI_FLUSH_DW;
2600
	 * (and thus being coherent from the CPU).
2278
	if (INTEL_INFO(ring->dev)->gen >= 8)
2601
	 */
2279
		cmd += 1;
2602
	cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
2280
	/*
2603
 
2281
	 * Bspec vol 1c.3 - blitter engine command streamer:
2604
	/*
2282
	 * "If ENABLED, all TLBs will be invalidated once the flush
2605
	 * Bspec vol 1c.3 - blitter engine command streamer:
Line 2283... Line -...
2283
	 * operation is complete. This bit is only valid when the
-
 
2284
	 * Post-Sync Operation field is a value of 1h or 3h."
-
 
2285
	 */
-
 
2286
	if (invalidate & I915_GEM_DOMAIN_RENDER)
-
 
2287
		cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX |
-
 
2288
			MI_FLUSH_DW_OP_STOREDW;
-
 
2289
	intel_ring_emit(ring, cmd);
-
 
2290
	intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
2606
	 * "If ENABLED, all TLBs will be invalidated once the flush
2291
	if (INTEL_INFO(ring->dev)->gen >= 8) {
2607
	 * operation is complete. This bit is only valid when the
Line 2292... Line 2608...
2292
		intel_ring_emit(ring, 0); /* upper addr */
2608
	 * Post-Sync Operation field is a value of 1h or 3h."
2293
		intel_ring_emit(ring, 0); /* value */
2609
	 */
Line 2334... Line 2650...
2334
				} else
2650
				} else
2335
					dev_priv->semaphore_obj = obj;
2651
					dev_priv->semaphore_obj = obj;
2336
			}
2652
			}
2337
		}
2653
		}
Line 2338... Line 2654...
2338
 
2654
 
2339
		ring->init_context = intel_ring_workarounds_emit;
2655
		ring->init_context = intel_rcs_ctx_init;
2340
		ring->add_request = gen6_add_request;
2656
		ring->add_request = gen6_add_request;
2341
		ring->flush = gen8_render_ring_flush;
2657
		ring->flush = gen8_render_ring_flush;
2342
		ring->irq_get = gen8_ring_get_irq;
2658
		ring->irq_get = gen8_ring_get_irq;
2343
		ring->irq_put = gen8_ring_put_irq;
2659
		ring->irq_put = gen8_ring_put_irq;
Line 2349... Line 2665...
2349
			ring->semaphore.sync_to = gen8_ring_sync;
2665
			ring->semaphore.sync_to = gen8_ring_sync;
2350
			ring->semaphore.signal = gen8_rcs_signal;
2666
			ring->semaphore.signal = gen8_rcs_signal;
2351
			GEN8_RING_SEMAPHORE_INIT;
2667
			GEN8_RING_SEMAPHORE_INIT;
2352
		}
2668
		}
2353
	} else if (INTEL_INFO(dev)->gen >= 6) {
2669
	} else if (INTEL_INFO(dev)->gen >= 6) {
-
 
2670
		ring->init_context = intel_rcs_ctx_init;
2354
       ring->add_request = gen6_add_request;
2671
		ring->add_request = gen6_add_request;
2355
		ring->flush = gen7_render_ring_flush;
2672
		ring->flush = gen7_render_ring_flush;
2356
		if (INTEL_INFO(dev)->gen == 6)
2673
		if (INTEL_INFO(dev)->gen == 6)
2357
		ring->flush = gen6_render_ring_flush;
2674
			ring->flush = gen6_render_ring_flush;
2358
		ring->irq_get = gen6_ring_get_irq;
2675
		ring->irq_get = gen6_ring_get_irq;
Line 2419... Line 2736...
2419
		ring->dispatch_execbuffer = i965_dispatch_execbuffer;
2736
		ring->dispatch_execbuffer = i965_dispatch_execbuffer;
2420
	else if (IS_I830(dev) || IS_845G(dev))
2737
	else if (IS_I830(dev) || IS_845G(dev))
2421
		ring->dispatch_execbuffer = i830_dispatch_execbuffer;
2738
		ring->dispatch_execbuffer = i830_dispatch_execbuffer;
2422
	else
2739
	else
2423
		ring->dispatch_execbuffer = i915_dispatch_execbuffer;
2740
		ring->dispatch_execbuffer = i915_dispatch_execbuffer;
2424
	ring->init = init_render_ring;
2741
	ring->init_hw = init_render_ring;
2425
	ring->cleanup = render_ring_cleanup;
2742
	ring->cleanup = render_ring_cleanup;
Line 2426... Line 2743...
2426
 
2743
 
2427
	/* Workaround batchbuffer to combat CS tlb bug. */
2744
	/* Workaround batchbuffer to combat CS tlb bug. */
2428
	if (HAS_BROKEN_CS_TLB(dev)) {
2745
	if (HAS_BROKEN_CS_TLB(dev)) {
Line 2441... Line 2758...
2441
 
2758
 
2442
		ring->scratch.obj = obj;
2759
		ring->scratch.obj = obj;
2443
		ring->scratch.gtt_offset = i915_gem_obj_ggtt_offset(obj);
2760
		ring->scratch.gtt_offset = i915_gem_obj_ggtt_offset(obj);
Line 2444... Line 2761...
2444
	}
2761
	}
-
 
2762
 
-
 
2763
	ret = intel_init_ring_buffer(dev, ring);
-
 
2764
	if (ret)
-
 
2765
		return ret;
-
 
2766
 
-
 
2767
	if (INTEL_INFO(dev)->gen >= 5) {
-
 
2768
		ret = intel_init_pipe_control(ring);
-
 
2769
		if (ret)
-
 
2770
			return ret;
-
 
2771
	}
2445
 
2772
 
Line 2446... Line 2773...
2446
	return intel_init_ring_buffer(dev, ring);
2773
	return 0;
2447
}
2774
}
2448
 
2775
 
Line 2512... Line 2839...
2512
			ring->irq_get = i9xx_ring_get_irq;
2839
			ring->irq_get = i9xx_ring_get_irq;
2513
			ring->irq_put = i9xx_ring_put_irq;
2840
			ring->irq_put = i9xx_ring_put_irq;
2514
		}
2841
		}
2515
		ring->dispatch_execbuffer = i965_dispatch_execbuffer;
2842
		ring->dispatch_execbuffer = i965_dispatch_execbuffer;
2516
	}
2843
	}
2517
	ring->init = init_ring_common;
2844
	ring->init_hw = init_ring_common;
Line 2518... Line 2845...
2518
 
2845
 
2519
	return intel_init_ring_buffer(dev, ring);
2846
	return intel_init_ring_buffer(dev, ring);
Line 2520... Line 2847...
2520
}
2847
}
2521
 
2848
 
2522
/**
-
 
2523
 * Initialize the second BSD ring for Broadwell GT3.
2849
/**
2524
 * It is noted that this only exists on Broadwell GT3.
2850
 * Initialize the second BSD ring (eg. Broadwell GT3, Skylake GT3)
2525
 */
2851
 */
2526
int intel_init_bsd2_ring_buffer(struct drm_device *dev)
2852
int intel_init_bsd2_ring_buffer(struct drm_device *dev)
2527
{
2853
{
Line 2528... Line -...
2528
	struct drm_i915_private *dev_priv = dev->dev_private;
-
 
2529
	struct intel_engine_cs *ring = &dev_priv->ring[VCS2];
-
 
2530
 
-
 
2531
	if ((INTEL_INFO(dev)->gen != 8)) {
-
 
2532
		DRM_ERROR("No dual-BSD ring on non-BDW machine\n");
-
 
2533
		return -EINVAL;
2854
	struct drm_i915_private *dev_priv = dev->dev_private;
2534
	}
2855
	struct intel_engine_cs *ring = &dev_priv->ring[VCS2];
Line 2535... Line 2856...
2535
 
2856
 
2536
	ring->name = "bsd2 ring";
2857
	ring->name = "bsd2 ring";
Line 2551... Line 2872...
2551
	if (i915_semaphore_is_enabled(dev)) {
2872
	if (i915_semaphore_is_enabled(dev)) {
2552
		ring->semaphore.sync_to = gen8_ring_sync;
2873
		ring->semaphore.sync_to = gen8_ring_sync;
2553
		ring->semaphore.signal = gen8_xcs_signal;
2874
		ring->semaphore.signal = gen8_xcs_signal;
2554
		GEN8_RING_SEMAPHORE_INIT;
2875
		GEN8_RING_SEMAPHORE_INIT;
2555
	}
2876
	}
2556
	ring->init = init_ring_common;
2877
	ring->init_hw = init_ring_common;
Line 2557... Line 2878...
2557
 
2878
 
2558
	return intel_init_ring_buffer(dev, ring);
2879
	return intel_init_ring_buffer(dev, ring);
Line 2559... Line 2880...
2559
}
2880
}
Line 2608... Line 2929...
2608
	ring->semaphore.mbox.signal[BCS] = GEN6_NOSYNC;
2929
			ring->semaphore.mbox.signal[BCS] = GEN6_NOSYNC;
2609
	ring->semaphore.mbox.signal[VECS] = GEN6_VEBSYNC;
2930
			ring->semaphore.mbox.signal[VECS] = GEN6_VEBSYNC;
2610
	ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
2931
			ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
2611
		}
2932
		}
2612
	}
2933
	}
2613
	ring->init = init_ring_common;
2934
	ring->init_hw = init_ring_common;
Line 2614... Line 2935...
2614
 
2935
 
2615
	return intel_init_ring_buffer(dev, ring);
2936
	return intel_init_ring_buffer(dev, ring);
Line 2616... Line 2937...
2616
}
2937
}
Line 2659... Line 2980...
2659
	ring->semaphore.mbox.signal[BCS] = GEN6_BVESYNC;
2980
			ring->semaphore.mbox.signal[BCS] = GEN6_BVESYNC;
2660
	ring->semaphore.mbox.signal[VECS] = GEN6_NOSYNC;
2981
			ring->semaphore.mbox.signal[VECS] = GEN6_NOSYNC;
2661
	ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
2982
			ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
2662
		}
2983
		}
2663
	}
2984
	}
2664
	ring->init = init_ring_common;
2985
	ring->init_hw = init_ring_common;
Line 2665... Line 2986...
2665
 
2986
 
2666
	return intel_init_ring_buffer(dev, ring);
2987
	return intel_init_ring_buffer(dev, ring);
Line 2667... Line 2988...
2667
}
2988
}
2668
 
2989
 
2669
int
2990
int
-
 
2991
intel_ring_flush_all_caches(struct drm_i915_gem_request *req)
2670
intel_ring_flush_all_caches(struct intel_engine_cs *ring)
2992
{
Line 2671... Line 2993...
2671
{
2993
	struct intel_engine_cs *ring = req->ring;
2672
	int ret;
2994
	int ret;
Line 2673... Line 2995...
2673
 
2995
 
2674
	if (!ring->gpu_caches_dirty)
2996
	if (!ring->gpu_caches_dirty)
2675
		return 0;
2997
		return 0;
Line 2676... Line 2998...
2676
 
2998
 
Line 2677... Line 2999...
2677
	ret = ring->flush(ring, 0, I915_GEM_GPU_DOMAINS);
2999
	ret = ring->flush(req, 0, I915_GEM_GPU_DOMAINS);
2678
	if (ret)
3000
	if (ret)
2679
		return ret;
3001
		return ret;
Line 2680... Line 3002...
2680
 
3002
 
2681
	trace_i915_gem_ring_flush(ring, 0, I915_GEM_GPU_DOMAINS);
3003
	trace_i915_gem_ring_flush(req, 0, I915_GEM_GPU_DOMAINS);
2682
 
3004
 
-
 
3005
	ring->gpu_caches_dirty = false;
2683
	ring->gpu_caches_dirty = false;
3006
	return 0;
2684
	return 0;
3007
}
Line 2685... Line 3008...
2685
}
3008
 
2686
 
3009
int
2687
int
3010
intel_ring_invalidate_all_caches(struct drm_i915_gem_request *req)
Line 2688... Line 3011...
2688
intel_ring_invalidate_all_caches(struct intel_engine_cs *ring)
3011
{
2689
{
3012
	struct intel_engine_cs *ring = req->ring;
2690
	uint32_t flush_domains;
3013
	uint32_t flush_domains;
Line 2691... Line 3014...
2691
	int ret;
3014
	int ret;
Line 2692... Line 3015...
2692
 
3015
 
2693
	flush_domains = 0;
3016
	flush_domains = 0;
2694
	if (ring->gpu_caches_dirty)
3017
	if (ring->gpu_caches_dirty)