Subversion Repositories Kolibri OS

Rev

Rev 5354 | Rev 6320 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2332 Serge 1
/*
2
 * Copyright © 2008-2010 Intel Corporation
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
 * IN THE SOFTWARE.
22
 *
23
 * Authors:
24
 *    Eric Anholt 
25
 *    Zou Nan hai 
26
 *    Xiang Hai hao
27
 *
28
 */
29
 
3031 serge 30
#include 
2332 Serge 31
#include "i915_drv.h"
3031 serge 32
#include 
2351 Serge 33
#include "i915_trace.h"
2332 Serge 34
#include "intel_drv.h"
35
 
5354 serge 36
bool
37
intel_ring_initialized(struct intel_engine_cs *ring)
38
{
39
	struct drm_device *dev = ring->dev;
5060 serge 40
 
5354 serge 41
	if (!dev)
42
		return false;
43
 
44
	if (i915.enable_execlists) {
45
		struct intel_context *dctx = ring->default_context;
46
		struct intel_ringbuffer *ringbuf = dctx->engine[ring->id].ringbuf;
47
 
48
		return ringbuf->obj;
49
	} else
50
		return ring->buffer && ring->buffer->obj;
51
}
52
 
53
int __intel_ring_space(int head, int tail, int size)
2332 Serge 54
{
6084 serge 55
	int space = head - tail;
56
	if (space <= 0)
5060 serge 57
		space += size;
6084 serge 58
	return space - I915_RING_FREE_SPACE;
2332 Serge 59
}
60
 
6084 serge 61
void intel_ring_update_space(struct intel_ringbuffer *ringbuf)
62
{
63
	if (ringbuf->last_retired_head != -1) {
64
		ringbuf->head = ringbuf->last_retired_head;
65
		ringbuf->last_retired_head = -1;
66
	}
67
 
68
	ringbuf->space = __intel_ring_space(ringbuf->head & HEAD_ADDR,
69
					    ringbuf->tail, ringbuf->size);
70
}
71
 
5354 serge 72
int intel_ring_space(struct intel_ringbuffer *ringbuf)
4560 Serge 73
{
6084 serge 74
	intel_ring_update_space(ringbuf);
75
	return ringbuf->space;
5060 serge 76
}
77
 
5354 serge 78
bool intel_ring_stopped(struct intel_engine_cs *ring)
5060 serge 79
{
4560 Serge 80
	struct drm_i915_private *dev_priv = ring->dev->dev_private;
5060 serge 81
	return dev_priv->gpu_error.stop_rings & intel_ring_flag(ring);
82
}
4560 Serge 83
 
6084 serge 84
static void __intel_ring_advance(struct intel_engine_cs *ring)
5060 serge 85
{
86
	struct intel_ringbuffer *ringbuf = ring->buffer;
87
	ringbuf->tail &= ringbuf->size - 1;
88
	if (intel_ring_stopped(ring))
4560 Serge 89
		return;
5060 serge 90
	ring->write_tail(ring, ringbuf->tail);
4560 Serge 91
}
92
 
3031 serge 93
static int
6084 serge 94
gen2_render_ring_flush(struct drm_i915_gem_request *req,
3031 serge 95
		       u32	invalidate_domains,
96
		       u32	flush_domains)
2332 Serge 97
{
6084 serge 98
	struct intel_engine_cs *ring = req->ring;
3031 serge 99
	u32 cmd;
100
	int ret;
2332 Serge 101
 
3031 serge 102
	cmd = MI_FLUSH;
103
	if (((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) == 0)
104
		cmd |= MI_NO_WRITE_FLUSH;
2332 Serge 105
 
3031 serge 106
	if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
107
		cmd |= MI_READ_FLUSH;
2332 Serge 108
 
6084 serge 109
	ret = intel_ring_begin(req, 2);
3031 serge 110
	if (ret)
111
		return ret;
112
 
113
	intel_ring_emit(ring, cmd);
114
	intel_ring_emit(ring, MI_NOOP);
115
	intel_ring_advance(ring);
116
 
117
	return 0;
2332 Serge 118
}
119
 
120
static int
6084 serge 121
gen4_render_ring_flush(struct drm_i915_gem_request *req,
122
		       u32	invalidate_domains,
123
		       u32	flush_domains)
2332 Serge 124
{
6084 serge 125
	struct intel_engine_cs *ring = req->ring;
2332 Serge 126
	struct drm_device *dev = ring->dev;
127
	u32 cmd;
128
	int ret;
129
 
130
	/*
131
	 * read/write caches:
132
	 *
133
	 * I915_GEM_DOMAIN_RENDER is always invalidated, but is
134
	 * only flushed if MI_NO_WRITE_FLUSH is unset.  On 965, it is
135
	 * also flushed at 2d versus 3d pipeline switches.
136
	 *
137
	 * read-only caches:
138
	 *
139
	 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
140
	 * MI_READ_FLUSH is set, and is always flushed on 965.
141
	 *
142
	 * I915_GEM_DOMAIN_COMMAND may not exist?
143
	 *
144
	 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
145
	 * invalidated when MI_EXE_FLUSH is set.
146
	 *
147
	 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
148
	 * invalidated with every MI_FLUSH.
149
	 *
150
	 * TLBs:
151
	 *
152
	 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
153
	 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
154
	 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
155
	 * are flushed at any MI_FLUSH.
156
	 */
157
 
158
	cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
3031 serge 159
	if ((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER)
2332 Serge 160
		cmd &= ~MI_NO_WRITE_FLUSH;
161
	if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION)
162
		cmd |= MI_EXE_FLUSH;
163
 
164
	if (invalidate_domains & I915_GEM_DOMAIN_COMMAND &&
165
	    (IS_G4X(dev) || IS_GEN5(dev)))
166
		cmd |= MI_INVALIDATE_ISP;
167
 
6084 serge 168
	ret = intel_ring_begin(req, 2);
2332 Serge 169
	if (ret)
170
		return ret;
171
 
172
	intel_ring_emit(ring, cmd);
173
	intel_ring_emit(ring, MI_NOOP);
174
	intel_ring_advance(ring);
175
 
176
	return 0;
177
}
178
 
2342 Serge 179
/**
180
 * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
181
 * implementing two workarounds on gen6.  From section 1.4.7.1
182
 * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
183
 *
184
 * [DevSNB-C+{W/A}] Before any depth stall flush (including those
185
 * produced by non-pipelined state commands), software needs to first
186
 * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
187
 * 0.
188
 *
189
 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
190
 * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
191
 *
192
 * And the workaround for these two requires this workaround first:
193
 *
194
 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
195
 * BEFORE the pipe-control with a post-sync op and no write-cache
196
 * flushes.
197
 *
198
 * And this last workaround is tricky because of the requirements on
199
 * that bit.  From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
200
 * volume 2 part 1:
201
 *
202
 *     "1 of the following must also be set:
203
 *      - Render Target Cache Flush Enable ([12] of DW1)
204
 *      - Depth Cache Flush Enable ([0] of DW1)
205
 *      - Stall at Pixel Scoreboard ([1] of DW1)
206
 *      - Depth Stall ([13] of DW1)
207
 *      - Post-Sync Operation ([13] of DW1)
208
 *      - Notify Enable ([8] of DW1)"
209
 *
210
 * The cache flushes require the workaround flush that triggered this
211
 * one, so we can't use it.  Depth stall would trigger the same.
212
 * Post-sync nonzero is what triggered this second workaround, so we
213
 * can't use that one either.  Notify enable is IRQs, which aren't
214
 * really our business.  That leaves only stall at scoreboard.
215
 */
216
static int
6084 serge 217
intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req)
2342 Serge 218
{
6084 serge 219
	struct intel_engine_cs *ring = req->ring;
5060 serge 220
	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
2342 Serge 221
	int ret;
222
 
6084 serge 223
	ret = intel_ring_begin(req, 6);
2342 Serge 224
	if (ret)
225
		return ret;
226
 
227
	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
228
	intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
229
			PIPE_CONTROL_STALL_AT_SCOREBOARD);
230
	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
231
	intel_ring_emit(ring, 0); /* low dword */
232
	intel_ring_emit(ring, 0); /* high dword */
233
	intel_ring_emit(ring, MI_NOOP);
234
	intel_ring_advance(ring);
235
 
6084 serge 236
	ret = intel_ring_begin(req, 6);
2342 Serge 237
	if (ret)
238
		return ret;
239
 
240
	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
241
	intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE);
242
	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
243
	intel_ring_emit(ring, 0);
244
	intel_ring_emit(ring, 0);
245
	intel_ring_emit(ring, MI_NOOP);
246
	intel_ring_advance(ring);
247
 
248
	return 0;
249
}
250
 
251
static int
6084 serge 252
gen6_render_ring_flush(struct drm_i915_gem_request *req,
253
		       u32 invalidate_domains, u32 flush_domains)
2342 Serge 254
{
6084 serge 255
	struct intel_engine_cs *ring = req->ring;
2342 Serge 256
	u32 flags = 0;
5060 serge 257
	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
2342 Serge 258
	int ret;
259
 
260
	/* Force SNB workarounds for PIPE_CONTROL flushes */
6084 serge 261
	ret = intel_emit_post_sync_nonzero_flush(req);
3031 serge 262
	if (ret)
263
		return ret;
2342 Serge 264
 
265
	/* Just flush everything.  Experiments have shown that reducing the
266
	 * number of bits based on the write domains has little performance
267
	 * impact.
268
	 */
3031 serge 269
	if (flush_domains) {
270
		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
271
		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
272
		/*
273
		 * Ensure that any following seqno writes only happen
274
		 * when the render cache is indeed flushed.
275
		 */
276
		flags |= PIPE_CONTROL_CS_STALL;
277
	}
278
	if (invalidate_domains) {
279
		flags |= PIPE_CONTROL_TLB_INVALIDATE;
280
		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
281
		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
282
		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
283
		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
284
		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
285
		/*
286
		 * TLB invalidate requires a post-sync write.
287
		 */
3243 Serge 288
		flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
3031 serge 289
	}
290
 
6084 serge 291
	ret = intel_ring_begin(req, 4);
3031 serge 292
	if (ret)
293
		return ret;
294
 
295
	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
296
	intel_ring_emit(ring, flags);
297
	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
298
	intel_ring_emit(ring, 0);
299
	intel_ring_advance(ring);
300
 
301
	return 0;
302
}
303
 
304
static int
6084 serge 305
gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req)
3031 serge 306
{
6084 serge 307
	struct intel_engine_cs *ring = req->ring;
3031 serge 308
	int ret;
309
 
6084 serge 310
	ret = intel_ring_begin(req, 4);
3031 serge 311
	if (ret)
312
		return ret;
313
 
314
	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
315
	intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
316
			      PIPE_CONTROL_STALL_AT_SCOREBOARD);
317
	intel_ring_emit(ring, 0);
318
	intel_ring_emit(ring, 0);
319
	intel_ring_advance(ring);
320
 
321
	return 0;
322
}
323
 
324
static int
6084 serge 325
gen7_render_ring_flush(struct drm_i915_gem_request *req,
3031 serge 326
		       u32 invalidate_domains, u32 flush_domains)
327
{
6084 serge 328
	struct intel_engine_cs *ring = req->ring;
3031 serge 329
	u32 flags = 0;
5060 serge 330
	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
3031 serge 331
	int ret;
332
 
333
	/*
334
	 * Ensure that any following seqno writes only happen when the render
335
	 * cache is indeed flushed.
336
	 *
337
	 * Workaround: 4th PIPE_CONTROL command (except the ones with only
338
	 * read-cache invalidate bits set) must have the CS_STALL bit set. We
339
	 * don't try to be clever and just set it unconditionally.
340
	 */
341
	flags |= PIPE_CONTROL_CS_STALL;
342
 
343
	/* Just flush everything.  Experiments have shown that reducing the
344
	 * number of bits based on the write domains has little performance
345
	 * impact.
346
	 */
347
	if (flush_domains) {
6084 serge 348
		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
3031 serge 349
		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
6084 serge 350
		flags |= PIPE_CONTROL_FLUSH_ENABLE;
3031 serge 351
	}
352
	if (invalidate_domains) {
353
		flags |= PIPE_CONTROL_TLB_INVALIDATE;
6084 serge 354
		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
355
		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
356
		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
357
		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
358
		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
5354 serge 359
		flags |= PIPE_CONTROL_MEDIA_STATE_CLEAR;
3031 serge 360
		/*
361
		 * TLB invalidate requires a post-sync write.
362
		 */
363
		flags |= PIPE_CONTROL_QW_WRITE;
3480 Serge 364
		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
2342 Serge 365
 
5354 serge 366
		flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
367
 
3031 serge 368
		/* Workaround: we must issue a pipe_control with CS-stall bit
369
		 * set before a pipe_control command that has the state cache
370
		 * invalidate bit set. */
6084 serge 371
		gen7_render_ring_cs_stall_wa(req);
3031 serge 372
	}
373
 
6084 serge 374
	ret = intel_ring_begin(req, 4);
2342 Serge 375
	if (ret)
376
		return ret;
377
 
3031 serge 378
	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
2342 Serge 379
	intel_ring_emit(ring, flags);
3480 Serge 380
	intel_ring_emit(ring, scratch_addr);
3031 serge 381
	intel_ring_emit(ring, 0);
2342 Serge 382
	intel_ring_advance(ring);
383
 
384
	return 0;
385
}
386
 
4560 Serge 387
static int
6084 serge 388
gen8_emit_pipe_control(struct drm_i915_gem_request *req,
5060 serge 389
		       u32 flags, u32 scratch_addr)
390
{
6084 serge 391
	struct intel_engine_cs *ring = req->ring;
5060 serge 392
	int ret;
393
 
6084 serge 394
	ret = intel_ring_begin(req, 6);
5060 serge 395
	if (ret)
396
		return ret;
397
 
398
	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6));
399
	intel_ring_emit(ring, flags);
400
	intel_ring_emit(ring, scratch_addr);
401
	intel_ring_emit(ring, 0);
402
	intel_ring_emit(ring, 0);
403
	intel_ring_emit(ring, 0);
404
	intel_ring_advance(ring);
405
 
406
	return 0;
407
}
408
 
409
static int
6084 serge 410
gen8_render_ring_flush(struct drm_i915_gem_request *req,
4560 Serge 411
		       u32 invalidate_domains, u32 flush_domains)
412
{
413
	u32 flags = 0;
6084 serge 414
	u32 scratch_addr = req->ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
4560 Serge 415
	int ret;
416
 
417
	flags |= PIPE_CONTROL_CS_STALL;
418
 
419
	if (flush_domains) {
420
		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
421
		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
6084 serge 422
		flags |= PIPE_CONTROL_FLUSH_ENABLE;
4560 Serge 423
	}
424
	if (invalidate_domains) {
425
		flags |= PIPE_CONTROL_TLB_INVALIDATE;
426
		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
427
		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
428
		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
429
		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
430
		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
431
		flags |= PIPE_CONTROL_QW_WRITE;
432
		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
433
 
5060 serge 434
		/* WaCsStallBeforeStateCacheInvalidate:bdw,chv */
6084 serge 435
		ret = gen8_emit_pipe_control(req,
5060 serge 436
					     PIPE_CONTROL_CS_STALL |
437
					     PIPE_CONTROL_STALL_AT_SCOREBOARD,
438
					     0);
6084 serge 439
		if (ret)
440
			return ret;
5060 serge 441
	}
4560 Serge 442
 
6084 serge 443
	return gen8_emit_pipe_control(req, flags, scratch_addr);
4560 Serge 444
}
445
 
5060 serge 446
static void ring_write_tail(struct intel_engine_cs *ring,
2332 Serge 447
			    u32 value)
448
{
5060 serge 449
	struct drm_i915_private *dev_priv = ring->dev->dev_private;
2332 Serge 450
	I915_WRITE_TAIL(ring, value);
451
}
452
 
5060 serge 453
u64 intel_ring_get_active_head(struct intel_engine_cs *ring)
2332 Serge 454
{
5060 serge 455
	struct drm_i915_private *dev_priv = ring->dev->dev_private;
456
	u64 acthd;
2332 Serge 457
 
5060 serge 458
	if (INTEL_INFO(ring->dev)->gen >= 8)
459
		acthd = I915_READ64_2x32(RING_ACTHD(ring->mmio_base),
460
					 RING_ACTHD_UDW(ring->mmio_base));
461
	else if (INTEL_INFO(ring->dev)->gen >= 4)
462
		acthd = I915_READ(RING_ACTHD(ring->mmio_base));
463
	else
464
		acthd = I915_READ(ACTHD);
465
 
466
	return acthd;
2332 Serge 467
}
468
 
5060 serge 469
static void ring_setup_phys_status_page(struct intel_engine_cs *ring)
4104 Serge 470
{
471
	struct drm_i915_private *dev_priv = ring->dev->dev_private;
472
	u32 addr;
473
 
474
	addr = dev_priv->status_page_dmah->busaddr;
475
	if (INTEL_INFO(ring->dev)->gen >= 4)
476
		addr |= (dev_priv->status_page_dmah->busaddr >> 28) & 0xf0;
477
	I915_WRITE(HWS_PGA, addr);
478
}
479
 
6084 serge 480
static void intel_ring_setup_status_page(struct intel_engine_cs *ring)
481
{
482
	struct drm_device *dev = ring->dev;
483
	struct drm_i915_private *dev_priv = ring->dev->dev_private;
484
	u32 mmio = 0;
485
 
486
	/* The ring status page addresses are no longer next to the rest of
487
	 * the ring registers as of gen7.
488
	 */
489
	if (IS_GEN7(dev)) {
490
		switch (ring->id) {
491
		case RCS:
492
			mmio = RENDER_HWS_PGA_GEN7;
493
			break;
494
		case BCS:
495
			mmio = BLT_HWS_PGA_GEN7;
496
			break;
497
		/*
498
		 * VCS2 actually doesn't exist on Gen7. Only shut up
499
		 * gcc switch check warning
500
		 */
501
		case VCS2:
502
		case VCS:
503
			mmio = BSD_HWS_PGA_GEN7;
504
			break;
505
		case VECS:
506
			mmio = VEBOX_HWS_PGA_GEN7;
507
			break;
508
		}
509
	} else if (IS_GEN6(ring->dev)) {
510
		mmio = RING_HWS_PGA_GEN6(ring->mmio_base);
511
	} else {
512
		/* XXX: gen8 returns to sanity */
513
		mmio = RING_HWS_PGA(ring->mmio_base);
514
	}
515
 
516
	I915_WRITE(mmio, (u32)ring->status_page.gfx_addr);
517
	POSTING_READ(mmio);
518
 
519
	/*
520
	 * Flush the TLB for this page
521
	 *
522
	 * FIXME: These two bits have disappeared on gen8, so a question
523
	 * arises: do we still need this and if so how should we go about
524
	 * invalidating the TLB?
525
	 */
526
	if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8) {
527
		u32 reg = RING_INSTPM(ring->mmio_base);
528
 
529
		/* ring should be idle before issuing a sync flush*/
530
		WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0);
531
 
532
		I915_WRITE(reg,
533
			   _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE |
534
					      INSTPM_SYNC_FLUSH));
535
		if (wait_for((I915_READ(reg) & INSTPM_SYNC_FLUSH) == 0,
536
			     1000))
537
			DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n",
538
				  ring->name);
539
	}
540
}
541
 
5060 serge 542
static bool stop_ring(struct intel_engine_cs *ring)
2332 Serge 543
{
5060 serge 544
	struct drm_i915_private *dev_priv = to_i915(ring->dev);
2332 Serge 545
 
5060 serge 546
	if (!IS_GEN2(ring->dev)) {
547
		I915_WRITE_MODE(ring, _MASKED_BIT_ENABLE(STOP_RING));
5354 serge 548
		if (wait_for((I915_READ_MODE(ring) & MODE_IDLE) != 0, 1000)) {
549
			DRM_ERROR("%s : timed out trying to stop ring\n", ring->name);
550
			/* Sometimes we observe that the idle flag is not
551
			 * set even though the ring is empty. So double
552
			 * check before giving up.
553
			 */
554
			if (I915_READ_HEAD(ring) != I915_READ_TAIL(ring))
6084 serge 555
				return false;
5060 serge 556
		}
557
	}
3031 serge 558
 
2332 Serge 559
	I915_WRITE_CTL(ring, 0);
560
	I915_WRITE_HEAD(ring, 0);
561
	ring->write_tail(ring, 0);
562
 
5060 serge 563
	if (!IS_GEN2(ring->dev)) {
564
		(void)I915_READ_CTL(ring);
565
		I915_WRITE_MODE(ring, _MASKED_BIT_DISABLE(STOP_RING));
566
	}
2332 Serge 567
 
5060 serge 568
	return (I915_READ_HEAD(ring) & HEAD_ADDR) == 0;
569
}
570
 
571
static int init_ring_common(struct intel_engine_cs *ring)
572
{
573
	struct drm_device *dev = ring->dev;
574
	struct drm_i915_private *dev_priv = dev->dev_private;
575
	struct intel_ringbuffer *ringbuf = ring->buffer;
576
	struct drm_i915_gem_object *obj = ringbuf->obj;
577
	int ret = 0;
578
 
6084 serge 579
	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
5060 serge 580
 
581
	if (!stop_ring(ring)) {
582
		/* G45 ring initialization often fails to reset head to zero */
2332 Serge 583
		DRM_DEBUG_KMS("%s head not reset to zero "
584
			      "ctl %08x head %08x tail %08x start %08x\n",
585
			      ring->name,
586
			      I915_READ_CTL(ring),
587
			      I915_READ_HEAD(ring),
588
			      I915_READ_TAIL(ring),
589
			      I915_READ_START(ring));
590
 
5060 serge 591
		if (!stop_ring(ring)) {
2332 Serge 592
			DRM_ERROR("failed to set %s head to zero "
593
				  "ctl %08x head %08x tail %08x start %08x\n",
594
				  ring->name,
595
				  I915_READ_CTL(ring),
596
				  I915_READ_HEAD(ring),
597
				  I915_READ_TAIL(ring),
598
				  I915_READ_START(ring));
5060 serge 599
			ret = -EIO;
600
			goto out;
2332 Serge 601
		}
602
	}
603
 
5060 serge 604
	if (I915_NEED_GFX_HWS(dev))
605
		intel_ring_setup_status_page(ring);
606
	else
607
		ring_setup_phys_status_page(ring);
608
 
609
	/* Enforce ordering by reading HEAD register back */
610
	I915_READ_HEAD(ring);
611
 
3031 serge 612
	/* Initialize the ring. This must happen _after_ we've cleared the ring
613
	 * registers with the above sequence (the readback of the HEAD registers
614
	 * also enforces ordering), otherwise the hw might lose the new ring
615
	 * register values. */
4104 Serge 616
	I915_WRITE_START(ring, i915_gem_obj_ggtt_offset(obj));
5354 serge 617
 
618
	/* WaClearRingBufHeadRegAtInit:ctg,elk */
619
	if (I915_READ_HEAD(ring))
620
		DRM_DEBUG("%s initialization failed [head=%08x], fudging\n",
621
			  ring->name, I915_READ_HEAD(ring));
622
	I915_WRITE_HEAD(ring, 0);
623
	(void)I915_READ_HEAD(ring);
624
 
2332 Serge 625
	I915_WRITE_CTL(ring,
5060 serge 626
			((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES)
3031 serge 627
			| RING_VALID);
2332 Serge 628
 
629
	/* If the head is still not zero, the ring is dead */
3031 serge 630
	if (wait_for((I915_READ_CTL(ring) & RING_VALID) != 0 &&
4104 Serge 631
		     I915_READ_START(ring) == i915_gem_obj_ggtt_offset(obj) &&
3031 serge 632
		     (I915_READ_HEAD(ring) & HEAD_ADDR) == 0, 50)) {
2332 Serge 633
		DRM_ERROR("%s initialization failed "
5060 serge 634
			  "ctl %08x (valid? %d) head %08x tail %08x start %08x [expected %08lx]\n",
6084 serge 635
			  ring->name,
5060 serge 636
			  I915_READ_CTL(ring), I915_READ_CTL(ring) & RING_VALID,
637
			  I915_READ_HEAD(ring), I915_READ_TAIL(ring),
638
			  I915_READ_START(ring), (unsigned long)i915_gem_obj_ggtt_offset(obj));
3031 serge 639
		ret = -EIO;
640
		goto out;
2332 Serge 641
	}
642
 
6084 serge 643
	ringbuf->last_retired_head = -1;
644
	ringbuf->head = I915_READ_HEAD(ring);
645
	ringbuf->tail = I915_READ_TAIL(ring) & TAIL_ADDR;
646
	intel_ring_update_space(ringbuf);
5060 serge 647
 
4104 Serge 648
	memset(&ring->hangcheck, 0, sizeof(ring->hangcheck));
649
 
3031 serge 650
out:
6084 serge 651
	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
2332 Serge 652
 
3031 serge 653
	return ret;
2332 Serge 654
}
655
 
5354 serge 656
void
657
intel_fini_pipe_control(struct intel_engine_cs *ring)
2332 Serge 658
{
5354 serge 659
	struct drm_device *dev = ring->dev;
660
 
661
	if (ring->scratch.obj == NULL)
662
		return;
663
 
664
	if (INTEL_INFO(dev)->gen >= 5) {
665
		kunmap(sg_page(ring->scratch.obj->pages->sgl));
666
		i915_gem_object_ggtt_unpin(ring->scratch.obj);
667
	}
668
 
669
	drm_gem_object_unreference(&ring->scratch.obj->base);
670
	ring->scratch.obj = NULL;
671
}
672
 
673
int
674
intel_init_pipe_control(struct intel_engine_cs *ring)
675
{
2332 Serge 676
	int ret;
677
 
6084 serge 678
	WARN_ON(ring->scratch.obj);
2332 Serge 679
 
4104 Serge 680
	ring->scratch.obj = i915_gem_alloc_object(ring->dev, 4096);
681
	if (ring->scratch.obj == NULL) {
2332 Serge 682
		DRM_ERROR("Failed to allocate seqno page\n");
683
		ret = -ENOMEM;
684
		goto err;
685
	}
686
 
5060 serge 687
	ret = i915_gem_object_set_cache_level(ring->scratch.obj, I915_CACHE_LLC);
688
	if (ret)
689
		goto err_unref;
2332 Serge 690
 
5060 serge 691
	ret = i915_gem_obj_ggtt_pin(ring->scratch.obj, 4096, 0);
2332 Serge 692
	if (ret)
693
		goto err_unref;
694
 
4104 Serge 695
	ring->scratch.gtt_offset = i915_gem_obj_ggtt_offset(ring->scratch.obj);
5354 serge 696
	ring->scratch.cpu_page = kmap(sg_page(ring->scratch.obj->pages->sgl));
4104 Serge 697
	if (ring->scratch.cpu_page == NULL) {
698
		ret = -ENOMEM;
2332 Serge 699
		goto err_unpin;
4104 Serge 700
	}
2332 Serge 701
 
3480 Serge 702
	DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08x\n",
4104 Serge 703
			 ring->name, ring->scratch.gtt_offset);
2332 Serge 704
	return 0;
705
 
706
err_unpin:
5060 serge 707
	i915_gem_object_ggtt_unpin(ring->scratch.obj);
2332 Serge 708
err_unref:
4104 Serge 709
	drm_gem_object_unreference(&ring->scratch.obj->base);
2332 Serge 710
err:
711
	return ret;
712
}
713
 
6084 serge 714
static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req)
5354 serge 715
{
716
	int ret, i;
6084 serge 717
	struct intel_engine_cs *ring = req->ring;
5354 serge 718
	struct drm_device *dev = ring->dev;
719
	struct drm_i915_private *dev_priv = dev->dev_private;
720
	struct i915_workarounds *w = &dev_priv->workarounds;
721
 
6084 serge 722
	if (w->count == 0)
5354 serge 723
		return 0;
724
 
725
	ring->gpu_caches_dirty = true;
6084 serge 726
	ret = intel_ring_flush_all_caches(req);
5354 serge 727
	if (ret)
728
		return ret;
729
 
6084 serge 730
	ret = intel_ring_begin(req, (w->count * 2 + 2));
5354 serge 731
	if (ret)
732
		return ret;
733
 
734
	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(w->count));
735
	for (i = 0; i < w->count; i++) {
736
		intel_ring_emit(ring, w->reg[i].addr);
737
		intel_ring_emit(ring, w->reg[i].value);
738
	}
739
	intel_ring_emit(ring, MI_NOOP);
740
 
741
	intel_ring_advance(ring);
742
 
743
	ring->gpu_caches_dirty = true;
6084 serge 744
	ret = intel_ring_flush_all_caches(req);
5354 serge 745
	if (ret)
746
		return ret;
747
 
748
	DRM_DEBUG_DRIVER("Number of Workarounds emitted: %d\n", w->count);
749
 
750
	return 0;
751
}
752
 
6084 serge 753
static int intel_rcs_ctx_init(struct drm_i915_gem_request *req)
754
{
755
	int ret;
756
 
757
	ret = intel_ring_workarounds_emit(req);
758
	if (ret != 0)
759
		return ret;
760
 
761
	ret = i915_gem_render_state_init(req);
762
	if (ret)
763
		DRM_ERROR("init render state: %d\n", ret);
764
 
765
	return ret;
766
}
767
 
5354 serge 768
static int wa_add(struct drm_i915_private *dev_priv,
769
		  const u32 addr, const u32 mask, const u32 val)
770
{
771
	const u32 idx = dev_priv->workarounds.count;
772
 
773
	if (WARN_ON(idx >= I915_MAX_WA_REGS))
774
		return -ENOSPC;
775
 
776
	dev_priv->workarounds.reg[idx].addr = addr;
777
	dev_priv->workarounds.reg[idx].value = val;
778
	dev_priv->workarounds.reg[idx].mask = mask;
779
 
780
	dev_priv->workarounds.count++;
781
 
782
	return 0;
783
}
784
 
6084 serge 785
#define WA_REG(addr, mask, val) do { \
5354 serge 786
		const int r = wa_add(dev_priv, (addr), (mask), (val)); \
787
		if (r) \
788
			return r; \
6084 serge 789
	} while (0)
5354 serge 790
 
791
#define WA_SET_BIT_MASKED(addr, mask) \
792
	WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask))
793
 
794
#define WA_CLR_BIT_MASKED(addr, mask) \
795
	WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask))
796
 
797
#define WA_SET_FIELD_MASKED(addr, mask, value) \
798
	WA_REG(addr, mask, _MASKED_FIELD(mask, value))
799
 
800
#define WA_SET_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) | (mask))
801
#define WA_CLR_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) & ~(mask))
802
 
803
#define WA_WRITE(addr, val) WA_REG(addr, 0xffffffff, val)
804
 
6084 serge 805
static int gen8_init_workarounds(struct intel_engine_cs *ring)
5354 serge 806
{
807
	struct drm_device *dev = ring->dev;
808
	struct drm_i915_private *dev_priv = dev->dev_private;
809
 
6084 serge 810
	WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
5354 serge 811
 
6084 serge 812
	/* WaDisableAsyncFlipPerfMode:bdw,chv */
813
	WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE);
5354 serge 814
 
6084 serge 815
	/* WaDisablePartialInstShootdown:bdw,chv */
816
	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
817
			  PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
5354 serge 818
 
819
	/* Use Force Non-Coherent whenever executing a 3D context. This is a
820
	 * workaround for for a possible hang in the unlikely event a TLB
821
	 * invalidation occurs during a PSD flush.
822
	 */
6084 serge 823
	/* WaForceEnableNonCoherent:bdw,chv */
824
	/* WaHdcDisableFetchWhenMasked:bdw,chv */
5354 serge 825
	WA_SET_BIT_MASKED(HDC_CHICKEN0,
6084 serge 826
			  HDC_DONOT_FETCH_MEM_WHEN_MASKED |
827
			  HDC_FORCE_NON_COHERENT);
5354 serge 828
 
6084 serge 829
	/* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
830
	 * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
831
	 *  polygons in the same 8x4 pixel/sample area to be processed without
832
	 *  stalling waiting for the earlier ones to write to Hierarchical Z
833
	 *  buffer."
834
	 *
835
	 * This optimization is off by default for BDW and CHV; turn it on.
836
	 */
837
	WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
5354 serge 838
 
6084 serge 839
	/* Wa4x4STCOptimizationDisable:bdw,chv */
840
	WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
841
 
5354 serge 842
	/*
843
	 * BSpec recommends 8x4 when MSAA is used,
844
	 * however in practice 16x4 seems fastest.
845
	 *
846
	 * Note that PS/WM thread counts depend on the WIZ hashing
847
	 * disable bit, which we don't touch here, but it's good
848
	 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
849
	 */
850
	WA_SET_FIELD_MASKED(GEN7_GT_MODE,
851
			    GEN6_WIZ_HASHING_MASK,
852
			    GEN6_WIZ_HASHING_16x4);
853
 
854
	return 0;
855
}
856
 
6084 serge 857
static int bdw_init_workarounds(struct intel_engine_cs *ring)
858
{
859
	int ret;
860
	struct drm_device *dev = ring->dev;
861
	struct drm_i915_private *dev_priv = dev->dev_private;
862
 
863
	ret = gen8_init_workarounds(ring);
864
	if (ret)
865
		return ret;
866
 
867
	/* WaDisableThreadStallDopClockGating:bdw (pre-production) */
868
	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
869
 
870
	/* WaDisableDopClockGating:bdw */
871
	WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
872
			  DOP_CLOCK_GATING_DISABLE);
873
 
874
	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
875
			  GEN8_SAMPLER_POWER_BYPASS_DIS);
876
 
877
	WA_SET_BIT_MASKED(HDC_CHICKEN0,
878
			  /* WaForceContextSaveRestoreNonCoherent:bdw */
879
			  HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
880
			  /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
881
			  (IS_BDW_GT3(dev) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
882
 
883
	return 0;
884
}
885
 
5354 serge 886
static int chv_init_workarounds(struct intel_engine_cs *ring)
887
{
6084 serge 888
	int ret;
5354 serge 889
	struct drm_device *dev = ring->dev;
890
	struct drm_i915_private *dev_priv = dev->dev_private;
891
 
6084 serge 892
	ret = gen8_init_workarounds(ring);
893
	if (ret)
894
		return ret;
895
 
5354 serge 896
	/* WaDisableThreadStallDopClockGating:chv */
6084 serge 897
	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
898
 
899
	/* Improve HiZ throughput on CHV. */
900
	WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
901
 
902
	return 0;
903
}
904
 
905
static int gen9_init_workarounds(struct intel_engine_cs *ring)
906
{
907
	struct drm_device *dev = ring->dev;
908
	struct drm_i915_private *dev_priv = dev->dev_private;
909
	uint32_t tmp;
910
 
911
	/* WaEnableLbsSlaRetryTimerDecrement:skl */
912
	I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) |
913
		   GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
914
 
915
	/* WaDisableKillLogic:bxt,skl */
916
	I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
917
		   ECOCHK_DIS_TLB);
918
 
919
	/* WaDisablePartialInstShootdown:skl,bxt */
5354 serge 920
	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
6084 serge 921
			  PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
5354 serge 922
 
6084 serge 923
	/* Syncing dependencies between camera and graphics:skl,bxt */
924
	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
925
			  GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC);
926
 
927
	if ((IS_SKYLAKE(dev) && (INTEL_REVID(dev) == SKL_REVID_A0 ||
928
	    INTEL_REVID(dev) == SKL_REVID_B0)) ||
929
	    (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0)) {
930
		/* WaDisableDgMirrorFixInHalfSliceChicken5:skl,bxt */
931
		WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
932
				  GEN9_DG_MIRROR_FIX_ENABLE);
933
	}
934
 
935
	if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) <= SKL_REVID_B0) ||
936
	    (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0)) {
937
		/* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:skl,bxt */
938
		WA_SET_BIT_MASKED(GEN7_COMMON_SLICE_CHICKEN1,
939
				  GEN9_RHWO_OPTIMIZATION_DISABLE);
940
		/*
941
		 * WA also requires GEN9_SLICE_COMMON_ECO_CHICKEN0[14:14] to be set
942
		 * but we do that in per ctx batchbuffer as there is an issue
943
		 * with this register not getting restored on ctx restore
944
		 */
945
	}
946
 
947
	if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) >= SKL_REVID_C0) ||
948
	    IS_BROXTON(dev)) {
949
		/* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt */
950
		WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
951
				  GEN9_ENABLE_YV12_BUGFIX);
952
	}
953
 
954
	/* Wa4x4STCOptimizationDisable:skl,bxt */
955
	/* WaDisablePartialResolveInVc:skl,bxt */
956
	WA_SET_BIT_MASKED(CACHE_MODE_1, (GEN8_4x4_STC_OPTIMIZATION_DISABLE |
957
					 GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE));
958
 
959
	/* WaCcsTlbPrefetchDisable:skl,bxt */
960
	WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
961
			  GEN9_CCS_TLB_PREFETCH_ENABLE);
962
 
963
	/* WaDisableMaskBasedCammingInRCC:skl,bxt */
964
	if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) == SKL_REVID_C0) ||
965
	    (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0))
966
		WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0,
967
				  PIXEL_MASK_CAMMING_DISABLE);
968
 
969
	/* WaForceContextSaveRestoreNonCoherent:skl,bxt */
970
	tmp = HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT;
971
	if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) == SKL_REVID_F0) ||
972
	    (IS_BROXTON(dev) && INTEL_REVID(dev) >= BXT_REVID_B0))
973
		tmp |= HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE;
974
	WA_SET_BIT_MASKED(HDC_CHICKEN0, tmp);
975
 
976
	/* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt */
977
	if (IS_SKYLAKE(dev) ||
978
	    (IS_BROXTON(dev) && INTEL_REVID(dev) <= BXT_REVID_B0)) {
979
		WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
980
				  GEN8_SAMPLER_POWER_BYPASS_DIS);
981
	}
982
 
983
	/* WaDisableSTUnitPowerOptimization:skl,bxt */
984
	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
985
 
986
	return 0;
987
}
988
 
989
static int skl_tune_iz_hashing(struct intel_engine_cs *ring)
990
{
991
	struct drm_device *dev = ring->dev;
992
	struct drm_i915_private *dev_priv = dev->dev_private;
993
	u8 vals[3] = { 0, 0, 0 };
994
	unsigned int i;
995
 
996
	for (i = 0; i < 3; i++) {
997
		u8 ss;
998
 
999
		/*
1000
		 * Only consider slices where one, and only one, subslice has 7
1001
		 * EUs
1002
		 */
1003
		if (hweight8(dev_priv->info.subslice_7eu[i]) != 1)
1004
			continue;
1005
 
1006
		/*
1007
		 * subslice_7eu[i] != 0 (because of the check above) and
1008
		 * ss_max == 4 (maximum number of subslices possible per slice)
1009
		 *
1010
		 * ->    0 <= ss <= 3;
1011
		 */
1012
		ss = ffs(dev_priv->info.subslice_7eu[i]) - 1;
1013
		vals[i] = 3 - ss;
1014
	}
1015
 
1016
	if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
1017
		return 0;
1018
 
1019
	/* Tune IZ hashing. See intel_device_info_runtime_init() */
1020
	WA_SET_FIELD_MASKED(GEN7_GT_MODE,
1021
			    GEN9_IZ_HASHING_MASK(2) |
1022
			    GEN9_IZ_HASHING_MASK(1) |
1023
			    GEN9_IZ_HASHING_MASK(0),
1024
			    GEN9_IZ_HASHING(2, vals[2]) |
1025
			    GEN9_IZ_HASHING(1, vals[1]) |
1026
			    GEN9_IZ_HASHING(0, vals[0]));
1027
 
1028
	return 0;
1029
}
1030
 
1031
static int skl_init_workarounds(struct intel_engine_cs *ring)
1032
{
1033
	int ret;
1034
	struct drm_device *dev = ring->dev;
1035
	struct drm_i915_private *dev_priv = dev->dev_private;
1036
 
1037
	ret = gen9_init_workarounds(ring);
1038
	if (ret)
1039
		return ret;
1040
 
1041
	if (INTEL_REVID(dev) <= SKL_REVID_D0) {
1042
		/* WaDisableHDCInvalidation:skl */
1043
		I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
1044
			   BDW_DISABLE_HDC_INVALIDATION);
1045
 
1046
		/* WaDisableChickenBitTSGBarrierAckForFFSliceCS:skl */
1047
		I915_WRITE(FF_SLICE_CS_CHICKEN2,
1048
			   _MASKED_BIT_ENABLE(GEN9_TSG_BARRIER_ACK_DISABLE));
1049
	}
1050
 
1051
	/* GEN8_L3SQCREG4 has a dependency with WA batch so any new changes
1052
	 * involving this register should also be added to WA batch as required.
5354 serge 1053
	 */
6084 serge 1054
	if (INTEL_REVID(dev) <= SKL_REVID_E0)
1055
		/* WaDisableLSQCROPERFforOCL:skl */
1056
		I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) |
1057
			   GEN8_LQSC_RO_PERF_DIS);
5354 serge 1058
 
6084 serge 1059
	/* WaEnableGapsTsvCreditFix:skl */
1060
	if (IS_SKYLAKE(dev) && (INTEL_REVID(dev) >= SKL_REVID_C0)) {
1061
		I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) |
1062
					   GEN9_GAPS_TSV_CREDIT_DISABLE));
1063
	}
1064
 
1065
	/* WaDisablePowerCompilerClockGating:skl */
1066
	if (INTEL_REVID(dev) == SKL_REVID_B0)
1067
		WA_SET_BIT_MASKED(HIZ_CHICKEN,
1068
				  BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE);
1069
 
1070
	if (INTEL_REVID(dev) <= SKL_REVID_D0) {
1071
		/*
1072
		 *Use Force Non-Coherent whenever executing a 3D context. This
1073
		 * is a workaround for a possible hang in the unlikely event
1074
		 * a TLB invalidation occurs during a PSD flush.
1075
		 */
1076
		/* WaForceEnableNonCoherent:skl */
1077
		WA_SET_BIT_MASKED(HDC_CHICKEN0,
1078
				  HDC_FORCE_NON_COHERENT);
1079
	}
1080
 
1081
	if (INTEL_REVID(dev) == SKL_REVID_C0 ||
1082
	    INTEL_REVID(dev) == SKL_REVID_D0)
1083
		/* WaBarrierPerformanceFixDisable:skl */
1084
		WA_SET_BIT_MASKED(HDC_CHICKEN0,
1085
				  HDC_FENCE_DEST_SLM_DISABLE |
1086
				  HDC_BARRIER_PERFORMANCE_DISABLE);
1087
 
1088
	/* WaDisableSbeCacheDispatchPortSharing:skl */
1089
	if (INTEL_REVID(dev) <= SKL_REVID_F0) {
1090
		WA_SET_BIT_MASKED(
1091
			GEN7_HALF_SLICE_CHICKEN1,
1092
			GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
1093
	}
1094
 
1095
	return skl_tune_iz_hashing(ring);
1096
}
1097
 
1098
static int bxt_init_workarounds(struct intel_engine_cs *ring)
1099
{
1100
	int ret;
1101
	struct drm_device *dev = ring->dev;
1102
	struct drm_i915_private *dev_priv = dev->dev_private;
1103
 
1104
	ret = gen9_init_workarounds(ring);
1105
	if (ret)
1106
		return ret;
1107
 
1108
	/* WaStoreMultiplePTEenable:bxt */
1109
	/* This is a requirement according to Hardware specification */
1110
	if (INTEL_REVID(dev) == BXT_REVID_A0)
1111
		I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_TLBPF);
1112
 
1113
	/* WaSetClckGatingDisableMedia:bxt */
1114
	if (INTEL_REVID(dev) == BXT_REVID_A0) {
1115
		I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) &
1116
					    ~GEN8_DOP_CLOCK_GATE_MEDIA_ENABLE));
1117
	}
1118
 
1119
	/* WaDisableThreadStallDopClockGating:bxt */
1120
	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
1121
			  STALL_DOP_GATING_DISABLE);
1122
 
1123
	/* WaDisableSbeCacheDispatchPortSharing:bxt */
1124
	if (INTEL_REVID(dev) <= BXT_REVID_B0) {
1125
		WA_SET_BIT_MASKED(
1126
			GEN7_HALF_SLICE_CHICKEN1,
1127
			GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
1128
	}
1129
 
5354 serge 1130
	return 0;
1131
}
1132
 
1133
int init_workarounds_ring(struct intel_engine_cs *ring)
1134
{
1135
	struct drm_device *dev = ring->dev;
1136
	struct drm_i915_private *dev_priv = dev->dev_private;
1137
 
1138
	WARN_ON(ring->id != RCS);
1139
 
1140
	dev_priv->workarounds.count = 0;
1141
 
1142
	if (IS_BROADWELL(dev))
1143
		return bdw_init_workarounds(ring);
1144
 
1145
	if (IS_CHERRYVIEW(dev))
1146
		return chv_init_workarounds(ring);
1147
 
6084 serge 1148
	if (IS_SKYLAKE(dev))
1149
		return skl_init_workarounds(ring);
1150
 
1151
	if (IS_BROXTON(dev))
1152
		return bxt_init_workarounds(ring);
1153
 
5354 serge 1154
	return 0;
1155
}
1156
 
5060 serge 1157
static int init_render_ring(struct intel_engine_cs *ring)
2332 Serge 1158
{
1159
	struct drm_device *dev = ring->dev;
1160
	struct drm_i915_private *dev_priv = dev->dev_private;
1161
	int ret = init_ring_common(ring);
5060 serge 1162
	if (ret)
1163
		return ret;
2332 Serge 1164
 
5060 serge 1165
	/* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
1166
	if (INTEL_INFO(dev)->gen >= 4 && INTEL_INFO(dev)->gen < 7)
3031 serge 1167
		I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH));
3243 Serge 1168
 
1169
	/* We need to disable the AsyncFlip performance optimisations in order
1170
	 * to use MI_WAIT_FOR_EVENT within the CS. It should already be
1171
	 * programmed to '1' on all products.
4104 Serge 1172
	 *
6084 serge 1173
	 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv
3243 Serge 1174
	 */
6084 serge 1175
	if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8)
3243 Serge 1176
		I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
1177
 
1178
	/* Required for the hardware to program scanline values for waiting */
5060 serge 1179
	/* WaEnableFlushTlbInvalidationMode:snb */
3243 Serge 1180
	if (INTEL_INFO(dev)->gen == 6)
1181
		I915_WRITE(GFX_MODE,
5060 serge 1182
			   _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT));
3243 Serge 1183
 
5060 serge 1184
	/* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */
6084 serge 1185
	if (IS_GEN7(dev))
1186
		I915_WRITE(GFX_MODE_GEN7,
5060 serge 1187
			   _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT) |
6084 serge 1188
			   _MASKED_BIT_ENABLE(GFX_REPLAY_MODE));
2332 Serge 1189
 
3031 serge 1190
	if (IS_GEN6(dev)) {
1191
		/* From the Sandybridge PRM, volume 1 part 3, page 24:
1192
		 * "If this bit is set, STCunit will have LRA as replacement
1193
		 *  policy. [...] This bit must be reset.  LRA replacement
1194
		 *  policy is not supported."
1195
		 */
1196
		I915_WRITE(CACHE_MODE_0,
1197
			   _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
2342 Serge 1198
	}
1199
 
6084 serge 1200
	if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8)
3031 serge 1201
		I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
1202
 
4560 Serge 1203
	if (HAS_L3_DPF(dev))
1204
		I915_WRITE_IMR(ring, ~GT_PARITY_ERROR(dev));
3031 serge 1205
 
5354 serge 1206
	return init_workarounds_ring(ring);
2332 Serge 1207
}
1208
 
5060 serge 1209
static void render_ring_cleanup(struct intel_engine_cs *ring)
2332 Serge 1210
{
3480 Serge 1211
	struct drm_device *dev = ring->dev;
5128 serge 1212
	struct drm_i915_private *dev_priv = dev->dev_private;
3480 Serge 1213
 
5128 serge 1214
	if (dev_priv->semaphore_obj) {
1215
		i915_gem_object_ggtt_unpin(dev_priv->semaphore_obj);
1216
		drm_gem_object_unreference(&dev_priv->semaphore_obj->base);
1217
		dev_priv->semaphore_obj = NULL;
1218
	}
1219
 
5354 serge 1220
	intel_fini_pipe_control(ring);
2332 Serge 1221
}
1222
 
6084 serge 1223
static int gen8_rcs_signal(struct drm_i915_gem_request *signaller_req,
5060 serge 1224
			   unsigned int num_dwords)
2332 Serge 1225
{
5060 serge 1226
#define MBOX_UPDATE_DWORDS 8
6084 serge 1227
	struct intel_engine_cs *signaller = signaller_req->ring;
5060 serge 1228
	struct drm_device *dev = signaller->dev;
1229
	struct drm_i915_private *dev_priv = dev->dev_private;
1230
	struct intel_engine_cs *waiter;
1231
	int i, ret, num_rings;
1232
 
1233
	num_rings = hweight32(INTEL_INFO(dev)->ring_mask);
1234
	num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS;
1235
#undef MBOX_UPDATE_DWORDS
1236
 
6084 serge 1237
	ret = intel_ring_begin(signaller_req, num_dwords);
5060 serge 1238
	if (ret)
1239
		return ret;
1240
 
1241
	for_each_ring(waiter, dev_priv, i) {
6084 serge 1242
		u32 seqno;
5060 serge 1243
		u64 gtt_offset = signaller->semaphore.signal_ggtt[i];
1244
		if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
1245
			continue;
1246
 
6084 serge 1247
		seqno = i915_gem_request_get_seqno(signaller_req);
5060 serge 1248
		intel_ring_emit(signaller, GFX_OP_PIPE_CONTROL(6));
1249
		intel_ring_emit(signaller, PIPE_CONTROL_GLOBAL_GTT_IVB |
1250
					   PIPE_CONTROL_QW_WRITE |
1251
					   PIPE_CONTROL_FLUSH_ENABLE);
1252
		intel_ring_emit(signaller, lower_32_bits(gtt_offset));
1253
		intel_ring_emit(signaller, upper_32_bits(gtt_offset));
6084 serge 1254
		intel_ring_emit(signaller, seqno);
5060 serge 1255
		intel_ring_emit(signaller, 0);
1256
		intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
1257
					   MI_SEMAPHORE_TARGET(waiter->id));
1258
		intel_ring_emit(signaller, 0);
1259
	}
1260
 
1261
	return 0;
2332 Serge 1262
}
1263
 
6084 serge 1264
static int gen8_xcs_signal(struct drm_i915_gem_request *signaller_req,
5060 serge 1265
			   unsigned int num_dwords)
1266
{
1267
#define MBOX_UPDATE_DWORDS 6
6084 serge 1268
	struct intel_engine_cs *signaller = signaller_req->ring;
5060 serge 1269
	struct drm_device *dev = signaller->dev;
1270
	struct drm_i915_private *dev_priv = dev->dev_private;
1271
	struct intel_engine_cs *waiter;
1272
	int i, ret, num_rings;
1273
 
1274
	num_rings = hweight32(INTEL_INFO(dev)->ring_mask);
1275
	num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS;
1276
#undef MBOX_UPDATE_DWORDS
1277
 
6084 serge 1278
	ret = intel_ring_begin(signaller_req, num_dwords);
5060 serge 1279
	if (ret)
1280
		return ret;
1281
 
1282
	for_each_ring(waiter, dev_priv, i) {
6084 serge 1283
		u32 seqno;
5060 serge 1284
		u64 gtt_offset = signaller->semaphore.signal_ggtt[i];
1285
		if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
1286
			continue;
1287
 
6084 serge 1288
		seqno = i915_gem_request_get_seqno(signaller_req);
5060 serge 1289
		intel_ring_emit(signaller, (MI_FLUSH_DW + 1) |
1290
					   MI_FLUSH_DW_OP_STOREDW);
1291
		intel_ring_emit(signaller, lower_32_bits(gtt_offset) |
1292
					   MI_FLUSH_DW_USE_GTT);
1293
		intel_ring_emit(signaller, upper_32_bits(gtt_offset));
6084 serge 1294
		intel_ring_emit(signaller, seqno);
5060 serge 1295
		intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
1296
					   MI_SEMAPHORE_TARGET(waiter->id));
1297
		intel_ring_emit(signaller, 0);
1298
	}
1299
 
1300
	return 0;
1301
}
1302
 
6084 serge 1303
static int gen6_signal(struct drm_i915_gem_request *signaller_req,
5060 serge 1304
		       unsigned int num_dwords)
1305
{
6084 serge 1306
	struct intel_engine_cs *signaller = signaller_req->ring;
5060 serge 1307
	struct drm_device *dev = signaller->dev;
1308
	struct drm_i915_private *dev_priv = dev->dev_private;
1309
	struct intel_engine_cs *useless;
1310
	int i, ret, num_rings;
1311
 
1312
#define MBOX_UPDATE_DWORDS 3
1313
	num_rings = hweight32(INTEL_INFO(dev)->ring_mask);
1314
	num_dwords += round_up((num_rings-1) * MBOX_UPDATE_DWORDS, 2);
1315
#undef MBOX_UPDATE_DWORDS
1316
 
6084 serge 1317
	ret = intel_ring_begin(signaller_req, num_dwords);
5060 serge 1318
	if (ret)
1319
		return ret;
1320
 
1321
	for_each_ring(useless, dev_priv, i) {
1322
		u32 mbox_reg = signaller->semaphore.mbox.signal[i];
1323
		if (mbox_reg != GEN6_NOSYNC) {
6084 serge 1324
			u32 seqno = i915_gem_request_get_seqno(signaller_req);
5060 serge 1325
			intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1));
1326
			intel_ring_emit(signaller, mbox_reg);
6084 serge 1327
			intel_ring_emit(signaller, seqno);
5060 serge 1328
		}
1329
	}
1330
 
1331
	/* If num_dwords was rounded, make sure the tail pointer is correct */
1332
	if (num_rings % 2 == 0)
1333
		intel_ring_emit(signaller, MI_NOOP);
1334
 
1335
	return 0;
1336
}
1337
 
2342 Serge 1338
/**
1339
 * gen6_add_request - Update the semaphore mailbox registers
1340
 *
6084 serge 1341
 * @request - request to write to the ring
2342 Serge 1342
 *
1343
 * Update the mailbox registers in the *other* rings with the current seqno.
1344
 * This acts like a signal in the canonical semaphore.
1345
 */
2332 Serge 1346
static int
6084 serge 1347
gen6_add_request(struct drm_i915_gem_request *req)
2332 Serge 1348
{
6084 serge 1349
	struct intel_engine_cs *ring = req->ring;
5060 serge 1350
	int ret;
2332 Serge 1351
 
5060 serge 1352
	if (ring->semaphore.signal)
6084 serge 1353
		ret = ring->semaphore.signal(req, 4);
5060 serge 1354
	else
6084 serge 1355
		ret = intel_ring_begin(req, 4);
4560 Serge 1356
 
2332 Serge 1357
	if (ret)
1358
		return ret;
1359
 
1360
	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
1361
	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
6084 serge 1362
	intel_ring_emit(ring, i915_gem_request_get_seqno(req));
2332 Serge 1363
	intel_ring_emit(ring, MI_USER_INTERRUPT);
4560 Serge 1364
	__intel_ring_advance(ring);
2332 Serge 1365
 
1366
	return 0;
1367
}
1368
 
3480 Serge 1369
static inline bool i915_gem_has_seqno_wrapped(struct drm_device *dev,
1370
					      u32 seqno)
1371
{
1372
	struct drm_i915_private *dev_priv = dev->dev_private;
1373
	return dev_priv->last_seqno < seqno;
1374
}
1375
 
2342 Serge 1376
/**
1377
 * intel_ring_sync - sync the waiter to the signaller on seqno
1378
 *
1379
 * @waiter - ring that is waiting
1380
 * @signaller - ring which has, or will signal
1381
 * @seqno - seqno which the waiter will block on
1382
 */
5060 serge 1383
 
2342 Serge 1384
static int
6084 serge 1385
gen8_ring_sync(struct drm_i915_gem_request *waiter_req,
5060 serge 1386
	       struct intel_engine_cs *signaller,
1387
	       u32 seqno)
1388
{
6084 serge 1389
	struct intel_engine_cs *waiter = waiter_req->ring;
5060 serge 1390
	struct drm_i915_private *dev_priv = waiter->dev->dev_private;
1391
	int ret;
1392
 
6084 serge 1393
	ret = intel_ring_begin(waiter_req, 4);
5060 serge 1394
	if (ret)
1395
		return ret;
1396
 
1397
	intel_ring_emit(waiter, MI_SEMAPHORE_WAIT |
1398
				MI_SEMAPHORE_GLOBAL_GTT |
1399
				MI_SEMAPHORE_POLL |
1400
				MI_SEMAPHORE_SAD_GTE_SDD);
1401
	intel_ring_emit(waiter, seqno);
1402
	intel_ring_emit(waiter,
1403
			lower_32_bits(GEN8_WAIT_OFFSET(waiter, signaller->id)));
1404
	intel_ring_emit(waiter,
1405
			upper_32_bits(GEN8_WAIT_OFFSET(waiter, signaller->id)));
1406
	intel_ring_advance(waiter);
1407
	return 0;
1408
}
1409
 
1410
static int
6084 serge 1411
gen6_ring_sync(struct drm_i915_gem_request *waiter_req,
5060 serge 1412
	       struct intel_engine_cs *signaller,
6084 serge 1413
	       u32 seqno)
2332 Serge 1414
{
6084 serge 1415
	struct intel_engine_cs *waiter = waiter_req->ring;
2342 Serge 1416
	u32 dw1 = MI_SEMAPHORE_MBOX |
1417
		  MI_SEMAPHORE_COMPARE |
1418
		  MI_SEMAPHORE_REGISTER;
5060 serge 1419
	u32 wait_mbox = signaller->semaphore.mbox.wait[waiter->id];
1420
	int ret;
2332 Serge 1421
 
3031 serge 1422
	/* Throughout all of the GEM code, seqno passed implies our current
1423
	 * seqno is >= the last seqno executed. However for hardware the
1424
	 * comparison is strictly greater than.
1425
	 */
1426
	seqno -= 1;
1427
 
5060 serge 1428
	WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID);
3031 serge 1429
 
6084 serge 1430
	ret = intel_ring_begin(waiter_req, 4);
2332 Serge 1431
	if (ret)
1432
		return ret;
1433
 
3480 Serge 1434
	/* If seqno wrap happened, omit the wait with no-ops */
1435
	if (likely(!i915_gem_has_seqno_wrapped(waiter->dev, seqno))) {
5060 serge 1436
		intel_ring_emit(waiter, dw1 | wait_mbox);
6084 serge 1437
		intel_ring_emit(waiter, seqno);
1438
		intel_ring_emit(waiter, 0);
1439
		intel_ring_emit(waiter, MI_NOOP);
3480 Serge 1440
	} else {
1441
		intel_ring_emit(waiter, MI_NOOP);
1442
		intel_ring_emit(waiter, MI_NOOP);
1443
		intel_ring_emit(waiter, MI_NOOP);
1444
		intel_ring_emit(waiter, MI_NOOP);
1445
	}
2342 Serge 1446
	intel_ring_advance(waiter);
2332 Serge 1447
 
1448
	return 0;
1449
}
1450
 
1451
#define PIPE_CONTROL_FLUSH(ring__, addr__)					\
1452
do {									\
2342 Serge 1453
	intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |		\
1454
		 PIPE_CONTROL_DEPTH_STALL);				\
2332 Serge 1455
	intel_ring_emit(ring__, (addr__) | PIPE_CONTROL_GLOBAL_GTT);			\
1456
	intel_ring_emit(ring__, 0);							\
1457
	intel_ring_emit(ring__, 0);							\
1458
} while (0)
1459
 
1460
static int
6084 serge 1461
pc_render_add_request(struct drm_i915_gem_request *req)
2332 Serge 1462
{
6084 serge 1463
	struct intel_engine_cs *ring = req->ring;
5060 serge 1464
	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
2332 Serge 1465
	int ret;
1466
 
1467
	/* For Ironlake, MI_USER_INTERRUPT was deprecated and apparently
1468
	 * incoherent with writes to memory, i.e. completely fubar,
1469
	 * so we need to use PIPE_NOTIFY instead.
1470
	 *
1471
	 * However, we also need to workaround the qword write
1472
	 * incoherence by flushing the 6 PIPE_NOTIFY buffers out to
1473
	 * memory before requesting an interrupt.
1474
	 */
6084 serge 1475
	ret = intel_ring_begin(req, 32);
2332 Serge 1476
	if (ret)
1477
		return ret;
1478
 
2342 Serge 1479
	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
1480
			PIPE_CONTROL_WRITE_FLUSH |
1481
			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
4104 Serge 1482
	intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
6084 serge 1483
	intel_ring_emit(ring, i915_gem_request_get_seqno(req));
2332 Serge 1484
	intel_ring_emit(ring, 0);
1485
	PIPE_CONTROL_FLUSH(ring, scratch_addr);
5060 serge 1486
	scratch_addr += 2 * CACHELINE_BYTES; /* write to separate cachelines */
2332 Serge 1487
	PIPE_CONTROL_FLUSH(ring, scratch_addr);
5060 serge 1488
	scratch_addr += 2 * CACHELINE_BYTES;
2332 Serge 1489
	PIPE_CONTROL_FLUSH(ring, scratch_addr);
5060 serge 1490
	scratch_addr += 2 * CACHELINE_BYTES;
2332 Serge 1491
	PIPE_CONTROL_FLUSH(ring, scratch_addr);
5060 serge 1492
	scratch_addr += 2 * CACHELINE_BYTES;
2332 Serge 1493
	PIPE_CONTROL_FLUSH(ring, scratch_addr);
5060 serge 1494
	scratch_addr += 2 * CACHELINE_BYTES;
2332 Serge 1495
	PIPE_CONTROL_FLUSH(ring, scratch_addr);
3031 serge 1496
 
2342 Serge 1497
	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
1498
			PIPE_CONTROL_WRITE_FLUSH |
1499
			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
2332 Serge 1500
			PIPE_CONTROL_NOTIFY);
4104 Serge 1501
	intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
6084 serge 1502
	intel_ring_emit(ring, i915_gem_request_get_seqno(req));
2332 Serge 1503
	intel_ring_emit(ring, 0);
4560 Serge 1504
	__intel_ring_advance(ring);
2332 Serge 1505
 
1506
	return 0;
1507
}
1508
 
1509
static u32
5060 serge 1510
gen6_ring_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
2342 Serge 1511
{
1512
	/* Workaround to force correct ordering between irq and seqno writes on
1513
	 * ivb (and maybe also on snb) by reading from a CS register (like
1514
	 * ACTHD) before reading the status page. */
5060 serge 1515
	if (!lazy_coherency) {
1516
		struct drm_i915_private *dev_priv = ring->dev->dev_private;
1517
		POSTING_READ(RING_ACTHD(ring->mmio_base));
1518
	}
1519
 
2342 Serge 1520
	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
1521
}
1522
 
1523
static u32
5060 serge 1524
ring_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
2332 Serge 1525
{
1526
	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
1527
}
1528
 
3480 Serge 1529
static void
5060 serge 1530
ring_set_seqno(struct intel_engine_cs *ring, u32 seqno)
3480 Serge 1531
{
1532
	intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno);
1533
}
1534
 
2332 Serge 1535
static u32
5060 serge 1536
pc_render_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
2332 Serge 1537
{
4104 Serge 1538
	return ring->scratch.cpu_page[0];
2332 Serge 1539
}
1540
 
3480 Serge 1541
static void
5060 serge 1542
pc_render_set_seqno(struct intel_engine_cs *ring, u32 seqno)
3480 Serge 1543
{
4104 Serge 1544
	ring->scratch.cpu_page[0] = seqno;
3480 Serge 1545
}
1546
 
3031 serge 1547
static bool
5060 serge 1548
gen5_ring_get_irq(struct intel_engine_cs *ring)
2332 Serge 1549
{
3031 serge 1550
	struct drm_device *dev = ring->dev;
5060 serge 1551
	struct drm_i915_private *dev_priv = dev->dev_private;
3031 serge 1552
	unsigned long flags;
1553
 
5354 serge 1554
	if (WARN_ON(!intel_irqs_enabled(dev_priv)))
3031 serge 1555
		return false;
1556
 
1557
	spin_lock_irqsave(&dev_priv->irq_lock, flags);
4104 Serge 1558
	if (ring->irq_refcount++ == 0)
5060 serge 1559
		gen5_enable_gt_irq(dev_priv, ring->irq_enable_mask);
3031 serge 1560
	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1561
 
1562
	return true;
2332 Serge 1563
}
1564
 
1565
static void
5060 serge 1566
gen5_ring_put_irq(struct intel_engine_cs *ring)
2332 Serge 1567
{
3031 serge 1568
	struct drm_device *dev = ring->dev;
5060 serge 1569
	struct drm_i915_private *dev_priv = dev->dev_private;
3031 serge 1570
	unsigned long flags;
1571
 
1572
	spin_lock_irqsave(&dev_priv->irq_lock, flags);
4104 Serge 1573
	if (--ring->irq_refcount == 0)
5060 serge 1574
		gen5_disable_gt_irq(dev_priv, ring->irq_enable_mask);
3031 serge 1575
	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
2332 Serge 1576
}
1577
 
3031 serge 1578
static bool
5060 serge 1579
i9xx_ring_get_irq(struct intel_engine_cs *ring)
2332 Serge 1580
{
3031 serge 1581
	struct drm_device *dev = ring->dev;
5060 serge 1582
	struct drm_i915_private *dev_priv = dev->dev_private;
3031 serge 1583
	unsigned long flags;
1584
 
5354 serge 1585
	if (!intel_irqs_enabled(dev_priv))
3031 serge 1586
		return false;
1587
 
1588
	spin_lock_irqsave(&dev_priv->irq_lock, flags);
1589
	if (ring->irq_refcount++ == 0) {
1590
		dev_priv->irq_mask &= ~ring->irq_enable_mask;
6084 serge 1591
		I915_WRITE(IMR, dev_priv->irq_mask);
1592
		POSTING_READ(IMR);
3031 serge 1593
	}
1594
	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1595
 
1596
	return true;
2332 Serge 1597
}
1598
 
1599
static void
5060 serge 1600
i9xx_ring_put_irq(struct intel_engine_cs *ring)
2332 Serge 1601
{
3031 serge 1602
	struct drm_device *dev = ring->dev;
5060 serge 1603
	struct drm_i915_private *dev_priv = dev->dev_private;
3031 serge 1604
	unsigned long flags;
1605
 
1606
	spin_lock_irqsave(&dev_priv->irq_lock, flags);
1607
	if (--ring->irq_refcount == 0) {
1608
		dev_priv->irq_mask |= ring->irq_enable_mask;
6084 serge 1609
		I915_WRITE(IMR, dev_priv->irq_mask);
1610
		POSTING_READ(IMR);
3031 serge 1611
	}
1612
	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
2332 Serge 1613
}
1614
 
1615
static bool
5060 serge 1616
i8xx_ring_get_irq(struct intel_engine_cs *ring)
2332 Serge 1617
{
1618
	struct drm_device *dev = ring->dev;
5060 serge 1619
	struct drm_i915_private *dev_priv = dev->dev_private;
3031 serge 1620
	unsigned long flags;
2332 Serge 1621
 
5354 serge 1622
	if (!intel_irqs_enabled(dev_priv))
2332 Serge 1623
		return false;
1624
 
3031 serge 1625
	spin_lock_irqsave(&dev_priv->irq_lock, flags);
2332 Serge 1626
	if (ring->irq_refcount++ == 0) {
3031 serge 1627
		dev_priv->irq_mask &= ~ring->irq_enable_mask;
1628
		I915_WRITE16(IMR, dev_priv->irq_mask);
1629
		POSTING_READ16(IMR);
2332 Serge 1630
	}
3031 serge 1631
	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
2332 Serge 1632
 
1633
	return true;
1634
}
1635
 
1636
static void
5060 serge 1637
i8xx_ring_put_irq(struct intel_engine_cs *ring)
2332 Serge 1638
{
1639
	struct drm_device *dev = ring->dev;
5060 serge 1640
	struct drm_i915_private *dev_priv = dev->dev_private;
3031 serge 1641
	unsigned long flags;
2332 Serge 1642
 
3031 serge 1643
	spin_lock_irqsave(&dev_priv->irq_lock, flags);
2332 Serge 1644
	if (--ring->irq_refcount == 0) {
3031 serge 1645
		dev_priv->irq_mask |= ring->irq_enable_mask;
1646
		I915_WRITE16(IMR, dev_priv->irq_mask);
1647
		POSTING_READ16(IMR);
2332 Serge 1648
	}
3031 serge 1649
	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
2332 Serge 1650
}
1651
 
1652
static int
6084 serge 1653
bsd_ring_flush(struct drm_i915_gem_request *req,
2332 Serge 1654
	       u32     invalidate_domains,
1655
	       u32     flush_domains)
1656
{
6084 serge 1657
	struct intel_engine_cs *ring = req->ring;
2332 Serge 1658
	int ret;
1659
 
6084 serge 1660
	ret = intel_ring_begin(req, 2);
2332 Serge 1661
	if (ret)
1662
		return ret;
1663
 
1664
	intel_ring_emit(ring, MI_FLUSH);
1665
	intel_ring_emit(ring, MI_NOOP);
1666
	intel_ring_advance(ring);
1667
	return 0;
1668
}
1669
 
1670
static int
6084 serge 1671
i9xx_add_request(struct drm_i915_gem_request *req)
2332 Serge 1672
{
6084 serge 1673
	struct intel_engine_cs *ring = req->ring;
2332 Serge 1674
	int ret;
1675
 
6084 serge 1676
	ret = intel_ring_begin(req, 4);
2332 Serge 1677
	if (ret)
1678
		return ret;
1679
 
1680
	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
1681
	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
6084 serge 1682
	intel_ring_emit(ring, i915_gem_request_get_seqno(req));
2332 Serge 1683
	intel_ring_emit(ring, MI_USER_INTERRUPT);
4560 Serge 1684
	__intel_ring_advance(ring);
2332 Serge 1685
 
1686
	return 0;
1687
}
1688
 
1689
static bool
5060 serge 1690
gen6_ring_get_irq(struct intel_engine_cs *ring)
2332 Serge 1691
{
1692
	struct drm_device *dev = ring->dev;
5060 serge 1693
	struct drm_i915_private *dev_priv = dev->dev_private;
3031 serge 1694
	unsigned long flags;
2332 Serge 1695
 
5354 serge 1696
	if (WARN_ON(!intel_irqs_enabled(dev_priv)))
6084 serge 1697
		return false;
2332 Serge 1698
 
3031 serge 1699
	spin_lock_irqsave(&dev_priv->irq_lock, flags);
2332 Serge 1700
	if (ring->irq_refcount++ == 0) {
4560 Serge 1701
		if (HAS_L3_DPF(dev) && ring->id == RCS)
4104 Serge 1702
			I915_WRITE_IMR(ring,
1703
				       ~(ring->irq_enable_mask |
4560 Serge 1704
					 GT_PARITY_ERROR(dev)));
3031 serge 1705
		else
1706
			I915_WRITE_IMR(ring, ~ring->irq_enable_mask);
5060 serge 1707
		gen5_enable_gt_irq(dev_priv, ring->irq_enable_mask);
2332 Serge 1708
	}
3031 serge 1709
	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
2332 Serge 1710
 
6084 serge 1711
	return true;
2332 Serge 1712
}
1713
 
1714
static void
5060 serge 1715
gen6_ring_put_irq(struct intel_engine_cs *ring)
2332 Serge 1716
{
1717
	struct drm_device *dev = ring->dev;
5060 serge 1718
	struct drm_i915_private *dev_priv = dev->dev_private;
3031 serge 1719
	unsigned long flags;
2332 Serge 1720
 
3031 serge 1721
	spin_lock_irqsave(&dev_priv->irq_lock, flags);
2332 Serge 1722
	if (--ring->irq_refcount == 0) {
4560 Serge 1723
		if (HAS_L3_DPF(dev) && ring->id == RCS)
1724
			I915_WRITE_IMR(ring, ~GT_PARITY_ERROR(dev));
3031 serge 1725
		else
1726
			I915_WRITE_IMR(ring, ~0);
5060 serge 1727
		gen5_disable_gt_irq(dev_priv, ring->irq_enable_mask);
2332 Serge 1728
	}
3031 serge 1729
	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
2332 Serge 1730
}
1731
 
4104 Serge 1732
static bool
5060 serge 1733
hsw_vebox_get_irq(struct intel_engine_cs *ring)
4104 Serge 1734
{
1735
	struct drm_device *dev = ring->dev;
1736
	struct drm_i915_private *dev_priv = dev->dev_private;
1737
	unsigned long flags;
1738
 
5354 serge 1739
	if (WARN_ON(!intel_irqs_enabled(dev_priv)))
4104 Serge 1740
		return false;
1741
 
1742
	spin_lock_irqsave(&dev_priv->irq_lock, flags);
1743
	if (ring->irq_refcount++ == 0) {
1744
		I915_WRITE_IMR(ring, ~ring->irq_enable_mask);
5060 serge 1745
		gen6_enable_pm_irq(dev_priv, ring->irq_enable_mask);
4104 Serge 1746
	}
1747
	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1748
 
1749
	return true;
1750
}
1751
 
1752
static void
5060 serge 1753
hsw_vebox_put_irq(struct intel_engine_cs *ring)
4104 Serge 1754
{
1755
	struct drm_device *dev = ring->dev;
1756
	struct drm_i915_private *dev_priv = dev->dev_private;
1757
	unsigned long flags;
1758
 
1759
	spin_lock_irqsave(&dev_priv->irq_lock, flags);
1760
	if (--ring->irq_refcount == 0) {
1761
		I915_WRITE_IMR(ring, ~0);
5060 serge 1762
		gen6_disable_pm_irq(dev_priv, ring->irq_enable_mask);
4104 Serge 1763
	}
1764
	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1765
}
1766
 
4560 Serge 1767
static bool
5060 serge 1768
gen8_ring_get_irq(struct intel_engine_cs *ring)
4560 Serge 1769
{
1770
	struct drm_device *dev = ring->dev;
1771
	struct drm_i915_private *dev_priv = dev->dev_private;
1772
	unsigned long flags;
1773
 
5354 serge 1774
	if (WARN_ON(!intel_irqs_enabled(dev_priv)))
4560 Serge 1775
		return false;
1776
 
1777
	spin_lock_irqsave(&dev_priv->irq_lock, flags);
1778
	if (ring->irq_refcount++ == 0) {
1779
		if (HAS_L3_DPF(dev) && ring->id == RCS) {
1780
			I915_WRITE_IMR(ring,
1781
				       ~(ring->irq_enable_mask |
1782
					 GT_RENDER_L3_PARITY_ERROR_INTERRUPT));
1783
		} else {
1784
			I915_WRITE_IMR(ring, ~ring->irq_enable_mask);
1785
		}
1786
		POSTING_READ(RING_IMR(ring->mmio_base));
1787
	}
1788
	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1789
 
1790
	return true;
1791
}
1792
 
1793
static void
5060 serge 1794
gen8_ring_put_irq(struct intel_engine_cs *ring)
4560 Serge 1795
{
1796
	struct drm_device *dev = ring->dev;
1797
	struct drm_i915_private *dev_priv = dev->dev_private;
1798
	unsigned long flags;
1799
 
1800
	spin_lock_irqsave(&dev_priv->irq_lock, flags);
1801
	if (--ring->irq_refcount == 0) {
1802
		if (HAS_L3_DPF(dev) && ring->id == RCS) {
1803
			I915_WRITE_IMR(ring,
1804
				       ~GT_RENDER_L3_PARITY_ERROR_INTERRUPT);
1805
		} else {
1806
			I915_WRITE_IMR(ring, ~0);
1807
		}
1808
		POSTING_READ(RING_IMR(ring->mmio_base));
1809
	}
1810
	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1811
}
1812
 
2332 Serge 1813
static int
6084 serge 1814
i965_dispatch_execbuffer(struct drm_i915_gem_request *req,
5060 serge 1815
			 u64 offset, u32 length,
6084 serge 1816
			 unsigned dispatch_flags)
2332 Serge 1817
{
6084 serge 1818
	struct intel_engine_cs *ring = req->ring;
2332 Serge 1819
	int ret;
1820
 
6084 serge 1821
	ret = intel_ring_begin(req, 2);
2332 Serge 1822
	if (ret)
1823
		return ret;
1824
 
1825
	intel_ring_emit(ring,
3031 serge 1826
			MI_BATCH_BUFFER_START |
1827
			MI_BATCH_GTT |
6084 serge 1828
			(dispatch_flags & I915_DISPATCH_SECURE ?
1829
 
2332 Serge 1830
	intel_ring_emit(ring, offset);
1831
	intel_ring_advance(ring);
1832
 
1833
	return 0;
1834
}
1835
 
3243 Serge 1836
/* Just userspace ABI convention to limit the wa batch bo to a resonable size */
1837
#define I830_BATCH_LIMIT (256*1024)
5128 serge 1838
#define I830_TLB_ENTRIES (2)
1839
#define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT)
2332 Serge 1840
static int
6084 serge 1841
i830_dispatch_execbuffer(struct drm_i915_gem_request *req,
1842
			 u64 offset, u32 len,
1843
			 unsigned dispatch_flags)
2332 Serge 1844
{
6084 serge 1845
	struct intel_engine_cs *ring = req->ring;
5128 serge 1846
	u32 cs_offset = ring->scratch.gtt_offset;
2332 Serge 1847
	int ret;
1848
 
6084 serge 1849
	ret = intel_ring_begin(req, 6);
1850
	if (ret)
1851
		return ret;
2332 Serge 1852
 
5128 serge 1853
	/* Evict the invalid PTE TLBs */
1854
	intel_ring_emit(ring, COLOR_BLT_CMD | BLT_WRITE_RGBA);
1855
	intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096);
1856
	intel_ring_emit(ring, I830_TLB_ENTRIES << 16 | 4); /* load each page */
1857
	intel_ring_emit(ring, cs_offset);
1858
	intel_ring_emit(ring, 0xdeadbeef);
6084 serge 1859
	intel_ring_emit(ring, MI_NOOP);
1860
	intel_ring_advance(ring);
3243 Serge 1861
 
6084 serge 1862
	if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) {
3243 Serge 1863
		if (len > I830_BATCH_LIMIT)
1864
			return -ENOSPC;
1865
 
6084 serge 1866
		ret = intel_ring_begin(req, 6 + 2);
3243 Serge 1867
		if (ret)
1868
			return ret;
5128 serge 1869
 
1870
		/* Blit the batch (which has now all relocs applied) to the
1871
		 * stable batch scratch bo area (so that the CS never
1872
		 * stumbles over its tlb invalidation bug) ...
1873
		 */
1874
		intel_ring_emit(ring, SRC_COPY_BLT_CMD | BLT_WRITE_RGBA);
1875
		intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096);
5139 serge 1876
		intel_ring_emit(ring, DIV_ROUND_UP(len, 4096) << 16 | 4096);
3243 Serge 1877
		intel_ring_emit(ring, cs_offset);
1878
		intel_ring_emit(ring, 4096);
1879
		intel_ring_emit(ring, offset);
5128 serge 1880
 
3243 Serge 1881
		intel_ring_emit(ring, MI_FLUSH);
5128 serge 1882
		intel_ring_emit(ring, MI_NOOP);
1883
		intel_ring_advance(ring);
3243 Serge 1884
 
1885
		/* ... and execute it. */
5128 serge 1886
		offset = cs_offset;
1887
	}
1888
 
6084 serge 1889
	ret = intel_ring_begin(req, 4);
5128 serge 1890
	if (ret)
1891
		return ret;
1892
 
6084 serge 1893
	intel_ring_emit(ring, MI_BATCH_BUFFER);
1894
	intel_ring_emit(ring, offset | (dispatch_flags & I915_DISPATCH_SECURE ?
1895
 
5128 serge 1896
	intel_ring_emit(ring, offset + len - 8);
1897
	intel_ring_emit(ring, MI_NOOP);
3031 serge 1898
	intel_ring_advance(ring);
1899
 
1900
	return 0;
1901
}
1902
 
1903
static int
6084 serge 1904
i915_dispatch_execbuffer(struct drm_i915_gem_request *req,
5060 serge 1905
			 u64 offset, u32 len,
6084 serge 1906
			 unsigned dispatch_flags)
3031 serge 1907
{
6084 serge 1908
	struct intel_engine_cs *ring = req->ring;
3031 serge 1909
	int ret;
1910
 
6084 serge 1911
	ret = intel_ring_begin(req, 2);
1912
	if (ret)
1913
		return ret;
2332 Serge 1914
 
3031 serge 1915
	intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT);
6084 serge 1916
	intel_ring_emit(ring, offset | (dispatch_flags & I915_DISPATCH_SECURE ?
1917
 
2332 Serge 1918
	intel_ring_advance(ring);
1919
 
1920
	return 0;
1921
}
1922
 
5060 serge 1923
static void cleanup_status_page(struct intel_engine_cs *ring)
2332 Serge 1924
{
1925
	struct drm_i915_gem_object *obj;
1926
 
1927
	obj = ring->status_page.obj;
1928
	if (obj == NULL)
1929
		return;
1930
 
5354 serge 1931
	kunmap(sg_page(obj->pages->sgl));
5060 serge 1932
	i915_gem_object_ggtt_unpin(obj);
2344 Serge 1933
	drm_gem_object_unreference(&obj->base);
2332 Serge 1934
	ring->status_page.obj = NULL;
1935
}
1936
 
5060 serge 1937
static int init_status_page(struct intel_engine_cs *ring)
2332 Serge 1938
{
1939
	struct drm_i915_gem_object *obj;
5060 serge 1940
 
1941
	if ((obj = ring->status_page.obj) == NULL) {
1942
		unsigned flags;
6084 serge 1943
		int ret;
2332 Serge 1944
 
5060 serge 1945
		obj = i915_gem_alloc_object(ring->dev, 4096);
6084 serge 1946
		if (obj == NULL) {
1947
			DRM_ERROR("Failed to allocate status page\n");
5060 serge 1948
			return -ENOMEM;
6084 serge 1949
		}
2332 Serge 1950
 
6084 serge 1951
		ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
1952
		if (ret)
1953
			goto err_unref;
2332 Serge 1954
 
5060 serge 1955
		flags = 0;
1956
		if (!HAS_LLC(ring->dev))
1957
			/* On g33, we cannot place HWS above 256MiB, so
1958
			 * restrict its pinning to the low mappable arena.
1959
			 * Though this restriction is not documented for
1960
			 * gen4, gen5, or byt, they also behave similarly
1961
			 * and hang if the HWS is placed at the top of the
1962
			 * GTT. To generalise, it appears that all !llc
1963
			 * platforms have issues with us placing the HWS
1964
			 * above the mappable region (even though we never
1965
			 * actualy map it).
1966
			 */
1967
			flags |= PIN_MAPPABLE;
1968
		ret = i915_gem_obj_ggtt_pin(obj, 4096, flags);
1969
		if (ret) {
1970
err_unref:
1971
			drm_gem_object_unreference(&obj->base);
1972
			return ret;
1973
		}
1974
 
1975
		ring->status_page.obj = obj;
2332 Serge 1976
	}
1977
 
4104 Serge 1978
	ring->status_page.gfx_addr = i915_gem_obj_ggtt_offset(obj);
5354 serge 1979
	ring->status_page.page_addr = kmap(sg_page(obj->pages->sgl));
2332 Serge 1980
	memset(ring->status_page.page_addr, 0, PAGE_SIZE);
1981
 
1982
	DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n",
1983
			ring->name, ring->status_page.gfx_addr);
1984
 
1985
	return 0;
1986
}
1987
 
5060 serge 1988
static int init_phys_status_page(struct intel_engine_cs *ring)
3243 Serge 1989
{
6084 serge 1990
	struct drm_i915_private *dev_priv = ring->dev->dev_private;
3243 Serge 1991
 
6084 serge 1992
	if (!dev_priv->status_page_dmah) {
1993
		dev_priv->status_page_dmah =
1994
			drm_pci_alloc(ring->dev, PAGE_SIZE, PAGE_SIZE);
1995
		if (!dev_priv->status_page_dmah)
1996
			return -ENOMEM;
1997
	}
3243 Serge 1998
 
6084 serge 1999
	ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
2000
	memset(ring->status_page.page_addr, 0, PAGE_SIZE);
3243 Serge 2001
 
6084 serge 2002
	return 0;
3243 Serge 2003
}
2004
 
5354 serge 2005
void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
2332 Serge 2006
{
5060 serge 2007
	iounmap(ringbuf->virtual_start);
5354 serge 2008
	ringbuf->virtual_start = NULL;
5060 serge 2009
	i915_gem_object_ggtt_unpin(ringbuf->obj);
2010
}
2011
 
5354 serge 2012
int intel_pin_and_map_ringbuffer_obj(struct drm_device *dev,
6084 serge 2013
				     struct intel_ringbuffer *ringbuf)
5060 serge 2014
{
2015
	struct drm_i915_private *dev_priv = to_i915(dev);
5354 serge 2016
	struct drm_i915_gem_object *obj = ringbuf->obj;
2332 Serge 2017
	int ret;
2340 Serge 2018
 
5354 serge 2019
	ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE);
2020
	if (ret)
2021
		return ret;
2022
 
2023
	ret = i915_gem_object_set_to_gtt_domain(obj, true);
2024
	if (ret) {
2025
		i915_gem_object_ggtt_unpin(obj);
2026
		return ret;
2027
	}
2028
 
2029
	ringbuf->virtual_start = ioremap_wc(dev_priv->gtt.mappable_base +
2030
			i915_gem_obj_ggtt_offset(obj), ringbuf->size);
2031
	if (ringbuf->virtual_start == NULL) {
2032
		i915_gem_object_ggtt_unpin(obj);
2033
		return -EINVAL;
2034
	}
2035
 
6084 serge 2036
	return 0;
5354 serge 2037
}
2332 Serge 2038
 
6084 serge 2039
static void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
5354 serge 2040
{
2041
	drm_gem_object_unreference(&ringbuf->obj->base);
2042
	ringbuf->obj = NULL;
2043
}
2044
 
6084 serge 2045
static int intel_alloc_ringbuffer_obj(struct drm_device *dev,
2046
				      struct intel_ringbuffer *ringbuf)
5354 serge 2047
{
2048
	struct drm_i915_gem_object *obj;
2049
 
3480 Serge 2050
	obj = NULL;
4371 Serge 2051
	if (!HAS_LLC(dev))
5060 serge 2052
		obj = i915_gem_object_create_stolen(dev, ringbuf->size);
3480 Serge 2053
	if (obj == NULL)
5060 serge 2054
		obj = i915_gem_alloc_object(dev, ringbuf->size);
2055
	if (obj == NULL)
2056
		return -ENOMEM;
2332 Serge 2057
 
5060 serge 2058
	/* mark ring buffers as read-only from GPU side by default */
2059
	obj->gt_ro = 1;
2332 Serge 2060
 
5354 serge 2061
	ringbuf->obj = obj;
2332 Serge 2062
 
5060 serge 2063
	return 0;
2064
}
2065
 
6084 serge 2066
struct intel_ringbuffer *
2067
intel_engine_create_ringbuffer(struct intel_engine_cs *engine, int size)
2068
{
2069
	struct intel_ringbuffer *ring;
2070
	int ret;
2071
 
2072
	ring = kzalloc(sizeof(*ring), GFP_KERNEL);
2073
	if (ring == NULL)
2074
		return ERR_PTR(-ENOMEM);
2075
 
2076
	ring->ring = engine;
2077
 
2078
	ring->size = size;
2079
	/* Workaround an erratum on the i830 which causes a hang if
2080
	 * the TAIL pointer points to within the last 2 cachelines
2081
	 * of the buffer.
2082
	 */
2083
	ring->effective_size = size;
2084
	if (IS_I830(engine->dev) || IS_845G(engine->dev))
2085
		ring->effective_size -= 2 * CACHELINE_BYTES;
2086
 
2087
	ring->last_retired_head = -1;
2088
	intel_ring_update_space(ring);
2089
 
2090
	ret = intel_alloc_ringbuffer_obj(engine->dev, ring);
2091
	if (ret) {
2092
		DRM_ERROR("Failed to allocate ringbuffer %s: %d\n",
2093
			  engine->name, ret);
2094
		kfree(ring);
2095
		return ERR_PTR(ret);
2096
	}
2097
 
2098
	return ring;
2099
}
2100
 
2101
void
2102
intel_ringbuffer_free(struct intel_ringbuffer *ring)
2103
{
2104
	intel_destroy_ringbuffer_obj(ring);
2105
	kfree(ring);
2106
}
2107
 
5060 serge 2108
static int intel_init_ring_buffer(struct drm_device *dev,
2109
				  struct intel_engine_cs *ring)
2110
{
6084 serge 2111
	struct intel_ringbuffer *ringbuf;
5060 serge 2112
	int ret;
2113
 
6084 serge 2114
	WARN_ON(ring->buffer);
5060 serge 2115
 
2116
	ring->dev = dev;
2117
	INIT_LIST_HEAD(&ring->active_list);
2118
	INIT_LIST_HEAD(&ring->request_list);
5354 serge 2119
	INIT_LIST_HEAD(&ring->execlist_queue);
6084 serge 2120
	i915_gem_batch_pool_init(dev, &ring->batch_pool);
5060 serge 2121
	memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno));
2122
 
2123
	init_waitqueue_head(&ring->irq_queue);
2124
 
6084 serge 2125
	ringbuf = intel_engine_create_ringbuffer(ring, 32 * PAGE_SIZE);
2126
	if (IS_ERR(ringbuf))
2127
		return PTR_ERR(ringbuf);
2128
	ring->buffer = ringbuf;
2129
 
5060 serge 2130
	if (I915_NEED_GFX_HWS(dev)) {
2131
		ret = init_status_page(ring);
2132
		if (ret)
2133
			goto error;
2134
	} else {
2135
		BUG_ON(ring->id != RCS);
2136
		ret = init_phys_status_page(ring);
6084 serge 2137
		if (ret)
5060 serge 2138
			goto error;
2139
	}
2332 Serge 2140
 
6084 serge 2141
	ret = intel_pin_and_map_ringbuffer_obj(dev, ringbuf);
5060 serge 2142
	if (ret) {
6084 serge 2143
		DRM_ERROR("Failed to pin and map ringbuffer %s: %d\n",
2144
				ring->name, ret);
2145
		intel_destroy_ringbuffer_obj(ringbuf);
5060 serge 2146
		goto error;
2147
	}
2148
 
2149
	ret = i915_cmd_parser_init_ring(ring);
2150
	if (ret)
2151
		goto error;
2152
 
2332 Serge 2153
	return 0;
2154
 
5060 serge 2155
error:
6084 serge 2156
	intel_ringbuffer_free(ringbuf);
5060 serge 2157
	ring->buffer = NULL;
2332 Serge 2158
	return ret;
2159
}
2160
 
5060 serge 2161
void intel_cleanup_ring_buffer(struct intel_engine_cs *ring)
2332 Serge 2162
{
5354 serge 2163
	struct drm_i915_private *dev_priv;
2332 Serge 2164
 
5060 serge 2165
	if (!intel_ring_initialized(ring))
2332 Serge 2166
		return;
2167
 
5354 serge 2168
	dev_priv = to_i915(ring->dev);
2169
 
5060 serge 2170
	intel_stop_ring_buffer(ring);
2171
	WARN_ON(!IS_GEN2(ring->dev) && (I915_READ_MODE(ring) & MODE_IDLE) == 0);
2332 Serge 2172
 
6084 serge 2173
	intel_unpin_ringbuffer_obj(ring->buffer);
2174
	intel_ringbuffer_free(ring->buffer);
2175
	ring->buffer = NULL;
2332 Serge 2176
 
2177
	if (ring->cleanup)
2178
		ring->cleanup(ring);
2179
 
6084 serge 2180
	cleanup_status_page(ring);
2332 Serge 2181
 
5060 serge 2182
	i915_cmd_parser_fini_ring(ring);
6084 serge 2183
	i915_gem_batch_pool_fini(&ring->batch_pool);
3031 serge 2184
}
2185
 
6084 serge 2186
static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
3031 serge 2187
{
5060 serge 2188
	struct intel_ringbuffer *ringbuf = ring->buffer;
3031 serge 2189
	struct drm_i915_gem_request *request;
6084 serge 2190
	unsigned space;
3031 serge 2191
	int ret;
2192
 
6084 serge 2193
	if (intel_ring_space(ringbuf) >= n)
2194
		return 0;
3031 serge 2195
 
6084 serge 2196
	/* The whole point of reserving space is to not wait! */
2197
	WARN_ON(ringbuf->reserved_in_use);
2332 Serge 2198
 
3031 serge 2199
	list_for_each_entry(request, &ring->request_list, list) {
6084 serge 2200
		space = __intel_ring_space(request->postfix, ringbuf->tail,
2201
					   ringbuf->size);
2202
		if (space >= n)
3031 serge 2203
			break;
2204
	}
2205
 
6084 serge 2206
	if (WARN_ON(&request->list == &ring->request_list))
3031 serge 2207
		return -ENOSPC;
2208
 
6084 serge 2209
	ret = i915_wait_request(request);
3031 serge 2210
	if (ret)
2211
		return ret;
2212
 
6084 serge 2213
	ringbuf->space = space;
3031 serge 2214
	return 0;
2215
}
2216
 
6084 serge 2217
static void __wrap_ring_buffer(struct intel_ringbuffer *ringbuf)
3031 serge 2218
{
3243 Serge 2219
	uint32_t __iomem *virt;
5060 serge 2220
	int rem = ringbuf->size - ringbuf->tail;
3243 Serge 2221
 
5060 serge 2222
	virt = ringbuf->virtual_start + ringbuf->tail;
3243 Serge 2223
	rem /= 4;
2224
	while (rem--)
2225
		iowrite32(MI_NOOP, virt++);
2226
 
5060 serge 2227
	ringbuf->tail = 0;
6084 serge 2228
	intel_ring_update_space(ringbuf);
3243 Serge 2229
}
2230
 
5060 serge 2231
int intel_ring_idle(struct intel_engine_cs *ring)
3243 Serge 2232
{
6084 serge 2233
	struct drm_i915_gem_request *req;
3243 Serge 2234
 
2235
	/* Wait upon the last request to be completed */
2236
	if (list_empty(&ring->request_list))
2237
		return 0;
2238
 
6084 serge 2239
	req = list_entry(ring->request_list.prev,
2240
			struct drm_i915_gem_request,
2241
			list);
3243 Serge 2242
 
6084 serge 2243
	/* Make sure we do not trigger any retires */
2244
	return __i915_wait_request(req,
2245
				   atomic_read(&to_i915(ring->dev)->gpu_error.reset_counter),
2246
				   to_i915(ring->dev)->mm.interruptible,
2247
				   NULL, NULL);
3243 Serge 2248
}
2249
 
6084 serge 2250
int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request)
3243 Serge 2251
{
6084 serge 2252
	request->ringbuf = request->ring->buffer;
2253
	return 0;
2254
}
3243 Serge 2255
 
6084 serge 2256
int intel_ring_reserve_space(struct drm_i915_gem_request *request)
2257
{
2258
	/*
2259
	 * The first call merely notes the reserve request and is common for
2260
	 * all back ends. The subsequent localised _begin() call actually
2261
	 * ensures that the reservation is available. Without the begin, if
2262
	 * the request creator immediately submitted the request without
2263
	 * adding any commands to it then there might not actually be
2264
	 * sufficient room for the submission commands.
2265
	 */
2266
	intel_ring_reserved_space_reserve(request->ringbuf, MIN_SPACE_FOR_ADD_REQUEST);
4560 Serge 2267
 
6084 serge 2268
	return intel_ring_begin(request, 0);
2269
}
4560 Serge 2270
 
6084 serge 2271
void intel_ring_reserved_space_reserve(struct intel_ringbuffer *ringbuf, int size)
2272
{
2273
	WARN_ON(ringbuf->reserved_size);
2274
	WARN_ON(ringbuf->reserved_in_use);
2275
 
2276
	ringbuf->reserved_size = size;
2277
}
2278
 
2279
void intel_ring_reserved_space_cancel(struct intel_ringbuffer *ringbuf)
2280
{
2281
	WARN_ON(ringbuf->reserved_in_use);
2282
 
2283
	ringbuf->reserved_size   = 0;
2284
	ringbuf->reserved_in_use = false;
2285
}
2286
 
2287
void intel_ring_reserved_space_use(struct intel_ringbuffer *ringbuf)
2288
{
2289
	WARN_ON(ringbuf->reserved_in_use);
2290
 
2291
	ringbuf->reserved_in_use = true;
2292
	ringbuf->reserved_tail   = ringbuf->tail;
2293
}
2294
 
2295
void intel_ring_reserved_space_end(struct intel_ringbuffer *ringbuf)
2296
{
2297
	WARN_ON(!ringbuf->reserved_in_use);
2298
	if (ringbuf->tail > ringbuf->reserved_tail) {
2299
		WARN(ringbuf->tail > ringbuf->reserved_tail + ringbuf->reserved_size,
2300
		     "request reserved size too small: %d vs %d!\n",
2301
		     ringbuf->tail - ringbuf->reserved_tail, ringbuf->reserved_size);
2302
	} else {
2303
		/*
2304
		 * The ring was wrapped while the reserved space was in use.
2305
		 * That means that some unknown amount of the ring tail was
2306
		 * no-op filled and skipped. Thus simply adding the ring size
2307
		 * to the tail and doing the above space check will not work.
2308
		 * Rather than attempt to track how much tail was skipped,
2309
		 * it is much simpler to say that also skipping the sanity
2310
		 * check every once in a while is not a big issue.
2311
		 */
4560 Serge 2312
	}
2313
 
6084 serge 2314
	ringbuf->reserved_size   = 0;
2315
	ringbuf->reserved_in_use = false;
3243 Serge 2316
}
2317
 
6084 serge 2318
static int __intel_ring_prepare(struct intel_engine_cs *ring, int bytes)
3480 Serge 2319
{
5060 serge 2320
	struct intel_ringbuffer *ringbuf = ring->buffer;
6084 serge 2321
	int remain_usable = ringbuf->effective_size - ringbuf->tail;
2322
	int remain_actual = ringbuf->size - ringbuf->tail;
2323
	int ret, total_bytes, wait_bytes = 0;
2324
	bool need_wrap = false;
3480 Serge 2325
 
6084 serge 2326
	if (ringbuf->reserved_in_use)
2327
		total_bytes = bytes;
2328
	else
2329
		total_bytes = bytes + ringbuf->reserved_size;
2330
 
2331
	if (unlikely(bytes > remain_usable)) {
2332
		/*
2333
		 * Not enough space for the basic request. So need to flush
2334
		 * out the remainder and then wait for base + reserved.
2335
		 */
2336
		wait_bytes = remain_actual + total_bytes;
2337
		need_wrap = true;
2338
	} else {
2339
		if (unlikely(total_bytes > remain_usable)) {
2340
			/*
2341
			 * The base request will fit but the reserved space
2342
			 * falls off the end. So only need to to wait for the
2343
			 * reserved size after flushing out the remainder.
2344
			 */
2345
			wait_bytes = remain_actual + ringbuf->reserved_size;
2346
			need_wrap = true;
2347
		} else if (total_bytes > ringbuf->space) {
2348
			/* No wrapping required, just waiting. */
2349
			wait_bytes = total_bytes;
2350
		}
3480 Serge 2351
	}
2352
 
6084 serge 2353
	if (wait_bytes) {
2354
		ret = ring_wait_for_space(ring, wait_bytes);
3480 Serge 2355
		if (unlikely(ret))
2356
			return ret;
6084 serge 2357
 
2358
		if (need_wrap)
2359
			__wrap_ring_buffer(ringbuf);
3480 Serge 2360
	}
2361
 
2362
	return 0;
2363
}
2364
 
6084 serge 2365
int intel_ring_begin(struct drm_i915_gem_request *req,
2332 Serge 2366
		     int num_dwords)
2367
{
6084 serge 2368
	struct intel_engine_cs *ring;
2369
	struct drm_i915_private *dev_priv;
2332 Serge 2370
	int ret;
2371
 
6084 serge 2372
	WARN_ON(req == NULL);
2373
	ring = req->ring;
2374
	dev_priv = ring->dev->dev_private;
2375
 
3480 Serge 2376
	ret = i915_gem_check_wedge(&dev_priv->gpu_error,
2377
				   dev_priv->mm.interruptible);
3031 serge 2378
	if (ret)
2379
		return ret;
2332 Serge 2380
 
4560 Serge 2381
	ret = __intel_ring_prepare(ring, num_dwords * sizeof(uint32_t));
2382
	if (ret)
2383
		return ret;
2384
 
5060 serge 2385
	ring->buffer->space -= num_dwords * sizeof(uint32_t);
4560 Serge 2386
	return 0;
3480 Serge 2387
}
2332 Serge 2388
 
5060 serge 2389
/* Align the ring tail to a cacheline boundary */
6084 serge 2390
int intel_ring_cacheline_align(struct drm_i915_gem_request *req)
3480 Serge 2391
{
6084 serge 2392
	struct intel_engine_cs *ring = req->ring;
5060 serge 2393
	int num_dwords = (ring->buffer->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
2394
	int ret;
3480 Serge 2395
 
5060 serge 2396
	if (num_dwords == 0)
2397
		return 0;
2398
 
2399
	num_dwords = CACHELINE_BYTES / sizeof(uint32_t) - num_dwords;
6084 serge 2400
	ret = intel_ring_begin(req, num_dwords);
5060 serge 2401
	if (ret)
2402
		return ret;
2403
 
2404
	while (num_dwords--)
2405
		intel_ring_emit(ring, MI_NOOP);
2406
 
2407
	intel_ring_advance(ring);
2408
 
2409
	return 0;
2410
}
2411
 
2412
void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno)
2413
{
2414
	struct drm_device *dev = ring->dev;
2415
	struct drm_i915_private *dev_priv = dev->dev_private;
2416
 
2417
	if (INTEL_INFO(dev)->gen == 6 || INTEL_INFO(dev)->gen == 7) {
3480 Serge 2418
		I915_WRITE(RING_SYNC_0(ring->mmio_base), 0);
2419
		I915_WRITE(RING_SYNC_1(ring->mmio_base), 0);
5060 serge 2420
		if (HAS_VEBOX(dev))
4104 Serge 2421
			I915_WRITE(RING_SYNC_2(ring->mmio_base), 0);
2332 Serge 2422
	}
2423
 
3480 Serge 2424
	ring->set_seqno(ring, seqno);
4104 Serge 2425
	ring->hangcheck.seqno = seqno;
2332 Serge 2426
}
2427
 
5060 serge 2428
static void gen6_bsd_ring_write_tail(struct intel_engine_cs *ring,
2332 Serge 2429
				     u32 value)
2430
{
5060 serge 2431
	struct drm_i915_private *dev_priv = ring->dev->dev_private;
2332 Serge 2432
 
2433
       /* Every tail move must follow the sequence below */
3031 serge 2434
 
2435
	/* Disable notification that the ring is IDLE. The GT
2436
	 * will then assume that it is busy and bring it out of rc6.
2437
	 */
6084 serge 2438
	I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
3031 serge 2439
		   _MASKED_BIT_ENABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
2332 Serge 2440
 
3031 serge 2441
	/* Clear the context id. Here be magic! */
2442
	I915_WRITE64(GEN6_BSD_RNCID, 0x0);
2443
 
2444
	/* Wait for the ring not to be idle, i.e. for it to wake up. */
6084 serge 2445
	if (wait_for((I915_READ(GEN6_BSD_SLEEP_PSMI_CONTROL) &
3031 serge 2446
		      GEN6_BSD_SLEEP_INDICATOR) == 0,
6084 serge 2447
		     50))
3031 serge 2448
		DRM_ERROR("timed out waiting for the BSD ring to wake up\n");
2332 Serge 2449
 
3031 serge 2450
	/* Now that the ring is fully powered up, update the tail */
6084 serge 2451
	I915_WRITE_TAIL(ring, value);
3031 serge 2452
	POSTING_READ(RING_TAIL(ring->mmio_base));
2453
 
2454
	/* Let the ring send IDLE messages to the GT again,
2455
	 * and so let it sleep to conserve power when idle.
2456
	 */
6084 serge 2457
	I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
3031 serge 2458
		   _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
2332 Serge 2459
}
2460
 
6084 serge 2461
static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req,
2462
			       u32 invalidate, u32 flush)
2332 Serge 2463
{
6084 serge 2464
	struct intel_engine_cs *ring = req->ring;
2332 Serge 2465
	uint32_t cmd;
2466
	int ret;
2467
 
6084 serge 2468
	ret = intel_ring_begin(req, 4);
2332 Serge 2469
	if (ret)
2470
		return ret;
2471
 
2472
	cmd = MI_FLUSH_DW;
4560 Serge 2473
	if (INTEL_INFO(ring->dev)->gen >= 8)
2474
		cmd += 1;
6084 serge 2475
 
2476
	/* We always require a command barrier so that subsequent
2477
	 * commands, such as breadcrumb interrupts, are strictly ordered
2478
	 * wrt the contents of the write cache being flushed to memory
2479
	 * (and thus being coherent from the CPU).
2480
	 */
2481
	cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
2482
 
3243 Serge 2483
	/*
2484
	 * Bspec vol 1c.5 - video engine command streamer:
2485
	 * "If ENABLED, all TLBs will be invalidated once the flush
2486
	 * operation is complete. This bit is only valid when the
2487
	 * Post-Sync Operation field is a value of 1h or 3h."
2488
	 */
2332 Serge 2489
	if (invalidate & I915_GEM_GPU_DOMAINS)
6084 serge 2490
		cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD;
2491
 
2332 Serge 2492
	intel_ring_emit(ring, cmd);
3243 Serge 2493
	intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
4560 Serge 2494
	if (INTEL_INFO(ring->dev)->gen >= 8) {
2495
		intel_ring_emit(ring, 0); /* upper addr */
2496
		intel_ring_emit(ring, 0); /* value */
2497
	} else  {
6084 serge 2498
		intel_ring_emit(ring, 0);
2499
		intel_ring_emit(ring, MI_NOOP);
4560 Serge 2500
	}
2332 Serge 2501
	intel_ring_advance(ring);
2502
	return 0;
2503
}
2504
 
2505
static int
6084 serge 2506
gen8_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
5060 serge 2507
			      u64 offset, u32 len,
6084 serge 2508
			      unsigned dispatch_flags)
4560 Serge 2509
{
6084 serge 2510
	struct intel_engine_cs *ring = req->ring;
2511
	bool ppgtt = USES_PPGTT(ring->dev) &&
2512
			!(dispatch_flags & I915_DISPATCH_SECURE);
4560 Serge 2513
	int ret;
2514
 
6084 serge 2515
	ret = intel_ring_begin(req, 4);
4560 Serge 2516
	if (ret)
2517
		return ret;
2518
 
2519
	/* FIXME(BDW): Address space and security selectors. */
6084 serge 2520
	intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8) |
2521
			(dispatch_flags & I915_DISPATCH_RS ?
2522
			 MI_BATCH_RESOURCE_STREAMER : 0));
5060 serge 2523
	intel_ring_emit(ring, lower_32_bits(offset));
2524
	intel_ring_emit(ring, upper_32_bits(offset));
4560 Serge 2525
	intel_ring_emit(ring, MI_NOOP);
2526
	intel_ring_advance(ring);
2527
 
2528
	return 0;
2529
}
2530
 
2531
static int
6084 serge 2532
hsw_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
2533
			     u64 offset, u32 len,
2534
			     unsigned dispatch_flags)
3243 Serge 2535
{
6084 serge 2536
	struct intel_engine_cs *ring = req->ring;
3243 Serge 2537
	int ret;
2538
 
6084 serge 2539
	ret = intel_ring_begin(req, 2);
3243 Serge 2540
	if (ret)
2541
		return ret;
2542
 
2543
	intel_ring_emit(ring,
5354 serge 2544
			MI_BATCH_BUFFER_START |
6084 serge 2545
			(dispatch_flags & I915_DISPATCH_SECURE ?
2546
 
2547
			(dispatch_flags & I915_DISPATCH_RS ?
2548
			 MI_BATCH_RESOURCE_STREAMER : 0));
3243 Serge 2549
	/* bit0-7 is the length on GEN6+ */
2550
	intel_ring_emit(ring, offset);
2551
	intel_ring_advance(ring);
2552
 
2553
	return 0;
2554
}
2555
 
2556
static int
6084 serge 2557
gen6_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
5060 serge 2558
			      u64 offset, u32 len,
6084 serge 2559
			      unsigned dispatch_flags)
2332 Serge 2560
{
6084 serge 2561
	struct intel_engine_cs *ring = req->ring;
2562
	int ret;
2332 Serge 2563
 
6084 serge 2564
	ret = intel_ring_begin(req, 2);
2565
	if (ret)
2566
		return ret;
2332 Serge 2567
 
3243 Serge 2568
	intel_ring_emit(ring,
2569
			MI_BATCH_BUFFER_START |
6084 serge 2570
			(dispatch_flags & I915_DISPATCH_SECURE ?
2571
 
2572
	/* bit0-7 is the length on GEN6+ */
2573
	intel_ring_emit(ring, offset);
2574
	intel_ring_advance(ring);
2332 Serge 2575
 
6084 serge 2576
	return 0;
2332 Serge 2577
}
2578
 
2579
/* Blitter support (SandyBridge+) */
2580
 
6084 serge 2581
static int gen6_ring_flush(struct drm_i915_gem_request *req,
2582
			   u32 invalidate, u32 flush)
2332 Serge 2583
{
6084 serge 2584
	struct intel_engine_cs *ring = req->ring;
4104 Serge 2585
	struct drm_device *dev = ring->dev;
2332 Serge 2586
	uint32_t cmd;
2587
	int ret;
2588
 
6084 serge 2589
	ret = intel_ring_begin(req, 4);
2332 Serge 2590
	if (ret)
2591
		return ret;
2592
 
2593
	cmd = MI_FLUSH_DW;
6084 serge 2594
	if (INTEL_INFO(dev)->gen >= 8)
4560 Serge 2595
		cmd += 1;
6084 serge 2596
 
2597
	/* We always require a command barrier so that subsequent
2598
	 * commands, such as breadcrumb interrupts, are strictly ordered
2599
	 * wrt the contents of the write cache being flushed to memory
2600
	 * (and thus being coherent from the CPU).
2601
	 */
2602
	cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
2603
 
3243 Serge 2604
	/*
2605
	 * Bspec vol 1c.3 - blitter engine command streamer:
2606
	 * "If ENABLED, all TLBs will be invalidated once the flush
2607
	 * operation is complete. This bit is only valid when the
2608
	 * Post-Sync Operation field is a value of 1h or 3h."
2609
	 */
2332 Serge 2610
	if (invalidate & I915_GEM_DOMAIN_RENDER)
6084 serge 2611
		cmd |= MI_INVALIDATE_TLB;
2332 Serge 2612
	intel_ring_emit(ring, cmd);
3243 Serge 2613
	intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
6084 serge 2614
	if (INTEL_INFO(dev)->gen >= 8) {
4560 Serge 2615
		intel_ring_emit(ring, 0); /* upper addr */
2616
		intel_ring_emit(ring, 0); /* value */
2617
	} else  {
6084 serge 2618
		intel_ring_emit(ring, 0);
2619
		intel_ring_emit(ring, MI_NOOP);
4560 Serge 2620
	}
2332 Serge 2621
	intel_ring_advance(ring);
4104 Serge 2622
 
2332 Serge 2623
	return 0;
2624
}
2625
 
2626
int intel_init_render_ring_buffer(struct drm_device *dev)
2627
{
5060 serge 2628
	struct drm_i915_private *dev_priv = dev->dev_private;
2629
	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
2630
	struct drm_i915_gem_object *obj;
2631
	int ret;
2340 Serge 2632
 
3031 serge 2633
	ring->name = "render ring";
2634
	ring->id = RCS;
2635
	ring->mmio_base = RENDER_RING_BASE;
2636
 
5060 serge 2637
	if (INTEL_INFO(dev)->gen >= 8) {
2638
		if (i915_semaphore_is_enabled(dev)) {
2639
			obj = i915_gem_alloc_object(dev, 4096);
2640
			if (obj == NULL) {
2641
				DRM_ERROR("Failed to allocate semaphore bo. Disabling semaphores\n");
2642
				i915.semaphores = 0;
2643
			} else {
2644
				i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
2645
				ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_NONBLOCK);
2646
				if (ret != 0) {
2647
					drm_gem_object_unreference(&obj->base);
2648
					DRM_ERROR("Failed to pin semaphore bo. Disabling semaphores\n");
2649
					i915.semaphores = 0;
2650
				} else
2651
					dev_priv->semaphore_obj = obj;
2652
			}
2653
		}
5354 serge 2654
 
6084 serge 2655
		ring->init_context = intel_rcs_ctx_init;
5060 serge 2656
		ring->add_request = gen6_add_request;
2657
		ring->flush = gen8_render_ring_flush;
2658
		ring->irq_get = gen8_ring_get_irq;
2659
		ring->irq_put = gen8_ring_put_irq;
2660
		ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
2661
		ring->get_seqno = gen6_ring_get_seqno;
2662
		ring->set_seqno = ring_set_seqno;
2663
		if (i915_semaphore_is_enabled(dev)) {
2664
			WARN_ON(!dev_priv->semaphore_obj);
2665
			ring->semaphore.sync_to = gen8_ring_sync;
2666
			ring->semaphore.signal = gen8_rcs_signal;
2667
			GEN8_RING_SEMAPHORE_INIT;
2668
		}
2669
	} else if (INTEL_INFO(dev)->gen >= 6) {
6084 serge 2670
		ring->init_context = intel_rcs_ctx_init;
2671
		ring->add_request = gen6_add_request;
3031 serge 2672
		ring->flush = gen7_render_ring_flush;
2673
		if (INTEL_INFO(dev)->gen == 6)
6084 serge 2674
			ring->flush = gen6_render_ring_flush;
3031 serge 2675
		ring->irq_get = gen6_ring_get_irq;
2676
		ring->irq_put = gen6_ring_put_irq;
4104 Serge 2677
		ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
2342 Serge 2678
		ring->get_seqno = gen6_ring_get_seqno;
3480 Serge 2679
		ring->set_seqno = ring_set_seqno;
5060 serge 2680
		if (i915_semaphore_is_enabled(dev)) {
6084 serge 2681
			ring->semaphore.sync_to = gen6_ring_sync;
2682
			ring->semaphore.signal = gen6_signal;
2683
			/*
5060 serge 2684
			 * The current semaphore is only applied on pre-gen8
2685
			 * platform.  And there is no VCS2 ring on the pre-gen8
2686
			 * platform. So the semaphore between RCS and VCS2 is
2687
			 * initialized as INVALID.  Gen8 will initialize the
2688
			 * sema between VCS2 and RCS later.
6084 serge 2689
			 */
2690
			ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_INVALID;
2691
			ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_RV;
2692
			ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_RB;
2693
			ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_RVE;
2694
			ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
2695
			ring->semaphore.mbox.signal[RCS] = GEN6_NOSYNC;
2696
			ring->semaphore.mbox.signal[VCS] = GEN6_VRSYNC;
2697
			ring->semaphore.mbox.signal[BCS] = GEN6_BRSYNC;
2698
			ring->semaphore.mbox.signal[VECS] = GEN6_VERSYNC;
2699
			ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
5060 serge 2700
		}
2332 Serge 2701
	} else if (IS_GEN5(dev)) {
6084 serge 2702
		ring->add_request = pc_render_add_request;
3031 serge 2703
		ring->flush = gen4_render_ring_flush;
2342 Serge 2704
		ring->get_seqno = pc_render_get_seqno;
3480 Serge 2705
		ring->set_seqno = pc_render_set_seqno;
3031 serge 2706
		ring->irq_get = gen5_ring_get_irq;
2707
		ring->irq_put = gen5_ring_put_irq;
4104 Serge 2708
		ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT |
2709
					GT_RENDER_PIPECTL_NOTIFY_INTERRUPT;
3031 serge 2710
	} else {
2711
		ring->add_request = i9xx_add_request;
2712
		if (INTEL_INFO(dev)->gen < 4)
2713
			ring->flush = gen2_render_ring_flush;
2714
		else
2715
			ring->flush = gen4_render_ring_flush;
2716
		ring->get_seqno = ring_get_seqno;
3480 Serge 2717
		ring->set_seqno = ring_set_seqno;
3031 serge 2718
		if (IS_GEN2(dev)) {
2719
			ring->irq_get = i8xx_ring_get_irq;
2720
			ring->irq_put = i8xx_ring_put_irq;
2721
		} else {
2722
			ring->irq_get = i9xx_ring_get_irq;
2723
			ring->irq_put = i9xx_ring_put_irq;
2724
		}
2725
		ring->irq_enable_mask = I915_USER_INTERRUPT;
2332 Serge 2726
	}
3031 serge 2727
	ring->write_tail = ring_write_tail;
5060 serge 2728
 
3243 Serge 2729
	if (IS_HASWELL(dev))
2730
		ring->dispatch_execbuffer = hsw_ring_dispatch_execbuffer;
4560 Serge 2731
	else if (IS_GEN8(dev))
2732
		ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;
3243 Serge 2733
	else if (INTEL_INFO(dev)->gen >= 6)
3031 serge 2734
		ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
2735
	else if (INTEL_INFO(dev)->gen >= 4)
2736
		ring->dispatch_execbuffer = i965_dispatch_execbuffer;
2737
	else if (IS_I830(dev) || IS_845G(dev))
2738
		ring->dispatch_execbuffer = i830_dispatch_execbuffer;
2739
	else
2740
		ring->dispatch_execbuffer = i915_dispatch_execbuffer;
6084 serge 2741
	ring->init_hw = init_render_ring;
3031 serge 2742
	ring->cleanup = render_ring_cleanup;
2332 Serge 2743
 
3243 Serge 2744
	/* Workaround batchbuffer to combat CS tlb bug. */
2745
	if (HAS_BROKEN_CS_TLB(dev)) {
5128 serge 2746
		obj = i915_gem_alloc_object(dev, I830_WA_SIZE);
3243 Serge 2747
		if (obj == NULL) {
2748
			DRM_ERROR("Failed to allocate batch bo\n");
2749
			return -ENOMEM;
2750
		}
2751
 
5060 serge 2752
		ret = i915_gem_obj_ggtt_pin(obj, 0, 0);
3243 Serge 2753
		if (ret != 0) {
2754
			drm_gem_object_unreference(&obj->base);
2755
			DRM_ERROR("Failed to ping batch bo\n");
2756
			return ret;
2757
		}
2758
 
4104 Serge 2759
		ring->scratch.obj = obj;
2760
		ring->scratch.gtt_offset = i915_gem_obj_ggtt_offset(obj);
2332 Serge 2761
	}
2340 Serge 2762
 
6084 serge 2763
	ret = intel_init_ring_buffer(dev, ring);
2764
	if (ret)
2765
		return ret;
2766
 
2767
	if (INTEL_INFO(dev)->gen >= 5) {
2768
		ret = intel_init_pipe_control(ring);
2769
		if (ret)
2770
			return ret;
2771
	}
2772
 
2773
	return 0;
2332 Serge 2774
}
2775
 
2776
int intel_init_bsd_ring_buffer(struct drm_device *dev)
2777
{
5060 serge 2778
	struct drm_i915_private *dev_priv = dev->dev_private;
2779
	struct intel_engine_cs *ring = &dev_priv->ring[VCS];
2332 Serge 2780
 
3031 serge 2781
	ring->name = "bsd ring";
2782
	ring->id = VCS;
2332 Serge 2783
 
3031 serge 2784
	ring->write_tail = ring_write_tail;
4560 Serge 2785
	if (INTEL_INFO(dev)->gen >= 6) {
3031 serge 2786
		ring->mmio_base = GEN6_BSD_RING_BASE;
2787
		/* gen6 bsd needs a special wa for tail updates */
2788
		if (IS_GEN6(dev))
2789
			ring->write_tail = gen6_bsd_ring_write_tail;
4104 Serge 2790
		ring->flush = gen6_bsd_ring_flush;
3031 serge 2791
		ring->add_request = gen6_add_request;
2792
		ring->get_seqno = gen6_ring_get_seqno;
3480 Serge 2793
		ring->set_seqno = ring_set_seqno;
4560 Serge 2794
		if (INTEL_INFO(dev)->gen >= 8) {
2795
			ring->irq_enable_mask =
2796
				GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT;
2797
			ring->irq_get = gen8_ring_get_irq;
2798
			ring->irq_put = gen8_ring_put_irq;
2799
			ring->dispatch_execbuffer =
2800
				gen8_ring_dispatch_execbuffer;
5060 serge 2801
			if (i915_semaphore_is_enabled(dev)) {
2802
				ring->semaphore.sync_to = gen8_ring_sync;
2803
				ring->semaphore.signal = gen8_xcs_signal;
2804
				GEN8_RING_SEMAPHORE_INIT;
2805
			}
4560 Serge 2806
		} else {
6084 serge 2807
			ring->irq_enable_mask = GT_BSD_USER_INTERRUPT;
2808
			ring->irq_get = gen6_ring_get_irq;
2809
			ring->irq_put = gen6_ring_put_irq;
4560 Serge 2810
			ring->dispatch_execbuffer =
2811
				gen6_ring_dispatch_execbuffer;
5060 serge 2812
			if (i915_semaphore_is_enabled(dev)) {
6084 serge 2813
				ring->semaphore.sync_to = gen6_ring_sync;
2814
				ring->semaphore.signal = gen6_signal;
2815
				ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_VR;
2816
				ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_INVALID;
2817
				ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_VB;
2818
				ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_VVE;
2819
				ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
2820
				ring->semaphore.mbox.signal[RCS] = GEN6_RVSYNC;
2821
				ring->semaphore.mbox.signal[VCS] = GEN6_NOSYNC;
2822
				ring->semaphore.mbox.signal[BCS] = GEN6_BVSYNC;
2823
				ring->semaphore.mbox.signal[VECS] = GEN6_VEVSYNC;
2824
				ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
5060 serge 2825
			}
4560 Serge 2826
		}
3031 serge 2827
	} else {
2828
		ring->mmio_base = BSD_RING_BASE;
2829
		ring->flush = bsd_ring_flush;
2830
		ring->add_request = i9xx_add_request;
2831
		ring->get_seqno = ring_get_seqno;
3480 Serge 2832
		ring->set_seqno = ring_set_seqno;
3031 serge 2833
		if (IS_GEN5(dev)) {
4104 Serge 2834
			ring->irq_enable_mask = ILK_BSD_USER_INTERRUPT;
3031 serge 2835
			ring->irq_get = gen5_ring_get_irq;
2836
			ring->irq_put = gen5_ring_put_irq;
2837
		} else {
2838
			ring->irq_enable_mask = I915_BSD_USER_INTERRUPT;
2839
			ring->irq_get = i9xx_ring_get_irq;
2840
			ring->irq_put = i9xx_ring_put_irq;
2841
		}
2842
		ring->dispatch_execbuffer = i965_dispatch_execbuffer;
2843
	}
6084 serge 2844
	ring->init_hw = init_ring_common;
3031 serge 2845
 
2332 Serge 2846
	return intel_init_ring_buffer(dev, ring);
2847
}
2848
 
5060 serge 2849
/**
6084 serge 2850
 * Initialize the second BSD ring (eg. Broadwell GT3, Skylake GT3)
5060 serge 2851
 */
2852
int intel_init_bsd2_ring_buffer(struct drm_device *dev)
2853
{
2854
	struct drm_i915_private *dev_priv = dev->dev_private;
2855
	struct intel_engine_cs *ring = &dev_priv->ring[VCS2];
2856
 
2857
	ring->name = "bsd2 ring";
2858
	ring->id = VCS2;
2859
 
2860
	ring->write_tail = ring_write_tail;
2861
	ring->mmio_base = GEN8_BSD2_RING_BASE;
2862
	ring->flush = gen6_bsd_ring_flush;
2863
	ring->add_request = gen6_add_request;
2864
	ring->get_seqno = gen6_ring_get_seqno;
2865
	ring->set_seqno = ring_set_seqno;
2866
	ring->irq_enable_mask =
2867
			GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT;
2868
	ring->irq_get = gen8_ring_get_irq;
2869
	ring->irq_put = gen8_ring_put_irq;
2870
	ring->dispatch_execbuffer =
2871
			gen8_ring_dispatch_execbuffer;
2872
	if (i915_semaphore_is_enabled(dev)) {
2873
		ring->semaphore.sync_to = gen8_ring_sync;
2874
		ring->semaphore.signal = gen8_xcs_signal;
2875
		GEN8_RING_SEMAPHORE_INIT;
2876
	}
6084 serge 2877
	ring->init_hw = init_ring_common;
5060 serge 2878
 
2879
	return intel_init_ring_buffer(dev, ring);
2880
}
2881
 
2332 Serge 2882
int intel_init_blt_ring_buffer(struct drm_device *dev)
2883
{
5060 serge 2884
	struct drm_i915_private *dev_priv = dev->dev_private;
2885
	struct intel_engine_cs *ring = &dev_priv->ring[BCS];
2332 Serge 2886
 
3031 serge 2887
	ring->name = "blitter ring";
2888
	ring->id = BCS;
2332 Serge 2889
 
3031 serge 2890
	ring->mmio_base = BLT_RING_BASE;
2891
	ring->write_tail = ring_write_tail;
4104 Serge 2892
	ring->flush = gen6_ring_flush;
3031 serge 2893
	ring->add_request = gen6_add_request;
2894
	ring->get_seqno = gen6_ring_get_seqno;
3480 Serge 2895
	ring->set_seqno = ring_set_seqno;
4560 Serge 2896
	if (INTEL_INFO(dev)->gen >= 8) {
2897
		ring->irq_enable_mask =
2898
			GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT;
2899
		ring->irq_get = gen8_ring_get_irq;
2900
		ring->irq_put = gen8_ring_put_irq;
2901
		ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;
5060 serge 2902
		if (i915_semaphore_is_enabled(dev)) {
2903
			ring->semaphore.sync_to = gen8_ring_sync;
2904
			ring->semaphore.signal = gen8_xcs_signal;
2905
			GEN8_RING_SEMAPHORE_INIT;
2906
		}
4560 Serge 2907
	} else {
6084 serge 2908
		ring->irq_enable_mask = GT_BLT_USER_INTERRUPT;
2909
		ring->irq_get = gen6_ring_get_irq;
2910
		ring->irq_put = gen6_ring_put_irq;
2911
		ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
5060 serge 2912
		if (i915_semaphore_is_enabled(dev)) {
2913
			ring->semaphore.signal = gen6_signal;
6084 serge 2914
			ring->semaphore.sync_to = gen6_ring_sync;
2915
			/*
5060 serge 2916
			 * The current semaphore is only applied on pre-gen8
2917
			 * platform.  And there is no VCS2 ring on the pre-gen8
2918
			 * platform. So the semaphore between BCS and VCS2 is
2919
			 * initialized as INVALID.  Gen8 will initialize the
2920
			 * sema between BCS and VCS2 later.
6084 serge 2921
			 */
2922
			ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_BR;
2923
			ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_BV;
2924
			ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_INVALID;
2925
			ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_BVE;
2926
			ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
2927
			ring->semaphore.mbox.signal[RCS] = GEN6_RBSYNC;
2928
			ring->semaphore.mbox.signal[VCS] = GEN6_VBSYNC;
2929
			ring->semaphore.mbox.signal[BCS] = GEN6_NOSYNC;
2930
			ring->semaphore.mbox.signal[VECS] = GEN6_VEBSYNC;
2931
			ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
5060 serge 2932
		}
4560 Serge 2933
	}
6084 serge 2934
	ring->init_hw = init_ring_common;
3031 serge 2935
 
2332 Serge 2936
	return intel_init_ring_buffer(dev, ring);
2937
}
3031 serge 2938
 
4104 Serge 2939
int intel_init_vebox_ring_buffer(struct drm_device *dev)
2940
{
5060 serge 2941
	struct drm_i915_private *dev_priv = dev->dev_private;
2942
	struct intel_engine_cs *ring = &dev_priv->ring[VECS];
4104 Serge 2943
 
2944
	ring->name = "video enhancement ring";
2945
	ring->id = VECS;
2946
 
2947
	ring->mmio_base = VEBOX_RING_BASE;
2948
	ring->write_tail = ring_write_tail;
2949
	ring->flush = gen6_ring_flush;
2950
	ring->add_request = gen6_add_request;
2951
	ring->get_seqno = gen6_ring_get_seqno;
2952
	ring->set_seqno = ring_set_seqno;
4560 Serge 2953
 
2954
	if (INTEL_INFO(dev)->gen >= 8) {
2955
		ring->irq_enable_mask =
2956
			GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT;
2957
		ring->irq_get = gen8_ring_get_irq;
2958
		ring->irq_put = gen8_ring_put_irq;
2959
		ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;
5060 serge 2960
		if (i915_semaphore_is_enabled(dev)) {
2961
			ring->semaphore.sync_to = gen8_ring_sync;
2962
			ring->semaphore.signal = gen8_xcs_signal;
2963
			GEN8_RING_SEMAPHORE_INIT;
2964
		}
4560 Serge 2965
	} else {
6084 serge 2966
		ring->irq_enable_mask = PM_VEBOX_USER_INTERRUPT;
2967
		ring->irq_get = hsw_vebox_get_irq;
2968
		ring->irq_put = hsw_vebox_put_irq;
2969
		ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
5060 serge 2970
		if (i915_semaphore_is_enabled(dev)) {
6084 serge 2971
			ring->semaphore.sync_to = gen6_ring_sync;
2972
			ring->semaphore.signal = gen6_signal;
2973
			ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_VER;
2974
			ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_VEV;
2975
			ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_VEB;
2976
			ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_INVALID;
2977
			ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
2978
			ring->semaphore.mbox.signal[RCS] = GEN6_RVESYNC;
2979
			ring->semaphore.mbox.signal[VCS] = GEN6_VVESYNC;
2980
			ring->semaphore.mbox.signal[BCS] = GEN6_BVESYNC;
2981
			ring->semaphore.mbox.signal[VECS] = GEN6_NOSYNC;
2982
			ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
5060 serge 2983
		}
4560 Serge 2984
	}
6084 serge 2985
	ring->init_hw = init_ring_common;
4104 Serge 2986
 
2987
	return intel_init_ring_buffer(dev, ring);
2988
}
2989
 
3031 serge 2990
int
6084 serge 2991
intel_ring_flush_all_caches(struct drm_i915_gem_request *req)
3031 serge 2992
{
6084 serge 2993
	struct intel_engine_cs *ring = req->ring;
3031 serge 2994
	int ret;
2995
 
2996
	if (!ring->gpu_caches_dirty)
2997
		return 0;
2998
 
6084 serge 2999
	ret = ring->flush(req, 0, I915_GEM_GPU_DOMAINS);
3031 serge 3000
	if (ret)
3001
		return ret;
3002
 
6084 serge 3003
	trace_i915_gem_ring_flush(req, 0, I915_GEM_GPU_DOMAINS);
3031 serge 3004
 
3005
	ring->gpu_caches_dirty = false;
3006
	return 0;
3007
}
3008
 
3009
int
6084 serge 3010
intel_ring_invalidate_all_caches(struct drm_i915_gem_request *req)
3031 serge 3011
{
6084 serge 3012
	struct intel_engine_cs *ring = req->ring;
3031 serge 3013
	uint32_t flush_domains;
3014
	int ret;
3015
 
3016
	flush_domains = 0;
3017
	if (ring->gpu_caches_dirty)
3018
		flush_domains = I915_GEM_GPU_DOMAINS;
3019
 
6084 serge 3020
	ret = ring->flush(req, I915_GEM_GPU_DOMAINS, flush_domains);
3031 serge 3021
	if (ret)
3022
		return ret;
3023
 
6084 serge 3024
	trace_i915_gem_ring_flush(req, I915_GEM_GPU_DOMAINS, flush_domains);
3031 serge 3025
 
3026
	ring->gpu_caches_dirty = false;
3027
	return 0;
3028
}
5060 serge 3029
 
3030
void
3031
intel_stop_ring_buffer(struct intel_engine_cs *ring)
3032
{
3033
	int ret;
3034
 
3035
	if (!intel_ring_initialized(ring))
3036
		return;
3037
 
3038
	ret = intel_ring_idle(ring);
3039
	if (ret && !i915_reset_in_progress(&to_i915(ring->dev)->gpu_error))
3040
		DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n",
3041
			  ring->name, ret);
3042
 
3043
	stop_ring(ring);
3044
}