Subversion Repositories Kolibri OS

Rev

Rev 5139 | Rev 6084 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2332 Serge 1
/*
2
 * Copyright © 2008-2010 Intel Corporation
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
 * IN THE SOFTWARE.
22
 *
23
 * Authors:
24
 *    Eric Anholt 
25
 *    Zou Nan hai 
26
 *    Xiang Hai hao
27
 *
28
 */
29
 
3031 serge 30
#include 
2332 Serge 31
#include "i915_drv.h"
3031 serge 32
#include 
2351 Serge 33
#include "i915_trace.h"
2332 Serge 34
#include "intel_drv.h"
35
 
5354 serge 36
bool
37
intel_ring_initialized(struct intel_engine_cs *ring)
38
{
39
	struct drm_device *dev = ring->dev;
5060 serge 40
 
5354 serge 41
	if (!dev)
42
		return false;
43
 
44
	if (i915.enable_execlists) {
45
		struct intel_context *dctx = ring->default_context;
46
		struct intel_ringbuffer *ringbuf = dctx->engine[ring->id].ringbuf;
47
 
48
		return ringbuf->obj;
49
	} else
50
		return ring->buffer && ring->buffer->obj;
51
}
52
 
53
int __intel_ring_space(int head, int tail, int size)
2332 Serge 54
{
5060 serge 55
	int space = head - (tail + I915_RING_FREE_SPACE);
2332 Serge 56
	if (space < 0)
5060 serge 57
		space += size;
2332 Serge 58
	return space;
59
}
60
 
5354 serge 61
int intel_ring_space(struct intel_ringbuffer *ringbuf)
4560 Serge 62
{
5354 serge 63
	return __intel_ring_space(ringbuf->head & HEAD_ADDR,
64
				  ringbuf->tail, ringbuf->size);
5060 serge 65
}
66
 
5354 serge 67
bool intel_ring_stopped(struct intel_engine_cs *ring)
5060 serge 68
{
4560 Serge 69
	struct drm_i915_private *dev_priv = ring->dev->dev_private;
5060 serge 70
	return dev_priv->gpu_error.stop_rings & intel_ring_flag(ring);
71
}
4560 Serge 72
 
5060 serge 73
void __intel_ring_advance(struct intel_engine_cs *ring)
74
{
75
	struct intel_ringbuffer *ringbuf = ring->buffer;
76
	ringbuf->tail &= ringbuf->size - 1;
77
	if (intel_ring_stopped(ring))
4560 Serge 78
		return;
5060 serge 79
	ring->write_tail(ring, ringbuf->tail);
4560 Serge 80
}
81
 
3031 serge 82
static int
5060 serge 83
gen2_render_ring_flush(struct intel_engine_cs *ring,
3031 serge 84
		       u32	invalidate_domains,
85
		       u32	flush_domains)
2332 Serge 86
{
3031 serge 87
	u32 cmd;
88
	int ret;
2332 Serge 89
 
3031 serge 90
	cmd = MI_FLUSH;
91
	if (((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) == 0)
92
		cmd |= MI_NO_WRITE_FLUSH;
2332 Serge 93
 
3031 serge 94
	if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
95
		cmd |= MI_READ_FLUSH;
2332 Serge 96
 
3031 serge 97
	ret = intel_ring_begin(ring, 2);
98
	if (ret)
99
		return ret;
100
 
101
	intel_ring_emit(ring, cmd);
102
	intel_ring_emit(ring, MI_NOOP);
103
	intel_ring_advance(ring);
104
 
105
	return 0;
2332 Serge 106
}
107
 
108
static int
5060 serge 109
gen4_render_ring_flush(struct intel_engine_cs *ring,
2332 Serge 110
		  u32	invalidate_domains,
111
		  u32	flush_domains)
112
{
113
	struct drm_device *dev = ring->dev;
114
	u32 cmd;
115
	int ret;
116
 
117
	/*
118
	 * read/write caches:
119
	 *
120
	 * I915_GEM_DOMAIN_RENDER is always invalidated, but is
121
	 * only flushed if MI_NO_WRITE_FLUSH is unset.  On 965, it is
122
	 * also flushed at 2d versus 3d pipeline switches.
123
	 *
124
	 * read-only caches:
125
	 *
126
	 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
127
	 * MI_READ_FLUSH is set, and is always flushed on 965.
128
	 *
129
	 * I915_GEM_DOMAIN_COMMAND may not exist?
130
	 *
131
	 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
132
	 * invalidated when MI_EXE_FLUSH is set.
133
	 *
134
	 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
135
	 * invalidated with every MI_FLUSH.
136
	 *
137
	 * TLBs:
138
	 *
139
	 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
140
	 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
141
	 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
142
	 * are flushed at any MI_FLUSH.
143
	 */
144
 
145
	cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
3031 serge 146
	if ((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER)
2332 Serge 147
		cmd &= ~MI_NO_WRITE_FLUSH;
148
	if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION)
149
		cmd |= MI_EXE_FLUSH;
150
 
151
	if (invalidate_domains & I915_GEM_DOMAIN_COMMAND &&
152
	    (IS_G4X(dev) || IS_GEN5(dev)))
153
		cmd |= MI_INVALIDATE_ISP;
154
 
155
	ret = intel_ring_begin(ring, 2);
156
	if (ret)
157
		return ret;
158
 
159
	intel_ring_emit(ring, cmd);
160
	intel_ring_emit(ring, MI_NOOP);
161
	intel_ring_advance(ring);
162
 
163
	return 0;
164
}
165
 
2342 Serge 166
/**
167
 * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
168
 * implementing two workarounds on gen6.  From section 1.4.7.1
169
 * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
170
 *
171
 * [DevSNB-C+{W/A}] Before any depth stall flush (including those
172
 * produced by non-pipelined state commands), software needs to first
173
 * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
174
 * 0.
175
 *
176
 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
177
 * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
178
 *
179
 * And the workaround for these two requires this workaround first:
180
 *
181
 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
182
 * BEFORE the pipe-control with a post-sync op and no write-cache
183
 * flushes.
184
 *
185
 * And this last workaround is tricky because of the requirements on
186
 * that bit.  From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
187
 * volume 2 part 1:
188
 *
189
 *     "1 of the following must also be set:
190
 *      - Render Target Cache Flush Enable ([12] of DW1)
191
 *      - Depth Cache Flush Enable ([0] of DW1)
192
 *      - Stall at Pixel Scoreboard ([1] of DW1)
193
 *      - Depth Stall ([13] of DW1)
194
 *      - Post-Sync Operation ([13] of DW1)
195
 *      - Notify Enable ([8] of DW1)"
196
 *
197
 * The cache flushes require the workaround flush that triggered this
198
 * one, so we can't use it.  Depth stall would trigger the same.
199
 * Post-sync nonzero is what triggered this second workaround, so we
200
 * can't use that one either.  Notify enable is IRQs, which aren't
201
 * really our business.  That leaves only stall at scoreboard.
202
 */
203
static int
5060 serge 204
intel_emit_post_sync_nonzero_flush(struct intel_engine_cs *ring)
2342 Serge 205
{
5060 serge 206
	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
2342 Serge 207
	int ret;
208
 
209
 
210
	ret = intel_ring_begin(ring, 6);
211
	if (ret)
212
		return ret;
213
 
214
	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
215
	intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
216
			PIPE_CONTROL_STALL_AT_SCOREBOARD);
217
	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
218
	intel_ring_emit(ring, 0); /* low dword */
219
	intel_ring_emit(ring, 0); /* high dword */
220
	intel_ring_emit(ring, MI_NOOP);
221
	intel_ring_advance(ring);
222
 
223
	ret = intel_ring_begin(ring, 6);
224
	if (ret)
225
		return ret;
226
 
227
	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
228
	intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE);
229
	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
230
	intel_ring_emit(ring, 0);
231
	intel_ring_emit(ring, 0);
232
	intel_ring_emit(ring, MI_NOOP);
233
	intel_ring_advance(ring);
234
 
235
	return 0;
236
}
237
 
238
static int
5060 serge 239
gen6_render_ring_flush(struct intel_engine_cs *ring,
2342 Serge 240
                         u32 invalidate_domains, u32 flush_domains)
241
{
242
	u32 flags = 0;
5060 serge 243
	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
2342 Serge 244
	int ret;
245
 
246
	/* Force SNB workarounds for PIPE_CONTROL flushes */
3031 serge 247
	ret = intel_emit_post_sync_nonzero_flush(ring);
248
	if (ret)
249
		return ret;
2342 Serge 250
 
251
	/* Just flush everything.  Experiments have shown that reducing the
252
	 * number of bits based on the write domains has little performance
253
	 * impact.
254
	 */
3031 serge 255
	if (flush_domains) {
256
		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
257
		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
258
		/*
259
		 * Ensure that any following seqno writes only happen
260
		 * when the render cache is indeed flushed.
261
		 */
262
		flags |= PIPE_CONTROL_CS_STALL;
263
	}
264
	if (invalidate_domains) {
265
		flags |= PIPE_CONTROL_TLB_INVALIDATE;
266
		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
267
		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
268
		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
269
		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
270
		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
271
		/*
272
		 * TLB invalidate requires a post-sync write.
273
		 */
3243 Serge 274
		flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
3031 serge 275
	}
276
 
277
	ret = intel_ring_begin(ring, 4);
278
	if (ret)
279
		return ret;
280
 
281
	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
282
	intel_ring_emit(ring, flags);
283
	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
284
	intel_ring_emit(ring, 0);
285
	intel_ring_advance(ring);
286
 
287
	return 0;
288
}
289
 
290
static int
5060 serge 291
gen7_render_ring_cs_stall_wa(struct intel_engine_cs *ring)
3031 serge 292
{
293
	int ret;
294
 
295
	ret = intel_ring_begin(ring, 4);
296
	if (ret)
297
		return ret;
298
 
299
	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
300
	intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
301
			      PIPE_CONTROL_STALL_AT_SCOREBOARD);
302
	intel_ring_emit(ring, 0);
303
	intel_ring_emit(ring, 0);
304
	intel_ring_advance(ring);
305
 
306
	return 0;
307
}
308
 
5060 serge 309
static int gen7_ring_fbc_flush(struct intel_engine_cs *ring, u32 value)
4104 Serge 310
{
311
	int ret;
312
 
313
	if (!ring->fbc_dirty)
314
		return 0;
315
 
4560 Serge 316
	ret = intel_ring_begin(ring, 6);
4104 Serge 317
	if (ret)
318
		return ret;
319
	/* WaFbcNukeOn3DBlt:ivb/hsw */
320
	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
321
	intel_ring_emit(ring, MSG_FBC_REND_STATE);
322
	intel_ring_emit(ring, value);
4560 Serge 323
	intel_ring_emit(ring, MI_STORE_REGISTER_MEM(1) | MI_SRM_LRM_GLOBAL_GTT);
324
	intel_ring_emit(ring, MSG_FBC_REND_STATE);
325
	intel_ring_emit(ring, ring->scratch.gtt_offset + 256);
4104 Serge 326
	intel_ring_advance(ring);
327
 
328
	ring->fbc_dirty = false;
329
	return 0;
330
}
331
 
3031 serge 332
static int
5060 serge 333
gen7_render_ring_flush(struct intel_engine_cs *ring,
3031 serge 334
		       u32 invalidate_domains, u32 flush_domains)
335
{
336
	u32 flags = 0;
5060 serge 337
	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
3031 serge 338
	int ret;
339
 
340
	/*
341
	 * Ensure that any following seqno writes only happen when the render
342
	 * cache is indeed flushed.
343
	 *
344
	 * Workaround: 4th PIPE_CONTROL command (except the ones with only
345
	 * read-cache invalidate bits set) must have the CS_STALL bit set. We
346
	 * don't try to be clever and just set it unconditionally.
347
	 */
348
	flags |= PIPE_CONTROL_CS_STALL;
349
 
350
	/* Just flush everything.  Experiments have shown that reducing the
351
	 * number of bits based on the write domains has little performance
352
	 * impact.
353
	 */
354
	if (flush_domains) {
2342 Serge 355
	flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
3031 serge 356
		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
357
	}
358
	if (invalidate_domains) {
359
		flags |= PIPE_CONTROL_TLB_INVALIDATE;
2342 Serge 360
	flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
361
	flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
362
	flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
363
	flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
364
	flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
5354 serge 365
		flags |= PIPE_CONTROL_MEDIA_STATE_CLEAR;
3031 serge 366
		/*
367
		 * TLB invalidate requires a post-sync write.
368
		 */
369
		flags |= PIPE_CONTROL_QW_WRITE;
3480 Serge 370
		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
2342 Serge 371
 
5354 serge 372
		flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
373
 
3031 serge 374
		/* Workaround: we must issue a pipe_control with CS-stall bit
375
		 * set before a pipe_control command that has the state cache
376
		 * invalidate bit set. */
377
		gen7_render_ring_cs_stall_wa(ring);
378
	}
379
 
380
	ret = intel_ring_begin(ring, 4);
2342 Serge 381
	if (ret)
382
		return ret;
383
 
3031 serge 384
	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
2342 Serge 385
	intel_ring_emit(ring, flags);
3480 Serge 386
	intel_ring_emit(ring, scratch_addr);
3031 serge 387
	intel_ring_emit(ring, 0);
2342 Serge 388
	intel_ring_advance(ring);
389
 
4560 Serge 390
	if (!invalidate_domains && flush_domains)
4104 Serge 391
		return gen7_ring_fbc_flush(ring, FBC_REND_NUKE);
392
 
2342 Serge 393
	return 0;
394
}
395
 
4560 Serge 396
static int
5060 serge 397
gen8_emit_pipe_control(struct intel_engine_cs *ring,
398
		       u32 flags, u32 scratch_addr)
399
{
400
	int ret;
401
 
402
	ret = intel_ring_begin(ring, 6);
403
	if (ret)
404
		return ret;
405
 
406
	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6));
407
	intel_ring_emit(ring, flags);
408
	intel_ring_emit(ring, scratch_addr);
409
	intel_ring_emit(ring, 0);
410
	intel_ring_emit(ring, 0);
411
	intel_ring_emit(ring, 0);
412
	intel_ring_advance(ring);
413
 
414
	return 0;
415
}
416
 
417
static int
418
gen8_render_ring_flush(struct intel_engine_cs *ring,
4560 Serge 419
		       u32 invalidate_domains, u32 flush_domains)
420
{
421
	u32 flags = 0;
5060 serge 422
	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
4560 Serge 423
	int ret;
424
 
425
	flags |= PIPE_CONTROL_CS_STALL;
426
 
427
	if (flush_domains) {
428
		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
429
		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
430
	}
431
	if (invalidate_domains) {
432
		flags |= PIPE_CONTROL_TLB_INVALIDATE;
433
		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
434
		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
435
		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
436
		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
437
		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
438
		flags |= PIPE_CONTROL_QW_WRITE;
439
		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
440
 
5060 serge 441
		/* WaCsStallBeforeStateCacheInvalidate:bdw,chv */
442
		ret = gen8_emit_pipe_control(ring,
443
					     PIPE_CONTROL_CS_STALL |
444
					     PIPE_CONTROL_STALL_AT_SCOREBOARD,
445
					     0);
4560 Serge 446
	if (ret)
447
		return ret;
5060 serge 448
	}
4560 Serge 449
 
5354 serge 450
	ret = gen8_emit_pipe_control(ring, flags, scratch_addr);
451
	if (ret)
452
		return ret;
453
 
454
	if (!invalidate_domains && flush_domains)
455
		return gen7_ring_fbc_flush(ring, FBC_REND_NUKE);
456
 
457
	return 0;
4560 Serge 458
}
459
 
5060 serge 460
static void ring_write_tail(struct intel_engine_cs *ring,
2332 Serge 461
			    u32 value)
462
{
5060 serge 463
	struct drm_i915_private *dev_priv = ring->dev->dev_private;
2332 Serge 464
	I915_WRITE_TAIL(ring, value);
465
}
466
 
5060 serge 467
u64 intel_ring_get_active_head(struct intel_engine_cs *ring)
2332 Serge 468
{
5060 serge 469
	struct drm_i915_private *dev_priv = ring->dev->dev_private;
470
	u64 acthd;
2332 Serge 471
 
5060 serge 472
	if (INTEL_INFO(ring->dev)->gen >= 8)
473
		acthd = I915_READ64_2x32(RING_ACTHD(ring->mmio_base),
474
					 RING_ACTHD_UDW(ring->mmio_base));
475
	else if (INTEL_INFO(ring->dev)->gen >= 4)
476
		acthd = I915_READ(RING_ACTHD(ring->mmio_base));
477
	else
478
		acthd = I915_READ(ACTHD);
479
 
480
	return acthd;
2332 Serge 481
}
482
 
5060 serge 483
static void ring_setup_phys_status_page(struct intel_engine_cs *ring)
4104 Serge 484
{
485
	struct drm_i915_private *dev_priv = ring->dev->dev_private;
486
	u32 addr;
487
 
488
	addr = dev_priv->status_page_dmah->busaddr;
489
	if (INTEL_INFO(ring->dev)->gen >= 4)
490
		addr |= (dev_priv->status_page_dmah->busaddr >> 28) & 0xf0;
491
	I915_WRITE(HWS_PGA, addr);
492
}
493
 
5060 serge 494
static bool stop_ring(struct intel_engine_cs *ring)
2332 Serge 495
{
5060 serge 496
	struct drm_i915_private *dev_priv = to_i915(ring->dev);
2332 Serge 497
 
5060 serge 498
	if (!IS_GEN2(ring->dev)) {
499
		I915_WRITE_MODE(ring, _MASKED_BIT_ENABLE(STOP_RING));
5354 serge 500
		if (wait_for((I915_READ_MODE(ring) & MODE_IDLE) != 0, 1000)) {
501
			DRM_ERROR("%s : timed out trying to stop ring\n", ring->name);
502
			/* Sometimes we observe that the idle flag is not
503
			 * set even though the ring is empty. So double
504
			 * check before giving up.
505
			 */
506
			if (I915_READ_HEAD(ring) != I915_READ_TAIL(ring))
5060 serge 507
			return false;
508
		}
509
	}
3031 serge 510
 
2332 Serge 511
	I915_WRITE_CTL(ring, 0);
512
	I915_WRITE_HEAD(ring, 0);
513
	ring->write_tail(ring, 0);
514
 
5060 serge 515
	if (!IS_GEN2(ring->dev)) {
516
		(void)I915_READ_CTL(ring);
517
		I915_WRITE_MODE(ring, _MASKED_BIT_DISABLE(STOP_RING));
518
	}
2332 Serge 519
 
5060 serge 520
	return (I915_READ_HEAD(ring) & HEAD_ADDR) == 0;
521
}
522
 
523
static int init_ring_common(struct intel_engine_cs *ring)
524
{
525
	struct drm_device *dev = ring->dev;
526
	struct drm_i915_private *dev_priv = dev->dev_private;
527
	struct intel_ringbuffer *ringbuf = ring->buffer;
528
	struct drm_i915_gem_object *obj = ringbuf->obj;
529
	int ret = 0;
530
 
531
	gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL);
532
 
533
	if (!stop_ring(ring)) {
534
		/* G45 ring initialization often fails to reset head to zero */
2332 Serge 535
		DRM_DEBUG_KMS("%s head not reset to zero "
536
			      "ctl %08x head %08x tail %08x start %08x\n",
537
			      ring->name,
538
			      I915_READ_CTL(ring),
539
			      I915_READ_HEAD(ring),
540
			      I915_READ_TAIL(ring),
541
			      I915_READ_START(ring));
542
 
5060 serge 543
		if (!stop_ring(ring)) {
2332 Serge 544
			DRM_ERROR("failed to set %s head to zero "
545
				  "ctl %08x head %08x tail %08x start %08x\n",
546
				  ring->name,
547
				  I915_READ_CTL(ring),
548
				  I915_READ_HEAD(ring),
549
				  I915_READ_TAIL(ring),
550
				  I915_READ_START(ring));
5060 serge 551
			ret = -EIO;
552
			goto out;
2332 Serge 553
		}
554
	}
555
 
5060 serge 556
	if (I915_NEED_GFX_HWS(dev))
557
		intel_ring_setup_status_page(ring);
558
	else
559
		ring_setup_phys_status_page(ring);
560
 
561
	/* Enforce ordering by reading HEAD register back */
562
	I915_READ_HEAD(ring);
563
 
3031 serge 564
	/* Initialize the ring. This must happen _after_ we've cleared the ring
565
	 * registers with the above sequence (the readback of the HEAD registers
566
	 * also enforces ordering), otherwise the hw might lose the new ring
567
	 * register values. */
4104 Serge 568
	I915_WRITE_START(ring, i915_gem_obj_ggtt_offset(obj));
5354 serge 569
 
570
	/* WaClearRingBufHeadRegAtInit:ctg,elk */
571
	if (I915_READ_HEAD(ring))
572
		DRM_DEBUG("%s initialization failed [head=%08x], fudging\n",
573
			  ring->name, I915_READ_HEAD(ring));
574
	I915_WRITE_HEAD(ring, 0);
575
	(void)I915_READ_HEAD(ring);
576
 
2332 Serge 577
	I915_WRITE_CTL(ring,
5060 serge 578
			((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES)
3031 serge 579
			| RING_VALID);
2332 Serge 580
 
581
	/* If the head is still not zero, the ring is dead */
3031 serge 582
	if (wait_for((I915_READ_CTL(ring) & RING_VALID) != 0 &&
4104 Serge 583
		     I915_READ_START(ring) == i915_gem_obj_ggtt_offset(obj) &&
3031 serge 584
		     (I915_READ_HEAD(ring) & HEAD_ADDR) == 0, 50)) {
2332 Serge 585
		DRM_ERROR("%s initialization failed "
5060 serge 586
			  "ctl %08x (valid? %d) head %08x tail %08x start %08x [expected %08lx]\n",
2332 Serge 587
				ring->name,
5060 serge 588
			  I915_READ_CTL(ring), I915_READ_CTL(ring) & RING_VALID,
589
			  I915_READ_HEAD(ring), I915_READ_TAIL(ring),
590
			  I915_READ_START(ring), (unsigned long)i915_gem_obj_ggtt_offset(obj));
3031 serge 591
		ret = -EIO;
592
		goto out;
2332 Serge 593
	}
594
 
5060 serge 595
		ringbuf->head = I915_READ_HEAD(ring);
596
		ringbuf->tail = I915_READ_TAIL(ring) & TAIL_ADDR;
5354 serge 597
	ringbuf->space = intel_ring_space(ringbuf);
5060 serge 598
		ringbuf->last_retired_head = -1;
599
 
4104 Serge 600
	memset(&ring->hangcheck, 0, sizeof(ring->hangcheck));
601
 
3031 serge 602
out:
4560 Serge 603
	gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
2332 Serge 604
 
3031 serge 605
	return ret;
2332 Serge 606
}
607
 
5354 serge 608
void
609
intel_fini_pipe_control(struct intel_engine_cs *ring)
2332 Serge 610
{
5354 serge 611
	struct drm_device *dev = ring->dev;
612
 
613
	if (ring->scratch.obj == NULL)
614
		return;
615
 
616
	if (INTEL_INFO(dev)->gen >= 5) {
617
		kunmap(sg_page(ring->scratch.obj->pages->sgl));
618
		i915_gem_object_ggtt_unpin(ring->scratch.obj);
619
	}
620
 
621
	drm_gem_object_unreference(&ring->scratch.obj->base);
622
	ring->scratch.obj = NULL;
623
}
624
 
625
int
626
intel_init_pipe_control(struct intel_engine_cs *ring)
627
{
2332 Serge 628
	int ret;
629
 
4104 Serge 630
	if (ring->scratch.obj)
2332 Serge 631
		return 0;
632
 
4104 Serge 633
	ring->scratch.obj = i915_gem_alloc_object(ring->dev, 4096);
634
	if (ring->scratch.obj == NULL) {
2332 Serge 635
		DRM_ERROR("Failed to allocate seqno page\n");
636
		ret = -ENOMEM;
637
		goto err;
638
	}
639
 
5060 serge 640
	ret = i915_gem_object_set_cache_level(ring->scratch.obj, I915_CACHE_LLC);
641
	if (ret)
642
		goto err_unref;
2332 Serge 643
 
5060 serge 644
	ret = i915_gem_obj_ggtt_pin(ring->scratch.obj, 4096, 0);
2332 Serge 645
	if (ret)
646
		goto err_unref;
647
 
4104 Serge 648
	ring->scratch.gtt_offset = i915_gem_obj_ggtt_offset(ring->scratch.obj);
5354 serge 649
	ring->scratch.cpu_page = kmap(sg_page(ring->scratch.obj->pages->sgl));
4104 Serge 650
	if (ring->scratch.cpu_page == NULL) {
651
		ret = -ENOMEM;
2332 Serge 652
		goto err_unpin;
4104 Serge 653
	}
2332 Serge 654
 
3480 Serge 655
	DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08x\n",
4104 Serge 656
			 ring->name, ring->scratch.gtt_offset);
2332 Serge 657
	return 0;
658
 
659
err_unpin:
5060 serge 660
	i915_gem_object_ggtt_unpin(ring->scratch.obj);
2332 Serge 661
err_unref:
4104 Serge 662
	drm_gem_object_unreference(&ring->scratch.obj->base);
2332 Serge 663
err:
664
	return ret;
665
}
666
 
5354 serge 667
static int intel_ring_workarounds_emit(struct intel_engine_cs *ring,
668
				       struct intel_context *ctx)
669
{
670
	int ret, i;
671
	struct drm_device *dev = ring->dev;
672
	struct drm_i915_private *dev_priv = dev->dev_private;
673
	struct i915_workarounds *w = &dev_priv->workarounds;
674
 
675
	if (WARN_ON(w->count == 0))
676
		return 0;
677
 
678
	ring->gpu_caches_dirty = true;
679
	ret = intel_ring_flush_all_caches(ring);
680
	if (ret)
681
		return ret;
682
 
683
	ret = intel_ring_begin(ring, (w->count * 2 + 2));
684
	if (ret)
685
		return ret;
686
 
687
	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(w->count));
688
	for (i = 0; i < w->count; i++) {
689
		intel_ring_emit(ring, w->reg[i].addr);
690
		intel_ring_emit(ring, w->reg[i].value);
691
	}
692
	intel_ring_emit(ring, MI_NOOP);
693
 
694
	intel_ring_advance(ring);
695
 
696
	ring->gpu_caches_dirty = true;
697
	ret = intel_ring_flush_all_caches(ring);
698
	if (ret)
699
		return ret;
700
 
701
	DRM_DEBUG_DRIVER("Number of Workarounds emitted: %d\n", w->count);
702
 
703
	return 0;
704
}
705
 
706
static int wa_add(struct drm_i915_private *dev_priv,
707
		  const u32 addr, const u32 mask, const u32 val)
708
{
709
	const u32 idx = dev_priv->workarounds.count;
710
 
711
	if (WARN_ON(idx >= I915_MAX_WA_REGS))
712
		return -ENOSPC;
713
 
714
	dev_priv->workarounds.reg[idx].addr = addr;
715
	dev_priv->workarounds.reg[idx].value = val;
716
	dev_priv->workarounds.reg[idx].mask = mask;
717
 
718
	dev_priv->workarounds.count++;
719
 
720
	return 0;
721
}
722
 
723
#define WA_REG(addr, mask, val) { \
724
		const int r = wa_add(dev_priv, (addr), (mask), (val)); \
725
		if (r) \
726
			return r; \
727
	}
728
 
729
#define WA_SET_BIT_MASKED(addr, mask) \
730
	WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask))
731
 
732
#define WA_CLR_BIT_MASKED(addr, mask) \
733
	WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask))
734
 
735
#define WA_SET_FIELD_MASKED(addr, mask, value) \
736
	WA_REG(addr, mask, _MASKED_FIELD(mask, value))
737
 
738
#define WA_SET_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) | (mask))
739
#define WA_CLR_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) & ~(mask))
740
 
741
#define WA_WRITE(addr, val) WA_REG(addr, 0xffffffff, val)
742
 
743
static int bdw_init_workarounds(struct intel_engine_cs *ring)
744
{
745
	struct drm_device *dev = ring->dev;
746
	struct drm_i915_private *dev_priv = dev->dev_private;
747
 
748
	/* WaDisablePartialInstShootdown:bdw */
749
	/* WaDisableThreadStallDopClockGating:bdw (pre-production) */
750
	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
751
			  PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE |
752
			  STALL_DOP_GATING_DISABLE);
753
 
754
	/* WaDisableDopClockGating:bdw */
755
	WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
756
			  DOP_CLOCK_GATING_DISABLE);
757
 
758
	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
759
			  GEN8_SAMPLER_POWER_BYPASS_DIS);
760
 
761
	/* Use Force Non-Coherent whenever executing a 3D context. This is a
762
	 * workaround for for a possible hang in the unlikely event a TLB
763
	 * invalidation occurs during a PSD flush.
764
	 */
765
	/* WaDisableFenceDestinationToSLM:bdw (GT3 pre-production) */
766
	WA_SET_BIT_MASKED(HDC_CHICKEN0,
767
			  HDC_FORCE_NON_COHERENT |
768
			  (IS_BDW_GT3(dev) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
769
 
770
	/* Wa4x4STCOptimizationDisable:bdw */
771
	WA_SET_BIT_MASKED(CACHE_MODE_1,
772
			  GEN8_4x4_STC_OPTIMIZATION_DISABLE);
773
 
774
	/*
775
	 * BSpec recommends 8x4 when MSAA is used,
776
	 * however in practice 16x4 seems fastest.
777
	 *
778
	 * Note that PS/WM thread counts depend on the WIZ hashing
779
	 * disable bit, which we don't touch here, but it's good
780
	 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
781
	 */
782
	WA_SET_FIELD_MASKED(GEN7_GT_MODE,
783
			    GEN6_WIZ_HASHING_MASK,
784
			    GEN6_WIZ_HASHING_16x4);
785
 
786
	return 0;
787
}
788
 
789
static int chv_init_workarounds(struct intel_engine_cs *ring)
790
{
791
	struct drm_device *dev = ring->dev;
792
	struct drm_i915_private *dev_priv = dev->dev_private;
793
 
794
	/* WaDisablePartialInstShootdown:chv */
795
	/* WaDisableThreadStallDopClockGating:chv */
796
	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
797
			  PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE |
798
			  STALL_DOP_GATING_DISABLE);
799
 
800
	/* Use Force Non-Coherent whenever executing a 3D context. This is a
801
	 * workaround for a possible hang in the unlikely event a TLB
802
	 * invalidation occurs during a PSD flush.
803
	 */
804
	/* WaForceEnableNonCoherent:chv */
805
	/* WaHdcDisableFetchWhenMasked:chv */
806
	WA_SET_BIT_MASKED(HDC_CHICKEN0,
807
			  HDC_FORCE_NON_COHERENT |
808
			  HDC_DONOT_FETCH_MEM_WHEN_MASKED);
809
 
810
	return 0;
811
}
812
 
813
int init_workarounds_ring(struct intel_engine_cs *ring)
814
{
815
	struct drm_device *dev = ring->dev;
816
	struct drm_i915_private *dev_priv = dev->dev_private;
817
 
818
	WARN_ON(ring->id != RCS);
819
 
820
	dev_priv->workarounds.count = 0;
821
 
822
	if (IS_BROADWELL(dev))
823
		return bdw_init_workarounds(ring);
824
 
825
	if (IS_CHERRYVIEW(dev))
826
		return chv_init_workarounds(ring);
827
 
828
	return 0;
829
}
830
 
5060 serge 831
static int init_render_ring(struct intel_engine_cs *ring)
2332 Serge 832
{
833
	struct drm_device *dev = ring->dev;
834
	struct drm_i915_private *dev_priv = dev->dev_private;
835
	int ret = init_ring_common(ring);
5060 serge 836
	if (ret)
837
		return ret;
2332 Serge 838
 
5060 serge 839
	/* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
840
	if (INTEL_INFO(dev)->gen >= 4 && INTEL_INFO(dev)->gen < 7)
3031 serge 841
		I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH));
3243 Serge 842
 
843
	/* We need to disable the AsyncFlip performance optimisations in order
844
	 * to use MI_WAIT_FOR_EVENT within the CS. It should already be
845
	 * programmed to '1' on all products.
4104 Serge 846
	 *
5060 serge 847
	 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv,bdw,chv
3243 Serge 848
	 */
5354 serge 849
	if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 9)
3243 Serge 850
		I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
851
 
852
	/* Required for the hardware to program scanline values for waiting */
5060 serge 853
	/* WaEnableFlushTlbInvalidationMode:snb */
3243 Serge 854
	if (INTEL_INFO(dev)->gen == 6)
855
		I915_WRITE(GFX_MODE,
5060 serge 856
			   _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT));
3243 Serge 857
 
5060 serge 858
	/* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */
2332 Serge 859
		if (IS_GEN7(dev))
860
			I915_WRITE(GFX_MODE_GEN7,
5060 serge 861
			   _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT) |
3031 serge 862
				   _MASKED_BIT_ENABLE(GFX_REPLAY_MODE));
2332 Serge 863
 
2342 Serge 864
	if (INTEL_INFO(dev)->gen >= 5) {
5354 serge 865
		ret = intel_init_pipe_control(ring);
2332 Serge 866
		if (ret)
867
			return ret;
868
	}
869
 
3031 serge 870
	if (IS_GEN6(dev)) {
871
		/* From the Sandybridge PRM, volume 1 part 3, page 24:
872
		 * "If this bit is set, STCunit will have LRA as replacement
873
		 *  policy. [...] This bit must be reset.  LRA replacement
874
		 *  policy is not supported."
875
		 */
876
		I915_WRITE(CACHE_MODE_0,
877
			   _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
2342 Serge 878
	}
879
 
3031 serge 880
	if (INTEL_INFO(dev)->gen >= 6)
881
		I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
882
 
4560 Serge 883
	if (HAS_L3_DPF(dev))
884
		I915_WRITE_IMR(ring, ~GT_PARITY_ERROR(dev));
3031 serge 885
 
5354 serge 886
	return init_workarounds_ring(ring);
2332 Serge 887
}
888
 
5060 serge 889
static void render_ring_cleanup(struct intel_engine_cs *ring)
2332 Serge 890
{
3480 Serge 891
	struct drm_device *dev = ring->dev;
5128 serge 892
	struct drm_i915_private *dev_priv = dev->dev_private;
3480 Serge 893
 
5128 serge 894
	if (dev_priv->semaphore_obj) {
895
		i915_gem_object_ggtt_unpin(dev_priv->semaphore_obj);
896
		drm_gem_object_unreference(&dev_priv->semaphore_obj->base);
897
		dev_priv->semaphore_obj = NULL;
898
	}
899
 
5354 serge 900
	intel_fini_pipe_control(ring);
2332 Serge 901
}
902
 
5060 serge 903
static int gen8_rcs_signal(struct intel_engine_cs *signaller,
904
			   unsigned int num_dwords)
2332 Serge 905
{
5060 serge 906
#define MBOX_UPDATE_DWORDS 8
907
	struct drm_device *dev = signaller->dev;
908
	struct drm_i915_private *dev_priv = dev->dev_private;
909
	struct intel_engine_cs *waiter;
910
	int i, ret, num_rings;
911
 
912
	num_rings = hweight32(INTEL_INFO(dev)->ring_mask);
913
	num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS;
914
#undef MBOX_UPDATE_DWORDS
915
 
916
	ret = intel_ring_begin(signaller, num_dwords);
917
	if (ret)
918
		return ret;
919
 
920
	for_each_ring(waiter, dev_priv, i) {
921
		u64 gtt_offset = signaller->semaphore.signal_ggtt[i];
922
		if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
923
			continue;
924
 
925
		intel_ring_emit(signaller, GFX_OP_PIPE_CONTROL(6));
926
		intel_ring_emit(signaller, PIPE_CONTROL_GLOBAL_GTT_IVB |
927
					   PIPE_CONTROL_QW_WRITE |
928
					   PIPE_CONTROL_FLUSH_ENABLE);
929
		intel_ring_emit(signaller, lower_32_bits(gtt_offset));
930
		intel_ring_emit(signaller, upper_32_bits(gtt_offset));
931
		intel_ring_emit(signaller, signaller->outstanding_lazy_seqno);
932
		intel_ring_emit(signaller, 0);
933
		intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
934
					   MI_SEMAPHORE_TARGET(waiter->id));
935
		intel_ring_emit(signaller, 0);
936
	}
937
 
938
	return 0;
2332 Serge 939
}
940
 
5060 serge 941
static int gen8_xcs_signal(struct intel_engine_cs *signaller,
942
			   unsigned int num_dwords)
943
{
944
#define MBOX_UPDATE_DWORDS 6
945
	struct drm_device *dev = signaller->dev;
946
	struct drm_i915_private *dev_priv = dev->dev_private;
947
	struct intel_engine_cs *waiter;
948
	int i, ret, num_rings;
949
 
950
	num_rings = hweight32(INTEL_INFO(dev)->ring_mask);
951
	num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS;
952
#undef MBOX_UPDATE_DWORDS
953
 
954
	ret = intel_ring_begin(signaller, num_dwords);
955
	if (ret)
956
		return ret;
957
 
958
	for_each_ring(waiter, dev_priv, i) {
959
		u64 gtt_offset = signaller->semaphore.signal_ggtt[i];
960
		if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
961
			continue;
962
 
963
		intel_ring_emit(signaller, (MI_FLUSH_DW + 1) |
964
					   MI_FLUSH_DW_OP_STOREDW);
965
		intel_ring_emit(signaller, lower_32_bits(gtt_offset) |
966
					   MI_FLUSH_DW_USE_GTT);
967
		intel_ring_emit(signaller, upper_32_bits(gtt_offset));
968
		intel_ring_emit(signaller, signaller->outstanding_lazy_seqno);
969
		intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
970
					   MI_SEMAPHORE_TARGET(waiter->id));
971
		intel_ring_emit(signaller, 0);
972
	}
973
 
974
	return 0;
975
}
976
 
977
static int gen6_signal(struct intel_engine_cs *signaller,
978
		       unsigned int num_dwords)
979
{
980
	struct drm_device *dev = signaller->dev;
981
	struct drm_i915_private *dev_priv = dev->dev_private;
982
	struct intel_engine_cs *useless;
983
	int i, ret, num_rings;
984
 
985
#define MBOX_UPDATE_DWORDS 3
986
	num_rings = hweight32(INTEL_INFO(dev)->ring_mask);
987
	num_dwords += round_up((num_rings-1) * MBOX_UPDATE_DWORDS, 2);
988
#undef MBOX_UPDATE_DWORDS
989
 
990
	ret = intel_ring_begin(signaller, num_dwords);
991
	if (ret)
992
		return ret;
993
 
994
	for_each_ring(useless, dev_priv, i) {
995
		u32 mbox_reg = signaller->semaphore.mbox.signal[i];
996
		if (mbox_reg != GEN6_NOSYNC) {
997
			intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1));
998
			intel_ring_emit(signaller, mbox_reg);
999
			intel_ring_emit(signaller, signaller->outstanding_lazy_seqno);
1000
		}
1001
	}
1002
 
1003
	/* If num_dwords was rounded, make sure the tail pointer is correct */
1004
	if (num_rings % 2 == 0)
1005
		intel_ring_emit(signaller, MI_NOOP);
1006
 
1007
	return 0;
1008
}
1009
 
2342 Serge 1010
/**
1011
 * gen6_add_request - Update the semaphore mailbox registers
1012
 *
1013
 * @ring - ring that is adding a request
1014
 * @seqno - return seqno stuck into the ring
1015
 *
1016
 * Update the mailbox registers in the *other* rings with the current seqno.
1017
 * This acts like a signal in the canonical semaphore.
1018
 */
2332 Serge 1019
static int
5060 serge 1020
gen6_add_request(struct intel_engine_cs *ring)
2332 Serge 1021
{
5060 serge 1022
	int ret;
2332 Serge 1023
 
5060 serge 1024
	if (ring->semaphore.signal)
1025
	ret = ring->semaphore.signal(ring, 4);
1026
	else
1027
		ret = intel_ring_begin(ring, 4);
4560 Serge 1028
 
2332 Serge 1029
	if (ret)
1030
		return ret;
1031
 
1032
	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
1033
	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
4560 Serge 1034
	intel_ring_emit(ring, ring->outstanding_lazy_seqno);
2332 Serge 1035
	intel_ring_emit(ring, MI_USER_INTERRUPT);
4560 Serge 1036
	__intel_ring_advance(ring);
2332 Serge 1037
 
1038
	return 0;
1039
}
1040
 
3480 Serge 1041
static inline bool i915_gem_has_seqno_wrapped(struct drm_device *dev,
1042
					      u32 seqno)
1043
{
1044
	struct drm_i915_private *dev_priv = dev->dev_private;
1045
	return dev_priv->last_seqno < seqno;
1046
}
1047
 
2342 Serge 1048
/**
1049
 * intel_ring_sync - sync the waiter to the signaller on seqno
1050
 *
1051
 * @waiter - ring that is waiting
1052
 * @signaller - ring which has, or will signal
1053
 * @seqno - seqno which the waiter will block on
1054
 */
5060 serge 1055
 
2342 Serge 1056
static int
5060 serge 1057
gen8_ring_sync(struct intel_engine_cs *waiter,
1058
	       struct intel_engine_cs *signaller,
1059
	       u32 seqno)
1060
{
1061
	struct drm_i915_private *dev_priv = waiter->dev->dev_private;
1062
	int ret;
1063
 
1064
	ret = intel_ring_begin(waiter, 4);
1065
	if (ret)
1066
		return ret;
1067
 
1068
	intel_ring_emit(waiter, MI_SEMAPHORE_WAIT |
1069
				MI_SEMAPHORE_GLOBAL_GTT |
1070
				MI_SEMAPHORE_POLL |
1071
				MI_SEMAPHORE_SAD_GTE_SDD);
1072
	intel_ring_emit(waiter, seqno);
1073
	intel_ring_emit(waiter,
1074
			lower_32_bits(GEN8_WAIT_OFFSET(waiter, signaller->id)));
1075
	intel_ring_emit(waiter,
1076
			upper_32_bits(GEN8_WAIT_OFFSET(waiter, signaller->id)));
1077
	intel_ring_advance(waiter);
1078
	return 0;
1079
}
1080
 
1081
static int
1082
gen6_ring_sync(struct intel_engine_cs *waiter,
1083
	       struct intel_engine_cs *signaller,
2332 Serge 1084
		u32 seqno)
1085
{
2342 Serge 1086
	u32 dw1 = MI_SEMAPHORE_MBOX |
1087
		  MI_SEMAPHORE_COMPARE |
1088
		  MI_SEMAPHORE_REGISTER;
5060 serge 1089
	u32 wait_mbox = signaller->semaphore.mbox.wait[waiter->id];
1090
	int ret;
2332 Serge 1091
 
3031 serge 1092
	/* Throughout all of the GEM code, seqno passed implies our current
1093
	 * seqno is >= the last seqno executed. However for hardware the
1094
	 * comparison is strictly greater than.
1095
	 */
1096
	seqno -= 1;
1097
 
5060 serge 1098
	WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID);
3031 serge 1099
 
2342 Serge 1100
	ret = intel_ring_begin(waiter, 4);
2332 Serge 1101
	if (ret)
1102
		return ret;
1103
 
3480 Serge 1104
	/* If seqno wrap happened, omit the wait with no-ops */
1105
	if (likely(!i915_gem_has_seqno_wrapped(waiter->dev, seqno))) {
5060 serge 1106
		intel_ring_emit(waiter, dw1 | wait_mbox);
2342 Serge 1107
	intel_ring_emit(waiter, seqno);
1108
	intel_ring_emit(waiter, 0);
1109
	intel_ring_emit(waiter, MI_NOOP);
3480 Serge 1110
	} else {
1111
		intel_ring_emit(waiter, MI_NOOP);
1112
		intel_ring_emit(waiter, MI_NOOP);
1113
		intel_ring_emit(waiter, MI_NOOP);
1114
		intel_ring_emit(waiter, MI_NOOP);
1115
	}
2342 Serge 1116
	intel_ring_advance(waiter);
2332 Serge 1117
 
1118
	return 0;
1119
}
1120
 
1121
#define PIPE_CONTROL_FLUSH(ring__, addr__)					\
1122
do {									\
2342 Serge 1123
	intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |		\
1124
		 PIPE_CONTROL_DEPTH_STALL);				\
2332 Serge 1125
	intel_ring_emit(ring__, (addr__) | PIPE_CONTROL_GLOBAL_GTT);			\
1126
	intel_ring_emit(ring__, 0);							\
1127
	intel_ring_emit(ring__, 0);							\
1128
} while (0)
1129
 
1130
static int
5060 serge 1131
pc_render_add_request(struct intel_engine_cs *ring)
2332 Serge 1132
{
5060 serge 1133
	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
2332 Serge 1134
	int ret;
1135
 
1136
	/* For Ironlake, MI_USER_INTERRUPT was deprecated and apparently
1137
	 * incoherent with writes to memory, i.e. completely fubar,
1138
	 * so we need to use PIPE_NOTIFY instead.
1139
	 *
1140
	 * However, we also need to workaround the qword write
1141
	 * incoherence by flushing the 6 PIPE_NOTIFY buffers out to
1142
	 * memory before requesting an interrupt.
1143
	 */
1144
	ret = intel_ring_begin(ring, 32);
1145
	if (ret)
1146
		return ret;
1147
 
2342 Serge 1148
	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
1149
			PIPE_CONTROL_WRITE_FLUSH |
1150
			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
4104 Serge 1151
	intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
4560 Serge 1152
	intel_ring_emit(ring, ring->outstanding_lazy_seqno);
2332 Serge 1153
	intel_ring_emit(ring, 0);
1154
	PIPE_CONTROL_FLUSH(ring, scratch_addr);
5060 serge 1155
	scratch_addr += 2 * CACHELINE_BYTES; /* write to separate cachelines */
2332 Serge 1156
	PIPE_CONTROL_FLUSH(ring, scratch_addr);
5060 serge 1157
	scratch_addr += 2 * CACHELINE_BYTES;
2332 Serge 1158
	PIPE_CONTROL_FLUSH(ring, scratch_addr);
5060 serge 1159
	scratch_addr += 2 * CACHELINE_BYTES;
2332 Serge 1160
	PIPE_CONTROL_FLUSH(ring, scratch_addr);
5060 serge 1161
	scratch_addr += 2 * CACHELINE_BYTES;
2332 Serge 1162
	PIPE_CONTROL_FLUSH(ring, scratch_addr);
5060 serge 1163
	scratch_addr += 2 * CACHELINE_BYTES;
2332 Serge 1164
	PIPE_CONTROL_FLUSH(ring, scratch_addr);
3031 serge 1165
 
2342 Serge 1166
	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
1167
			PIPE_CONTROL_WRITE_FLUSH |
1168
			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
2332 Serge 1169
			PIPE_CONTROL_NOTIFY);
4104 Serge 1170
	intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
4560 Serge 1171
	intel_ring_emit(ring, ring->outstanding_lazy_seqno);
2332 Serge 1172
	intel_ring_emit(ring, 0);
4560 Serge 1173
	__intel_ring_advance(ring);
2332 Serge 1174
 
1175
	return 0;
1176
}
1177
 
1178
static u32
5060 serge 1179
gen6_ring_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
2342 Serge 1180
{
1181
	/* Workaround to force correct ordering between irq and seqno writes on
1182
	 * ivb (and maybe also on snb) by reading from a CS register (like
1183
	 * ACTHD) before reading the status page. */
5060 serge 1184
	if (!lazy_coherency) {
1185
		struct drm_i915_private *dev_priv = ring->dev->dev_private;
1186
		POSTING_READ(RING_ACTHD(ring->mmio_base));
1187
	}
1188
 
2342 Serge 1189
	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
1190
}
1191
 
1192
static u32
5060 serge 1193
ring_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
2332 Serge 1194
{
1195
	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
1196
}
1197
 
3480 Serge 1198
static void
5060 serge 1199
ring_set_seqno(struct intel_engine_cs *ring, u32 seqno)
3480 Serge 1200
{
1201
	intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno);
1202
}
1203
 
2332 Serge 1204
static u32
5060 serge 1205
pc_render_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
2332 Serge 1206
{
4104 Serge 1207
	return ring->scratch.cpu_page[0];
2332 Serge 1208
}
1209
 
3480 Serge 1210
static void
5060 serge 1211
pc_render_set_seqno(struct intel_engine_cs *ring, u32 seqno)
3480 Serge 1212
{
4104 Serge 1213
	ring->scratch.cpu_page[0] = seqno;
3480 Serge 1214
}
1215
 
3031 serge 1216
static bool
5060 serge 1217
gen5_ring_get_irq(struct intel_engine_cs *ring)
2332 Serge 1218
{
3031 serge 1219
	struct drm_device *dev = ring->dev;
5060 serge 1220
	struct drm_i915_private *dev_priv = dev->dev_private;
3031 serge 1221
	unsigned long flags;
1222
 
5354 serge 1223
	if (WARN_ON(!intel_irqs_enabled(dev_priv)))
3031 serge 1224
		return false;
1225
 
1226
	spin_lock_irqsave(&dev_priv->irq_lock, flags);
4104 Serge 1227
	if (ring->irq_refcount++ == 0)
5060 serge 1228
		gen5_enable_gt_irq(dev_priv, ring->irq_enable_mask);
3031 serge 1229
	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1230
 
1231
	return true;
2332 Serge 1232
}
1233
 
1234
static void
5060 serge 1235
gen5_ring_put_irq(struct intel_engine_cs *ring)
2332 Serge 1236
{
3031 serge 1237
	struct drm_device *dev = ring->dev;
5060 serge 1238
	struct drm_i915_private *dev_priv = dev->dev_private;
3031 serge 1239
	unsigned long flags;
1240
 
1241
	spin_lock_irqsave(&dev_priv->irq_lock, flags);
4104 Serge 1242
	if (--ring->irq_refcount == 0)
5060 serge 1243
		gen5_disable_gt_irq(dev_priv, ring->irq_enable_mask);
3031 serge 1244
	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
2332 Serge 1245
}
1246
 
3031 serge 1247
static bool
5060 serge 1248
i9xx_ring_get_irq(struct intel_engine_cs *ring)
2332 Serge 1249
{
3031 serge 1250
	struct drm_device *dev = ring->dev;
5060 serge 1251
	struct drm_i915_private *dev_priv = dev->dev_private;
3031 serge 1252
	unsigned long flags;
1253
 
5354 serge 1254
	if (!intel_irqs_enabled(dev_priv))
3031 serge 1255
		return false;
1256
 
1257
	spin_lock_irqsave(&dev_priv->irq_lock, flags);
1258
	if (ring->irq_refcount++ == 0) {
1259
		dev_priv->irq_mask &= ~ring->irq_enable_mask;
2332 Serge 1260
	I915_WRITE(IMR, dev_priv->irq_mask);
1261
	POSTING_READ(IMR);
3031 serge 1262
	}
1263
	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1264
 
1265
	return true;
2332 Serge 1266
}
1267
 
1268
static void
5060 serge 1269
i9xx_ring_put_irq(struct intel_engine_cs *ring)
2332 Serge 1270
{
3031 serge 1271
	struct drm_device *dev = ring->dev;
5060 serge 1272
	struct drm_i915_private *dev_priv = dev->dev_private;
3031 serge 1273
	unsigned long flags;
1274
 
1275
	spin_lock_irqsave(&dev_priv->irq_lock, flags);
1276
	if (--ring->irq_refcount == 0) {
1277
		dev_priv->irq_mask |= ring->irq_enable_mask;
2332 Serge 1278
	I915_WRITE(IMR, dev_priv->irq_mask);
1279
	POSTING_READ(IMR);
3031 serge 1280
	}
1281
	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
2332 Serge 1282
}
1283
 
1284
static bool
5060 serge 1285
i8xx_ring_get_irq(struct intel_engine_cs *ring)
2332 Serge 1286
{
1287
	struct drm_device *dev = ring->dev;
5060 serge 1288
	struct drm_i915_private *dev_priv = dev->dev_private;
3031 serge 1289
	unsigned long flags;
2332 Serge 1290
 
5354 serge 1291
	if (!intel_irqs_enabled(dev_priv))
2332 Serge 1292
		return false;
1293
 
3031 serge 1294
	spin_lock_irqsave(&dev_priv->irq_lock, flags);
2332 Serge 1295
	if (ring->irq_refcount++ == 0) {
3031 serge 1296
		dev_priv->irq_mask &= ~ring->irq_enable_mask;
1297
		I915_WRITE16(IMR, dev_priv->irq_mask);
1298
		POSTING_READ16(IMR);
2332 Serge 1299
	}
3031 serge 1300
	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
2332 Serge 1301
 
1302
	return true;
1303
}
1304
 
1305
static void
5060 serge 1306
i8xx_ring_put_irq(struct intel_engine_cs *ring)
2332 Serge 1307
{
1308
	struct drm_device *dev = ring->dev;
5060 serge 1309
	struct drm_i915_private *dev_priv = dev->dev_private;
3031 serge 1310
	unsigned long flags;
2332 Serge 1311
 
3031 serge 1312
	spin_lock_irqsave(&dev_priv->irq_lock, flags);
2332 Serge 1313
	if (--ring->irq_refcount == 0) {
3031 serge 1314
		dev_priv->irq_mask |= ring->irq_enable_mask;
1315
		I915_WRITE16(IMR, dev_priv->irq_mask);
1316
		POSTING_READ16(IMR);
2332 Serge 1317
	}
3031 serge 1318
	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
2332 Serge 1319
}
1320
 
5060 serge 1321
void intel_ring_setup_status_page(struct intel_engine_cs *ring)
2332 Serge 1322
{
1323
	struct drm_device *dev = ring->dev;
5060 serge 1324
	struct drm_i915_private *dev_priv = ring->dev->dev_private;
2332 Serge 1325
	u32 mmio = 0;
1326
 
1327
	/* The ring status page addresses are no longer next to the rest of
1328
	 * the ring registers as of gen7.
1329
	 */
1330
	if (IS_GEN7(dev)) {
1331
		switch (ring->id) {
3031 serge 1332
		case RCS:
2332 Serge 1333
			mmio = RENDER_HWS_PGA_GEN7;
1334
			break;
3031 serge 1335
		case BCS:
2332 Serge 1336
			mmio = BLT_HWS_PGA_GEN7;
1337
			break;
5060 serge 1338
		/*
1339
		 * VCS2 actually doesn't exist on Gen7. Only shut up
1340
		 * gcc switch check warning
1341
		 */
1342
		case VCS2:
3031 serge 1343
		case VCS:
2332 Serge 1344
			mmio = BSD_HWS_PGA_GEN7;
1345
			break;
4104 Serge 1346
		case VECS:
1347
			mmio = VEBOX_HWS_PGA_GEN7;
1348
			break;
2332 Serge 1349
		}
1350
	} else if (IS_GEN6(ring->dev)) {
1351
		mmio = RING_HWS_PGA_GEN6(ring->mmio_base);
1352
	} else {
4560 Serge 1353
		/* XXX: gen8 returns to sanity */
2332 Serge 1354
		mmio = RING_HWS_PGA(ring->mmio_base);
1355
	}
1356
 
1357
	I915_WRITE(mmio, (u32)ring->status_page.gfx_addr);
1358
	POSTING_READ(mmio);
3746 Serge 1359
 
5060 serge 1360
	/*
1361
	 * Flush the TLB for this page
1362
	 *
1363
	 * FIXME: These two bits have disappeared on gen8, so a question
1364
	 * arises: do we still need this and if so how should we go about
1365
	 * invalidating the TLB?
1366
	 */
1367
	if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8) {
4104 Serge 1368
		u32 reg = RING_INSTPM(ring->mmio_base);
5060 serge 1369
 
1370
		/* ring should be idle before issuing a sync flush*/
1371
		WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0);
1372
 
4104 Serge 1373
		I915_WRITE(reg,
1374
			   _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE |
1375
					      INSTPM_SYNC_FLUSH));
1376
		if (wait_for((I915_READ(reg) & INSTPM_SYNC_FLUSH) == 0,
1377
			     1000))
1378
			DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n",
1379
				  ring->name);
1380
	}
2332 Serge 1381
}
1382
 
1383
static int
5060 serge 1384
bsd_ring_flush(struct intel_engine_cs *ring,
2332 Serge 1385
	       u32     invalidate_domains,
1386
	       u32     flush_domains)
1387
{
1388
	int ret;
1389
 
1390
	ret = intel_ring_begin(ring, 2);
1391
	if (ret)
1392
		return ret;
1393
 
1394
	intel_ring_emit(ring, MI_FLUSH);
1395
	intel_ring_emit(ring, MI_NOOP);
1396
	intel_ring_advance(ring);
1397
	return 0;
1398
}
1399
 
1400
static int
5060 serge 1401
i9xx_add_request(struct intel_engine_cs *ring)
2332 Serge 1402
{
1403
	int ret;
1404
 
1405
	ret = intel_ring_begin(ring, 4);
1406
	if (ret)
1407
		return ret;
1408
 
1409
	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
1410
	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
4560 Serge 1411
	intel_ring_emit(ring, ring->outstanding_lazy_seqno);
2332 Serge 1412
	intel_ring_emit(ring, MI_USER_INTERRUPT);
4560 Serge 1413
	__intel_ring_advance(ring);
2332 Serge 1414
 
1415
	return 0;
1416
}
1417
 
1418
static bool
5060 serge 1419
gen6_ring_get_irq(struct intel_engine_cs *ring)
2332 Serge 1420
{
1421
	struct drm_device *dev = ring->dev;
5060 serge 1422
	struct drm_i915_private *dev_priv = dev->dev_private;
3031 serge 1423
	unsigned long flags;
2332 Serge 1424
 
5354 serge 1425
	if (WARN_ON(!intel_irqs_enabled(dev_priv)))
2332 Serge 1426
	       return false;
1427
 
3031 serge 1428
	spin_lock_irqsave(&dev_priv->irq_lock, flags);
2332 Serge 1429
	if (ring->irq_refcount++ == 0) {
4560 Serge 1430
		if (HAS_L3_DPF(dev) && ring->id == RCS)
4104 Serge 1431
			I915_WRITE_IMR(ring,
1432
				       ~(ring->irq_enable_mask |
4560 Serge 1433
					 GT_PARITY_ERROR(dev)));
3031 serge 1434
		else
1435
			I915_WRITE_IMR(ring, ~ring->irq_enable_mask);
5060 serge 1436
		gen5_enable_gt_irq(dev_priv, ring->irq_enable_mask);
2332 Serge 1437
	}
3031 serge 1438
	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
2332 Serge 1439
 
2351 Serge 1440
    return true;
2332 Serge 1441
}
1442
 
1443
static void
5060 serge 1444
gen6_ring_put_irq(struct intel_engine_cs *ring)
2332 Serge 1445
{
1446
	struct drm_device *dev = ring->dev;
5060 serge 1447
	struct drm_i915_private *dev_priv = dev->dev_private;
3031 serge 1448
	unsigned long flags;
2332 Serge 1449
 
3031 serge 1450
	spin_lock_irqsave(&dev_priv->irq_lock, flags);
2332 Serge 1451
	if (--ring->irq_refcount == 0) {
4560 Serge 1452
		if (HAS_L3_DPF(dev) && ring->id == RCS)
1453
			I915_WRITE_IMR(ring, ~GT_PARITY_ERROR(dev));
3031 serge 1454
		else
1455
			I915_WRITE_IMR(ring, ~0);
5060 serge 1456
		gen5_disable_gt_irq(dev_priv, ring->irq_enable_mask);
2332 Serge 1457
	}
3031 serge 1458
	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
2332 Serge 1459
}
1460
 
4104 Serge 1461
static bool
5060 serge 1462
hsw_vebox_get_irq(struct intel_engine_cs *ring)
4104 Serge 1463
{
1464
	struct drm_device *dev = ring->dev;
1465
	struct drm_i915_private *dev_priv = dev->dev_private;
1466
	unsigned long flags;
1467
 
5354 serge 1468
	if (WARN_ON(!intel_irqs_enabled(dev_priv)))
4104 Serge 1469
		return false;
1470
 
1471
	spin_lock_irqsave(&dev_priv->irq_lock, flags);
1472
	if (ring->irq_refcount++ == 0) {
1473
		I915_WRITE_IMR(ring, ~ring->irq_enable_mask);
5060 serge 1474
		gen6_enable_pm_irq(dev_priv, ring->irq_enable_mask);
4104 Serge 1475
	}
1476
	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1477
 
1478
	return true;
1479
}
1480
 
1481
static void
5060 serge 1482
hsw_vebox_put_irq(struct intel_engine_cs *ring)
4104 Serge 1483
{
1484
	struct drm_device *dev = ring->dev;
1485
	struct drm_i915_private *dev_priv = dev->dev_private;
1486
	unsigned long flags;
1487
 
1488
	spin_lock_irqsave(&dev_priv->irq_lock, flags);
1489
	if (--ring->irq_refcount == 0) {
1490
		I915_WRITE_IMR(ring, ~0);
5060 serge 1491
		gen6_disable_pm_irq(dev_priv, ring->irq_enable_mask);
4104 Serge 1492
	}
1493
	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1494
}
1495
 
4560 Serge 1496
static bool
5060 serge 1497
gen8_ring_get_irq(struct intel_engine_cs *ring)
4560 Serge 1498
{
1499
	struct drm_device *dev = ring->dev;
1500
	struct drm_i915_private *dev_priv = dev->dev_private;
1501
	unsigned long flags;
1502
 
5354 serge 1503
	if (WARN_ON(!intel_irqs_enabled(dev_priv)))
4560 Serge 1504
		return false;
1505
 
1506
	spin_lock_irqsave(&dev_priv->irq_lock, flags);
1507
	if (ring->irq_refcount++ == 0) {
1508
		if (HAS_L3_DPF(dev) && ring->id == RCS) {
1509
			I915_WRITE_IMR(ring,
1510
				       ~(ring->irq_enable_mask |
1511
					 GT_RENDER_L3_PARITY_ERROR_INTERRUPT));
1512
		} else {
1513
			I915_WRITE_IMR(ring, ~ring->irq_enable_mask);
1514
		}
1515
		POSTING_READ(RING_IMR(ring->mmio_base));
1516
	}
1517
	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1518
 
1519
	return true;
1520
}
1521
 
1522
static void
5060 serge 1523
gen8_ring_put_irq(struct intel_engine_cs *ring)
4560 Serge 1524
{
1525
	struct drm_device *dev = ring->dev;
1526
	struct drm_i915_private *dev_priv = dev->dev_private;
1527
	unsigned long flags;
1528
 
1529
	spin_lock_irqsave(&dev_priv->irq_lock, flags);
1530
	if (--ring->irq_refcount == 0) {
1531
		if (HAS_L3_DPF(dev) && ring->id == RCS) {
1532
			I915_WRITE_IMR(ring,
1533
				       ~GT_RENDER_L3_PARITY_ERROR_INTERRUPT);
1534
		} else {
1535
			I915_WRITE_IMR(ring, ~0);
1536
		}
1537
		POSTING_READ(RING_IMR(ring->mmio_base));
1538
	}
1539
	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1540
}
1541
 
2332 Serge 1542
static int
5060 serge 1543
i965_dispatch_execbuffer(struct intel_engine_cs *ring,
1544
			 u64 offset, u32 length,
3243 Serge 1545
			 unsigned flags)
2332 Serge 1546
{
1547
	int ret;
1548
 
1549
	ret = intel_ring_begin(ring, 2);
1550
	if (ret)
1551
		return ret;
1552
 
1553
	intel_ring_emit(ring,
3031 serge 1554
			MI_BATCH_BUFFER_START |
1555
			MI_BATCH_GTT |
3243 Serge 1556
			(flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965));
2332 Serge 1557
	intel_ring_emit(ring, offset);
1558
	intel_ring_advance(ring);
1559
 
1560
	return 0;
1561
}
1562
 
3243 Serge 1563
/* Just userspace ABI convention to limit the wa batch bo to a resonable size */
1564
#define I830_BATCH_LIMIT (256*1024)
5128 serge 1565
#define I830_TLB_ENTRIES (2)
1566
#define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT)
2332 Serge 1567
static int
5060 serge 1568
i830_dispatch_execbuffer(struct intel_engine_cs *ring,
1569
				u64 offset, u32 len,
3243 Serge 1570
				unsigned flags)
2332 Serge 1571
{
5128 serge 1572
	u32 cs_offset = ring->scratch.gtt_offset;
2332 Serge 1573
	int ret;
1574
 
5128 serge 1575
	ret = intel_ring_begin(ring, 6);
2332 Serge 1576
		if (ret)
1577
			return ret;
1578
 
5128 serge 1579
	/* Evict the invalid PTE TLBs */
1580
	intel_ring_emit(ring, COLOR_BLT_CMD | BLT_WRITE_RGBA);
1581
	intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096);
1582
	intel_ring_emit(ring, I830_TLB_ENTRIES << 16 | 4); /* load each page */
1583
	intel_ring_emit(ring, cs_offset);
1584
	intel_ring_emit(ring, 0xdeadbeef);
3243 Serge 1585
		intel_ring_emit(ring, MI_NOOP);
1586
		intel_ring_advance(ring);
1587
 
5128 serge 1588
	if ((flags & I915_DISPATCH_PINNED) == 0) {
3243 Serge 1589
		if (len > I830_BATCH_LIMIT)
1590
			return -ENOSPC;
1591
 
5128 serge 1592
		ret = intel_ring_begin(ring, 6 + 2);
3243 Serge 1593
		if (ret)
1594
			return ret;
5128 serge 1595
 
1596
		/* Blit the batch (which has now all relocs applied) to the
1597
		 * stable batch scratch bo area (so that the CS never
1598
		 * stumbles over its tlb invalidation bug) ...
1599
		 */
1600
		intel_ring_emit(ring, SRC_COPY_BLT_CMD | BLT_WRITE_RGBA);
1601
		intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096);
5139 serge 1602
		intel_ring_emit(ring, DIV_ROUND_UP(len, 4096) << 16 | 4096);
3243 Serge 1603
		intel_ring_emit(ring, cs_offset);
1604
		intel_ring_emit(ring, 4096);
1605
		intel_ring_emit(ring, offset);
5128 serge 1606
 
3243 Serge 1607
		intel_ring_emit(ring, MI_FLUSH);
5128 serge 1608
		intel_ring_emit(ring, MI_NOOP);
1609
		intel_ring_advance(ring);
3243 Serge 1610
 
1611
		/* ... and execute it. */
5128 serge 1612
		offset = cs_offset;
1613
	}
1614
 
1615
	ret = intel_ring_begin(ring, 4);
1616
	if (ret)
1617
		return ret;
1618
 
3243 Serge 1619
		intel_ring_emit(ring, MI_BATCH_BUFFER);
5128 serge 1620
	intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
1621
	intel_ring_emit(ring, offset + len - 8);
1622
	intel_ring_emit(ring, MI_NOOP);
3031 serge 1623
	intel_ring_advance(ring);
1624
 
1625
	return 0;
1626
}
1627
 
1628
static int
5060 serge 1629
i915_dispatch_execbuffer(struct intel_engine_cs *ring,
1630
			 u64 offset, u32 len,
3243 Serge 1631
			 unsigned flags)
3031 serge 1632
{
1633
	int ret;
1634
 
2332 Serge 1635
		ret = intel_ring_begin(ring, 2);
1636
		if (ret)
1637
			return ret;
1638
 
3031 serge 1639
	intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT);
3243 Serge 1640
	intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
2332 Serge 1641
	intel_ring_advance(ring);
1642
 
1643
	return 0;
1644
}
1645
 
5060 serge 1646
static void cleanup_status_page(struct intel_engine_cs *ring)
2332 Serge 1647
{
1648
	struct drm_i915_gem_object *obj;
1649
 
1650
	obj = ring->status_page.obj;
1651
	if (obj == NULL)
1652
		return;
1653
 
5354 serge 1654
	kunmap(sg_page(obj->pages->sgl));
5060 serge 1655
	i915_gem_object_ggtt_unpin(obj);
2344 Serge 1656
	drm_gem_object_unreference(&obj->base);
2332 Serge 1657
	ring->status_page.obj = NULL;
1658
}
1659
 
5060 serge 1660
static int init_status_page(struct intel_engine_cs *ring)
2332 Serge 1661
{
1662
	struct drm_i915_gem_object *obj;
5060 serge 1663
 
1664
	if ((obj = ring->status_page.obj) == NULL) {
1665
		unsigned flags;
2332 Serge 1666
	int ret;
1667
 
5060 serge 1668
		obj = i915_gem_alloc_object(ring->dev, 4096);
2332 Serge 1669
	if (obj == NULL) {
1670
		DRM_ERROR("Failed to allocate status page\n");
5060 serge 1671
			return -ENOMEM;
2332 Serge 1672
	}
1673
 
5060 serge 1674
	ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
1675
	if (ret)
1676
		goto err_unref;
2332 Serge 1677
 
5060 serge 1678
		flags = 0;
1679
		if (!HAS_LLC(ring->dev))
1680
			/* On g33, we cannot place HWS above 256MiB, so
1681
			 * restrict its pinning to the low mappable arena.
1682
			 * Though this restriction is not documented for
1683
			 * gen4, gen5, or byt, they also behave similarly
1684
			 * and hang if the HWS is placed at the top of the
1685
			 * GTT. To generalise, it appears that all !llc
1686
			 * platforms have issues with us placing the HWS
1687
			 * above the mappable region (even though we never
1688
			 * actualy map it).
1689
			 */
1690
			flags |= PIN_MAPPABLE;
1691
		ret = i915_gem_obj_ggtt_pin(obj, 4096, flags);
1692
		if (ret) {
1693
err_unref:
1694
			drm_gem_object_unreference(&obj->base);
1695
			return ret;
1696
		}
1697
 
1698
		ring->status_page.obj = obj;
2332 Serge 1699
	}
1700
 
4104 Serge 1701
	ring->status_page.gfx_addr = i915_gem_obj_ggtt_offset(obj);
5354 serge 1702
	ring->status_page.page_addr = kmap(sg_page(obj->pages->sgl));
2332 Serge 1703
	memset(ring->status_page.page_addr, 0, PAGE_SIZE);
1704
 
1705
	DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n",
1706
			ring->name, ring->status_page.gfx_addr);
1707
 
1708
	return 0;
1709
}
1710
 
5060 serge 1711
static int init_phys_status_page(struct intel_engine_cs *ring)
3243 Serge 1712
{
1713
    struct drm_i915_private *dev_priv = ring->dev->dev_private;
1714
 
1715
    if (!dev_priv->status_page_dmah) {
1716
        dev_priv->status_page_dmah =
1717
            drm_pci_alloc(ring->dev, PAGE_SIZE, PAGE_SIZE);
1718
        if (!dev_priv->status_page_dmah)
1719
            return -ENOMEM;
1720
    }
1721
 
1722
    ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
1723
    memset(ring->status_page.page_addr, 0, PAGE_SIZE);
1724
 
1725
    return 0;
1726
}
1727
 
5354 serge 1728
void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
2332 Serge 1729
{
5060 serge 1730
	iounmap(ringbuf->virtual_start);
5354 serge 1731
	ringbuf->virtual_start = NULL;
5060 serge 1732
	i915_gem_object_ggtt_unpin(ringbuf->obj);
1733
}
1734
 
5354 serge 1735
int intel_pin_and_map_ringbuffer_obj(struct drm_device *dev,
5060 serge 1736
				      struct intel_ringbuffer *ringbuf)
1737
{
1738
	struct drm_i915_private *dev_priv = to_i915(dev);
5354 serge 1739
	struct drm_i915_gem_object *obj = ringbuf->obj;
2332 Serge 1740
	int ret;
2340 Serge 1741
 
5354 serge 1742
	ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE);
1743
	if (ret)
1744
		return ret;
1745
 
1746
	ret = i915_gem_object_set_to_gtt_domain(obj, true);
1747
	if (ret) {
1748
		i915_gem_object_ggtt_unpin(obj);
1749
		return ret;
1750
	}
1751
 
1752
	ringbuf->virtual_start = ioremap_wc(dev_priv->gtt.mappable_base +
1753
			i915_gem_obj_ggtt_offset(obj), ringbuf->size);
1754
	if (ringbuf->virtual_start == NULL) {
1755
		i915_gem_object_ggtt_unpin(obj);
1756
		return -EINVAL;
1757
	}
1758
 
5060 serge 1759
		return 0;
5354 serge 1760
}
2332 Serge 1761
 
5354 serge 1762
void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
1763
{
1764
	drm_gem_object_unreference(&ringbuf->obj->base);
1765
	ringbuf->obj = NULL;
1766
}
1767
 
1768
int intel_alloc_ringbuffer_obj(struct drm_device *dev,
1769
			       struct intel_ringbuffer *ringbuf)
1770
{
1771
	struct drm_i915_gem_object *obj;
1772
 
3480 Serge 1773
	obj = NULL;
4371 Serge 1774
	if (!HAS_LLC(dev))
5060 serge 1775
		obj = i915_gem_object_create_stolen(dev, ringbuf->size);
3480 Serge 1776
	if (obj == NULL)
5060 serge 1777
		obj = i915_gem_alloc_object(dev, ringbuf->size);
1778
	if (obj == NULL)
1779
		return -ENOMEM;
2332 Serge 1780
 
5060 serge 1781
	/* mark ring buffers as read-only from GPU side by default */
1782
	obj->gt_ro = 1;
2332 Serge 1783
 
5354 serge 1784
	ringbuf->obj = obj;
2332 Serge 1785
 
5060 serge 1786
	return 0;
1787
}
1788
 
1789
static int intel_init_ring_buffer(struct drm_device *dev,
1790
				  struct intel_engine_cs *ring)
1791
{
1792
	struct intel_ringbuffer *ringbuf = ring->buffer;
1793
	int ret;
1794
 
1795
	if (ringbuf == NULL) {
1796
		ringbuf = kzalloc(sizeof(*ringbuf), GFP_KERNEL);
1797
		if (!ringbuf)
1798
			return -ENOMEM;
1799
		ring->buffer = ringbuf;
1800
	}
1801
 
1802
	ring->dev = dev;
1803
	INIT_LIST_HEAD(&ring->active_list);
1804
	INIT_LIST_HEAD(&ring->request_list);
5354 serge 1805
	INIT_LIST_HEAD(&ring->execlist_queue);
5060 serge 1806
	ringbuf->size = 32 * PAGE_SIZE;
5354 serge 1807
	ringbuf->ring = ring;
5060 serge 1808
	memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno));
1809
 
1810
	init_waitqueue_head(&ring->irq_queue);
1811
 
1812
	if (I915_NEED_GFX_HWS(dev)) {
1813
		ret = init_status_page(ring);
1814
		if (ret)
1815
			goto error;
1816
	} else {
1817
		BUG_ON(ring->id != RCS);
1818
		ret = init_phys_status_page(ring);
2332 Serge 1819
	if (ret)
5060 serge 1820
			goto error;
1821
	}
2332 Serge 1822
 
5354 serge 1823
	if (ringbuf->obj == NULL) {
5060 serge 1824
	ret = intel_alloc_ringbuffer_obj(dev, ringbuf);
1825
	if (ret) {
5354 serge 1826
			DRM_ERROR("Failed to allocate ringbuffer %s: %d\n",
1827
					ring->name, ret);
1828
			goto error;
1829
		}
1830
 
1831
		ret = intel_pin_and_map_ringbuffer_obj(dev, ringbuf);
1832
		if (ret) {
1833
			DRM_ERROR("Failed to pin and map ringbuffer %s: %d\n",
1834
					ring->name, ret);
1835
			intel_destroy_ringbuffer_obj(ringbuf);
5060 serge 1836
		goto error;
1837
	}
5354 serge 1838
	}
5060 serge 1839
 
2332 Serge 1840
	/* Workaround an erratum on the i830 which causes a hang if
1841
	 * the TAIL pointer points to within the last 2 cachelines
1842
	 * of the buffer.
1843
	 */
5060 serge 1844
	ringbuf->effective_size = ringbuf->size;
1845
	if (IS_I830(dev) || IS_845G(dev))
1846
		ringbuf->effective_size -= 2 * CACHELINE_BYTES;
2340 Serge 1847
 
5060 serge 1848
	ret = i915_cmd_parser_init_ring(ring);
1849
	if (ret)
1850
		goto error;
1851
 
1852
	ret = ring->init(ring);
1853
	if (ret)
1854
		goto error;
1855
 
2332 Serge 1856
	return 0;
1857
 
5060 serge 1858
error:
1859
	kfree(ringbuf);
1860
	ring->buffer = NULL;
2332 Serge 1861
	return ret;
1862
}
1863
 
5060 serge 1864
void intel_cleanup_ring_buffer(struct intel_engine_cs *ring)
2332 Serge 1865
{
5354 serge 1866
	struct drm_i915_private *dev_priv;
1867
	struct intel_ringbuffer *ringbuf;
2332 Serge 1868
 
5060 serge 1869
	if (!intel_ring_initialized(ring))
2332 Serge 1870
		return;
1871
 
5354 serge 1872
	dev_priv = to_i915(ring->dev);
1873
	ringbuf = ring->buffer;
1874
 
5060 serge 1875
	intel_stop_ring_buffer(ring);
1876
	WARN_ON(!IS_GEN2(ring->dev) && (I915_READ_MODE(ring) & MODE_IDLE) == 0);
2332 Serge 1877
 
5354 serge 1878
	intel_unpin_ringbuffer_obj(ringbuf);
5060 serge 1879
	intel_destroy_ringbuffer_obj(ringbuf);
4560 Serge 1880
	ring->preallocated_lazy_request = NULL;
1881
	ring->outstanding_lazy_seqno = 0;
2332 Serge 1882
 
1883
	if (ring->cleanup)
1884
		ring->cleanup(ring);
1885
 
5060 serge 1886
//	cleanup_status_page(ring);
2332 Serge 1887
 
5060 serge 1888
	i915_cmd_parser_fini_ring(ring);
2332 Serge 1889
 
5060 serge 1890
	kfree(ringbuf);
1891
	ring->buffer = NULL;
3031 serge 1892
}
1893
 
5060 serge 1894
static int intel_ring_wait_request(struct intel_engine_cs *ring, int n)
3031 serge 1895
{
5060 serge 1896
	struct intel_ringbuffer *ringbuf = ring->buffer;
3031 serge 1897
	struct drm_i915_gem_request *request;
1898
	u32 seqno = 0;
1899
	int ret;
1900
 
5060 serge 1901
	if (ringbuf->last_retired_head != -1) {
1902
		ringbuf->head = ringbuf->last_retired_head;
1903
		ringbuf->last_retired_head = -1;
3031 serge 1904
 
5354 serge 1905
		ringbuf->space = intel_ring_space(ringbuf);
5060 serge 1906
		if (ringbuf->space >= n)
2332 Serge 1907
			return 0;
1908
	}
1909
 
3031 serge 1910
	list_for_each_entry(request, &ring->request_list, list) {
5354 serge 1911
		if (__intel_ring_space(request->tail, ringbuf->tail,
1912
				       ringbuf->size) >= n) {
3031 serge 1913
			seqno = request->seqno;
1914
			break;
1915
		}
1916
	}
1917
 
1918
	if (seqno == 0)
1919
		return -ENOSPC;
1920
 
5060 serge 1921
	ret = i915_wait_seqno(ring, seqno);
3031 serge 1922
	if (ret)
1923
		return ret;
1924
 
5060 serge 1925
	i915_gem_retire_requests_ring(ring);
1926
	ringbuf->head = ringbuf->last_retired_head;
1927
	ringbuf->last_retired_head = -1;
3031 serge 1928
 
5354 serge 1929
	ringbuf->space = intel_ring_space(ringbuf);
3031 serge 1930
	return 0;
1931
}
1932
 
5060 serge 1933
static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
3031 serge 1934
{
1935
	struct drm_device *dev = ring->dev;
1936
	struct drm_i915_private *dev_priv = dev->dev_private;
5060 serge 1937
	struct intel_ringbuffer *ringbuf = ring->buffer;
3031 serge 1938
	unsigned long end;
1939
	int ret;
1940
 
1941
	ret = intel_ring_wait_request(ring, n);
1942
	if (ret != -ENOSPC)
1943
		return ret;
1944
 
4560 Serge 1945
	/* force the tail write in case we have been skipping them */
1946
	__intel_ring_advance(ring);
1947
 
3031 serge 1948
	/* With GEM the hangcheck timer should kick us out of the loop,
1949
	 * leaving it early runs the risk of corrupting GEM state (due
1950
	 * to running on almost untested codepaths). But on resume
1951
	 * timers don't work yet, so prevent a complete hang in that
1952
	 * case by choosing an insanely large timeout. */
5060 serge 1953
	end = jiffies + 60 * HZ;
3031 serge 1954
 
5060 serge 1955
	trace_i915_ring_wait_begin(ring);
2332 Serge 1956
	do {
5060 serge 1957
		ringbuf->head = I915_READ_HEAD(ring);
5354 serge 1958
		ringbuf->space = intel_ring_space(ringbuf);
5060 serge 1959
		if (ringbuf->space >= n) {
1960
			ret = 0;
1961
			break;
2332 Serge 1962
		}
1963
 
1964
		msleep(1);
3031 serge 1965
 
3480 Serge 1966
		ret = i915_gem_check_wedge(&dev_priv->gpu_error,
1967
					   dev_priv->mm.interruptible);
3031 serge 1968
		if (ret)
5060 serge 1969
			break;
1970
 
1971
		if (time_after(jiffies, end)) {
1972
			ret = -EBUSY;
1973
			break;
1974
		}
1975
	} while (1);
1976
	trace_i915_ring_wait_end(ring);
3031 serge 1977
			return ret;
2332 Serge 1978
}
1979
 
5060 serge 1980
static int intel_wrap_ring_buffer(struct intel_engine_cs *ring)
3243 Serge 1981
{
1982
	uint32_t __iomem *virt;
5060 serge 1983
	struct intel_ringbuffer *ringbuf = ring->buffer;
1984
	int rem = ringbuf->size - ringbuf->tail;
3243 Serge 1985
 
5060 serge 1986
	if (ringbuf->space < rem) {
3243 Serge 1987
		int ret = ring_wait_for_space(ring, rem);
1988
		if (ret)
1989
			return ret;
1990
	}
1991
 
5060 serge 1992
	virt = ringbuf->virtual_start + ringbuf->tail;
3243 Serge 1993
	rem /= 4;
1994
	while (rem--)
1995
		iowrite32(MI_NOOP, virt++);
1996
 
5060 serge 1997
	ringbuf->tail = 0;
5354 serge 1998
	ringbuf->space = intel_ring_space(ringbuf);
3243 Serge 1999
 
2000
	return 0;
2001
}
2002
 
5060 serge 2003
int intel_ring_idle(struct intel_engine_cs *ring)
3243 Serge 2004
{
2005
	u32 seqno;
2006
	int ret;
2007
 
2008
	/* We need to add any requests required to flush the objects and ring */
4560 Serge 2009
	if (ring->outstanding_lazy_seqno) {
4104 Serge 2010
		ret = i915_add_request(ring, NULL);
3243 Serge 2011
		if (ret)
2012
			return ret;
2013
	}
2014
 
2015
	/* Wait upon the last request to be completed */
2016
	if (list_empty(&ring->request_list))
2017
		return 0;
2018
 
2019
	seqno = list_entry(ring->request_list.prev,
2020
			   struct drm_i915_gem_request,
2021
			   list)->seqno;
2022
 
2023
	return i915_wait_seqno(ring, seqno);
2024
}
2025
 
2026
static int
5060 serge 2027
intel_ring_alloc_seqno(struct intel_engine_cs *ring)
3243 Serge 2028
{
4560 Serge 2029
	if (ring->outstanding_lazy_seqno)
3243 Serge 2030
		return 0;
2031
 
4560 Serge 2032
	if (ring->preallocated_lazy_request == NULL) {
2033
		struct drm_i915_gem_request *request;
2034
 
2035
		request = kmalloc(sizeof(*request), GFP_KERNEL);
2036
		if (request == NULL)
2037
			return -ENOMEM;
2038
 
2039
		ring->preallocated_lazy_request = request;
2040
	}
2041
 
2042
	return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_seqno);
3243 Serge 2043
}
2044
 
5060 serge 2045
static int __intel_ring_prepare(struct intel_engine_cs *ring,
3480 Serge 2046
			      int bytes)
2047
{
5060 serge 2048
	struct intel_ringbuffer *ringbuf = ring->buffer;
3480 Serge 2049
	int ret;
2050
 
5060 serge 2051
	if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) {
3480 Serge 2052
		ret = intel_wrap_ring_buffer(ring);
2053
		if (unlikely(ret))
2054
			return ret;
2055
	}
2056
 
5060 serge 2057
	if (unlikely(ringbuf->space < bytes)) {
3480 Serge 2058
		ret = ring_wait_for_space(ring, bytes);
2059
		if (unlikely(ret))
2060
			return ret;
2061
	}
2062
 
2063
	return 0;
2064
}
2065
 
5060 serge 2066
int intel_ring_begin(struct intel_engine_cs *ring,
2332 Serge 2067
		     int num_dwords)
2068
{
5060 serge 2069
	struct drm_i915_private *dev_priv = ring->dev->dev_private;
2332 Serge 2070
	int ret;
2071
 
3480 Serge 2072
	ret = i915_gem_check_wedge(&dev_priv->gpu_error,
2073
				   dev_priv->mm.interruptible);
3031 serge 2074
	if (ret)
2075
		return ret;
2332 Serge 2076
 
4560 Serge 2077
	ret = __intel_ring_prepare(ring, num_dwords * sizeof(uint32_t));
2078
	if (ret)
2079
		return ret;
2080
 
3243 Serge 2081
	/* Preallocate the olr before touching the ring */
2082
	ret = intel_ring_alloc_seqno(ring);
2083
	if (ret)
2084
		return ret;
2085
 
5060 serge 2086
	ring->buffer->space -= num_dwords * sizeof(uint32_t);
4560 Serge 2087
	return 0;
3480 Serge 2088
}
2332 Serge 2089
 
5060 serge 2090
/* Align the ring tail to a cacheline boundary */
2091
int intel_ring_cacheline_align(struct intel_engine_cs *ring)
3480 Serge 2092
{
5060 serge 2093
	int num_dwords = (ring->buffer->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
2094
	int ret;
3480 Serge 2095
 
5060 serge 2096
	if (num_dwords == 0)
2097
		return 0;
2098
 
2099
	num_dwords = CACHELINE_BYTES / sizeof(uint32_t) - num_dwords;
2100
	ret = intel_ring_begin(ring, num_dwords);
2101
	if (ret)
2102
		return ret;
2103
 
2104
	while (num_dwords--)
2105
		intel_ring_emit(ring, MI_NOOP);
2106
 
2107
	intel_ring_advance(ring);
2108
 
2109
	return 0;
2110
}
2111
 
2112
void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno)
2113
{
2114
	struct drm_device *dev = ring->dev;
2115
	struct drm_i915_private *dev_priv = dev->dev_private;
2116
 
4560 Serge 2117
	BUG_ON(ring->outstanding_lazy_seqno);
3480 Serge 2118
 
5060 serge 2119
	if (INTEL_INFO(dev)->gen == 6 || INTEL_INFO(dev)->gen == 7) {
3480 Serge 2120
		I915_WRITE(RING_SYNC_0(ring->mmio_base), 0);
2121
		I915_WRITE(RING_SYNC_1(ring->mmio_base), 0);
5060 serge 2122
		if (HAS_VEBOX(dev))
4104 Serge 2123
			I915_WRITE(RING_SYNC_2(ring->mmio_base), 0);
2332 Serge 2124
	}
2125
 
3480 Serge 2126
	ring->set_seqno(ring, seqno);
4104 Serge 2127
	ring->hangcheck.seqno = seqno;
2332 Serge 2128
}
2129
 
5060 serge 2130
static void gen6_bsd_ring_write_tail(struct intel_engine_cs *ring,
2332 Serge 2131
				     u32 value)
2132
{
5060 serge 2133
	struct drm_i915_private *dev_priv = ring->dev->dev_private;
2332 Serge 2134
 
2135
       /* Every tail move must follow the sequence below */
3031 serge 2136
 
2137
	/* Disable notification that the ring is IDLE. The GT
2138
	 * will then assume that it is busy and bring it out of rc6.
2139
	 */
2332 Serge 2140
       I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
3031 serge 2141
		   _MASKED_BIT_ENABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
2332 Serge 2142
 
3031 serge 2143
	/* Clear the context id. Here be magic! */
2144
	I915_WRITE64(GEN6_BSD_RNCID, 0x0);
2145
 
2146
	/* Wait for the ring not to be idle, i.e. for it to wake up. */
2332 Serge 2147
       if (wait_for((I915_READ(GEN6_BSD_SLEEP_PSMI_CONTROL) &
3031 serge 2148
		      GEN6_BSD_SLEEP_INDICATOR) == 0,
2332 Serge 2149
                       50))
3031 serge 2150
		DRM_ERROR("timed out waiting for the BSD ring to wake up\n");
2332 Serge 2151
 
3031 serge 2152
	/* Now that the ring is fully powered up, update the tail */
2332 Serge 2153
       I915_WRITE_TAIL(ring, value);
3031 serge 2154
	POSTING_READ(RING_TAIL(ring->mmio_base));
2155
 
2156
	/* Let the ring send IDLE messages to the GT again,
2157
	 * and so let it sleep to conserve power when idle.
2158
	 */
2332 Serge 2159
       I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
3031 serge 2160
		   _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
2332 Serge 2161
}
2162
 
5060 serge 2163
static int gen6_bsd_ring_flush(struct intel_engine_cs *ring,
2332 Serge 2164
			   u32 invalidate, u32 flush)
2165
{
2166
	uint32_t cmd;
2167
	int ret;
2168
 
2169
	ret = intel_ring_begin(ring, 4);
2170
	if (ret)
2171
		return ret;
2172
 
2173
	cmd = MI_FLUSH_DW;
4560 Serge 2174
	if (INTEL_INFO(ring->dev)->gen >= 8)
2175
		cmd += 1;
3243 Serge 2176
	/*
2177
	 * Bspec vol 1c.5 - video engine command streamer:
2178
	 * "If ENABLED, all TLBs will be invalidated once the flush
2179
	 * operation is complete. This bit is only valid when the
2180
	 * Post-Sync Operation field is a value of 1h or 3h."
2181
	 */
2332 Serge 2182
	if (invalidate & I915_GEM_GPU_DOMAINS)
3243 Serge 2183
		cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD |
2184
			MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
2332 Serge 2185
	intel_ring_emit(ring, cmd);
3243 Serge 2186
	intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
4560 Serge 2187
	if (INTEL_INFO(ring->dev)->gen >= 8) {
2188
		intel_ring_emit(ring, 0); /* upper addr */
2189
		intel_ring_emit(ring, 0); /* value */
2190
	} else  {
2332 Serge 2191
	intel_ring_emit(ring, 0);
2192
	intel_ring_emit(ring, MI_NOOP);
4560 Serge 2193
	}
2332 Serge 2194
	intel_ring_advance(ring);
2195
	return 0;
2196
}
2197
 
2198
static int
5060 serge 2199
gen8_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
2200
			      u64 offset, u32 len,
4560 Serge 2201
			      unsigned flags)
2202
{
5354 serge 2203
	bool ppgtt = USES_PPGTT(ring->dev) && !(flags & I915_DISPATCH_SECURE);
4560 Serge 2204
	int ret;
2205
 
2206
	ret = intel_ring_begin(ring, 4);
2207
	if (ret)
2208
		return ret;
2209
 
2210
	/* FIXME(BDW): Address space and security selectors. */
2211
	intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8));
5060 serge 2212
	intel_ring_emit(ring, lower_32_bits(offset));
2213
	intel_ring_emit(ring, upper_32_bits(offset));
4560 Serge 2214
	intel_ring_emit(ring, MI_NOOP);
2215
	intel_ring_advance(ring);
2216
 
2217
	return 0;
2218
}
2219
 
2220
static int
5060 serge 2221
hsw_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
2222
			      u64 offset, u32 len,
3243 Serge 2223
			      unsigned flags)
2224
{
2225
	int ret;
2226
 
2227
	ret = intel_ring_begin(ring, 2);
2228
	if (ret)
2229
		return ret;
2230
 
2231
	intel_ring_emit(ring,
5354 serge 2232
			MI_BATCH_BUFFER_START |
2233
			(flags & I915_DISPATCH_SECURE ?
2234
 
3243 Serge 2235
	/* bit0-7 is the length on GEN6+ */
2236
	intel_ring_emit(ring, offset);
2237
	intel_ring_advance(ring);
2238
 
2239
	return 0;
2240
}
2241
 
2242
static int
5060 serge 2243
gen6_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
2244
			      u64 offset, u32 len,
3243 Serge 2245
			      unsigned flags)
2332 Serge 2246
{
2247
       int ret;
2248
 
2249
       ret = intel_ring_begin(ring, 2);
2250
       if (ret)
2251
	       return ret;
2252
 
3243 Serge 2253
	intel_ring_emit(ring,
2254
			MI_BATCH_BUFFER_START |
2255
			(flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965));
2332 Serge 2256
       /* bit0-7 is the length on GEN6+ */
2257
       intel_ring_emit(ring, offset);
2258
       intel_ring_advance(ring);
2259
 
2260
       return 0;
2261
}
2262
 
2263
/* Blitter support (SandyBridge+) */
2264
 
5060 serge 2265
static int gen6_ring_flush(struct intel_engine_cs *ring,
2332 Serge 2266
			  u32 invalidate, u32 flush)
2267
{
4104 Serge 2268
	struct drm_device *dev = ring->dev;
5354 serge 2269
	struct drm_i915_private *dev_priv = dev->dev_private;
2332 Serge 2270
	uint32_t cmd;
2271
	int ret;
2272
 
3031 serge 2273
	ret = intel_ring_begin(ring, 4);
2332 Serge 2274
	if (ret)
2275
		return ret;
2276
 
2277
	cmd = MI_FLUSH_DW;
4560 Serge 2278
	if (INTEL_INFO(ring->dev)->gen >= 8)
2279
		cmd += 1;
3243 Serge 2280
	/*
2281
	 * Bspec vol 1c.3 - blitter engine command streamer:
2282
	 * "If ENABLED, all TLBs will be invalidated once the flush
2283
	 * operation is complete. This bit is only valid when the
2284
	 * Post-Sync Operation field is a value of 1h or 3h."
2285
	 */
2332 Serge 2286
	if (invalidate & I915_GEM_DOMAIN_RENDER)
3243 Serge 2287
		cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX |
2288
			MI_FLUSH_DW_OP_STOREDW;
2332 Serge 2289
	intel_ring_emit(ring, cmd);
3243 Serge 2290
	intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
4560 Serge 2291
	if (INTEL_INFO(ring->dev)->gen >= 8) {
2292
		intel_ring_emit(ring, 0); /* upper addr */
2293
		intel_ring_emit(ring, 0); /* value */
2294
	} else  {
2332 Serge 2295
	intel_ring_emit(ring, 0);
2296
	intel_ring_emit(ring, MI_NOOP);
4560 Serge 2297
	}
2332 Serge 2298
	intel_ring_advance(ring);
4104 Serge 2299
 
5354 serge 2300
	if (!invalidate && flush) {
2301
		if (IS_GEN7(dev))
4104 Serge 2302
		return gen7_ring_fbc_flush(ring, FBC_REND_CACHE_CLEAN);
5354 serge 2303
		else if (IS_BROADWELL(dev))
2304
			dev_priv->fbc.need_sw_cache_clean = true;
2305
	}
4104 Serge 2306
 
2332 Serge 2307
	return 0;
2308
}
2309
 
2310
int intel_init_render_ring_buffer(struct drm_device *dev)
2311
{
5060 serge 2312
	struct drm_i915_private *dev_priv = dev->dev_private;
2313
	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
2314
	struct drm_i915_gem_object *obj;
2315
	int ret;
2340 Serge 2316
 
3031 serge 2317
	ring->name = "render ring";
2318
	ring->id = RCS;
2319
	ring->mmio_base = RENDER_RING_BASE;
2320
 
5060 serge 2321
	if (INTEL_INFO(dev)->gen >= 8) {
2322
		if (i915_semaphore_is_enabled(dev)) {
2323
			obj = i915_gem_alloc_object(dev, 4096);
2324
			if (obj == NULL) {
2325
				DRM_ERROR("Failed to allocate semaphore bo. Disabling semaphores\n");
2326
				i915.semaphores = 0;
2327
			} else {
2328
				i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
2329
				ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_NONBLOCK);
2330
				if (ret != 0) {
2331
					drm_gem_object_unreference(&obj->base);
2332
					DRM_ERROR("Failed to pin semaphore bo. Disabling semaphores\n");
2333
					i915.semaphores = 0;
2334
				} else
2335
					dev_priv->semaphore_obj = obj;
2336
			}
2337
		}
5354 serge 2338
 
2339
		ring->init_context = intel_ring_workarounds_emit;
5060 serge 2340
		ring->add_request = gen6_add_request;
2341
		ring->flush = gen8_render_ring_flush;
2342
		ring->irq_get = gen8_ring_get_irq;
2343
		ring->irq_put = gen8_ring_put_irq;
2344
		ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
2345
		ring->get_seqno = gen6_ring_get_seqno;
2346
		ring->set_seqno = ring_set_seqno;
2347
		if (i915_semaphore_is_enabled(dev)) {
2348
			WARN_ON(!dev_priv->semaphore_obj);
2349
			ring->semaphore.sync_to = gen8_ring_sync;
2350
			ring->semaphore.signal = gen8_rcs_signal;
2351
			GEN8_RING_SEMAPHORE_INIT;
2352
		}
2353
	} else if (INTEL_INFO(dev)->gen >= 6) {
2339 Serge 2354
       ring->add_request = gen6_add_request;
3031 serge 2355
		ring->flush = gen7_render_ring_flush;
2356
		if (INTEL_INFO(dev)->gen == 6)
2342 Serge 2357
		ring->flush = gen6_render_ring_flush;
3031 serge 2358
		ring->irq_get = gen6_ring_get_irq;
2359
		ring->irq_put = gen6_ring_put_irq;
4104 Serge 2360
		ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
2342 Serge 2361
		ring->get_seqno = gen6_ring_get_seqno;
3480 Serge 2362
		ring->set_seqno = ring_set_seqno;
5060 serge 2363
		if (i915_semaphore_is_enabled(dev)) {
2364
		ring->semaphore.sync_to = gen6_ring_sync;
2365
		ring->semaphore.signal = gen6_signal;
2366
		/*
2367
			 * The current semaphore is only applied on pre-gen8
2368
			 * platform.  And there is no VCS2 ring on the pre-gen8
2369
			 * platform. So the semaphore between RCS and VCS2 is
2370
			 * initialized as INVALID.  Gen8 will initialize the
2371
			 * sema between VCS2 and RCS later.
2372
		 */
2373
		ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_INVALID;
2374
		ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_RV;
2375
		ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_RB;
2376
		ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_RVE;
2377
		ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
2378
		ring->semaphore.mbox.signal[RCS] = GEN6_NOSYNC;
2379
		ring->semaphore.mbox.signal[VCS] = GEN6_VRSYNC;
2380
		ring->semaphore.mbox.signal[BCS] = GEN6_BRSYNC;
2381
		ring->semaphore.mbox.signal[VECS] = GEN6_VERSYNC;
2382
		ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
2383
		}
2332 Serge 2384
	} else if (IS_GEN5(dev)) {
2339 Serge 2385
       ring->add_request = pc_render_add_request;
3031 serge 2386
		ring->flush = gen4_render_ring_flush;
2342 Serge 2387
		ring->get_seqno = pc_render_get_seqno;
3480 Serge 2388
		ring->set_seqno = pc_render_set_seqno;
3031 serge 2389
		ring->irq_get = gen5_ring_get_irq;
2390
		ring->irq_put = gen5_ring_put_irq;
4104 Serge 2391
		ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT |
2392
					GT_RENDER_PIPECTL_NOTIFY_INTERRUPT;
3031 serge 2393
	} else {
2394
		ring->add_request = i9xx_add_request;
2395
		if (INTEL_INFO(dev)->gen < 4)
2396
			ring->flush = gen2_render_ring_flush;
2397
		else
2398
			ring->flush = gen4_render_ring_flush;
2399
		ring->get_seqno = ring_get_seqno;
3480 Serge 2400
		ring->set_seqno = ring_set_seqno;
3031 serge 2401
		if (IS_GEN2(dev)) {
2402
			ring->irq_get = i8xx_ring_get_irq;
2403
			ring->irq_put = i8xx_ring_put_irq;
2404
		} else {
2405
			ring->irq_get = i9xx_ring_get_irq;
2406
			ring->irq_put = i9xx_ring_put_irq;
2407
		}
2408
		ring->irq_enable_mask = I915_USER_INTERRUPT;
2332 Serge 2409
	}
3031 serge 2410
	ring->write_tail = ring_write_tail;
5060 serge 2411
 
3243 Serge 2412
	if (IS_HASWELL(dev))
2413
		ring->dispatch_execbuffer = hsw_ring_dispatch_execbuffer;
4560 Serge 2414
	else if (IS_GEN8(dev))
2415
		ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;
3243 Serge 2416
	else if (INTEL_INFO(dev)->gen >= 6)
3031 serge 2417
		ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
2418
	else if (INTEL_INFO(dev)->gen >= 4)
2419
		ring->dispatch_execbuffer = i965_dispatch_execbuffer;
2420
	else if (IS_I830(dev) || IS_845G(dev))
2421
		ring->dispatch_execbuffer = i830_dispatch_execbuffer;
2422
	else
2423
		ring->dispatch_execbuffer = i915_dispatch_execbuffer;
2424
	ring->init = init_render_ring;
2425
	ring->cleanup = render_ring_cleanup;
2332 Serge 2426
 
3243 Serge 2427
	/* Workaround batchbuffer to combat CS tlb bug. */
2428
	if (HAS_BROKEN_CS_TLB(dev)) {
5128 serge 2429
		obj = i915_gem_alloc_object(dev, I830_WA_SIZE);
3243 Serge 2430
		if (obj == NULL) {
2431
			DRM_ERROR("Failed to allocate batch bo\n");
2432
			return -ENOMEM;
2433
		}
2434
 
5060 serge 2435
		ret = i915_gem_obj_ggtt_pin(obj, 0, 0);
3243 Serge 2436
		if (ret != 0) {
2437
			drm_gem_object_unreference(&obj->base);
2438
			DRM_ERROR("Failed to ping batch bo\n");
2439
			return ret;
2440
		}
2441
 
4104 Serge 2442
		ring->scratch.obj = obj;
2443
		ring->scratch.gtt_offset = i915_gem_obj_ggtt_offset(obj);
2332 Serge 2444
	}
2340 Serge 2445
 
2332 Serge 2446
	return intel_init_ring_buffer(dev, ring);
2447
}
2448
 
2449
int intel_init_bsd_ring_buffer(struct drm_device *dev)
2450
{
5060 serge 2451
	struct drm_i915_private *dev_priv = dev->dev_private;
2452
	struct intel_engine_cs *ring = &dev_priv->ring[VCS];
2332 Serge 2453
 
3031 serge 2454
	ring->name = "bsd ring";
2455
	ring->id = VCS;
2332 Serge 2456
 
3031 serge 2457
	ring->write_tail = ring_write_tail;
4560 Serge 2458
	if (INTEL_INFO(dev)->gen >= 6) {
3031 serge 2459
		ring->mmio_base = GEN6_BSD_RING_BASE;
2460
		/* gen6 bsd needs a special wa for tail updates */
2461
		if (IS_GEN6(dev))
2462
			ring->write_tail = gen6_bsd_ring_write_tail;
4104 Serge 2463
		ring->flush = gen6_bsd_ring_flush;
3031 serge 2464
		ring->add_request = gen6_add_request;
2465
		ring->get_seqno = gen6_ring_get_seqno;
3480 Serge 2466
		ring->set_seqno = ring_set_seqno;
4560 Serge 2467
		if (INTEL_INFO(dev)->gen >= 8) {
2468
			ring->irq_enable_mask =
2469
				GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT;
2470
			ring->irq_get = gen8_ring_get_irq;
2471
			ring->irq_put = gen8_ring_put_irq;
2472
			ring->dispatch_execbuffer =
2473
				gen8_ring_dispatch_execbuffer;
5060 serge 2474
			if (i915_semaphore_is_enabled(dev)) {
2475
				ring->semaphore.sync_to = gen8_ring_sync;
2476
				ring->semaphore.signal = gen8_xcs_signal;
2477
				GEN8_RING_SEMAPHORE_INIT;
2478
			}
4560 Serge 2479
		} else {
4104 Serge 2480
		ring->irq_enable_mask = GT_BSD_USER_INTERRUPT;
3031 serge 2481
		ring->irq_get = gen6_ring_get_irq;
2482
		ring->irq_put = gen6_ring_put_irq;
4560 Serge 2483
			ring->dispatch_execbuffer =
2484
				gen6_ring_dispatch_execbuffer;
5060 serge 2485
			if (i915_semaphore_is_enabled(dev)) {
2486
		ring->semaphore.sync_to = gen6_ring_sync;
2487
		ring->semaphore.signal = gen6_signal;
2488
		ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_VR;
2489
		ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_INVALID;
2490
		ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_VB;
2491
		ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_VVE;
2492
		ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
2493
		ring->semaphore.mbox.signal[RCS] = GEN6_RVSYNC;
2494
		ring->semaphore.mbox.signal[VCS] = GEN6_NOSYNC;
2495
		ring->semaphore.mbox.signal[BCS] = GEN6_BVSYNC;
2496
		ring->semaphore.mbox.signal[VECS] = GEN6_VEVSYNC;
2497
		ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
2498
			}
4560 Serge 2499
		}
3031 serge 2500
	} else {
2501
		ring->mmio_base = BSD_RING_BASE;
2502
		ring->flush = bsd_ring_flush;
2503
		ring->add_request = i9xx_add_request;
2504
		ring->get_seqno = ring_get_seqno;
3480 Serge 2505
		ring->set_seqno = ring_set_seqno;
3031 serge 2506
		if (IS_GEN5(dev)) {
4104 Serge 2507
			ring->irq_enable_mask = ILK_BSD_USER_INTERRUPT;
3031 serge 2508
			ring->irq_get = gen5_ring_get_irq;
2509
			ring->irq_put = gen5_ring_put_irq;
2510
		} else {
2511
			ring->irq_enable_mask = I915_BSD_USER_INTERRUPT;
2512
			ring->irq_get = i9xx_ring_get_irq;
2513
			ring->irq_put = i9xx_ring_put_irq;
2514
		}
2515
		ring->dispatch_execbuffer = i965_dispatch_execbuffer;
2516
	}
2517
	ring->init = init_ring_common;
2518
 
2332 Serge 2519
	return intel_init_ring_buffer(dev, ring);
2520
}
2521
 
5060 serge 2522
/**
2523
 * Initialize the second BSD ring for Broadwell GT3.
2524
 * It is noted that this only exists on Broadwell GT3.
2525
 */
2526
int intel_init_bsd2_ring_buffer(struct drm_device *dev)
2527
{
2528
	struct drm_i915_private *dev_priv = dev->dev_private;
2529
	struct intel_engine_cs *ring = &dev_priv->ring[VCS2];
2530
 
2531
	if ((INTEL_INFO(dev)->gen != 8)) {
2532
		DRM_ERROR("No dual-BSD ring on non-BDW machine\n");
2533
		return -EINVAL;
2534
	}
2535
 
2536
	ring->name = "bsd2 ring";
2537
	ring->id = VCS2;
2538
 
2539
	ring->write_tail = ring_write_tail;
2540
	ring->mmio_base = GEN8_BSD2_RING_BASE;
2541
	ring->flush = gen6_bsd_ring_flush;
2542
	ring->add_request = gen6_add_request;
2543
	ring->get_seqno = gen6_ring_get_seqno;
2544
	ring->set_seqno = ring_set_seqno;
2545
	ring->irq_enable_mask =
2546
			GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT;
2547
	ring->irq_get = gen8_ring_get_irq;
2548
	ring->irq_put = gen8_ring_put_irq;
2549
	ring->dispatch_execbuffer =
2550
			gen8_ring_dispatch_execbuffer;
2551
	if (i915_semaphore_is_enabled(dev)) {
2552
		ring->semaphore.sync_to = gen8_ring_sync;
2553
		ring->semaphore.signal = gen8_xcs_signal;
2554
		GEN8_RING_SEMAPHORE_INIT;
2555
	}
2556
	ring->init = init_ring_common;
2557
 
2558
	return intel_init_ring_buffer(dev, ring);
2559
}
2560
 
2332 Serge 2561
int intel_init_blt_ring_buffer(struct drm_device *dev)
2562
{
5060 serge 2563
	struct drm_i915_private *dev_priv = dev->dev_private;
2564
	struct intel_engine_cs *ring = &dev_priv->ring[BCS];
2332 Serge 2565
 
3031 serge 2566
	ring->name = "blitter ring";
2567
	ring->id = BCS;
2332 Serge 2568
 
3031 serge 2569
	ring->mmio_base = BLT_RING_BASE;
2570
	ring->write_tail = ring_write_tail;
4104 Serge 2571
	ring->flush = gen6_ring_flush;
3031 serge 2572
	ring->add_request = gen6_add_request;
2573
	ring->get_seqno = gen6_ring_get_seqno;
3480 Serge 2574
	ring->set_seqno = ring_set_seqno;
4560 Serge 2575
	if (INTEL_INFO(dev)->gen >= 8) {
2576
		ring->irq_enable_mask =
2577
			GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT;
2578
		ring->irq_get = gen8_ring_get_irq;
2579
		ring->irq_put = gen8_ring_put_irq;
2580
		ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;
5060 serge 2581
		if (i915_semaphore_is_enabled(dev)) {
2582
			ring->semaphore.sync_to = gen8_ring_sync;
2583
			ring->semaphore.signal = gen8_xcs_signal;
2584
			GEN8_RING_SEMAPHORE_INIT;
2585
		}
4560 Serge 2586
	} else {
4104 Serge 2587
	ring->irq_enable_mask = GT_BLT_USER_INTERRUPT;
3031 serge 2588
	ring->irq_get = gen6_ring_get_irq;
2589
	ring->irq_put = gen6_ring_put_irq;
2590
	ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
5060 serge 2591
		if (i915_semaphore_is_enabled(dev)) {
2592
			ring->semaphore.signal = gen6_signal;
2593
	ring->semaphore.sync_to = gen6_ring_sync;
2594
	/*
2595
			 * The current semaphore is only applied on pre-gen8
2596
			 * platform.  And there is no VCS2 ring on the pre-gen8
2597
			 * platform. So the semaphore between BCS and VCS2 is
2598
			 * initialized as INVALID.  Gen8 will initialize the
2599
			 * sema between BCS and VCS2 later.
2600
	 */
2601
	ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_BR;
2602
	ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_BV;
2603
	ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_INVALID;
2604
	ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_BVE;
2605
	ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
2606
	ring->semaphore.mbox.signal[RCS] = GEN6_RBSYNC;
2607
	ring->semaphore.mbox.signal[VCS] = GEN6_VBSYNC;
2608
	ring->semaphore.mbox.signal[BCS] = GEN6_NOSYNC;
2609
	ring->semaphore.mbox.signal[VECS] = GEN6_VEBSYNC;
2610
	ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
2611
		}
4560 Serge 2612
	}
3031 serge 2613
	ring->init = init_ring_common;
2614
 
2332 Serge 2615
	return intel_init_ring_buffer(dev, ring);
2616
}
3031 serge 2617
 
4104 Serge 2618
int intel_init_vebox_ring_buffer(struct drm_device *dev)
2619
{
5060 serge 2620
	struct drm_i915_private *dev_priv = dev->dev_private;
2621
	struct intel_engine_cs *ring = &dev_priv->ring[VECS];
4104 Serge 2622
 
2623
	ring->name = "video enhancement ring";
2624
	ring->id = VECS;
2625
 
2626
	ring->mmio_base = VEBOX_RING_BASE;
2627
	ring->write_tail = ring_write_tail;
2628
	ring->flush = gen6_ring_flush;
2629
	ring->add_request = gen6_add_request;
2630
	ring->get_seqno = gen6_ring_get_seqno;
2631
	ring->set_seqno = ring_set_seqno;
4560 Serge 2632
 
2633
	if (INTEL_INFO(dev)->gen >= 8) {
2634
		ring->irq_enable_mask =
2635
			GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT;
2636
		ring->irq_get = gen8_ring_get_irq;
2637
		ring->irq_put = gen8_ring_put_irq;
2638
		ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;
5060 serge 2639
		if (i915_semaphore_is_enabled(dev)) {
2640
			ring->semaphore.sync_to = gen8_ring_sync;
2641
			ring->semaphore.signal = gen8_xcs_signal;
2642
			GEN8_RING_SEMAPHORE_INIT;
2643
		}
4560 Serge 2644
	} else {
4104 Serge 2645
	ring->irq_enable_mask = PM_VEBOX_USER_INTERRUPT;
2646
	ring->irq_get = hsw_vebox_get_irq;
2647
	ring->irq_put = hsw_vebox_put_irq;
2648
	ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
5060 serge 2649
		if (i915_semaphore_is_enabled(dev)) {
2650
	ring->semaphore.sync_to = gen6_ring_sync;
2651
	ring->semaphore.signal = gen6_signal;
2652
	ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_VER;
2653
	ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_VEV;
2654
	ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_VEB;
2655
	ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_INVALID;
2656
	ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
2657
	ring->semaphore.mbox.signal[RCS] = GEN6_RVESYNC;
2658
	ring->semaphore.mbox.signal[VCS] = GEN6_VVESYNC;
2659
	ring->semaphore.mbox.signal[BCS] = GEN6_BVESYNC;
2660
	ring->semaphore.mbox.signal[VECS] = GEN6_NOSYNC;
2661
	ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
2662
		}
4560 Serge 2663
	}
4104 Serge 2664
	ring->init = init_ring_common;
2665
 
2666
	return intel_init_ring_buffer(dev, ring);
2667
}
2668
 
3031 serge 2669
int
5060 serge 2670
intel_ring_flush_all_caches(struct intel_engine_cs *ring)
3031 serge 2671
{
2672
	int ret;
2673
 
2674
	if (!ring->gpu_caches_dirty)
2675
		return 0;
2676
 
2677
	ret = ring->flush(ring, 0, I915_GEM_GPU_DOMAINS);
2678
	if (ret)
2679
		return ret;
2680
 
2681
	trace_i915_gem_ring_flush(ring, 0, I915_GEM_GPU_DOMAINS);
2682
 
2683
	ring->gpu_caches_dirty = false;
2684
	return 0;
2685
}
2686
 
2687
int
5060 serge 2688
intel_ring_invalidate_all_caches(struct intel_engine_cs *ring)
3031 serge 2689
{
2690
	uint32_t flush_domains;
2691
	int ret;
2692
 
2693
	flush_domains = 0;
2694
	if (ring->gpu_caches_dirty)
2695
		flush_domains = I915_GEM_GPU_DOMAINS;
2696
 
2697
	ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, flush_domains);
2698
	if (ret)
2699
		return ret;
2700
 
2701
	trace_i915_gem_ring_flush(ring, I915_GEM_GPU_DOMAINS, flush_domains);
2702
 
2703
	ring->gpu_caches_dirty = false;
2704
	return 0;
2705
}
5060 serge 2706
 
2707
void
2708
intel_stop_ring_buffer(struct intel_engine_cs *ring)
2709
{
2710
	int ret;
2711
 
2712
	if (!intel_ring_initialized(ring))
2713
		return;
2714
 
2715
	ret = intel_ring_idle(ring);
2716
	if (ret && !i915_reset_in_progress(&to_i915(ring->dev)->gpu_error))
2717
		DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n",
2718
			  ring->name, ret);
2719
 
2720
	stop_ring(ring);
2721
}