36,6 → 36,16 |
//#include "i915_trace.h" |
#include "intel_drv.h" |
|
/* |
* 965+ support PIPE_CONTROL commands, which provide finer grained control |
* over cache flushing. |
*/ |
struct pipe_control { |
struct drm_i915_gem_object *obj; |
volatile u32 *cpu_page; |
u32 gtt_offset; |
}; |
|
static inline int ring_space(struct intel_ring_buffer *ring) |
{ |
int space = (ring->head & HEAD_ADDR) - (ring->tail + 8); |
125,6 → 135,118 |
return 0; |
} |
|
/** |
* Emits a PIPE_CONTROL with a non-zero post-sync operation, for |
* implementing two workarounds on gen6. From section 1.4.7.1 |
* "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1: |
* |
* [DevSNB-C+{W/A}] Before any depth stall flush (including those |
* produced by non-pipelined state commands), software needs to first |
* send a PIPE_CONTROL with no bits set except Post-Sync Operation != |
* 0. |
* |
* [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable |
* =1, a PIPE_CONTROL with any non-zero post-sync-op is required. |
* |
* And the workaround for these two requires this workaround first: |
* |
* [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent |
* BEFORE the pipe-control with a post-sync op and no write-cache |
* flushes. |
* |
* And this last workaround is tricky because of the requirements on |
* that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM |
* volume 2 part 1: |
* |
* "1 of the following must also be set: |
* - Render Target Cache Flush Enable ([12] of DW1) |
* - Depth Cache Flush Enable ([0] of DW1) |
* - Stall at Pixel Scoreboard ([1] of DW1) |
* - Depth Stall ([13] of DW1) |
* - Post-Sync Operation ([13] of DW1) |
* - Notify Enable ([8] of DW1)" |
* |
* The cache flushes require the workaround flush that triggered this |
* one, so we can't use it. Depth stall would trigger the same. |
* Post-sync nonzero is what triggered this second workaround, so we |
* can't use that one either. Notify enable is IRQs, which aren't |
* really our business. That leaves only stall at scoreboard. |
*/ |
static int |
intel_emit_post_sync_nonzero_flush(struct intel_ring_buffer *ring) |
{ |
struct pipe_control *pc = ring->private; |
u32 scratch_addr = pc->gtt_offset + 128; |
int ret; |
|
|
ret = intel_ring_begin(ring, 6); |
if (ret) |
return ret; |
|
intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); |
intel_ring_emit(ring, PIPE_CONTROL_CS_STALL | |
PIPE_CONTROL_STALL_AT_SCOREBOARD); |
intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */ |
intel_ring_emit(ring, 0); /* low dword */ |
intel_ring_emit(ring, 0); /* high dword */ |
intel_ring_emit(ring, MI_NOOP); |
intel_ring_advance(ring); |
|
ret = intel_ring_begin(ring, 6); |
if (ret) |
return ret; |
|
intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); |
intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE); |
intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */ |
intel_ring_emit(ring, 0); |
intel_ring_emit(ring, 0); |
intel_ring_emit(ring, MI_NOOP); |
intel_ring_advance(ring); |
|
return 0; |
} |
|
static int |
gen6_render_ring_flush(struct intel_ring_buffer *ring, |
u32 invalidate_domains, u32 flush_domains) |
{ |
u32 flags = 0; |
struct pipe_control *pc = ring->private; |
u32 scratch_addr = pc->gtt_offset + 128; |
int ret; |
|
/* Force SNB workarounds for PIPE_CONTROL flushes */ |
intel_emit_post_sync_nonzero_flush(ring); |
|
/* Just flush everything. Experiments have shown that reducing the |
* number of bits based on the write domains has little performance |
* impact. |
*/ |
flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; |
flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; |
flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; |
flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; |
flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; |
flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; |
flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; |
|
ret = intel_ring_begin(ring, 6); |
if (ret) |
return ret; |
|
intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); |
intel_ring_emit(ring, flags); |
intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); |
intel_ring_emit(ring, 0); /* lower dword */ |
intel_ring_emit(ring, 0); /* uppwer dword */ |
intel_ring_emit(ring, MI_NOOP); |
intel_ring_advance(ring); |
|
return 0; |
} |
|
static void ring_write_tail(struct intel_ring_buffer *ring, |
u32 value) |
{ |
205,16 → 327,6 |
return 0; |
} |
|
/* |
* 965+ support PIPE_CONTROL commands, which provide finer grained control |
* over cache flushing. |
*/ |
struct pipe_control { |
struct drm_i915_gem_object *obj; |
volatile u32 *cpu_page; |
u32 gtt_offset; |
}; |
|
static int |
init_pipe_control(struct intel_ring_buffer *ring) |
{ |
295,13 → 407,17 |
GFX_MODE_ENABLE(GFX_REPLAY_MODE)); |
} |
|
if (INTEL_INFO(dev)->gen >= 6) { |
} else if (IS_GEN5(dev)) { |
if (INTEL_INFO(dev)->gen >= 5) { |
ret = init_pipe_control(ring); |
if (ret) |
return ret; |
} |
|
if (INTEL_INFO(dev)->gen >= 6) { |
I915_WRITE(INSTPM, |
INSTPM_FORCE_ORDERING << 16 | INSTPM_FORCE_ORDERING); |
} |
|
return ret; |
} |
|
314,35 → 430,33 |
} |
|
static void |
update_semaphore(struct intel_ring_buffer *ring, int i, u32 seqno) |
update_mboxes(struct intel_ring_buffer *ring, |
u32 seqno, |
u32 mmio_offset) |
{ |
struct drm_device *dev = ring->dev; |
struct drm_i915_private *dev_priv = dev->dev_private; |
int id; |
|
/* |
* cs -> 1 = vcs, 0 = bcs |
* vcs -> 1 = bcs, 0 = cs, |
* bcs -> 1 = cs, 0 = vcs. |
*/ |
id = ring - dev_priv->ring; |
id += 2 - i; |
id %= 3; |
|
intel_ring_emit(ring, |
MI_SEMAPHORE_MBOX | |
intel_ring_emit(ring, MI_SEMAPHORE_MBOX | |
MI_SEMAPHORE_GLOBAL_GTT | |
MI_SEMAPHORE_REGISTER | |
MI_SEMAPHORE_UPDATE); |
intel_ring_emit(ring, seqno); |
intel_ring_emit(ring, |
RING_SYNC_0(dev_priv->ring[id].mmio_base) + 4*i); |
intel_ring_emit(ring, mmio_offset); |
} |
|
/** |
* gen6_add_request - Update the semaphore mailbox registers |
* |
* @ring - ring that is adding a request |
* @seqno - return seqno stuck into the ring |
* |
* Update the mailbox registers in the *other* rings with the current seqno. |
* This acts like a signal in the canonical semaphore. |
*/ |
static int |
gen6_add_request(struct intel_ring_buffer *ring, |
u32 *result) |
u32 *seqno) |
{ |
u32 seqno; |
u32 mbox1_reg; |
u32 mbox2_reg; |
int ret; |
|
ret = intel_ring_begin(ring, 10); |
349,48 → 463,98 |
if (ret) |
return ret; |
|
seqno = i915_gem_get_seqno(ring->dev); |
update_semaphore(ring, 0, seqno); |
update_semaphore(ring, 1, seqno); |
mbox1_reg = ring->signal_mbox[0]; |
mbox2_reg = ring->signal_mbox[1]; |
|
*seqno = i915_gem_get_seqno(ring->dev); |
|
update_mboxes(ring, *seqno, mbox1_reg); |
update_mboxes(ring, *seqno, mbox2_reg); |
intel_ring_emit(ring, MI_STORE_DWORD_INDEX); |
intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); |
intel_ring_emit(ring, seqno); |
intel_ring_emit(ring, *seqno); |
intel_ring_emit(ring, MI_USER_INTERRUPT); |
intel_ring_advance(ring); |
|
*result = seqno; |
return 0; |
} |
|
int |
intel_ring_sync(struct intel_ring_buffer *ring, |
struct intel_ring_buffer *to, |
/** |
* intel_ring_sync - sync the waiter to the signaller on seqno |
* |
* @waiter - ring that is waiting |
* @signaller - ring which has, or will signal |
* @seqno - seqno which the waiter will block on |
*/ |
static int |
intel_ring_sync(struct intel_ring_buffer *waiter, |
struct intel_ring_buffer *signaller, |
int ring, |
u32 seqno) |
{ |
int ret; |
u32 dw1 = MI_SEMAPHORE_MBOX | |
MI_SEMAPHORE_COMPARE | |
MI_SEMAPHORE_REGISTER; |
|
ret = intel_ring_begin(ring, 4); |
ret = intel_ring_begin(waiter, 4); |
if (ret) |
return ret; |
|
intel_ring_emit(ring, |
MI_SEMAPHORE_MBOX | |
MI_SEMAPHORE_REGISTER | |
intel_ring_sync_index(ring, to) << 17 | |
MI_SEMAPHORE_COMPARE); |
intel_ring_emit(ring, seqno); |
intel_ring_emit(ring, 0); |
intel_ring_emit(ring, MI_NOOP); |
intel_ring_advance(ring); |
intel_ring_emit(waiter, dw1 | signaller->semaphore_register[ring]); |
intel_ring_emit(waiter, seqno); |
intel_ring_emit(waiter, 0); |
intel_ring_emit(waiter, MI_NOOP); |
intel_ring_advance(waiter); |
|
return 0; |
} |
|
/* VCS->RCS (RVSYNC) or BCS->RCS (RBSYNC) */ |
int |
render_ring_sync_to(struct intel_ring_buffer *waiter, |
struct intel_ring_buffer *signaller, |
u32 seqno) |
{ |
// WARN_ON(signaller->semaphore_register[RCS] == MI_SEMAPHORE_SYNC_INVALID); |
return intel_ring_sync(waiter, |
signaller, |
RCS, |
seqno); |
} |
|
/* RCS->VCS (VRSYNC) or BCS->VCS (VBSYNC) */ |
int |
gen6_bsd_ring_sync_to(struct intel_ring_buffer *waiter, |
struct intel_ring_buffer *signaller, |
u32 seqno) |
{ |
// WARN_ON(signaller->semaphore_register[VCS] == MI_SEMAPHORE_SYNC_INVALID); |
return intel_ring_sync(waiter, |
signaller, |
VCS, |
seqno); |
} |
|
/* RCS->BCS (BRSYNC) or VCS->BCS (BVSYNC) */ |
int |
gen6_blt_ring_sync_to(struct intel_ring_buffer *waiter, |
struct intel_ring_buffer *signaller, |
u32 seqno) |
{ |
// WARN_ON(signaller->semaphore_register[BCS] == MI_SEMAPHORE_SYNC_INVALID); |
return intel_ring_sync(waiter, |
signaller, |
BCS, |
seqno); |
} |
|
|
|
#define PIPE_CONTROL_FLUSH(ring__, addr__) \ |
do { \ |
intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL | PIPE_CONTROL_QW_WRITE | \ |
PIPE_CONTROL_DEPTH_STALL | 2); \ |
intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | \ |
PIPE_CONTROL_DEPTH_STALL); \ |
intel_ring_emit(ring__, (addr__) | PIPE_CONTROL_GLOBAL_GTT); \ |
intel_ring_emit(ring__, 0); \ |
intel_ring_emit(ring__, 0); \ |
418,8 → 582,9 |
if (ret) |
return ret; |
|
intel_ring_emit(ring, GFX_OP_PIPE_CONTROL | PIPE_CONTROL_QW_WRITE | |
PIPE_CONTROL_WC_FLUSH | PIPE_CONTROL_TC_FLUSH); |
intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | |
PIPE_CONTROL_WRITE_FLUSH | |
PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE); |
intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT); |
intel_ring_emit(ring, seqno); |
intel_ring_emit(ring, 0); |
434,8 → 599,9 |
PIPE_CONTROL_FLUSH(ring, scratch_addr); |
scratch_addr += 128; |
PIPE_CONTROL_FLUSH(ring, scratch_addr); |
intel_ring_emit(ring, GFX_OP_PIPE_CONTROL | PIPE_CONTROL_QW_WRITE | |
PIPE_CONTROL_WC_FLUSH | PIPE_CONTROL_TC_FLUSH | |
intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | |
PIPE_CONTROL_WRITE_FLUSH | |
PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | |
PIPE_CONTROL_NOTIFY); |
intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT); |
intel_ring_emit(ring, seqno); |
469,6 → 635,19 |
} |
|
static u32 |
gen6_ring_get_seqno(struct intel_ring_buffer *ring) |
{ |
struct drm_device *dev = ring->dev; |
|
/* Workaround to force correct ordering between irq and seqno writes on |
* ivb (and maybe also on snb) by reading from a CS register (like |
* ACTHD) before reading the status page. */ |
if (IS_GEN7(dev)) |
intel_ring_get_active_head(ring); |
return intel_read_status_page(ring, I915_GEM_HWS_INDEX); |
} |
|
static u32 |
ring_get_seqno(struct intel_ring_buffer *ring) |
{ |
return intel_read_status_page(ring, I915_GEM_HWS_INDEX); |
513,7 → 692,6 |
POSTING_READ(IMR); |
} |
|
#if 0 |
static bool |
render_ring_get_irq(struct intel_ring_buffer *ring) |
{ |
553,7 → 731,6 |
} |
spin_unlock(&ring->irq_lock); |
} |
#endif |
|
void intel_ring_setup_status_page(struct intel_ring_buffer *ring) |
{ |
626,8 → 803,6 |
return 0; |
} |
|
#if 0 |
|
static bool |
gen6_ring_get_irq(struct intel_ring_buffer *ring, u32 gflag, u32 rflag) |
{ |
637,6 → 812,12 |
if (!dev->irq_enabled) |
return false; |
|
/* It looks like we need to prevent the gt from suspending while waiting |
* for an notifiy irq, otherwise irqs seem to get lost on at least the |
* blt/bsd rings on ivb. */ |
if (IS_GEN7(dev)) |
gen6_gt_force_wake_get(dev_priv); |
|
spin_lock(&ring->irq_lock); |
if (ring->irq_refcount++ == 0) { |
ring->irq_mask &= ~rflag; |
661,6 → 842,9 |
ironlake_disable_irq(dev_priv, gflag); |
} |
spin_unlock(&ring->irq_lock); |
|
if (IS_GEN7(dev)) |
gen6_gt_force_wake_put(dev_priv); |
} |
|
static bool |
698,7 → 882,6 |
} |
spin_unlock(&ring->irq_lock); |
} |
#endif |
|
static int |
ring_dispatch_execbuffer(struct intel_ring_buffer *ring, u32 offset, u32 length) |
828,7 → 1011,7 |
INIT_LIST_HEAD(&ring->gpu_write_list); |
|
// init_waitqueue_head(&ring->irq_queue); |
// spin_lock_init(&ring->irq_lock); |
spin_lock_init(&ring->irq_lock); |
ring->irq_mask = ~0; |
|
if (I915_NEED_GFX_HWS(dev)) { |
1038,11 → 1221,16 |
.write_tail = ring_write_tail, |
.flush = render_ring_flush, |
.add_request = render_ring_add_request, |
// .get_seqno = ring_get_seqno, |
// .irq_get = render_ring_get_irq, |
// .irq_put = render_ring_put_irq, |
.get_seqno = ring_get_seqno, |
.irq_get = render_ring_get_irq, |
.irq_put = render_ring_put_irq, |
.dispatch_execbuffer = render_ring_dispatch_execbuffer, |
// .cleanup = render_ring_cleanup, |
.sync_to = render_ring_sync_to, |
.semaphore_register = {MI_SEMAPHORE_SYNC_INVALID, |
MI_SEMAPHORE_SYNC_RV, |
MI_SEMAPHORE_SYNC_RB}, |
.signal_mbox = {GEN6_VRSYNC, GEN6_BRSYNC}, |
}; |
|
/* ring buffer for bit-stream decoder */ |
1056,9 → 1244,9 |
.write_tail = ring_write_tail, |
.flush = bsd_ring_flush, |
.add_request = ring_add_request, |
// .get_seqno = ring_get_seqno, |
// .irq_get = bsd_ring_get_irq, |
// .irq_put = bsd_ring_put_irq, |
.get_seqno = ring_get_seqno, |
.irq_get = bsd_ring_get_irq, |
.irq_put = bsd_ring_put_irq, |
.dispatch_execbuffer = ring_dispatch_execbuffer, |
}; |
|
1124,8 → 1312,6 |
return 0; |
} |
|
#if 0 |
|
static bool |
gen6_render_ring_get_irq(struct intel_ring_buffer *ring) |
{ |
1158,8 → 1344,6 |
GEN6_BSD_USER_INTERRUPT); |
} |
|
#endif |
|
/* ring buffer for Video Codec for Gen6+ */ |
static const struct intel_ring_buffer gen6_bsd_ring = { |
.name = "gen6 bsd ring", |
1170,13 → 1354,17 |
.write_tail = gen6_bsd_ring_write_tail, |
.flush = gen6_ring_flush, |
.add_request = gen6_add_request, |
// .get_seqno = ring_get_seqno, |
// .irq_get = gen6_bsd_ring_get_irq, |
// .irq_put = gen6_bsd_ring_put_irq, |
.get_seqno = gen6_ring_get_seqno, |
.irq_get = gen6_bsd_ring_get_irq, |
.irq_put = gen6_bsd_ring_put_irq, |
.dispatch_execbuffer = gen6_ring_dispatch_execbuffer, |
.sync_to = gen6_bsd_ring_sync_to, |
.semaphore_register = {MI_SEMAPHORE_SYNC_VR, |
MI_SEMAPHORE_SYNC_INVALID, |
MI_SEMAPHORE_SYNC_VB}, |
.signal_mbox = {GEN6_RVSYNC, GEN6_BVSYNC}, |
}; |
|
#if 0 |
/* Blitter support (SandyBridge+) */ |
|
static bool |
1194,7 → 1382,6 |
GT_BLT_USER_INTERRUPT, |
GEN6_BLITTER_USER_INTERRUPT); |
} |
#endif |
|
|
/* Workaround for some stepping of SNB, |
1302,11 → 1489,16 |
.write_tail = ring_write_tail, |
.flush = blt_ring_flush, |
.add_request = gen6_add_request, |
// .get_seqno = ring_get_seqno, |
// .irq_get = blt_ring_get_irq, |
// .irq_put = blt_ring_put_irq, |
.get_seqno = gen6_ring_get_seqno, |
.irq_get = blt_ring_get_irq, |
.irq_put = blt_ring_put_irq, |
.dispatch_execbuffer = gen6_ring_dispatch_execbuffer, |
// .cleanup = blt_ring_cleanup, |
.sync_to = gen6_blt_ring_sync_to, |
.semaphore_register = {MI_SEMAPHORE_SYNC_BR, |
MI_SEMAPHORE_SYNC_BV, |
MI_SEMAPHORE_SYNC_INVALID}, |
.signal_mbox = {GEN6_RBSYNC, GEN6_VBSYNC}, |
}; |
|
int intel_init_render_ring_buffer(struct drm_device *dev) |
1317,11 → 1509,13 |
*ring = render_ring; |
if (INTEL_INFO(dev)->gen >= 6) { |
ring->add_request = gen6_add_request; |
// ring->irq_get = gen6_render_ring_get_irq; |
// ring->irq_put = gen6_render_ring_put_irq; |
ring->flush = gen6_render_ring_flush; |
ring->irq_get = gen6_render_ring_get_irq; |
ring->irq_put = gen6_render_ring_put_irq; |
ring->get_seqno = gen6_ring_get_seqno; |
} else if (IS_GEN5(dev)) { |
ring->add_request = pc_render_add_request; |
// ring->get_seqno = pc_render_get_seqno; |
ring->get_seqno = pc_render_get_seqno; |
} |
|
if (!I915_NEED_GFX_HWS(dev)) { |