Subversion Repositories Kolibri OS

Compare Revisions

Regard whitespace Rev 2341 → Rev 2342

/drivers/video/drm/i915/intel_ringbuffer.c
36,6 → 36,16
//#include "i915_trace.h"
#include "intel_drv.h"
 
/*
* 965+ support PIPE_CONTROL commands, which provide finer grained control
* over cache flushing.
*/
struct pipe_control {
struct drm_i915_gem_object *obj;
volatile u32 *cpu_page;
u32 gtt_offset;
};
 
static inline int ring_space(struct intel_ring_buffer *ring)
{
int space = (ring->head & HEAD_ADDR) - (ring->tail + 8);
125,6 → 135,118
return 0;
}
 
/**
* Emits a PIPE_CONTROL with a non-zero post-sync operation, for
* implementing two workarounds on gen6. From section 1.4.7.1
* "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
*
* [DevSNB-C+{W/A}] Before any depth stall flush (including those
* produced by non-pipelined state commands), software needs to first
* send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
* 0.
*
* [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
* =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
*
* And the workaround for these two requires this workaround first:
*
* [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
* BEFORE the pipe-control with a post-sync op and no write-cache
* flushes.
*
* And this last workaround is tricky because of the requirements on
* that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
* volume 2 part 1:
*
* "1 of the following must also be set:
* - Render Target Cache Flush Enable ([12] of DW1)
* - Depth Cache Flush Enable ([0] of DW1)
* - Stall at Pixel Scoreboard ([1] of DW1)
* - Depth Stall ([13] of DW1)
* - Post-Sync Operation ([13] of DW1)
* - Notify Enable ([8] of DW1)"
*
* The cache flushes require the workaround flush that triggered this
* one, so we can't use it. Depth stall would trigger the same.
* Post-sync nonzero is what triggered this second workaround, so we
* can't use that one either. Notify enable is IRQs, which aren't
* really our business. That leaves only stall at scoreboard.
*/
static int
intel_emit_post_sync_nonzero_flush(struct intel_ring_buffer *ring)
{
struct pipe_control *pc = ring->private;
u32 scratch_addr = pc->gtt_offset + 128;
int ret;
 
 
ret = intel_ring_begin(ring, 6);
if (ret)
return ret;
 
intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_STALL_AT_SCOREBOARD);
intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
intel_ring_emit(ring, 0); /* low dword */
intel_ring_emit(ring, 0); /* high dword */
intel_ring_emit(ring, MI_NOOP);
intel_ring_advance(ring);
 
ret = intel_ring_begin(ring, 6);
if (ret)
return ret;
 
intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE);
intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
intel_ring_emit(ring, 0);
intel_ring_emit(ring, 0);
intel_ring_emit(ring, MI_NOOP);
intel_ring_advance(ring);
 
return 0;
}
 
static int
gen6_render_ring_flush(struct intel_ring_buffer *ring,
u32 invalidate_domains, u32 flush_domains)
{
u32 flags = 0;
struct pipe_control *pc = ring->private;
u32 scratch_addr = pc->gtt_offset + 128;
int ret;
 
/* Force SNB workarounds for PIPE_CONTROL flushes */
intel_emit_post_sync_nonzero_flush(ring);
 
/* Just flush everything. Experiments have shown that reducing the
* number of bits based on the write domains has little performance
* impact.
*/
flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
 
ret = intel_ring_begin(ring, 6);
if (ret)
return ret;
 
intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
intel_ring_emit(ring, flags);
intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
intel_ring_emit(ring, 0); /* lower dword */
intel_ring_emit(ring, 0); /* uppwer dword */
intel_ring_emit(ring, MI_NOOP);
intel_ring_advance(ring);
 
return 0;
}
 
static void ring_write_tail(struct intel_ring_buffer *ring,
u32 value)
{
205,16 → 327,6
return 0;
}
 
/*
* 965+ support PIPE_CONTROL commands, which provide finer grained control
* over cache flushing.
*/
struct pipe_control {
struct drm_i915_gem_object *obj;
volatile u32 *cpu_page;
u32 gtt_offset;
};
 
static int
init_pipe_control(struct intel_ring_buffer *ring)
{
295,13 → 407,17
GFX_MODE_ENABLE(GFX_REPLAY_MODE));
}
 
if (INTEL_INFO(dev)->gen >= 6) {
} else if (IS_GEN5(dev)) {
if (INTEL_INFO(dev)->gen >= 5) {
ret = init_pipe_control(ring);
if (ret)
return ret;
}
 
if (INTEL_INFO(dev)->gen >= 6) {
I915_WRITE(INSTPM,
INSTPM_FORCE_ORDERING << 16 | INSTPM_FORCE_ORDERING);
}
 
return ret;
}
 
314,35 → 430,33
}
 
static void
update_semaphore(struct intel_ring_buffer *ring, int i, u32 seqno)
update_mboxes(struct intel_ring_buffer *ring,
u32 seqno,
u32 mmio_offset)
{
struct drm_device *dev = ring->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
int id;
 
/*
* cs -> 1 = vcs, 0 = bcs
* vcs -> 1 = bcs, 0 = cs,
* bcs -> 1 = cs, 0 = vcs.
*/
id = ring - dev_priv->ring;
id += 2 - i;
id %= 3;
 
intel_ring_emit(ring,
MI_SEMAPHORE_MBOX |
intel_ring_emit(ring, MI_SEMAPHORE_MBOX |
MI_SEMAPHORE_GLOBAL_GTT |
MI_SEMAPHORE_REGISTER |
MI_SEMAPHORE_UPDATE);
intel_ring_emit(ring, seqno);
intel_ring_emit(ring,
RING_SYNC_0(dev_priv->ring[id].mmio_base) + 4*i);
intel_ring_emit(ring, mmio_offset);
}
 
/**
* gen6_add_request - Update the semaphore mailbox registers
*
* @ring - ring that is adding a request
* @seqno - return seqno stuck into the ring
*
* Update the mailbox registers in the *other* rings with the current seqno.
* This acts like a signal in the canonical semaphore.
*/
static int
gen6_add_request(struct intel_ring_buffer *ring,
u32 *result)
u32 *seqno)
{
u32 seqno;
u32 mbox1_reg;
u32 mbox2_reg;
int ret;
 
ret = intel_ring_begin(ring, 10);
349,48 → 463,98
if (ret)
return ret;
 
seqno = i915_gem_get_seqno(ring->dev);
update_semaphore(ring, 0, seqno);
update_semaphore(ring, 1, seqno);
mbox1_reg = ring->signal_mbox[0];
mbox2_reg = ring->signal_mbox[1];
 
*seqno = i915_gem_get_seqno(ring->dev);
 
update_mboxes(ring, *seqno, mbox1_reg);
update_mboxes(ring, *seqno, mbox2_reg);
intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
intel_ring_emit(ring, seqno);
intel_ring_emit(ring, *seqno);
intel_ring_emit(ring, MI_USER_INTERRUPT);
intel_ring_advance(ring);
 
*result = seqno;
return 0;
}
 
int
intel_ring_sync(struct intel_ring_buffer *ring,
struct intel_ring_buffer *to,
/**
* intel_ring_sync - sync the waiter to the signaller on seqno
*
* @waiter - ring that is waiting
* @signaller - ring which has, or will signal
* @seqno - seqno which the waiter will block on
*/
static int
intel_ring_sync(struct intel_ring_buffer *waiter,
struct intel_ring_buffer *signaller,
int ring,
u32 seqno)
{
int ret;
u32 dw1 = MI_SEMAPHORE_MBOX |
MI_SEMAPHORE_COMPARE |
MI_SEMAPHORE_REGISTER;
 
ret = intel_ring_begin(ring, 4);
ret = intel_ring_begin(waiter, 4);
if (ret)
return ret;
 
intel_ring_emit(ring,
MI_SEMAPHORE_MBOX |
MI_SEMAPHORE_REGISTER |
intel_ring_sync_index(ring, to) << 17 |
MI_SEMAPHORE_COMPARE);
intel_ring_emit(ring, seqno);
intel_ring_emit(ring, 0);
intel_ring_emit(ring, MI_NOOP);
intel_ring_advance(ring);
intel_ring_emit(waiter, dw1 | signaller->semaphore_register[ring]);
intel_ring_emit(waiter, seqno);
intel_ring_emit(waiter, 0);
intel_ring_emit(waiter, MI_NOOP);
intel_ring_advance(waiter);
 
return 0;
}
 
/* VCS->RCS (RVSYNC) or BCS->RCS (RBSYNC) */
int
render_ring_sync_to(struct intel_ring_buffer *waiter,
struct intel_ring_buffer *signaller,
u32 seqno)
{
// WARN_ON(signaller->semaphore_register[RCS] == MI_SEMAPHORE_SYNC_INVALID);
return intel_ring_sync(waiter,
signaller,
RCS,
seqno);
}
 
/* RCS->VCS (VRSYNC) or BCS->VCS (VBSYNC) */
int
gen6_bsd_ring_sync_to(struct intel_ring_buffer *waiter,
struct intel_ring_buffer *signaller,
u32 seqno)
{
// WARN_ON(signaller->semaphore_register[VCS] == MI_SEMAPHORE_SYNC_INVALID);
return intel_ring_sync(waiter,
signaller,
VCS,
seqno);
}
 
/* RCS->BCS (BRSYNC) or VCS->BCS (BVSYNC) */
int
gen6_blt_ring_sync_to(struct intel_ring_buffer *waiter,
struct intel_ring_buffer *signaller,
u32 seqno)
{
// WARN_ON(signaller->semaphore_register[BCS] == MI_SEMAPHORE_SYNC_INVALID);
return intel_ring_sync(waiter,
signaller,
BCS,
seqno);
}
 
 
 
#define PIPE_CONTROL_FLUSH(ring__, addr__) \
do { \
intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL | PIPE_CONTROL_QW_WRITE | \
PIPE_CONTROL_DEPTH_STALL | 2); \
intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | \
PIPE_CONTROL_DEPTH_STALL); \
intel_ring_emit(ring__, (addr__) | PIPE_CONTROL_GLOBAL_GTT); \
intel_ring_emit(ring__, 0); \
intel_ring_emit(ring__, 0); \
418,8 → 582,9
if (ret)
return ret;
 
intel_ring_emit(ring, GFX_OP_PIPE_CONTROL | PIPE_CONTROL_QW_WRITE |
PIPE_CONTROL_WC_FLUSH | PIPE_CONTROL_TC_FLUSH);
intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
PIPE_CONTROL_WRITE_FLUSH |
PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
intel_ring_emit(ring, seqno);
intel_ring_emit(ring, 0);
434,8 → 599,9
PIPE_CONTROL_FLUSH(ring, scratch_addr);
scratch_addr += 128;
PIPE_CONTROL_FLUSH(ring, scratch_addr);
intel_ring_emit(ring, GFX_OP_PIPE_CONTROL | PIPE_CONTROL_QW_WRITE |
PIPE_CONTROL_WC_FLUSH | PIPE_CONTROL_TC_FLUSH |
intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
PIPE_CONTROL_WRITE_FLUSH |
PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
PIPE_CONTROL_NOTIFY);
intel_ring_emit(ring, pc->gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
intel_ring_emit(ring, seqno);
469,6 → 635,19
}
 
static u32
gen6_ring_get_seqno(struct intel_ring_buffer *ring)
{
struct drm_device *dev = ring->dev;
 
/* Workaround to force correct ordering between irq and seqno writes on
* ivb (and maybe also on snb) by reading from a CS register (like
* ACTHD) before reading the status page. */
if (IS_GEN7(dev))
intel_ring_get_active_head(ring);
return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
}
 
static u32
ring_get_seqno(struct intel_ring_buffer *ring)
{
return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
513,7 → 692,6
POSTING_READ(IMR);
}
 
#if 0
static bool
render_ring_get_irq(struct intel_ring_buffer *ring)
{
553,7 → 731,6
}
spin_unlock(&ring->irq_lock);
}
#endif
 
void intel_ring_setup_status_page(struct intel_ring_buffer *ring)
{
626,8 → 803,6
return 0;
}
 
#if 0
 
static bool
gen6_ring_get_irq(struct intel_ring_buffer *ring, u32 gflag, u32 rflag)
{
637,6 → 812,12
if (!dev->irq_enabled)
return false;
 
/* It looks like we need to prevent the gt from suspending while waiting
* for an notifiy irq, otherwise irqs seem to get lost on at least the
* blt/bsd rings on ivb. */
if (IS_GEN7(dev))
gen6_gt_force_wake_get(dev_priv);
 
spin_lock(&ring->irq_lock);
if (ring->irq_refcount++ == 0) {
ring->irq_mask &= ~rflag;
661,6 → 842,9
ironlake_disable_irq(dev_priv, gflag);
}
spin_unlock(&ring->irq_lock);
 
if (IS_GEN7(dev))
gen6_gt_force_wake_put(dev_priv);
}
 
static bool
698,7 → 882,6
}
spin_unlock(&ring->irq_lock);
}
#endif
 
static int
ring_dispatch_execbuffer(struct intel_ring_buffer *ring, u32 offset, u32 length)
828,7 → 1011,7
INIT_LIST_HEAD(&ring->gpu_write_list);
 
// init_waitqueue_head(&ring->irq_queue);
// spin_lock_init(&ring->irq_lock);
spin_lock_init(&ring->irq_lock);
ring->irq_mask = ~0;
 
if (I915_NEED_GFX_HWS(dev)) {
1038,11 → 1221,16
.write_tail = ring_write_tail,
.flush = render_ring_flush,
.add_request = render_ring_add_request,
// .get_seqno = ring_get_seqno,
// .irq_get = render_ring_get_irq,
// .irq_put = render_ring_put_irq,
.get_seqno = ring_get_seqno,
.irq_get = render_ring_get_irq,
.irq_put = render_ring_put_irq,
.dispatch_execbuffer = render_ring_dispatch_execbuffer,
// .cleanup = render_ring_cleanup,
.sync_to = render_ring_sync_to,
.semaphore_register = {MI_SEMAPHORE_SYNC_INVALID,
MI_SEMAPHORE_SYNC_RV,
MI_SEMAPHORE_SYNC_RB},
.signal_mbox = {GEN6_VRSYNC, GEN6_BRSYNC},
};
 
/* ring buffer for bit-stream decoder */
1056,9 → 1244,9
.write_tail = ring_write_tail,
.flush = bsd_ring_flush,
.add_request = ring_add_request,
// .get_seqno = ring_get_seqno,
// .irq_get = bsd_ring_get_irq,
// .irq_put = bsd_ring_put_irq,
.get_seqno = ring_get_seqno,
.irq_get = bsd_ring_get_irq,
.irq_put = bsd_ring_put_irq,
.dispatch_execbuffer = ring_dispatch_execbuffer,
};
 
1124,8 → 1312,6
return 0;
}
 
#if 0
 
static bool
gen6_render_ring_get_irq(struct intel_ring_buffer *ring)
{
1158,8 → 1344,6
GEN6_BSD_USER_INTERRUPT);
}
 
#endif
 
/* ring buffer for Video Codec for Gen6+ */
static const struct intel_ring_buffer gen6_bsd_ring = {
.name = "gen6 bsd ring",
1170,13 → 1354,17
.write_tail = gen6_bsd_ring_write_tail,
.flush = gen6_ring_flush,
.add_request = gen6_add_request,
// .get_seqno = ring_get_seqno,
// .irq_get = gen6_bsd_ring_get_irq,
// .irq_put = gen6_bsd_ring_put_irq,
.get_seqno = gen6_ring_get_seqno,
.irq_get = gen6_bsd_ring_get_irq,
.irq_put = gen6_bsd_ring_put_irq,
.dispatch_execbuffer = gen6_ring_dispatch_execbuffer,
.sync_to = gen6_bsd_ring_sync_to,
.semaphore_register = {MI_SEMAPHORE_SYNC_VR,
MI_SEMAPHORE_SYNC_INVALID,
MI_SEMAPHORE_SYNC_VB},
.signal_mbox = {GEN6_RVSYNC, GEN6_BVSYNC},
};
 
#if 0
/* Blitter support (SandyBridge+) */
 
static bool
1194,7 → 1382,6
GT_BLT_USER_INTERRUPT,
GEN6_BLITTER_USER_INTERRUPT);
}
#endif
 
 
/* Workaround for some stepping of SNB,
1302,11 → 1489,16
.write_tail = ring_write_tail,
.flush = blt_ring_flush,
.add_request = gen6_add_request,
// .get_seqno = ring_get_seqno,
// .irq_get = blt_ring_get_irq,
// .irq_put = blt_ring_put_irq,
.get_seqno = gen6_ring_get_seqno,
.irq_get = blt_ring_get_irq,
.irq_put = blt_ring_put_irq,
.dispatch_execbuffer = gen6_ring_dispatch_execbuffer,
// .cleanup = blt_ring_cleanup,
.sync_to = gen6_blt_ring_sync_to,
.semaphore_register = {MI_SEMAPHORE_SYNC_BR,
MI_SEMAPHORE_SYNC_BV,
MI_SEMAPHORE_SYNC_INVALID},
.signal_mbox = {GEN6_RBSYNC, GEN6_VBSYNC},
};
 
int intel_init_render_ring_buffer(struct drm_device *dev)
1317,11 → 1509,13
*ring = render_ring;
if (INTEL_INFO(dev)->gen >= 6) {
ring->add_request = gen6_add_request;
// ring->irq_get = gen6_render_ring_get_irq;
// ring->irq_put = gen6_render_ring_put_irq;
ring->flush = gen6_render_ring_flush;
ring->irq_get = gen6_render_ring_get_irq;
ring->irq_put = gen6_render_ring_put_irq;
ring->get_seqno = gen6_ring_get_seqno;
} else if (IS_GEN5(dev)) {
ring->add_request = pc_render_add_request;
// ring->get_seqno = pc_render_get_seqno;
ring->get_seqno = pc_render_get_seqno;
}
 
if (!I915_NEED_GFX_HWS(dev)) {