1,6 → 1,10 |
#ifndef _INTEL_RINGBUFFER_H_ |
#define _INTEL_RINGBUFFER_H_ |
|
#include <linux/hashtable.h> |
|
#define I915_CMD_HASH_ORDER 9 |
|
/* |
* Gen2 BSpec "1. Programming Environment" / 1.4.4.6 "Ring Buffer Use" |
* Gen3 BSpec "vol1c Memory Interface Functions" / 2.3.4.5 "Ring Buffer Use" |
33,35 → 37,58 |
#define I915_READ_IMR(ring) I915_READ(RING_IMR((ring)->mmio_base)) |
#define I915_WRITE_IMR(ring, val) I915_WRITE(RING_IMR((ring)->mmio_base), val) |
|
#define I915_READ_MODE(ring) I915_READ(RING_MI_MODE((ring)->mmio_base)) |
#define I915_WRITE_MODE(ring, val) I915_WRITE(RING_MI_MODE((ring)->mmio_base), val) |
|
/* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to |
* do the writes, and that must have qw aligned offsets, simply pretend it's 8b. |
*/ |
#define i915_semaphore_seqno_size sizeof(uint64_t) |
#define GEN8_SIGNAL_OFFSET(__ring, to) \ |
(i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj) + \ |
((__ring)->id * I915_NUM_RINGS * i915_semaphore_seqno_size) + \ |
(i915_semaphore_seqno_size * (to))) |
|
#define GEN8_WAIT_OFFSET(__ring, from) \ |
(i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj) + \ |
((from) * I915_NUM_RINGS * i915_semaphore_seqno_size) + \ |
(i915_semaphore_seqno_size * (__ring)->id)) |
|
#define GEN8_RING_SEMAPHORE_INIT do { \ |
if (!dev_priv->semaphore_obj) { \ |
break; \ |
} \ |
ring->semaphore.signal_ggtt[RCS] = GEN8_SIGNAL_OFFSET(ring, RCS); \ |
ring->semaphore.signal_ggtt[VCS] = GEN8_SIGNAL_OFFSET(ring, VCS); \ |
ring->semaphore.signal_ggtt[BCS] = GEN8_SIGNAL_OFFSET(ring, BCS); \ |
ring->semaphore.signal_ggtt[VECS] = GEN8_SIGNAL_OFFSET(ring, VECS); \ |
ring->semaphore.signal_ggtt[VCS2] = GEN8_SIGNAL_OFFSET(ring, VCS2); \ |
ring->semaphore.signal_ggtt[ring->id] = MI_SEMAPHORE_SYNC_INVALID; \ |
} while(0) |
|
enum intel_ring_hangcheck_action { |
HANGCHECK_IDLE = 0, |
HANGCHECK_WAIT, |
HANGCHECK_ACTIVE, |
HANGCHECK_ACTIVE_LOOP, |
HANGCHECK_KICK, |
HANGCHECK_HUNG, |
}; |
|
#define HANGCHECK_SCORE_RING_HUNG 31 |
|
struct intel_ring_hangcheck { |
bool deadlock; |
u64 acthd; |
u64 max_acthd; |
u32 seqno; |
u32 acthd; |
int score; |
enum intel_ring_hangcheck_action action; |
int deadlock; |
}; |
|
struct intel_ring_buffer { |
const char *name; |
enum intel_ring_id { |
RCS = 0x0, |
VCS, |
BCS, |
VECS, |
} id; |
#define I915_NUM_RINGS 4 |
u32 mmio_base; |
struct intel_ringbuffer { |
struct drm_i915_gem_object *obj; |
void __iomem *virtual_start; |
struct drm_device *dev; |
struct drm_i915_gem_object *obj; |
|
u32 head; |
u32 tail; |
68,7 → 95,6 |
int space; |
int size; |
int effective_size; |
struct intel_hw_status_page status_page; |
|
/** We track the position of the requests in the ring buffer, and |
* when each is retired we increment last_retired_head as the GPU |
79,22 → 105,39 |
* we can detect new retirements. |
*/ |
u32 last_retired_head; |
}; |
|
struct intel_engine_cs { |
const char *name; |
enum intel_ring_id { |
RCS = 0x0, |
VCS, |
BCS, |
VECS, |
VCS2 |
} id; |
#define I915_NUM_RINGS 5 |
#define LAST_USER_RING (VECS + 1) |
u32 mmio_base; |
struct drm_device *dev; |
struct intel_ringbuffer *buffer; |
|
struct intel_hw_status_page status_page; |
|
unsigned irq_refcount; /* protected by dev_priv->irq_lock */ |
u32 irq_enable_mask; /* bitmask to enable ring interrupt */ |
u32 trace_irq_seqno; |
u32 sync_seqno[I915_NUM_RINGS-1]; |
bool __must_check (*irq_get)(struct intel_ring_buffer *ring); |
void (*irq_put)(struct intel_ring_buffer *ring); |
bool __must_check (*irq_get)(struct intel_engine_cs *ring); |
void (*irq_put)(struct intel_engine_cs *ring); |
|
int (*init)(struct intel_ring_buffer *ring); |
int (*init)(struct intel_engine_cs *ring); |
|
void (*write_tail)(struct intel_ring_buffer *ring, |
void (*write_tail)(struct intel_engine_cs *ring, |
u32 value); |
int __must_check (*flush)(struct intel_ring_buffer *ring, |
int __must_check (*flush)(struct intel_engine_cs *ring, |
u32 invalidate_domains, |
u32 flush_domains); |
int (*add_request)(struct intel_ring_buffer *ring); |
int (*add_request)(struct intel_engine_cs *ring); |
/* Some chipsets are not quite as coherent as advertised and need |
* an expensive kick to force a true read of the up-to-date seqno. |
* However, the up-to-date seqno is not always required and the last |
101,25 → 144,76 |
* seen value is good enough. Note that the seqno will always be |
* monotonic, even if not coherent. |
*/ |
u32 (*get_seqno)(struct intel_ring_buffer *ring, |
u32 (*get_seqno)(struct intel_engine_cs *ring, |
bool lazy_coherency); |
void (*set_seqno)(struct intel_ring_buffer *ring, |
void (*set_seqno)(struct intel_engine_cs *ring, |
u32 seqno); |
int (*dispatch_execbuffer)(struct intel_ring_buffer *ring, |
u32 offset, u32 length, |
int (*dispatch_execbuffer)(struct intel_engine_cs *ring, |
u64 offset, u32 length, |
unsigned flags); |
#define I915_DISPATCH_SECURE 0x1 |
#define I915_DISPATCH_PINNED 0x2 |
void (*cleanup)(struct intel_ring_buffer *ring); |
int (*sync_to)(struct intel_ring_buffer *ring, |
struct intel_ring_buffer *to, |
u32 seqno); |
void (*cleanup)(struct intel_engine_cs *ring); |
|
/* GEN8 signal/wait table - never trust comments! |
* signal to signal to signal to signal to signal to |
* RCS VCS BCS VECS VCS2 |
* -------------------------------------------------------------------- |
* RCS | NOP (0x00) | VCS (0x08) | BCS (0x10) | VECS (0x18) | VCS2 (0x20) | |
* |------------------------------------------------------------------- |
* VCS | RCS (0x28) | NOP (0x30) | BCS (0x38) | VECS (0x40) | VCS2 (0x48) | |
* |------------------------------------------------------------------- |
* BCS | RCS (0x50) | VCS (0x58) | NOP (0x60) | VECS (0x68) | VCS2 (0x70) | |
* |------------------------------------------------------------------- |
* VECS | RCS (0x78) | VCS (0x80) | BCS (0x88) | NOP (0x90) | VCS2 (0x98) | |
* |------------------------------------------------------------------- |
* VCS2 | RCS (0xa0) | VCS (0xa8) | BCS (0xb0) | VECS (0xb8) | NOP (0xc0) | |
* |------------------------------------------------------------------- |
* |
* Generalization: |
* f(x, y) := (x->id * NUM_RINGS * seqno_size) + (seqno_size * y->id) |
* ie. transpose of g(x, y) |
* |
* sync from sync from sync from sync from sync from |
* RCS VCS BCS VECS VCS2 |
* -------------------------------------------------------------------- |
* RCS | NOP (0x00) | VCS (0x28) | BCS (0x50) | VECS (0x78) | VCS2 (0xa0) | |
* |------------------------------------------------------------------- |
* VCS | RCS (0x08) | NOP (0x30) | BCS (0x58) | VECS (0x80) | VCS2 (0xa8) | |
* |------------------------------------------------------------------- |
* BCS | RCS (0x10) | VCS (0x38) | NOP (0x60) | VECS (0x88) | VCS2 (0xb0) | |
* |------------------------------------------------------------------- |
* VECS | RCS (0x18) | VCS (0x40) | BCS (0x68) | NOP (0x90) | VCS2 (0xb8) | |
* |------------------------------------------------------------------- |
* VCS2 | RCS (0x20) | VCS (0x48) | BCS (0x70) | VECS (0x98) | NOP (0xc0) | |
* |------------------------------------------------------------------- |
* |
* Generalization: |
* g(x, y) := (y->id * NUM_RINGS * seqno_size) + (seqno_size * x->id) |
* ie. transpose of f(x, y) |
*/ |
struct { |
u32 sync_seqno[I915_NUM_RINGS-1]; |
|
union { |
struct { |
/* our mbox written by others */ |
u32 semaphore_register[I915_NUM_RINGS]; |
u32 wait[I915_NUM_RINGS]; |
/* mboxes this ring signals to */ |
u32 signal_mbox[I915_NUM_RINGS]; |
u32 signal[I915_NUM_RINGS]; |
} mbox; |
u64 signal_ggtt[I915_NUM_RINGS]; |
}; |
|
/* AKA wait() */ |
int (*sync_to)(struct intel_engine_cs *ring, |
struct intel_engine_cs *to, |
u32 seqno); |
int (*signal)(struct intel_engine_cs *signaller, |
/* num_dwords needed by caller */ |
unsigned int num_dwords); |
} semaphore; |
|
/** |
* List of objects currently involved in rendering from the |
* ringbuffer. |
148,12 → 242,8 |
|
wait_queue_head_t irq_queue; |
|
/** |
* Do an explicit TLB flush before MI_SET_CONTEXT |
*/ |
bool itlb_before_ctx_switch; |
struct i915_hw_context *default_context; |
struct i915_hw_context *last_context; |
struct intel_context *default_context; |
struct intel_context *last_context; |
|
struct intel_ring_hangcheck hangcheck; |
|
162,30 → 252,65 |
u32 gtt_offset; |
volatile u32 *cpu_page; |
} scratch; |
|
bool needs_cmd_parser; |
|
/* |
* Table of commands the command parser needs to know about |
* for this ring. |
*/ |
DECLARE_HASHTABLE(cmd_hash, I915_CMD_HASH_ORDER); |
|
/* |
* Table of registers allowed in commands that read/write registers. |
*/ |
const u32 *reg_table; |
int reg_count; |
|
/* |
* Table of registers allowed in commands that read/write registers, but |
* only from the DRM master. |
*/ |
const u32 *master_reg_table; |
int master_reg_count; |
|
/* |
* Returns the bitmask for the length field of the specified command. |
* Return 0 for an unrecognized/invalid command. |
* |
* If the command parser finds an entry for a command in the ring's |
* cmd_tables, it gets the command's length based on the table entry. |
* If not, it calls this function to determine the per-ring length field |
* encoding for the command (i.e. certain opcode ranges use certain bits |
* to encode the command length in the header). |
*/ |
u32 (*get_cmd_length_mask)(u32 cmd_header); |
}; |
|
static inline bool |
intel_ring_initialized(struct intel_ring_buffer *ring) |
intel_ring_initialized(struct intel_engine_cs *ring) |
{ |
return ring->obj != NULL; |
return ring->buffer && ring->buffer->obj; |
} |
|
static inline unsigned |
intel_ring_flag(struct intel_ring_buffer *ring) |
intel_ring_flag(struct intel_engine_cs *ring) |
{ |
return 1 << ring->id; |
} |
|
static inline u32 |
intel_ring_sync_index(struct intel_ring_buffer *ring, |
struct intel_ring_buffer *other) |
intel_ring_sync_index(struct intel_engine_cs *ring, |
struct intel_engine_cs *other) |
{ |
int idx; |
|
/* |
* cs -> 0 = vcs, 1 = bcs |
* vcs -> 0 = bcs, 1 = cs, |
* bcs -> 0 = cs, 1 = vcs. |
* rcs -> 0 = vcs, 1 = bcs, 2 = vecs, 3 = vcs2; |
* vcs -> 0 = bcs, 1 = vecs, 2 = vcs2, 3 = rcs; |
* bcs -> 0 = vecs, 1 = vcs2. 2 = rcs, 3 = vcs; |
* vecs -> 0 = vcs2, 1 = rcs, 2 = vcs, 3 = bcs; |
* vcs2 -> 0 = rcs, 1 = vcs, 2 = bcs, 3 = vecs; |
*/ |
|
idx = (other - ring) - 1; |
196,7 → 321,7 |
} |
|
static inline u32 |
intel_read_status_page(struct intel_ring_buffer *ring, |
intel_read_status_page(struct intel_engine_cs *ring, |
int reg) |
{ |
/* Ensure that the compiler doesn't optimize away the load. */ |
205,7 → 330,7 |
} |
|
static inline void |
intel_write_status_page(struct intel_ring_buffer *ring, |
intel_write_status_page(struct intel_engine_cs *ring, |
int reg, u32 value) |
{ |
ring->status_page.page_addr[reg] = value; |
230,46 → 355,51 |
#define I915_GEM_HWS_SCRATCH_INDEX 0x30 |
#define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT) |
|
void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring); |
void intel_stop_ring_buffer(struct intel_engine_cs *ring); |
void intel_cleanup_ring_buffer(struct intel_engine_cs *ring); |
|
int __must_check intel_ring_begin(struct intel_ring_buffer *ring, int n); |
static inline void intel_ring_emit(struct intel_ring_buffer *ring, |
int __must_check intel_ring_begin(struct intel_engine_cs *ring, int n); |
int __must_check intel_ring_cacheline_align(struct intel_engine_cs *ring); |
static inline void intel_ring_emit(struct intel_engine_cs *ring, |
u32 data) |
{ |
iowrite32(data, ring->virtual_start + ring->tail); |
ring->tail += 4; |
struct intel_ringbuffer *ringbuf = ring->buffer; |
iowrite32(data, ringbuf->virtual_start + ringbuf->tail); |
ringbuf->tail += 4; |
} |
static inline void intel_ring_advance(struct intel_ring_buffer *ring) |
static inline void intel_ring_advance(struct intel_engine_cs *ring) |
{ |
ring->tail &= ring->size - 1; |
struct intel_ringbuffer *ringbuf = ring->buffer; |
ringbuf->tail &= ringbuf->size - 1; |
} |
void __intel_ring_advance(struct intel_ring_buffer *ring); |
void __intel_ring_advance(struct intel_engine_cs *ring); |
|
int __must_check intel_ring_idle(struct intel_ring_buffer *ring); |
void intel_ring_init_seqno(struct intel_ring_buffer *ring, u32 seqno); |
int intel_ring_flush_all_caches(struct intel_ring_buffer *ring); |
int intel_ring_invalidate_all_caches(struct intel_ring_buffer *ring); |
int __must_check intel_ring_idle(struct intel_engine_cs *ring); |
void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno); |
int intel_ring_flush_all_caches(struct intel_engine_cs *ring); |
int intel_ring_invalidate_all_caches(struct intel_engine_cs *ring); |
|
int intel_init_render_ring_buffer(struct drm_device *dev); |
int intel_init_bsd_ring_buffer(struct drm_device *dev); |
int intel_init_bsd2_ring_buffer(struct drm_device *dev); |
int intel_init_blt_ring_buffer(struct drm_device *dev); |
int intel_init_vebox_ring_buffer(struct drm_device *dev); |
|
u32 intel_ring_get_active_head(struct intel_ring_buffer *ring); |
void intel_ring_setup_status_page(struct intel_ring_buffer *ring); |
u64 intel_ring_get_active_head(struct intel_engine_cs *ring); |
void intel_ring_setup_status_page(struct intel_engine_cs *ring); |
|
static inline u32 intel_ring_get_tail(struct intel_ring_buffer *ring) |
static inline u32 intel_ring_get_tail(struct intel_ringbuffer *ringbuf) |
{ |
return ring->tail; |
return ringbuf->tail; |
} |
|
static inline u32 intel_ring_get_seqno(struct intel_ring_buffer *ring) |
static inline u32 intel_ring_get_seqno(struct intel_engine_cs *ring) |
{ |
BUG_ON(ring->outstanding_lazy_seqno == 0); |
return ring->outstanding_lazy_seqno; |
} |
|
static inline void i915_trace_irq_get(struct intel_ring_buffer *ring, u32 seqno) |
static inline void i915_trace_irq_get(struct intel_engine_cs *ring, u32 seqno) |
{ |
if (ring->trace_irq_seqno == 0 && ring->irq_get(ring)) |
ring->trace_irq_seqno = seqno; |