Subversion Repositories Kolibri OS

Rev

Rev 5060 | Rev 6937 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | Download | RSS feed

  1. #ifndef _INTEL_RINGBUFFER_H_
  2. #define _INTEL_RINGBUFFER_H_
  3.  
  4. #include <linux/hashtable.h>
  5.  
  6. #define I915_CMD_HASH_ORDER 9
  7.  
  8. /* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill,
  9.  * but keeps the logic simple. Indeed, the whole purpose of this macro is just
  10.  * to give some inclination as to some of the magic values used in the various
  11.  * workarounds!
  12.  */
  13. #define CACHELINE_BYTES 64
  14.  
  15. /*
  16.  * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 "Ring Buffer Use"
  17.  * Gen3 BSpec "vol1c Memory Interface Functions" / 2.3.4.5 "Ring Buffer Use"
  18.  * Gen4+ BSpec "vol1c Memory Interface and Command Stream" / 5.3.4.5 "Ring Buffer Use"
  19.  *
  20.  * "If the Ring Buffer Head Pointer and the Tail Pointer are on the same
  21.  * cacheline, the Head Pointer must not be greater than the Tail
  22.  * Pointer."
  23.  */
  24. #define I915_RING_FREE_SPACE 64
  25.  
  26. struct  intel_hw_status_page {
  27.         u32             *page_addr;
  28.         unsigned int    gfx_addr;
  29.         struct          drm_i915_gem_object *obj;
  30. };
  31.  
  32. #define I915_READ_TAIL(ring) I915_READ(RING_TAIL((ring)->mmio_base))
  33. #define I915_WRITE_TAIL(ring, val) I915_WRITE(RING_TAIL((ring)->mmio_base), val)
  34.  
  35. #define I915_READ_START(ring) I915_READ(RING_START((ring)->mmio_base))
  36. #define I915_WRITE_START(ring, val) I915_WRITE(RING_START((ring)->mmio_base), val)
  37.  
  38. #define I915_READ_HEAD(ring)  I915_READ(RING_HEAD((ring)->mmio_base))
  39. #define I915_WRITE_HEAD(ring, val) I915_WRITE(RING_HEAD((ring)->mmio_base), val)
  40.  
  41. #define I915_READ_CTL(ring) I915_READ(RING_CTL((ring)->mmio_base))
  42. #define I915_WRITE_CTL(ring, val) I915_WRITE(RING_CTL((ring)->mmio_base), val)
  43.  
  44. #define I915_READ_IMR(ring) I915_READ(RING_IMR((ring)->mmio_base))
  45. #define I915_WRITE_IMR(ring, val) I915_WRITE(RING_IMR((ring)->mmio_base), val)
  46.  
  47. #define I915_READ_MODE(ring) I915_READ(RING_MI_MODE((ring)->mmio_base))
  48. #define I915_WRITE_MODE(ring, val) I915_WRITE(RING_MI_MODE((ring)->mmio_base), val)
  49.  
  50. /* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to
  51.  * do the writes, and that must have qw aligned offsets, simply pretend it's 8b.
  52.  */
  53. #define i915_semaphore_seqno_size sizeof(uint64_t)
  54. #define GEN8_SIGNAL_OFFSET(__ring, to)                       \
  55.         (i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj) + \
  56.         ((__ring)->id * I915_NUM_RINGS * i915_semaphore_seqno_size) +   \
  57.         (i915_semaphore_seqno_size * (to)))
  58.  
  59. #define GEN8_WAIT_OFFSET(__ring, from)                       \
  60.         (i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj) + \
  61.         ((from) * I915_NUM_RINGS * i915_semaphore_seqno_size) + \
  62.         (i915_semaphore_seqno_size * (__ring)->id))
  63.  
  64. #define GEN8_RING_SEMAPHORE_INIT do { \
  65.         if (!dev_priv->semaphore_obj) { \
  66.                 break; \
  67.         } \
  68.         ring->semaphore.signal_ggtt[RCS] = GEN8_SIGNAL_OFFSET(ring, RCS); \
  69.         ring->semaphore.signal_ggtt[VCS] = GEN8_SIGNAL_OFFSET(ring, VCS); \
  70.         ring->semaphore.signal_ggtt[BCS] = GEN8_SIGNAL_OFFSET(ring, BCS); \
  71.         ring->semaphore.signal_ggtt[VECS] = GEN8_SIGNAL_OFFSET(ring, VECS); \
  72.         ring->semaphore.signal_ggtt[VCS2] = GEN8_SIGNAL_OFFSET(ring, VCS2); \
  73.         ring->semaphore.signal_ggtt[ring->id] = MI_SEMAPHORE_SYNC_INVALID; \
  74.         } while(0)
  75.  
  76. enum intel_ring_hangcheck_action {
  77.         HANGCHECK_IDLE = 0,
  78.         HANGCHECK_WAIT,
  79.         HANGCHECK_ACTIVE,
  80.         HANGCHECK_ACTIVE_LOOP,
  81.         HANGCHECK_KICK,
  82.         HANGCHECK_HUNG,
  83. };
  84.  
  85. #define HANGCHECK_SCORE_RING_HUNG 31
  86.  
  87. struct intel_ring_hangcheck {
  88.         u64 acthd;
  89.         u64 max_acthd;
  90.         u32 seqno;
  91.         int score;
  92.         enum intel_ring_hangcheck_action action;
  93.         int deadlock;
  94. };
  95.  
  96. struct intel_ringbuffer {
  97.         struct drm_i915_gem_object *obj;
  98.         void            __iomem *virtual_start;
  99.  
  100.         struct intel_engine_cs *ring;
  101.  
  102.         /*
  103.          * FIXME: This backpointer is an artifact of the history of how the
  104.          * execlist patches came into being. It will get removed once the basic
  105.          * code has landed.
  106.          */
  107.         struct intel_context *FIXME_lrc_ctx;
  108.  
  109.         u32             head;
  110.         u32             tail;
  111.         int             space;
  112.         int             size;
  113.         int             effective_size;
  114.  
  115.         /** We track the position of the requests in the ring buffer, and
  116.          * when each is retired we increment last_retired_head as the GPU
  117.          * must have finished processing the request and so we know we
  118.          * can advance the ringbuffer up to that position.
  119.          *
  120.          * last_retired_head is set to -1 after the value is consumed so
  121.          * we can detect new retirements.
  122.          */
  123.         u32             last_retired_head;
  124. };
  125.  
  126. struct  intel_engine_cs {
  127.         const char      *name;
  128.         enum intel_ring_id {
  129.                 RCS = 0x0,
  130.                 VCS,
  131.                 BCS,
  132.                 VECS,
  133.                 VCS2
  134.         } id;
  135. #define I915_NUM_RINGS 5
  136. #define LAST_USER_RING (VECS + 1)
  137.         u32             mmio_base;
  138.         struct          drm_device *dev;
  139.         struct intel_ringbuffer *buffer;
  140.  
  141.         struct intel_hw_status_page status_page;
  142.  
  143.         unsigned irq_refcount; /* protected by dev_priv->irq_lock */
  144.         u32             irq_enable_mask;        /* bitmask to enable ring interrupt */
  145.         u32             trace_irq_seqno;
  146.         bool __must_check (*irq_get)(struct intel_engine_cs *ring);
  147.         void            (*irq_put)(struct intel_engine_cs *ring);
  148.  
  149.         int             (*init)(struct intel_engine_cs *ring);
  150.  
  151.         int             (*init_context)(struct intel_engine_cs *ring,
  152.                                         struct intel_context *ctx);
  153.  
  154.         void            (*write_tail)(struct intel_engine_cs *ring,
  155.                                       u32 value);
  156.         int __must_check (*flush)(struct intel_engine_cs *ring,
  157.                                   u32   invalidate_domains,
  158.                                   u32   flush_domains);
  159.         int             (*add_request)(struct intel_engine_cs *ring);
  160.         /* Some chipsets are not quite as coherent as advertised and need
  161.          * an expensive kick to force a true read of the up-to-date seqno.
  162.          * However, the up-to-date seqno is not always required and the last
  163.          * seen value is good enough. Note that the seqno will always be
  164.          * monotonic, even if not coherent.
  165.          */
  166.         u32             (*get_seqno)(struct intel_engine_cs *ring,
  167.                                      bool lazy_coherency);
  168.         void            (*set_seqno)(struct intel_engine_cs *ring,
  169.                                      u32 seqno);
  170.         int             (*dispatch_execbuffer)(struct intel_engine_cs *ring,
  171.                                                u64 offset, u32 length,
  172.                                                unsigned flags);
  173. #define I915_DISPATCH_SECURE 0x1
  174. #define I915_DISPATCH_PINNED 0x2
  175.         void            (*cleanup)(struct intel_engine_cs *ring);
  176.  
  177.         /* GEN8 signal/wait table - never trust comments!
  178.          *        signal to     signal to    signal to   signal to      signal to
  179.          *          RCS            VCS          BCS        VECS          VCS2
  180.          *      --------------------------------------------------------------------
  181.          *  RCS | NOP (0x00) | VCS (0x08) | BCS (0x10) | VECS (0x18) | VCS2 (0x20) |
  182.          *      |-------------------------------------------------------------------
  183.          *  VCS | RCS (0x28) | NOP (0x30) | BCS (0x38) | VECS (0x40) | VCS2 (0x48) |
  184.          *      |-------------------------------------------------------------------
  185.          *  BCS | RCS (0x50) | VCS (0x58) | NOP (0x60) | VECS (0x68) | VCS2 (0x70) |
  186.          *      |-------------------------------------------------------------------
  187.          * VECS | RCS (0x78) | VCS (0x80) | BCS (0x88) |  NOP (0x90) | VCS2 (0x98) |
  188.          *      |-------------------------------------------------------------------
  189.          * VCS2 | RCS (0xa0) | VCS (0xa8) | BCS (0xb0) | VECS (0xb8) | NOP  (0xc0) |
  190.          *      |-------------------------------------------------------------------
  191.          *
  192.          * Generalization:
  193.          *  f(x, y) := (x->id * NUM_RINGS * seqno_size) + (seqno_size * y->id)
  194.          *  ie. transpose of g(x, y)
  195.          *
  196.          *       sync from      sync from    sync from    sync from     sync from
  197.          *          RCS            VCS          BCS        VECS          VCS2
  198.          *      --------------------------------------------------------------------
  199.          *  RCS | NOP (0x00) | VCS (0x28) | BCS (0x50) | VECS (0x78) | VCS2 (0xa0) |
  200.          *      |-------------------------------------------------------------------
  201.          *  VCS | RCS (0x08) | NOP (0x30) | BCS (0x58) | VECS (0x80) | VCS2 (0xa8) |
  202.          *      |-------------------------------------------------------------------
  203.          *  BCS | RCS (0x10) | VCS (0x38) | NOP (0x60) | VECS (0x88) | VCS2 (0xb0) |
  204.          *      |-------------------------------------------------------------------
  205.          * VECS | RCS (0x18) | VCS (0x40) | BCS (0x68) |  NOP (0x90) | VCS2 (0xb8) |
  206.          *      |-------------------------------------------------------------------
  207.          * VCS2 | RCS (0x20) | VCS (0x48) | BCS (0x70) | VECS (0x98) |  NOP (0xc0) |
  208.          *      |-------------------------------------------------------------------
  209.          *
  210.          * Generalization:
  211.          *  g(x, y) := (y->id * NUM_RINGS * seqno_size) + (seqno_size * x->id)
  212.          *  ie. transpose of f(x, y)
  213.          */
  214.         struct {
  215.                 u32     sync_seqno[I915_NUM_RINGS-1];
  216.  
  217.                 union {
  218.                 struct {
  219.         /* our mbox written by others */
  220.                         u32             wait[I915_NUM_RINGS];
  221.         /* mboxes this ring signals to */
  222.                         u32             signal[I915_NUM_RINGS];
  223.                 } mbox;
  224.                         u64             signal_ggtt[I915_NUM_RINGS];
  225.                 };
  226.  
  227.                 /* AKA wait() */
  228.                 int     (*sync_to)(struct intel_engine_cs *ring,
  229.                                    struct intel_engine_cs *to,
  230.                                    u32 seqno);
  231.                 int     (*signal)(struct intel_engine_cs *signaller,
  232.                                   /* num_dwords needed by caller */
  233.                                   unsigned int num_dwords);
  234.         } semaphore;
  235.  
  236.         /* Execlists */
  237.         spinlock_t execlist_lock;
  238.         struct list_head execlist_queue;
  239.         struct list_head execlist_retired_req_list;
  240.         u8 next_context_status_buffer;
  241.         u32             irq_keep_mask; /* bitmask for interrupts that should not be masked */
  242.         int             (*emit_request)(struct intel_ringbuffer *ringbuf);
  243.         int             (*emit_flush)(struct intel_ringbuffer *ringbuf,
  244.                                       u32 invalidate_domains,
  245.                                       u32 flush_domains);
  246.         int             (*emit_bb_start)(struct intel_ringbuffer *ringbuf,
  247.                                          u64 offset, unsigned flags);
  248.  
  249.         /**
  250.          * List of objects currently involved in rendering from the
  251.          * ringbuffer.
  252.          *
  253.          * Includes buffers having the contents of their GPU caches
  254.          * flushed, not necessarily primitives.  last_rendering_seqno
  255.          * represents when the rendering involved will be completed.
  256.          *
  257.          * A reference is held on the buffer while on this list.
  258.          */
  259.         struct list_head active_list;
  260.  
  261.         /**
  262.          * List of breadcrumbs associated with GPU requests currently
  263.          * outstanding.
  264.          */
  265.         struct list_head request_list;
  266.  
  267.         /**
  268.          * Do we have some not yet emitted requests outstanding?
  269.          */
  270.         struct drm_i915_gem_request *preallocated_lazy_request;
  271.         u32 outstanding_lazy_seqno;
  272.         bool gpu_caches_dirty;
  273.         bool fbc_dirty;
  274.  
  275.         wait_queue_head_t irq_queue;
  276.  
  277.         struct intel_context *default_context;
  278.         struct intel_context *last_context;
  279.  
  280.         struct intel_ring_hangcheck hangcheck;
  281.  
  282.         struct {
  283.                 struct drm_i915_gem_object *obj;
  284.                 u32 gtt_offset;
  285.                 volatile u32 *cpu_page;
  286.         } scratch;
  287.  
  288.         bool needs_cmd_parser;
  289.  
  290.         /*
  291.          * Table of commands the command parser needs to know about
  292.          * for this ring.
  293.          */
  294.         DECLARE_HASHTABLE(cmd_hash, I915_CMD_HASH_ORDER);
  295.  
  296.         /*
  297.          * Table of registers allowed in commands that read/write registers.
  298.          */
  299.         const u32 *reg_table;
  300.         int reg_count;
  301.  
  302.         /*
  303.          * Table of registers allowed in commands that read/write registers, but
  304.          * only from the DRM master.
  305.          */
  306.         const u32 *master_reg_table;
  307.         int master_reg_count;
  308.  
  309.         /*
  310.          * Returns the bitmask for the length field of the specified command.
  311.          * Return 0 for an unrecognized/invalid command.
  312.          *
  313.          * If the command parser finds an entry for a command in the ring's
  314.          * cmd_tables, it gets the command's length based on the table entry.
  315.          * If not, it calls this function to determine the per-ring length field
  316.          * encoding for the command (i.e. certain opcode ranges use certain bits
  317.          * to encode the command length in the header).
  318.          */
  319.         u32 (*get_cmd_length_mask)(u32 cmd_header);
  320. };
  321.  
  322. bool intel_ring_initialized(struct intel_engine_cs *ring);
  323.  
  324. static inline unsigned
  325. intel_ring_flag(struct intel_engine_cs *ring)
  326. {
  327.         return 1 << ring->id;
  328. }
  329.  
  330. static inline u32
  331. intel_ring_sync_index(struct intel_engine_cs *ring,
  332.                       struct intel_engine_cs *other)
  333. {
  334.         int idx;
  335.  
  336.         /*
  337.          * rcs -> 0 = vcs, 1 = bcs, 2 = vecs, 3 = vcs2;
  338.          * vcs -> 0 = bcs, 1 = vecs, 2 = vcs2, 3 = rcs;
  339.          * bcs -> 0 = vecs, 1 = vcs2. 2 = rcs, 3 = vcs;
  340.          * vecs -> 0 = vcs2, 1 = rcs, 2 = vcs, 3 = bcs;
  341.          * vcs2 -> 0 = rcs, 1 = vcs, 2 = bcs, 3 = vecs;
  342.          */
  343.  
  344.         idx = (other - ring) - 1;
  345.         if (idx < 0)
  346.                 idx += I915_NUM_RINGS;
  347.  
  348.         return idx;
  349. }
  350.  
  351. static inline u32
  352. intel_read_status_page(struct intel_engine_cs *ring,
  353.                        int reg)
  354. {
  355.         /* Ensure that the compiler doesn't optimize away the load. */
  356.         barrier();
  357.         return ring->status_page.page_addr[reg];
  358. }
  359.  
  360. static inline void
  361. intel_write_status_page(struct intel_engine_cs *ring,
  362.                         int reg, u32 value)
  363. {
  364.         ring->status_page.page_addr[reg] = value;
  365. }
  366.  
  367. /**
  368.  * Reads a dword out of the status page, which is written to from the command
  369.  * queue by automatic updates, MI_REPORT_HEAD, MI_STORE_DATA_INDEX, or
  370.  * MI_STORE_DATA_IMM.
  371.  *
  372.  * The following dwords have a reserved meaning:
  373.  * 0x00: ISR copy, updated when an ISR bit not set in the HWSTAM changes.
  374.  * 0x04: ring 0 head pointer
  375.  * 0x05: ring 1 head pointer (915-class)
  376.  * 0x06: ring 2 head pointer (915-class)
  377.  * 0x10-0x1b: Context status DWords (GM45)
  378.  * 0x1f: Last written status offset. (GM45)
  379.  *
  380.  * The area from dword 0x20 to 0x3ff is available for driver usage.
  381.  */
  382. #define I915_GEM_HWS_INDEX              0x20
  383. #define I915_GEM_HWS_SCRATCH_INDEX      0x30
  384. #define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
  385.  
  386. void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf);
  387. int intel_pin_and_map_ringbuffer_obj(struct drm_device *dev,
  388.                                      struct intel_ringbuffer *ringbuf);
  389. void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf);
  390. int intel_alloc_ringbuffer_obj(struct drm_device *dev,
  391.                                struct intel_ringbuffer *ringbuf);
  392.  
  393. void intel_stop_ring_buffer(struct intel_engine_cs *ring);
  394. void intel_cleanup_ring_buffer(struct intel_engine_cs *ring);
  395.  
  396. int __must_check intel_ring_begin(struct intel_engine_cs *ring, int n);
  397. int __must_check intel_ring_cacheline_align(struct intel_engine_cs *ring);
  398. static inline void intel_ring_emit(struct intel_engine_cs *ring,
  399.                                    u32 data)
  400. {
  401.         struct intel_ringbuffer *ringbuf = ring->buffer;
  402.         iowrite32(data, ringbuf->virtual_start + ringbuf->tail);
  403.         ringbuf->tail += 4;
  404. }
  405. static inline void intel_ring_advance(struct intel_engine_cs *ring)
  406. {
  407.         struct intel_ringbuffer *ringbuf = ring->buffer;
  408.         ringbuf->tail &= ringbuf->size - 1;
  409. }
  410. int __intel_ring_space(int head, int tail, int size);
  411. int intel_ring_space(struct intel_ringbuffer *ringbuf);
  412. bool intel_ring_stopped(struct intel_engine_cs *ring);
  413. void __intel_ring_advance(struct intel_engine_cs *ring);
  414.  
  415. int __must_check intel_ring_idle(struct intel_engine_cs *ring);
  416. void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno);
  417. int intel_ring_flush_all_caches(struct intel_engine_cs *ring);
  418. int intel_ring_invalidate_all_caches(struct intel_engine_cs *ring);
  419.  
  420. void intel_fini_pipe_control(struct intel_engine_cs *ring);
  421. int intel_init_pipe_control(struct intel_engine_cs *ring);
  422.  
  423. int intel_init_render_ring_buffer(struct drm_device *dev);
  424. int intel_init_bsd_ring_buffer(struct drm_device *dev);
  425. int intel_init_bsd2_ring_buffer(struct drm_device *dev);
  426. int intel_init_blt_ring_buffer(struct drm_device *dev);
  427. int intel_init_vebox_ring_buffer(struct drm_device *dev);
  428.  
  429. u64 intel_ring_get_active_head(struct intel_engine_cs *ring);
  430. void intel_ring_setup_status_page(struct intel_engine_cs *ring);
  431.  
  432. int init_workarounds_ring(struct intel_engine_cs *ring);
  433.  
  434. static inline u32 intel_ring_get_tail(struct intel_ringbuffer *ringbuf)
  435. {
  436.         return ringbuf->tail;
  437. }
  438.  
  439. static inline u32 intel_ring_get_seqno(struct intel_engine_cs *ring)
  440. {
  441.         BUG_ON(ring->outstanding_lazy_seqno == 0);
  442.         return ring->outstanding_lazy_seqno;
  443. }
  444.  
  445. static inline void i915_trace_irq_get(struct intel_engine_cs *ring, u32 seqno)
  446. {
  447.         if (ring->trace_irq_seqno == 0 && ring->irq_get(ring))
  448.                 ring->trace_irq_seqno = seqno;
  449. }
  450.  
  451. #endif /* _INTEL_RINGBUFFER_H_ */
  452.