Subversion Repositories Kolibri OS

Rev

Rev 5354 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | Download | RSS feed

  1. #ifndef _INTEL_RINGBUFFER_H_
  2. #define _INTEL_RINGBUFFER_H_
  3.  
  4. #include <linux/hashtable.h>
  5. #include "i915_gem_batch_pool.h"
  6.  
  7. #define I915_CMD_HASH_ORDER 9
  8.  
  9. /* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill,
  10.  * but keeps the logic simple. Indeed, the whole purpose of this macro is just
  11.  * to give some inclination as to some of the magic values used in the various
  12.  * workarounds!
  13.  */
  14. #define CACHELINE_BYTES 64
  15. #define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(uint32_t))
  16.  
  17. /*
  18.  * Gen2 BSpec "1. Programming Environment" / 1.4.4.6 "Ring Buffer Use"
  19.  * Gen3 BSpec "vol1c Memory Interface Functions" / 2.3.4.5 "Ring Buffer Use"
  20.  * Gen4+ BSpec "vol1c Memory Interface and Command Stream" / 5.3.4.5 "Ring Buffer Use"
  21.  *
  22.  * "If the Ring Buffer Head Pointer and the Tail Pointer are on the same
  23.  * cacheline, the Head Pointer must not be greater than the Tail
  24.  * Pointer."
  25.  */
  26. #define I915_RING_FREE_SPACE 64
  27.  
  28. struct  intel_hw_status_page {
  29.         u32             *page_addr;
  30.         unsigned int    gfx_addr;
  31.         struct          drm_i915_gem_object *obj;
  32. };
  33.  
  34. #define I915_READ_TAIL(ring) I915_READ(RING_TAIL((ring)->mmio_base))
  35. #define I915_WRITE_TAIL(ring, val) I915_WRITE(RING_TAIL((ring)->mmio_base), val)
  36.  
  37. #define I915_READ_START(ring) I915_READ(RING_START((ring)->mmio_base))
  38. #define I915_WRITE_START(ring, val) I915_WRITE(RING_START((ring)->mmio_base), val)
  39.  
  40. #define I915_READ_HEAD(ring)  I915_READ(RING_HEAD((ring)->mmio_base))
  41. #define I915_WRITE_HEAD(ring, val) I915_WRITE(RING_HEAD((ring)->mmio_base), val)
  42.  
  43. #define I915_READ_CTL(ring) I915_READ(RING_CTL((ring)->mmio_base))
  44. #define I915_WRITE_CTL(ring, val) I915_WRITE(RING_CTL((ring)->mmio_base), val)
  45.  
  46. #define I915_READ_IMR(ring) I915_READ(RING_IMR((ring)->mmio_base))
  47. #define I915_WRITE_IMR(ring, val) I915_WRITE(RING_IMR((ring)->mmio_base), val)
  48.  
  49. #define I915_READ_MODE(ring) I915_READ(RING_MI_MODE((ring)->mmio_base))
  50. #define I915_WRITE_MODE(ring, val) I915_WRITE(RING_MI_MODE((ring)->mmio_base), val)
  51.  
  52. /* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to
  53.  * do the writes, and that must have qw aligned offsets, simply pretend it's 8b.
  54.  */
  55. #define i915_semaphore_seqno_size sizeof(uint64_t)
  56. #define GEN8_SIGNAL_OFFSET(__ring, to)                       \
  57.         (i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj) + \
  58.         ((__ring)->id * I915_NUM_RINGS * i915_semaphore_seqno_size) +   \
  59.         (i915_semaphore_seqno_size * (to)))
  60.  
  61. #define GEN8_WAIT_OFFSET(__ring, from)                       \
  62.         (i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj) + \
  63.         ((from) * I915_NUM_RINGS * i915_semaphore_seqno_size) + \
  64.         (i915_semaphore_seqno_size * (__ring)->id))
  65.  
  66. #define GEN8_RING_SEMAPHORE_INIT do { \
  67.         if (!dev_priv->semaphore_obj) { \
  68.                 break; \
  69.         } \
  70.         ring->semaphore.signal_ggtt[RCS] = GEN8_SIGNAL_OFFSET(ring, RCS); \
  71.         ring->semaphore.signal_ggtt[VCS] = GEN8_SIGNAL_OFFSET(ring, VCS); \
  72.         ring->semaphore.signal_ggtt[BCS] = GEN8_SIGNAL_OFFSET(ring, BCS); \
  73.         ring->semaphore.signal_ggtt[VECS] = GEN8_SIGNAL_OFFSET(ring, VECS); \
  74.         ring->semaphore.signal_ggtt[VCS2] = GEN8_SIGNAL_OFFSET(ring, VCS2); \
  75.         ring->semaphore.signal_ggtt[ring->id] = MI_SEMAPHORE_SYNC_INVALID; \
  76.         } while(0)
  77.  
  78. enum intel_ring_hangcheck_action {
  79.         HANGCHECK_IDLE = 0,
  80.         HANGCHECK_WAIT,
  81.         HANGCHECK_ACTIVE,
  82.         HANGCHECK_ACTIVE_LOOP,
  83.         HANGCHECK_KICK,
  84.         HANGCHECK_HUNG,
  85. };
  86.  
  87. #define HANGCHECK_SCORE_RING_HUNG 31
  88.  
  89. struct intel_ring_hangcheck {
  90.         u64 acthd;
  91.         u64 max_acthd;
  92.         u32 seqno;
  93.         int score;
  94.         enum intel_ring_hangcheck_action action;
  95.         int deadlock;
  96. };
  97.  
  98. struct intel_ringbuffer {
  99.         struct drm_i915_gem_object *obj;
  100.         void __iomem *virtual_start;
  101.  
  102.         struct intel_engine_cs *ring;
  103.  
  104.         u32 head;
  105.         u32 tail;
  106.         int space;
  107.         int size;
  108.         int effective_size;
  109.         int reserved_size;
  110.         int reserved_tail;
  111.         bool reserved_in_use;
  112.  
  113.         /** We track the position of the requests in the ring buffer, and
  114.          * when each is retired we increment last_retired_head as the GPU
  115.          * must have finished processing the request and so we know we
  116.          * can advance the ringbuffer up to that position.
  117.          *
  118.          * last_retired_head is set to -1 after the value is consumed so
  119.          * we can detect new retirements.
  120.          */
  121.         u32 last_retired_head;
  122. };
  123.  
  124. struct  intel_context;
  125. struct drm_i915_reg_descriptor;
  126.  
  127. /*
  128.  * we use a single page to load ctx workarounds so all of these
  129.  * values are referred in terms of dwords
  130.  *
  131.  * struct i915_wa_ctx_bb:
  132.  *  offset: specifies batch starting position, also helpful in case
  133.  *    if we want to have multiple batches at different offsets based on
  134.  *    some criteria. It is not a requirement at the moment but provides
  135.  *    an option for future use.
  136.  *  size: size of the batch in DWORDS
  137.  */
  138. struct  i915_ctx_workarounds {
  139.         struct i915_wa_ctx_bb {
  140.                 u32 offset;
  141.                 u32 size;
  142.         } indirect_ctx, per_ctx;
  143.         struct drm_i915_gem_object *obj;
  144. };
  145.  
  146. struct  intel_engine_cs {
  147.         const char      *name;
  148.         enum intel_ring_id {
  149.                 RCS = 0x0,
  150.                 VCS,
  151.                 BCS,
  152.                 VECS,
  153.                 VCS2
  154.         } id;
  155. #define I915_NUM_RINGS 5
  156. #define LAST_USER_RING (VECS + 1)
  157.         u32             mmio_base;
  158.         struct          drm_device *dev;
  159.         struct intel_ringbuffer *buffer;
  160.  
  161.         /*
  162.          * A pool of objects to use as shadow copies of client batch buffers
  163.          * when the command parser is enabled. Prevents the client from
  164.          * modifying the batch contents after software parsing.
  165.          */
  166.         struct i915_gem_batch_pool batch_pool;
  167.  
  168.         struct intel_hw_status_page status_page;
  169.         struct i915_ctx_workarounds wa_ctx;
  170.  
  171.         unsigned irq_refcount; /* protected by dev_priv->irq_lock */
  172.         u32             irq_enable_mask;        /* bitmask to enable ring interrupt */
  173.         struct drm_i915_gem_request *trace_irq_req;
  174.         bool __must_check (*irq_get)(struct intel_engine_cs *ring);
  175.         void            (*irq_put)(struct intel_engine_cs *ring);
  176.  
  177.         int             (*init_hw)(struct intel_engine_cs *ring);
  178.  
  179.         int             (*init_context)(struct drm_i915_gem_request *req);
  180.  
  181.         void            (*write_tail)(struct intel_engine_cs *ring,
  182.                                       u32 value);
  183.         int __must_check (*flush)(struct drm_i915_gem_request *req,
  184.                                   u32   invalidate_domains,
  185.                                   u32   flush_domains);
  186.         int             (*add_request)(struct drm_i915_gem_request *req);
  187.         /* Some chipsets are not quite as coherent as advertised and need
  188.          * an expensive kick to force a true read of the up-to-date seqno.
  189.          * However, the up-to-date seqno is not always required and the last
  190.          * seen value is good enough. Note that the seqno will always be
  191.          * monotonic, even if not coherent.
  192.          */
  193.         u32             (*get_seqno)(struct intel_engine_cs *ring,
  194.                                      bool lazy_coherency);
  195.         void            (*set_seqno)(struct intel_engine_cs *ring,
  196.                                      u32 seqno);
  197.         int             (*dispatch_execbuffer)(struct drm_i915_gem_request *req,
  198.                                                u64 offset, u32 length,
  199.                                                unsigned dispatch_flags);
  200. #define I915_DISPATCH_SECURE 0x1
  201. #define I915_DISPATCH_PINNED 0x2
  202. #define I915_DISPATCH_RS     0x4
  203.         void            (*cleanup)(struct intel_engine_cs *ring);
  204.  
  205.         /* GEN8 signal/wait table - never trust comments!
  206.          *        signal to     signal to    signal to   signal to      signal to
  207.          *          RCS            VCS          BCS        VECS          VCS2
  208.          *      --------------------------------------------------------------------
  209.          *  RCS | NOP (0x00) | VCS (0x08) | BCS (0x10) | VECS (0x18) | VCS2 (0x20) |
  210.          *      |-------------------------------------------------------------------
  211.          *  VCS | RCS (0x28) | NOP (0x30) | BCS (0x38) | VECS (0x40) | VCS2 (0x48) |
  212.          *      |-------------------------------------------------------------------
  213.          *  BCS | RCS (0x50) | VCS (0x58) | NOP (0x60) | VECS (0x68) | VCS2 (0x70) |
  214.          *      |-------------------------------------------------------------------
  215.          * VECS | RCS (0x78) | VCS (0x80) | BCS (0x88) |  NOP (0x90) | VCS2 (0x98) |
  216.          *      |-------------------------------------------------------------------
  217.          * VCS2 | RCS (0xa0) | VCS (0xa8) | BCS (0xb0) | VECS (0xb8) | NOP  (0xc0) |
  218.          *      |-------------------------------------------------------------------
  219.          *
  220.          * Generalization:
  221.          *  f(x, y) := (x->id * NUM_RINGS * seqno_size) + (seqno_size * y->id)
  222.          *  ie. transpose of g(x, y)
  223.          *
  224.          *       sync from      sync from    sync from    sync from     sync from
  225.          *          RCS            VCS          BCS        VECS          VCS2
  226.          *      --------------------------------------------------------------------
  227.          *  RCS | NOP (0x00) | VCS (0x28) | BCS (0x50) | VECS (0x78) | VCS2 (0xa0) |
  228.          *      |-------------------------------------------------------------------
  229.          *  VCS | RCS (0x08) | NOP (0x30) | BCS (0x58) | VECS (0x80) | VCS2 (0xa8) |
  230.          *      |-------------------------------------------------------------------
  231.          *  BCS | RCS (0x10) | VCS (0x38) | NOP (0x60) | VECS (0x88) | VCS2 (0xb0) |
  232.          *      |-------------------------------------------------------------------
  233.          * VECS | RCS (0x18) | VCS (0x40) | BCS (0x68) |  NOP (0x90) | VCS2 (0xb8) |
  234.          *      |-------------------------------------------------------------------
  235.          * VCS2 | RCS (0x20) | VCS (0x48) | BCS (0x70) | VECS (0x98) |  NOP (0xc0) |
  236.          *      |-------------------------------------------------------------------
  237.          *
  238.          * Generalization:
  239.          *  g(x, y) := (y->id * NUM_RINGS * seqno_size) + (seqno_size * x->id)
  240.          *  ie. transpose of f(x, y)
  241.          */
  242.         struct {
  243.                 u32     sync_seqno[I915_NUM_RINGS-1];
  244.  
  245.                 union {
  246.                         struct {
  247.                                 /* our mbox written by others */
  248.                                 u32             wait[I915_NUM_RINGS];
  249.                                 /* mboxes this ring signals to */
  250.                                 u32             signal[I915_NUM_RINGS];
  251.                         } mbox;
  252.                         u64             signal_ggtt[I915_NUM_RINGS];
  253.                 };
  254.  
  255.                 /* AKA wait() */
  256.                 int     (*sync_to)(struct drm_i915_gem_request *to_req,
  257.                                    struct intel_engine_cs *from,
  258.                                    u32 seqno);
  259.                 int     (*signal)(struct drm_i915_gem_request *signaller_req,
  260.                                   /* num_dwords needed by caller */
  261.                                   unsigned int num_dwords);
  262.         } semaphore;
  263.  
  264.         /* Execlists */
  265.         spinlock_t execlist_lock;
  266.         struct list_head execlist_queue;
  267.         struct list_head execlist_retired_req_list;
  268.         u8 next_context_status_buffer;
  269.         u32             irq_keep_mask; /* bitmask for interrupts that should not be masked */
  270.         int             (*emit_request)(struct drm_i915_gem_request *request);
  271.         int             (*emit_flush)(struct drm_i915_gem_request *request,
  272.                                       u32 invalidate_domains,
  273.                                       u32 flush_domains);
  274.         int             (*emit_bb_start)(struct drm_i915_gem_request *req,
  275.                                          u64 offset, unsigned dispatch_flags);
  276.  
  277.         /**
  278.          * List of objects currently involved in rendering from the
  279.          * ringbuffer.
  280.          *
  281.          * Includes buffers having the contents of their GPU caches
  282.          * flushed, not necessarily primitives.  last_read_req
  283.          * represents when the rendering involved will be completed.
  284.          *
  285.          * A reference is held on the buffer while on this list.
  286.          */
  287.         struct list_head active_list;
  288.  
  289.         /**
  290.          * List of breadcrumbs associated with GPU requests currently
  291.          * outstanding.
  292.          */
  293.         struct list_head request_list;
  294.  
  295.         /**
  296.          * Seqno of request most recently submitted to request_list.
  297.          * Used exclusively by hang checker to avoid grabbing lock while
  298.          * inspecting request list.
  299.          */
  300.         u32 last_submitted_seqno;
  301.  
  302.         bool gpu_caches_dirty;
  303.  
  304.         wait_queue_head_t irq_queue;
  305.  
  306.         struct intel_context *default_context;
  307.         struct intel_context *last_context;
  308.  
  309.         struct intel_ring_hangcheck hangcheck;
  310.  
  311.         struct {
  312.                 struct drm_i915_gem_object *obj;
  313.                 u32 gtt_offset;
  314.                 volatile u32 *cpu_page;
  315.         } scratch;
  316.  
  317.         bool needs_cmd_parser;
  318.  
  319.         /*
  320.          * Table of commands the command parser needs to know about
  321.          * for this ring.
  322.          */
  323.         DECLARE_HASHTABLE(cmd_hash, I915_CMD_HASH_ORDER);
  324.  
  325.         /*
  326.          * Table of registers allowed in commands that read/write registers.
  327.          */
  328.         const struct drm_i915_reg_descriptor *reg_table;
  329.         int reg_count;
  330.  
  331.         /*
  332.          * Table of registers allowed in commands that read/write registers, but
  333.          * only from the DRM master.
  334.          */
  335.         const struct drm_i915_reg_descriptor *master_reg_table;
  336.         int master_reg_count;
  337.  
  338.         /*
  339.          * Returns the bitmask for the length field of the specified command.
  340.          * Return 0 for an unrecognized/invalid command.
  341.          *
  342.          * If the command parser finds an entry for a command in the ring's
  343.          * cmd_tables, it gets the command's length based on the table entry.
  344.          * If not, it calls this function to determine the per-ring length field
  345.          * encoding for the command (i.e. certain opcode ranges use certain bits
  346.          * to encode the command length in the header).
  347.          */
  348.         u32 (*get_cmd_length_mask)(u32 cmd_header);
  349. };
  350.  
  351. bool intel_ring_initialized(struct intel_engine_cs *ring);
  352.  
  353. static inline unsigned
  354. intel_ring_flag(struct intel_engine_cs *ring)
  355. {
  356.         return 1 << ring->id;
  357. }
  358.  
  359. static inline u32
  360. intel_ring_sync_index(struct intel_engine_cs *ring,
  361.                       struct intel_engine_cs *other)
  362. {
  363.         int idx;
  364.  
  365.         /*
  366.          * rcs -> 0 = vcs, 1 = bcs, 2 = vecs, 3 = vcs2;
  367.          * vcs -> 0 = bcs, 1 = vecs, 2 = vcs2, 3 = rcs;
  368.          * bcs -> 0 = vecs, 1 = vcs2. 2 = rcs, 3 = vcs;
  369.          * vecs -> 0 = vcs2, 1 = rcs, 2 = vcs, 3 = bcs;
  370.          * vcs2 -> 0 = rcs, 1 = vcs, 2 = bcs, 3 = vecs;
  371.          */
  372.  
  373.         idx = (other - ring) - 1;
  374.         if (idx < 0)
  375.                 idx += I915_NUM_RINGS;
  376.  
  377.         return idx;
  378. }
  379.  
  380. static inline void
  381. intel_flush_status_page(struct intel_engine_cs *ring, int reg)
  382. {
  383.         drm_clflush_virt_range(&ring->status_page.page_addr[reg],
  384.                                sizeof(uint32_t));
  385. }
  386.  
  387. static inline u32
  388. intel_read_status_page(struct intel_engine_cs *ring,
  389.                        int reg)
  390. {
  391.         /* Ensure that the compiler doesn't optimize away the load. */
  392.         barrier();
  393.         return ring->status_page.page_addr[reg];
  394. }
  395.  
  396. static inline void
  397. intel_write_status_page(struct intel_engine_cs *ring,
  398.                         int reg, u32 value)
  399. {
  400.         ring->status_page.page_addr[reg] = value;
  401. }
  402.  
  403. /**
  404.  * Reads a dword out of the status page, which is written to from the command
  405.  * queue by automatic updates, MI_REPORT_HEAD, MI_STORE_DATA_INDEX, or
  406.  * MI_STORE_DATA_IMM.
  407.  *
  408.  * The following dwords have a reserved meaning:
  409.  * 0x00: ISR copy, updated when an ISR bit not set in the HWSTAM changes.
  410.  * 0x04: ring 0 head pointer
  411.  * 0x05: ring 1 head pointer (915-class)
  412.  * 0x06: ring 2 head pointer (915-class)
  413.  * 0x10-0x1b: Context status DWords (GM45)
  414.  * 0x1f: Last written status offset. (GM45)
  415.  * 0x20-0x2f: Reserved (Gen6+)
  416.  *
  417.  * The area from dword 0x30 to 0x3ff is available for driver usage.
  418.  */
  419. #define I915_GEM_HWS_INDEX              0x30
  420. #define I915_GEM_HWS_SCRATCH_INDEX      0x40
  421. #define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
  422.  
  423. struct intel_ringbuffer *
  424. intel_engine_create_ringbuffer(struct intel_engine_cs *engine, int size);
  425. int intel_pin_and_map_ringbuffer_obj(struct drm_device *dev,
  426.                                      struct intel_ringbuffer *ringbuf);
  427. void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf);
  428. void intel_ringbuffer_free(struct intel_ringbuffer *ring);
  429.  
  430. void intel_stop_ring_buffer(struct intel_engine_cs *ring);
  431. void intel_cleanup_ring_buffer(struct intel_engine_cs *ring);
  432.  
  433. int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request);
  434.  
  435. int __must_check intel_ring_begin(struct drm_i915_gem_request *req, int n);
  436. int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req);
  437. static inline void intel_ring_emit(struct intel_engine_cs *ring,
  438.                                    u32 data)
  439. {
  440.         struct intel_ringbuffer *ringbuf = ring->buffer;
  441.         iowrite32(data, ringbuf->virtual_start + ringbuf->tail);
  442.         ringbuf->tail += 4;
  443. }
  444. static inline void intel_ring_advance(struct intel_engine_cs *ring)
  445. {
  446.         struct intel_ringbuffer *ringbuf = ring->buffer;
  447.         ringbuf->tail &= ringbuf->size - 1;
  448. }
  449. int __intel_ring_space(int head, int tail, int size);
  450. void intel_ring_update_space(struct intel_ringbuffer *ringbuf);
  451. int intel_ring_space(struct intel_ringbuffer *ringbuf);
  452. bool intel_ring_stopped(struct intel_engine_cs *ring);
  453.  
  454. int __must_check intel_ring_idle(struct intel_engine_cs *ring);
  455. void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno);
  456. int intel_ring_flush_all_caches(struct drm_i915_gem_request *req);
  457. int intel_ring_invalidate_all_caches(struct drm_i915_gem_request *req);
  458.  
  459. void intel_fini_pipe_control(struct intel_engine_cs *ring);
  460. int intel_init_pipe_control(struct intel_engine_cs *ring);
  461.  
  462. int intel_init_render_ring_buffer(struct drm_device *dev);
  463. int intel_init_bsd_ring_buffer(struct drm_device *dev);
  464. int intel_init_bsd2_ring_buffer(struct drm_device *dev);
  465. int intel_init_blt_ring_buffer(struct drm_device *dev);
  466. int intel_init_vebox_ring_buffer(struct drm_device *dev);
  467.  
  468. u64 intel_ring_get_active_head(struct intel_engine_cs *ring);
  469.  
  470. int init_workarounds_ring(struct intel_engine_cs *ring);
  471.  
  472. static inline u32 intel_ring_get_tail(struct intel_ringbuffer *ringbuf)
  473. {
  474.         return ringbuf->tail;
  475. }
  476.  
  477. /*
  478.  * Arbitrary size for largest possible 'add request' sequence. The code paths
  479.  * are complex and variable. Empirical measurement shows that the worst case
  480.  * is ILK at 136 words. Reserving too much is better than reserving too little
  481.  * as that allows for corner cases that might have been missed. So the figure
  482.  * has been rounded up to 160 words.
  483.  */
  484. #define MIN_SPACE_FOR_ADD_REQUEST       160
  485.  
  486. /*
  487.  * Reserve space in the ring to guarantee that the i915_add_request() call
  488.  * will always have sufficient room to do its stuff. The request creation
  489.  * code calls this automatically.
  490.  */
  491. void intel_ring_reserved_space_reserve(struct intel_ringbuffer *ringbuf, int size);
  492. /* Cancel the reservation, e.g. because the request is being discarded. */
  493. void intel_ring_reserved_space_cancel(struct intel_ringbuffer *ringbuf);
  494. /* Use the reserved space - for use by i915_add_request() only. */
  495. void intel_ring_reserved_space_use(struct intel_ringbuffer *ringbuf);
  496. /* Finish with the reserved space - for use by i915_add_request() only. */
  497. void intel_ring_reserved_space_end(struct intel_ringbuffer *ringbuf);
  498.  
  499. /* Legacy ringbuffer specific portion of reservation code: */
  500. int intel_ring_reserve_space(struct drm_i915_gem_request *request);
  501.  
  502. #endif /* _INTEL_RINGBUFFER_H_ */
  503.