Subversion Repositories Kolibri OS

Rev

Rev 6660 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | Download | RSS feed

  1. /*
  2.  * Copyright © 2008-2010 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21.  * IN THE SOFTWARE.
  22.  *
  23.  * Authors:
  24.  *    Eric Anholt <eric@anholt.net>
  25.  *    Zou Nan hai <nanhai.zou@intel.com>
  26.  *    Xiang Hai hao<haihao.xiang@intel.com>
  27.  *
  28.  */
  29.  
  30. #include <linux/log2.h>
  31. #include <drm/drmP.h>
  32. #include "i915_drv.h"
  33. #include <drm/i915_drm.h>
  34. #include "i915_trace.h"
  35. #include "intel_drv.h"
  36.  
  37. int __intel_ring_space(int head, int tail, int size)
  38. {
  39.         int space = head - tail;
  40.         if (space <= 0)
  41.                 space += size;
  42.         return space - I915_RING_FREE_SPACE;
  43. }
  44.  
  45. void intel_ring_update_space(struct intel_ringbuffer *ringbuf)
  46. {
  47.         if (ringbuf->last_retired_head != -1) {
  48.                 ringbuf->head = ringbuf->last_retired_head;
  49.                 ringbuf->last_retired_head = -1;
  50.         }
  51.  
  52.         ringbuf->space = __intel_ring_space(ringbuf->head & HEAD_ADDR,
  53.                                             ringbuf->tail, ringbuf->size);
  54. }
  55.  
  56. int intel_ring_space(struct intel_ringbuffer *ringbuf)
  57. {
  58.         intel_ring_update_space(ringbuf);
  59.         return ringbuf->space;
  60. }
  61.  
  62. bool intel_ring_stopped(struct intel_engine_cs *ring)
  63. {
  64.         struct drm_i915_private *dev_priv = ring->dev->dev_private;
  65.         return dev_priv->gpu_error.stop_rings & intel_ring_flag(ring);
  66. }
  67.  
  68. static void __intel_ring_advance(struct intel_engine_cs *ring)
  69. {
  70.         struct intel_ringbuffer *ringbuf = ring->buffer;
  71.         ringbuf->tail &= ringbuf->size - 1;
  72.         if (intel_ring_stopped(ring))
  73.                 return;
  74.         ring->write_tail(ring, ringbuf->tail);
  75. }
  76.  
  77. static int
  78. gen2_render_ring_flush(struct drm_i915_gem_request *req,
  79.                        u32      invalidate_domains,
  80.                        u32      flush_domains)
  81. {
  82.         struct intel_engine_cs *ring = req->ring;
  83.         u32 cmd;
  84.         int ret;
  85.  
  86.         cmd = MI_FLUSH;
  87.         if (((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) == 0)
  88.                 cmd |= MI_NO_WRITE_FLUSH;
  89.  
  90.         if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
  91.                 cmd |= MI_READ_FLUSH;
  92.  
  93.         ret = intel_ring_begin(req, 2);
  94.         if (ret)
  95.                 return ret;
  96.  
  97.         intel_ring_emit(ring, cmd);
  98.         intel_ring_emit(ring, MI_NOOP);
  99.         intel_ring_advance(ring);
  100.  
  101.         return 0;
  102. }
  103.  
  104. static int
  105. gen4_render_ring_flush(struct drm_i915_gem_request *req,
  106.                        u32      invalidate_domains,
  107.                        u32      flush_domains)
  108. {
  109.         struct intel_engine_cs *ring = req->ring;
  110.         struct drm_device *dev = ring->dev;
  111.         u32 cmd;
  112.         int ret;
  113.  
  114.         /*
  115.          * read/write caches:
  116.          *
  117.          * I915_GEM_DOMAIN_RENDER is always invalidated, but is
  118.          * only flushed if MI_NO_WRITE_FLUSH is unset.  On 965, it is
  119.          * also flushed at 2d versus 3d pipeline switches.
  120.          *
  121.          * read-only caches:
  122.          *
  123.          * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
  124.          * MI_READ_FLUSH is set, and is always flushed on 965.
  125.          *
  126.          * I915_GEM_DOMAIN_COMMAND may not exist?
  127.          *
  128.          * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
  129.          * invalidated when MI_EXE_FLUSH is set.
  130.          *
  131.          * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
  132.          * invalidated with every MI_FLUSH.
  133.          *
  134.          * TLBs:
  135.          *
  136.          * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
  137.          * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
  138.          * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
  139.          * are flushed at any MI_FLUSH.
  140.          */
  141.  
  142.         cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
  143.         if ((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER)
  144.                 cmd &= ~MI_NO_WRITE_FLUSH;
  145.         if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION)
  146.                 cmd |= MI_EXE_FLUSH;
  147.  
  148.         if (invalidate_domains & I915_GEM_DOMAIN_COMMAND &&
  149.             (IS_G4X(dev) || IS_GEN5(dev)))
  150.                 cmd |= MI_INVALIDATE_ISP;
  151.  
  152.         ret = intel_ring_begin(req, 2);
  153.         if (ret)
  154.                 return ret;
  155.  
  156.         intel_ring_emit(ring, cmd);
  157.         intel_ring_emit(ring, MI_NOOP);
  158.         intel_ring_advance(ring);
  159.  
  160.         return 0;
  161. }
  162.  
  163. /**
  164.  * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
  165.  * implementing two workarounds on gen6.  From section 1.4.7.1
  166.  * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
  167.  *
  168.  * [DevSNB-C+{W/A}] Before any depth stall flush (including those
  169.  * produced by non-pipelined state commands), software needs to first
  170.  * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
  171.  * 0.
  172.  *
  173.  * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
  174.  * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
  175.  *
  176.  * And the workaround for these two requires this workaround first:
  177.  *
  178.  * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
  179.  * BEFORE the pipe-control with a post-sync op and no write-cache
  180.  * flushes.
  181.  *
  182.  * And this last workaround is tricky because of the requirements on
  183.  * that bit.  From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
  184.  * volume 2 part 1:
  185.  *
  186.  *     "1 of the following must also be set:
  187.  *      - Render Target Cache Flush Enable ([12] of DW1)
  188.  *      - Depth Cache Flush Enable ([0] of DW1)
  189.  *      - Stall at Pixel Scoreboard ([1] of DW1)
  190.  *      - Depth Stall ([13] of DW1)
  191.  *      - Post-Sync Operation ([13] of DW1)
  192.  *      - Notify Enable ([8] of DW1)"
  193.  *
  194.  * The cache flushes require the workaround flush that triggered this
  195.  * one, so we can't use it.  Depth stall would trigger the same.
  196.  * Post-sync nonzero is what triggered this second workaround, so we
  197.  * can't use that one either.  Notify enable is IRQs, which aren't
  198.  * really our business.  That leaves only stall at scoreboard.
  199.  */
  200. static int
  201. intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req)
  202. {
  203.         struct intel_engine_cs *ring = req->ring;
  204.         u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
  205.         int ret;
  206.  
  207.         ret = intel_ring_begin(req, 6);
  208.         if (ret)
  209.                 return ret;
  210.  
  211.         intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
  212.         intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
  213.                         PIPE_CONTROL_STALL_AT_SCOREBOARD);
  214.         intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
  215.         intel_ring_emit(ring, 0); /* low dword */
  216.         intel_ring_emit(ring, 0); /* high dword */
  217.         intel_ring_emit(ring, MI_NOOP);
  218.         intel_ring_advance(ring);
  219.  
  220.         ret = intel_ring_begin(req, 6);
  221.         if (ret)
  222.                 return ret;
  223.  
  224.         intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
  225.         intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE);
  226.         intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
  227.         intel_ring_emit(ring, 0);
  228.         intel_ring_emit(ring, 0);
  229.         intel_ring_emit(ring, MI_NOOP);
  230.         intel_ring_advance(ring);
  231.  
  232.         return 0;
  233. }
  234.  
  235. static int
  236. gen6_render_ring_flush(struct drm_i915_gem_request *req,
  237.                        u32 invalidate_domains, u32 flush_domains)
  238. {
  239.         struct intel_engine_cs *ring = req->ring;
  240.         u32 flags = 0;
  241.         u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
  242.         int ret;
  243.  
  244.         /* Force SNB workarounds for PIPE_CONTROL flushes */
  245.         ret = intel_emit_post_sync_nonzero_flush(req);
  246.         if (ret)
  247.                 return ret;
  248.  
  249.         /* Just flush everything.  Experiments have shown that reducing the
  250.          * number of bits based on the write domains has little performance
  251.          * impact.
  252.          */
  253.         if (flush_domains) {
  254.                 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
  255.                 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
  256.                 /*
  257.                  * Ensure that any following seqno writes only happen
  258.                  * when the render cache is indeed flushed.
  259.                  */
  260.                 flags |= PIPE_CONTROL_CS_STALL;
  261.         }
  262.         if (invalidate_domains) {
  263.                 flags |= PIPE_CONTROL_TLB_INVALIDATE;
  264.                 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
  265.                 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
  266.                 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
  267.                 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
  268.                 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
  269.                 /*
  270.                  * TLB invalidate requires a post-sync write.
  271.                  */
  272.                 flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
  273.         }
  274.  
  275.         ret = intel_ring_begin(req, 4);
  276.         if (ret)
  277.                 return ret;
  278.  
  279.         intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
  280.         intel_ring_emit(ring, flags);
  281.         intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
  282.         intel_ring_emit(ring, 0);
  283.         intel_ring_advance(ring);
  284.  
  285.         return 0;
  286. }
  287.  
  288. static int
  289. gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req)
  290. {
  291.         struct intel_engine_cs *ring = req->ring;
  292.         int ret;
  293.  
  294.         ret = intel_ring_begin(req, 4);
  295.         if (ret)
  296.                 return ret;
  297.  
  298.         intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
  299.         intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
  300.                               PIPE_CONTROL_STALL_AT_SCOREBOARD);
  301.         intel_ring_emit(ring, 0);
  302.         intel_ring_emit(ring, 0);
  303.         intel_ring_advance(ring);
  304.  
  305.         return 0;
  306. }
  307.  
  308. static int
  309. gen7_render_ring_flush(struct drm_i915_gem_request *req,
  310.                        u32 invalidate_domains, u32 flush_domains)
  311. {
  312.         struct intel_engine_cs *ring = req->ring;
  313.         u32 flags = 0;
  314.         u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
  315.         int ret;
  316.  
  317.         /*
  318.          * Ensure that any following seqno writes only happen when the render
  319.          * cache is indeed flushed.
  320.          *
  321.          * Workaround: 4th PIPE_CONTROL command (except the ones with only
  322.          * read-cache invalidate bits set) must have the CS_STALL bit set. We
  323.          * don't try to be clever and just set it unconditionally.
  324.          */
  325.         flags |= PIPE_CONTROL_CS_STALL;
  326.  
  327.         /* Just flush everything.  Experiments have shown that reducing the
  328.          * number of bits based on the write domains has little performance
  329.          * impact.
  330.          */
  331.         if (flush_domains) {
  332.                 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
  333.                 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
  334.                 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
  335.                 flags |= PIPE_CONTROL_FLUSH_ENABLE;
  336.         }
  337.         if (invalidate_domains) {
  338.                 flags |= PIPE_CONTROL_TLB_INVALIDATE;
  339.                 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
  340.                 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
  341.                 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
  342.                 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
  343.                 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
  344.                 flags |= PIPE_CONTROL_MEDIA_STATE_CLEAR;
  345.                 /*
  346.                  * TLB invalidate requires a post-sync write.
  347.                  */
  348.                 flags |= PIPE_CONTROL_QW_WRITE;
  349.                 flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
  350.  
  351.                 flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
  352.  
  353.                 /* Workaround: we must issue a pipe_control with CS-stall bit
  354.                  * set before a pipe_control command that has the state cache
  355.                  * invalidate bit set. */
  356.                 gen7_render_ring_cs_stall_wa(req);
  357.         }
  358.  
  359.         ret = intel_ring_begin(req, 4);
  360.         if (ret)
  361.                 return ret;
  362.  
  363.         intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
  364.         intel_ring_emit(ring, flags);
  365.         intel_ring_emit(ring, scratch_addr);
  366.         intel_ring_emit(ring, 0);
  367.         intel_ring_advance(ring);
  368.  
  369.         return 0;
  370. }
  371.  
  372. static int
  373. gen8_emit_pipe_control(struct drm_i915_gem_request *req,
  374.                        u32 flags, u32 scratch_addr)
  375. {
  376.         struct intel_engine_cs *ring = req->ring;
  377.         int ret;
  378.  
  379.         ret = intel_ring_begin(req, 6);
  380.         if (ret)
  381.                 return ret;
  382.  
  383.         intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6));
  384.         intel_ring_emit(ring, flags);
  385.         intel_ring_emit(ring, scratch_addr);
  386.         intel_ring_emit(ring, 0);
  387.         intel_ring_emit(ring, 0);
  388.         intel_ring_emit(ring, 0);
  389.         intel_ring_advance(ring);
  390.  
  391.         return 0;
  392. }
  393.  
  394. static int
  395. gen8_render_ring_flush(struct drm_i915_gem_request *req,
  396.                        u32 invalidate_domains, u32 flush_domains)
  397. {
  398.         u32 flags = 0;
  399.         u32 scratch_addr = req->ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
  400.         int ret;
  401.  
  402.         flags |= PIPE_CONTROL_CS_STALL;
  403.  
  404.         if (flush_domains) {
  405.                 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
  406.                 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
  407.                 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
  408.                 flags |= PIPE_CONTROL_FLUSH_ENABLE;
  409.         }
  410.         if (invalidate_domains) {
  411.                 flags |= PIPE_CONTROL_TLB_INVALIDATE;
  412.                 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
  413.                 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
  414.                 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
  415.                 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
  416.                 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
  417.                 flags |= PIPE_CONTROL_QW_WRITE;
  418.                 flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
  419.  
  420.                 /* WaCsStallBeforeStateCacheInvalidate:bdw,chv */
  421.                 ret = gen8_emit_pipe_control(req,
  422.                                              PIPE_CONTROL_CS_STALL |
  423.                                              PIPE_CONTROL_STALL_AT_SCOREBOARD,
  424.                                              0);
  425.                 if (ret)
  426.                         return ret;
  427.         }
  428.  
  429.         return gen8_emit_pipe_control(req, flags, scratch_addr);
  430. }
  431.  
  432. static void ring_write_tail(struct intel_engine_cs *ring,
  433.                             u32 value)
  434. {
  435.         struct drm_i915_private *dev_priv = ring->dev->dev_private;
  436.         I915_WRITE_TAIL(ring, value);
  437. }
  438.  
  439. u64 intel_ring_get_active_head(struct intel_engine_cs *ring)
  440. {
  441.         struct drm_i915_private *dev_priv = ring->dev->dev_private;
  442.         u64 acthd;
  443.  
  444.         if (INTEL_INFO(ring->dev)->gen >= 8)
  445.                 acthd = I915_READ64_2x32(RING_ACTHD(ring->mmio_base),
  446.                                          RING_ACTHD_UDW(ring->mmio_base));
  447.         else if (INTEL_INFO(ring->dev)->gen >= 4)
  448.                 acthd = I915_READ(RING_ACTHD(ring->mmio_base));
  449.         else
  450.                 acthd = I915_READ(ACTHD);
  451.  
  452.         return acthd;
  453. }
  454.  
  455. static void ring_setup_phys_status_page(struct intel_engine_cs *ring)
  456. {
  457.         struct drm_i915_private *dev_priv = ring->dev->dev_private;
  458.         u32 addr;
  459.  
  460.         addr = dev_priv->status_page_dmah->busaddr;
  461.         if (INTEL_INFO(ring->dev)->gen >= 4)
  462.                 addr |= (dev_priv->status_page_dmah->busaddr >> 28) & 0xf0;
  463.         I915_WRITE(HWS_PGA, addr);
  464. }
  465.  
  466. static void intel_ring_setup_status_page(struct intel_engine_cs *ring)
  467. {
  468.         struct drm_device *dev = ring->dev;
  469.         struct drm_i915_private *dev_priv = ring->dev->dev_private;
  470.         i915_reg_t mmio;
  471.  
  472.         /* The ring status page addresses are no longer next to the rest of
  473.          * the ring registers as of gen7.
  474.          */
  475.         if (IS_GEN7(dev)) {
  476.                 switch (ring->id) {
  477.                 case RCS:
  478.                         mmio = RENDER_HWS_PGA_GEN7;
  479.                         break;
  480.                 case BCS:
  481.                         mmio = BLT_HWS_PGA_GEN7;
  482.                         break;
  483.                 /*
  484.                  * VCS2 actually doesn't exist on Gen7. Only shut up
  485.                  * gcc switch check warning
  486.                  */
  487.                 case VCS2:
  488.                 case VCS:
  489.                         mmio = BSD_HWS_PGA_GEN7;
  490.                         break;
  491.                 case VECS:
  492.                         mmio = VEBOX_HWS_PGA_GEN7;
  493.                         break;
  494.                 }
  495.         } else if (IS_GEN6(ring->dev)) {
  496.                 mmio = RING_HWS_PGA_GEN6(ring->mmio_base);
  497.         } else {
  498.                 /* XXX: gen8 returns to sanity */
  499.                 mmio = RING_HWS_PGA(ring->mmio_base);
  500.         }
  501.  
  502.         I915_WRITE(mmio, (u32)ring->status_page.gfx_addr);
  503.         POSTING_READ(mmio);
  504.  
  505.         /*
  506.          * Flush the TLB for this page
  507.          *
  508.          * FIXME: These two bits have disappeared on gen8, so a question
  509.          * arises: do we still need this and if so how should we go about
  510.          * invalidating the TLB?
  511.          */
  512.         if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8) {
  513.                 i915_reg_t reg = RING_INSTPM(ring->mmio_base);
  514.  
  515.                 /* ring should be idle before issuing a sync flush*/
  516.                 WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0);
  517.  
  518.                 I915_WRITE(reg,
  519.                            _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE |
  520.                                               INSTPM_SYNC_FLUSH));
  521.                 if (wait_for((I915_READ(reg) & INSTPM_SYNC_FLUSH) == 0,
  522.                              1000))
  523.                         DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n",
  524.                                   ring->name);
  525.         }
  526. }
  527.  
  528. static bool stop_ring(struct intel_engine_cs *ring)
  529. {
  530.         struct drm_i915_private *dev_priv = to_i915(ring->dev);
  531.  
  532.         if (!IS_GEN2(ring->dev)) {
  533.                 I915_WRITE_MODE(ring, _MASKED_BIT_ENABLE(STOP_RING));
  534.                 if (wait_for((I915_READ_MODE(ring) & MODE_IDLE) != 0, 1000)) {
  535.                         DRM_ERROR("%s : timed out trying to stop ring\n", ring->name);
  536.                         /* Sometimes we observe that the idle flag is not
  537.                          * set even though the ring is empty. So double
  538.                          * check before giving up.
  539.                          */
  540.                         if (I915_READ_HEAD(ring) != I915_READ_TAIL(ring))
  541.                                 return false;
  542.                 }
  543.         }
  544.  
  545.         I915_WRITE_CTL(ring, 0);
  546.         I915_WRITE_HEAD(ring, 0);
  547.         ring->write_tail(ring, 0);
  548.  
  549.         if (!IS_GEN2(ring->dev)) {
  550.                 (void)I915_READ_CTL(ring);
  551.                 I915_WRITE_MODE(ring, _MASKED_BIT_DISABLE(STOP_RING));
  552.         }
  553.  
  554.         return (I915_READ_HEAD(ring) & HEAD_ADDR) == 0;
  555. }
  556.  
  557. static int init_ring_common(struct intel_engine_cs *ring)
  558. {
  559.         struct drm_device *dev = ring->dev;
  560.         struct drm_i915_private *dev_priv = dev->dev_private;
  561.         struct intel_ringbuffer *ringbuf = ring->buffer;
  562.         struct drm_i915_gem_object *obj = ringbuf->obj;
  563.         int ret = 0;
  564.  
  565.         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
  566.  
  567.         if (!stop_ring(ring)) {
  568.                 /* G45 ring initialization often fails to reset head to zero */
  569.                 DRM_DEBUG_KMS("%s head not reset to zero "
  570.                               "ctl %08x head %08x tail %08x start %08x\n",
  571.                               ring->name,
  572.                               I915_READ_CTL(ring),
  573.                               I915_READ_HEAD(ring),
  574.                               I915_READ_TAIL(ring),
  575.                               I915_READ_START(ring));
  576.  
  577.                 if (!stop_ring(ring)) {
  578.                         DRM_ERROR("failed to set %s head to zero "
  579.                                   "ctl %08x head %08x tail %08x start %08x\n",
  580.                                   ring->name,
  581.                                   I915_READ_CTL(ring),
  582.                                   I915_READ_HEAD(ring),
  583.                                   I915_READ_TAIL(ring),
  584.                                   I915_READ_START(ring));
  585.                         ret = -EIO;
  586.                         goto out;
  587.                 }
  588.         }
  589.  
  590.         if (I915_NEED_GFX_HWS(dev))
  591.                 intel_ring_setup_status_page(ring);
  592.         else
  593.                 ring_setup_phys_status_page(ring);
  594.  
  595.         /* Enforce ordering by reading HEAD register back */
  596.         I915_READ_HEAD(ring);
  597.  
  598.         /* Initialize the ring. This must happen _after_ we've cleared the ring
  599.          * registers with the above sequence (the readback of the HEAD registers
  600.          * also enforces ordering), otherwise the hw might lose the new ring
  601.          * register values. */
  602.         I915_WRITE_START(ring, i915_gem_obj_ggtt_offset(obj));
  603.  
  604.         /* WaClearRingBufHeadRegAtInit:ctg,elk */
  605.         if (I915_READ_HEAD(ring))
  606.                 DRM_DEBUG("%s initialization failed [head=%08x], fudging\n",
  607.                           ring->name, I915_READ_HEAD(ring));
  608.         I915_WRITE_HEAD(ring, 0);
  609.         (void)I915_READ_HEAD(ring);
  610.  
  611.         I915_WRITE_CTL(ring,
  612.                         ((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES)
  613.                         | RING_VALID);
  614.  
  615.         /* If the head is still not zero, the ring is dead */
  616.         if (wait_for((I915_READ_CTL(ring) & RING_VALID) != 0 &&
  617.                      I915_READ_START(ring) == i915_gem_obj_ggtt_offset(obj) &&
  618.                      (I915_READ_HEAD(ring) & HEAD_ADDR) == 0, 50)) {
  619.                 DRM_ERROR("%s initialization failed "
  620.                           "ctl %08x (valid? %d) head %08x tail %08x start %08x [expected %08lx]\n",
  621.                           ring->name,
  622.                           I915_READ_CTL(ring), I915_READ_CTL(ring) & RING_VALID,
  623.                           I915_READ_HEAD(ring), I915_READ_TAIL(ring),
  624.                           I915_READ_START(ring), (unsigned long)i915_gem_obj_ggtt_offset(obj));
  625.                 ret = -EIO;
  626.                 goto out;
  627.         }
  628.  
  629.         ringbuf->last_retired_head = -1;
  630.         ringbuf->head = I915_READ_HEAD(ring);
  631.         ringbuf->tail = I915_READ_TAIL(ring) & TAIL_ADDR;
  632.         intel_ring_update_space(ringbuf);
  633.  
  634.         memset(&ring->hangcheck, 0, sizeof(ring->hangcheck));
  635.  
  636. out:
  637.         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
  638.  
  639.         return ret;
  640. }
  641.  
  642. void
  643. intel_fini_pipe_control(struct intel_engine_cs *ring)
  644. {
  645.         struct drm_device *dev = ring->dev;
  646.  
  647.         if (ring->scratch.obj == NULL)
  648.                 return;
  649.  
  650.         if (INTEL_INFO(dev)->gen >= 5) {
  651.                 kunmap(sg_page(ring->scratch.obj->pages->sgl));
  652.                 i915_gem_object_ggtt_unpin(ring->scratch.obj);
  653.         }
  654.  
  655.         drm_gem_object_unreference(&ring->scratch.obj->base);
  656.         ring->scratch.obj = NULL;
  657. }
  658.  
  659. int
  660. intel_init_pipe_control(struct intel_engine_cs *ring)
  661. {
  662.         int ret;
  663.  
  664.         WARN_ON(ring->scratch.obj);
  665.  
  666.         ring->scratch.obj = i915_gem_alloc_object(ring->dev, 4096);
  667.         if (ring->scratch.obj == NULL) {
  668.                 DRM_ERROR("Failed to allocate seqno page\n");
  669.                 ret = -ENOMEM;
  670.                 goto err;
  671.         }
  672.  
  673.         ret = i915_gem_object_set_cache_level(ring->scratch.obj, I915_CACHE_LLC);
  674.         if (ret)
  675.                 goto err_unref;
  676.  
  677.         ret = i915_gem_obj_ggtt_pin(ring->scratch.obj, 4096, 0);
  678.         if (ret)
  679.                 goto err_unref;
  680.  
  681.         ring->scratch.gtt_offset = i915_gem_obj_ggtt_offset(ring->scratch.obj);
  682.         ring->scratch.cpu_page = kmap(sg_page(ring->scratch.obj->pages->sgl));
  683.         if (ring->scratch.cpu_page == NULL) {
  684.                 ret = -ENOMEM;
  685.                 goto err_unpin;
  686.         }
  687.  
  688.         DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08x\n",
  689.                          ring->name, ring->scratch.gtt_offset);
  690.         return 0;
  691.  
  692. err_unpin:
  693.         i915_gem_object_ggtt_unpin(ring->scratch.obj);
  694. err_unref:
  695.         drm_gem_object_unreference(&ring->scratch.obj->base);
  696. err:
  697.         return ret;
  698. }
  699.  
  700. static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req)
  701. {
  702.         int ret, i;
  703.         struct intel_engine_cs *ring = req->ring;
  704.         struct drm_device *dev = ring->dev;
  705.         struct drm_i915_private *dev_priv = dev->dev_private;
  706.         struct i915_workarounds *w = &dev_priv->workarounds;
  707.  
  708.         if (w->count == 0)
  709.                 return 0;
  710.  
  711.         ring->gpu_caches_dirty = true;
  712.         ret = intel_ring_flush_all_caches(req);
  713.         if (ret)
  714.                 return ret;
  715.  
  716.         ret = intel_ring_begin(req, (w->count * 2 + 2));
  717.         if (ret)
  718.                 return ret;
  719.  
  720.         intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(w->count));
  721.         for (i = 0; i < w->count; i++) {
  722.                 intel_ring_emit_reg(ring, w->reg[i].addr);
  723.                 intel_ring_emit(ring, w->reg[i].value);
  724.         }
  725.         intel_ring_emit(ring, MI_NOOP);
  726.  
  727.         intel_ring_advance(ring);
  728.  
  729.         ring->gpu_caches_dirty = true;
  730.         ret = intel_ring_flush_all_caches(req);
  731.         if (ret)
  732.                 return ret;
  733.  
  734.         DRM_DEBUG_DRIVER("Number of Workarounds emitted: %d\n", w->count);
  735.  
  736.         return 0;
  737. }
  738.  
  739. static int intel_rcs_ctx_init(struct drm_i915_gem_request *req)
  740. {
  741.         int ret;
  742.  
  743.         ret = intel_ring_workarounds_emit(req);
  744.         if (ret != 0)
  745.                 return ret;
  746.  
  747.         ret = i915_gem_render_state_init(req);
  748.         if (ret)
  749.                 DRM_ERROR("init render state: %d\n", ret);
  750.  
  751.         return ret;
  752. }
  753.  
  754. static int wa_add(struct drm_i915_private *dev_priv,
  755.                   i915_reg_t addr,
  756.                   const u32 mask, const u32 val)
  757. {
  758.         const u32 idx = dev_priv->workarounds.count;
  759.  
  760.         if (WARN_ON(idx >= I915_MAX_WA_REGS))
  761.                 return -ENOSPC;
  762.  
  763.         dev_priv->workarounds.reg[idx].addr = addr;
  764.         dev_priv->workarounds.reg[idx].value = val;
  765.         dev_priv->workarounds.reg[idx].mask = mask;
  766.  
  767.         dev_priv->workarounds.count++;
  768.  
  769.         return 0;
  770. }
  771.  
  772. #define WA_REG(addr, mask, val) do { \
  773.                 const int r = wa_add(dev_priv, (addr), (mask), (val)); \
  774.                 if (r) \
  775.                         return r; \
  776.         } while (0)
  777.  
  778. #define WA_SET_BIT_MASKED(addr, mask) \
  779.         WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask))
  780.  
  781. #define WA_CLR_BIT_MASKED(addr, mask) \
  782.         WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask))
  783.  
  784. #define WA_SET_FIELD_MASKED(addr, mask, value) \
  785.         WA_REG(addr, mask, _MASKED_FIELD(mask, value))
  786.  
  787. #define WA_SET_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) | (mask))
  788. #define WA_CLR_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) & ~(mask))
  789.  
  790. #define WA_WRITE(addr, val) WA_REG(addr, 0xffffffff, val)
  791.  
  792. static int gen8_init_workarounds(struct intel_engine_cs *ring)
  793. {
  794.         struct drm_device *dev = ring->dev;
  795.         struct drm_i915_private *dev_priv = dev->dev_private;
  796.  
  797.         WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
  798.  
  799.         /* WaDisableAsyncFlipPerfMode:bdw,chv */
  800.         WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE);
  801.  
  802.         /* WaDisablePartialInstShootdown:bdw,chv */
  803.         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
  804.                           PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
  805.  
  806.         /* Use Force Non-Coherent whenever executing a 3D context. This is a
  807.          * workaround for for a possible hang in the unlikely event a TLB
  808.          * invalidation occurs during a PSD flush.
  809.          */
  810.         /* WaForceEnableNonCoherent:bdw,chv */
  811.         /* WaHdcDisableFetchWhenMasked:bdw,chv */
  812.         WA_SET_BIT_MASKED(HDC_CHICKEN0,
  813.                           HDC_DONOT_FETCH_MEM_WHEN_MASKED |
  814.                           HDC_FORCE_NON_COHERENT);
  815.  
  816.         /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
  817.          * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
  818.          *  polygons in the same 8x4 pixel/sample area to be processed without
  819.          *  stalling waiting for the earlier ones to write to Hierarchical Z
  820.          *  buffer."
  821.          *
  822.          * This optimization is off by default for BDW and CHV; turn it on.
  823.          */
  824.         WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
  825.  
  826.         /* Wa4x4STCOptimizationDisable:bdw,chv */
  827.         WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
  828.  
  829.         /*
  830.          * BSpec recommends 8x4 when MSAA is used,
  831.          * however in practice 16x4 seems fastest.
  832.          *
  833.          * Note that PS/WM thread counts depend on the WIZ hashing
  834.          * disable bit, which we don't touch here, but it's good
  835.          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
  836.          */
  837.         WA_SET_FIELD_MASKED(GEN7_GT_MODE,
  838.                             GEN6_WIZ_HASHING_MASK,
  839.                             GEN6_WIZ_HASHING_16x4);
  840.  
  841.         return 0;
  842. }
  843.  
  844. static int bdw_init_workarounds(struct intel_engine_cs *ring)
  845. {
  846.         int ret;
  847.         struct drm_device *dev = ring->dev;
  848.         struct drm_i915_private *dev_priv = dev->dev_private;
  849.  
  850.         ret = gen8_init_workarounds(ring);
  851.         if (ret)
  852.                 return ret;
  853.  
  854.         /* WaDisableThreadStallDopClockGating:bdw (pre-production) */
  855.         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
  856.  
  857.         /* WaDisableDopClockGating:bdw */
  858.         WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
  859.                           DOP_CLOCK_GATING_DISABLE);
  860.  
  861.         WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
  862.                           GEN8_SAMPLER_POWER_BYPASS_DIS);
  863.  
  864.         WA_SET_BIT_MASKED(HDC_CHICKEN0,
  865.                           /* WaForceContextSaveRestoreNonCoherent:bdw */
  866.                           HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
  867.                           /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
  868.                           (IS_BDW_GT3(dev) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
  869.  
  870.         return 0;
  871. }
  872.  
  873. static int chv_init_workarounds(struct intel_engine_cs *ring)
  874. {
  875.         int ret;
  876.         struct drm_device *dev = ring->dev;
  877.         struct drm_i915_private *dev_priv = dev->dev_private;
  878.  
  879.         ret = gen8_init_workarounds(ring);
  880.         if (ret)
  881.                 return ret;
  882.  
  883.         /* WaDisableThreadStallDopClockGating:chv */
  884.         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
  885.  
  886.         /* Improve HiZ throughput on CHV. */
  887.         WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
  888.  
  889.         return 0;
  890. }
  891.  
  892. static int gen9_init_workarounds(struct intel_engine_cs *ring)
  893. {
  894.         struct drm_device *dev = ring->dev;
  895.         struct drm_i915_private *dev_priv = dev->dev_private;
  896.         uint32_t tmp;
  897.  
  898.         /* WaEnableLbsSlaRetryTimerDecrement:skl */
  899.         I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) |
  900.                    GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
  901.  
  902.         /* WaDisableKillLogic:bxt,skl */
  903.         I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
  904.                    ECOCHK_DIS_TLB);
  905.  
  906.         /* WaDisablePartialInstShootdown:skl,bxt */
  907.         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
  908.                           PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
  909.  
  910.         /* Syncing dependencies between camera and graphics:skl,bxt */
  911.         WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
  912.                           GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC);
  913.  
  914.                 /* WaDisableDgMirrorFixInHalfSliceChicken5:skl,bxt */
  915.         if (IS_SKL_REVID(dev, 0, SKL_REVID_B0) ||
  916.             IS_BXT_REVID(dev, 0, BXT_REVID_A1))
  917.                 WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
  918.                                   GEN9_DG_MIRROR_FIX_ENABLE);
  919.  
  920.                 /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:skl,bxt */
  921.         if (IS_SKL_REVID(dev, 0, SKL_REVID_B0) ||
  922.             IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
  923.                 WA_SET_BIT_MASKED(GEN7_COMMON_SLICE_CHICKEN1,
  924.                                   GEN9_RHWO_OPTIMIZATION_DISABLE);
  925.                 /*
  926.                  * WA also requires GEN9_SLICE_COMMON_ECO_CHICKEN0[14:14] to be set
  927.                  * but we do that in per ctx batchbuffer as there is an issue
  928.                  * with this register not getting restored on ctx restore
  929.                  */
  930.         }
  931.  
  932.                 /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt */
  933.         if (IS_SKL_REVID(dev, SKL_REVID_C0, REVID_FOREVER) || IS_BROXTON(dev))
  934.                 WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
  935.                                   GEN9_ENABLE_YV12_BUGFIX);
  936.  
  937.         /* Wa4x4STCOptimizationDisable:skl,bxt */
  938.         /* WaDisablePartialResolveInVc:skl,bxt */
  939.         WA_SET_BIT_MASKED(CACHE_MODE_1, (GEN8_4x4_STC_OPTIMIZATION_DISABLE |
  940.                                          GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE));
  941.  
  942.         /* WaCcsTlbPrefetchDisable:skl,bxt */
  943.         WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
  944.                           GEN9_CCS_TLB_PREFETCH_ENABLE);
  945.  
  946.         /* WaDisableMaskBasedCammingInRCC:skl,bxt */
  947.         if (IS_SKL_REVID(dev, SKL_REVID_C0, SKL_REVID_C0) ||
  948.             IS_BXT_REVID(dev, 0, BXT_REVID_A1))
  949.                 WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0,
  950.                                   PIXEL_MASK_CAMMING_DISABLE);
  951.  
  952.         /* WaForceContextSaveRestoreNonCoherent:skl,bxt */
  953.         tmp = HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT;
  954.         if (IS_SKL_REVID(dev, SKL_REVID_F0, REVID_FOREVER) ||
  955.             IS_BXT_REVID(dev, BXT_REVID_B0, REVID_FOREVER))
  956.                 tmp |= HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE;
  957.         WA_SET_BIT_MASKED(HDC_CHICKEN0, tmp);
  958.  
  959.         /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt */
  960.         if (IS_SKYLAKE(dev) || IS_BXT_REVID(dev, 0, BXT_REVID_B0))
  961.                 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
  962.                                   GEN8_SAMPLER_POWER_BYPASS_DIS);
  963.  
  964.         /* WaDisableSTUnitPowerOptimization:skl,bxt */
  965.         WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
  966.  
  967.         return 0;
  968. }
  969.  
  970. static int skl_tune_iz_hashing(struct intel_engine_cs *ring)
  971. {
  972.         struct drm_device *dev = ring->dev;
  973.         struct drm_i915_private *dev_priv = dev->dev_private;
  974.         u8 vals[3] = { 0, 0, 0 };
  975.         unsigned int i;
  976.  
  977.         for (i = 0; i < 3; i++) {
  978.                 u8 ss;
  979.  
  980.                 /*
  981.                  * Only consider slices where one, and only one, subslice has 7
  982.                  * EUs
  983.                  */
  984.                 if (!is_power_of_2(dev_priv->info.subslice_7eu[i]))
  985.                         continue;
  986.  
  987.                 /*
  988.                  * subslice_7eu[i] != 0 (because of the check above) and
  989.                  * ss_max == 4 (maximum number of subslices possible per slice)
  990.                  *
  991.                  * ->    0 <= ss <= 3;
  992.                  */
  993.                 ss = ffs(dev_priv->info.subslice_7eu[i]) - 1;
  994.                 vals[i] = 3 - ss;
  995.         }
  996.  
  997.         if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
  998.                 return 0;
  999.  
  1000.         /* Tune IZ hashing. See intel_device_info_runtime_init() */
  1001.         WA_SET_FIELD_MASKED(GEN7_GT_MODE,
  1002.                             GEN9_IZ_HASHING_MASK(2) |
  1003.                             GEN9_IZ_HASHING_MASK(1) |
  1004.                             GEN9_IZ_HASHING_MASK(0),
  1005.                             GEN9_IZ_HASHING(2, vals[2]) |
  1006.                             GEN9_IZ_HASHING(1, vals[1]) |
  1007.                             GEN9_IZ_HASHING(0, vals[0]));
  1008.  
  1009.         return 0;
  1010. }
  1011.  
  1012. static int skl_init_workarounds(struct intel_engine_cs *ring)
  1013. {
  1014.         int ret;
  1015.         struct drm_device *dev = ring->dev;
  1016.         struct drm_i915_private *dev_priv = dev->dev_private;
  1017.  
  1018.         ret = gen9_init_workarounds(ring);
  1019.         if (ret)
  1020.                 return ret;
  1021.  
  1022.         if (IS_SKL_REVID(dev, 0, SKL_REVID_D0)) {
  1023.                 /* WaDisableChickenBitTSGBarrierAckForFFSliceCS:skl */
  1024.                 I915_WRITE(FF_SLICE_CS_CHICKEN2,
  1025.                            _MASKED_BIT_ENABLE(GEN9_TSG_BARRIER_ACK_DISABLE));
  1026.         }
  1027.  
  1028.         /* GEN8_L3SQCREG4 has a dependency with WA batch so any new changes
  1029.          * involving this register should also be added to WA batch as required.
  1030.          */
  1031.         if (IS_SKL_REVID(dev, 0, SKL_REVID_E0))
  1032.                 /* WaDisableLSQCROPERFforOCL:skl */
  1033.                 I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) |
  1034.                            GEN8_LQSC_RO_PERF_DIS);
  1035.  
  1036.         /* WaEnableGapsTsvCreditFix:skl */
  1037.         if (IS_SKL_REVID(dev, SKL_REVID_C0, REVID_FOREVER)) {
  1038.                 I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) |
  1039.                                            GEN9_GAPS_TSV_CREDIT_DISABLE));
  1040.         }
  1041.  
  1042.         /* WaDisablePowerCompilerClockGating:skl */
  1043.         if (IS_SKL_REVID(dev, SKL_REVID_B0, SKL_REVID_B0))
  1044.                 WA_SET_BIT_MASKED(HIZ_CHICKEN,
  1045.                                   BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE);
  1046.  
  1047.         /* This is tied to WaForceContextSaveRestoreNonCoherent */
  1048.         if (IS_SKL_REVID(dev, 0, REVID_FOREVER)) {
  1049.                 /*
  1050.                  *Use Force Non-Coherent whenever executing a 3D context. This
  1051.                  * is a workaround for a possible hang in the unlikely event
  1052.                  * a TLB invalidation occurs during a PSD flush.
  1053.                  */
  1054.                 /* WaForceEnableNonCoherent:skl */
  1055.                 WA_SET_BIT_MASKED(HDC_CHICKEN0,
  1056.                                   HDC_FORCE_NON_COHERENT);
  1057.  
  1058.                 /* WaDisableHDCInvalidation:skl */
  1059.                 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
  1060.                            BDW_DISABLE_HDC_INVALIDATION);
  1061.         }
  1062.  
  1063.                 /* WaBarrierPerformanceFixDisable:skl */
  1064.         if (IS_SKL_REVID(dev, SKL_REVID_C0, SKL_REVID_D0))
  1065.                 WA_SET_BIT_MASKED(HDC_CHICKEN0,
  1066.                                   HDC_FENCE_DEST_SLM_DISABLE |
  1067.                                   HDC_BARRIER_PERFORMANCE_DISABLE);
  1068.  
  1069.         /* WaDisableSbeCacheDispatchPortSharing:skl */
  1070.         if (IS_SKL_REVID(dev, 0, SKL_REVID_F0))
  1071.                 WA_SET_BIT_MASKED(
  1072.                         GEN7_HALF_SLICE_CHICKEN1,
  1073.                         GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
  1074.  
  1075.         return skl_tune_iz_hashing(ring);
  1076. }
  1077.  
  1078. static int bxt_init_workarounds(struct intel_engine_cs *ring)
  1079. {
  1080.         int ret;
  1081.         struct drm_device *dev = ring->dev;
  1082.         struct drm_i915_private *dev_priv = dev->dev_private;
  1083.  
  1084.         ret = gen9_init_workarounds(ring);
  1085.         if (ret)
  1086.                 return ret;
  1087.  
  1088.         /* WaStoreMultiplePTEenable:bxt */
  1089.         /* This is a requirement according to Hardware specification */
  1090.         if (IS_BXT_REVID(dev, 0, BXT_REVID_A1))
  1091.                 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_TLBPF);
  1092.  
  1093.         /* WaSetClckGatingDisableMedia:bxt */
  1094.         if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
  1095.                 I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) &
  1096.                                             ~GEN8_DOP_CLOCK_GATE_MEDIA_ENABLE));
  1097.         }
  1098.  
  1099.         /* WaDisableThreadStallDopClockGating:bxt */
  1100.         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
  1101.                           STALL_DOP_GATING_DISABLE);
  1102.  
  1103.         /* WaDisableSbeCacheDispatchPortSharing:bxt */
  1104.         if (IS_BXT_REVID(dev, 0, BXT_REVID_B0)) {
  1105.                 WA_SET_BIT_MASKED(
  1106.                         GEN7_HALF_SLICE_CHICKEN1,
  1107.                         GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
  1108.         }
  1109.  
  1110.         return 0;
  1111. }
  1112.  
  1113. int init_workarounds_ring(struct intel_engine_cs *ring)
  1114. {
  1115.         struct drm_device *dev = ring->dev;
  1116.         struct drm_i915_private *dev_priv = dev->dev_private;
  1117.  
  1118.         WARN_ON(ring->id != RCS);
  1119.  
  1120.         dev_priv->workarounds.count = 0;
  1121.  
  1122.         if (IS_BROADWELL(dev))
  1123.                 return bdw_init_workarounds(ring);
  1124.  
  1125.         if (IS_CHERRYVIEW(dev))
  1126.                 return chv_init_workarounds(ring);
  1127.  
  1128.         if (IS_SKYLAKE(dev))
  1129.                 return skl_init_workarounds(ring);
  1130.  
  1131.         if (IS_BROXTON(dev))
  1132.                 return bxt_init_workarounds(ring);
  1133.  
  1134.         return 0;
  1135. }
  1136.  
  1137. static int init_render_ring(struct intel_engine_cs *ring)
  1138. {
  1139.         struct drm_device *dev = ring->dev;
  1140.         struct drm_i915_private *dev_priv = dev->dev_private;
  1141.         int ret = init_ring_common(ring);
  1142.         if (ret)
  1143.                 return ret;
  1144.  
  1145.         /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
  1146.         if (INTEL_INFO(dev)->gen >= 4 && INTEL_INFO(dev)->gen < 7)
  1147.                 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH));
  1148.  
  1149.         /* We need to disable the AsyncFlip performance optimisations in order
  1150.          * to use MI_WAIT_FOR_EVENT within the CS. It should already be
  1151.          * programmed to '1' on all products.
  1152.          *
  1153.          * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv
  1154.          */
  1155.         if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8)
  1156.                 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
  1157.  
  1158.         /* Required for the hardware to program scanline values for waiting */
  1159.         /* WaEnableFlushTlbInvalidationMode:snb */
  1160.         if (INTEL_INFO(dev)->gen == 6)
  1161.                 I915_WRITE(GFX_MODE,
  1162.                            _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT));
  1163.  
  1164.         /* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */
  1165.         if (IS_GEN7(dev))
  1166.                 I915_WRITE(GFX_MODE_GEN7,
  1167.                            _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT) |
  1168.                            _MASKED_BIT_ENABLE(GFX_REPLAY_MODE));
  1169.  
  1170.         if (IS_GEN6(dev)) {
  1171.                 /* From the Sandybridge PRM, volume 1 part 3, page 24:
  1172.                  * "If this bit is set, STCunit will have LRA as replacement
  1173.                  *  policy. [...] This bit must be reset.  LRA replacement
  1174.                  *  policy is not supported."
  1175.                  */
  1176.                 I915_WRITE(CACHE_MODE_0,
  1177.                            _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
  1178.         }
  1179.  
  1180.         if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8)
  1181.                 I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
  1182.  
  1183.         if (HAS_L3_DPF(dev))
  1184.                 I915_WRITE_IMR(ring, ~GT_PARITY_ERROR(dev));
  1185.  
  1186.         return init_workarounds_ring(ring);
  1187. }
  1188.  
  1189. static void render_ring_cleanup(struct intel_engine_cs *ring)
  1190. {
  1191.         struct drm_device *dev = ring->dev;
  1192.         struct drm_i915_private *dev_priv = dev->dev_private;
  1193.  
  1194.         if (dev_priv->semaphore_obj) {
  1195.                 i915_gem_object_ggtt_unpin(dev_priv->semaphore_obj);
  1196.                 drm_gem_object_unreference(&dev_priv->semaphore_obj->base);
  1197.                 dev_priv->semaphore_obj = NULL;
  1198.         }
  1199.  
  1200.         intel_fini_pipe_control(ring);
  1201. }
  1202.  
  1203. static int gen8_rcs_signal(struct drm_i915_gem_request *signaller_req,
  1204.                            unsigned int num_dwords)
  1205. {
  1206. #define MBOX_UPDATE_DWORDS 8
  1207.         struct intel_engine_cs *signaller = signaller_req->ring;
  1208.         struct drm_device *dev = signaller->dev;
  1209.         struct drm_i915_private *dev_priv = dev->dev_private;
  1210.         struct intel_engine_cs *waiter;
  1211.         int i, ret, num_rings;
  1212.  
  1213.         num_rings = hweight32(INTEL_INFO(dev)->ring_mask);
  1214.         num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS;
  1215. #undef MBOX_UPDATE_DWORDS
  1216.  
  1217.         ret = intel_ring_begin(signaller_req, num_dwords);
  1218.         if (ret)
  1219.                 return ret;
  1220.  
  1221.         for_each_ring(waiter, dev_priv, i) {
  1222.                 u32 seqno;
  1223.                 u64 gtt_offset = signaller->semaphore.signal_ggtt[i];
  1224.                 if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
  1225.                         continue;
  1226.  
  1227.                 seqno = i915_gem_request_get_seqno(signaller_req);
  1228.                 intel_ring_emit(signaller, GFX_OP_PIPE_CONTROL(6));
  1229.                 intel_ring_emit(signaller, PIPE_CONTROL_GLOBAL_GTT_IVB |
  1230.                                            PIPE_CONTROL_QW_WRITE |
  1231.                                            PIPE_CONTROL_FLUSH_ENABLE);
  1232.                 intel_ring_emit(signaller, lower_32_bits(gtt_offset));
  1233.                 intel_ring_emit(signaller, upper_32_bits(gtt_offset));
  1234.                 intel_ring_emit(signaller, seqno);
  1235.                 intel_ring_emit(signaller, 0);
  1236.                 intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
  1237.                                            MI_SEMAPHORE_TARGET(waiter->id));
  1238.                 intel_ring_emit(signaller, 0);
  1239.         }
  1240.  
  1241.         return 0;
  1242. }
  1243.  
  1244. static int gen8_xcs_signal(struct drm_i915_gem_request *signaller_req,
  1245.                            unsigned int num_dwords)
  1246. {
  1247. #define MBOX_UPDATE_DWORDS 6
  1248.         struct intel_engine_cs *signaller = signaller_req->ring;
  1249.         struct drm_device *dev = signaller->dev;
  1250.         struct drm_i915_private *dev_priv = dev->dev_private;
  1251.         struct intel_engine_cs *waiter;
  1252.         int i, ret, num_rings;
  1253.  
  1254.         num_rings = hweight32(INTEL_INFO(dev)->ring_mask);
  1255.         num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS;
  1256. #undef MBOX_UPDATE_DWORDS
  1257.  
  1258.         ret = intel_ring_begin(signaller_req, num_dwords);
  1259.         if (ret)
  1260.                 return ret;
  1261.  
  1262.         for_each_ring(waiter, dev_priv, i) {
  1263.                 u32 seqno;
  1264.                 u64 gtt_offset = signaller->semaphore.signal_ggtt[i];
  1265.                 if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
  1266.                         continue;
  1267.  
  1268.                 seqno = i915_gem_request_get_seqno(signaller_req);
  1269.                 intel_ring_emit(signaller, (MI_FLUSH_DW + 1) |
  1270.                                            MI_FLUSH_DW_OP_STOREDW);
  1271.                 intel_ring_emit(signaller, lower_32_bits(gtt_offset) |
  1272.                                            MI_FLUSH_DW_USE_GTT);
  1273.                 intel_ring_emit(signaller, upper_32_bits(gtt_offset));
  1274.                 intel_ring_emit(signaller, seqno);
  1275.                 intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
  1276.                                            MI_SEMAPHORE_TARGET(waiter->id));
  1277.                 intel_ring_emit(signaller, 0);
  1278.         }
  1279.  
  1280.         return 0;
  1281. }
  1282.  
  1283. static int gen6_signal(struct drm_i915_gem_request *signaller_req,
  1284.                        unsigned int num_dwords)
  1285. {
  1286.         struct intel_engine_cs *signaller = signaller_req->ring;
  1287.         struct drm_device *dev = signaller->dev;
  1288.         struct drm_i915_private *dev_priv = dev->dev_private;
  1289.         struct intel_engine_cs *useless;
  1290.         int i, ret, num_rings;
  1291.  
  1292. #define MBOX_UPDATE_DWORDS 3
  1293.         num_rings = hweight32(INTEL_INFO(dev)->ring_mask);
  1294.         num_dwords += round_up((num_rings-1) * MBOX_UPDATE_DWORDS, 2);
  1295. #undef MBOX_UPDATE_DWORDS
  1296.  
  1297.         ret = intel_ring_begin(signaller_req, num_dwords);
  1298.         if (ret)
  1299.                 return ret;
  1300.  
  1301.         for_each_ring(useless, dev_priv, i) {
  1302.                 i915_reg_t mbox_reg = signaller->semaphore.mbox.signal[i];
  1303.  
  1304.                 if (i915_mmio_reg_valid(mbox_reg)) {
  1305.                         u32 seqno = i915_gem_request_get_seqno(signaller_req);
  1306.  
  1307.                         intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1));
  1308.                         intel_ring_emit_reg(signaller, mbox_reg);
  1309.                         intel_ring_emit(signaller, seqno);
  1310.                 }
  1311.         }
  1312.  
  1313.         /* If num_dwords was rounded, make sure the tail pointer is correct */
  1314.         if (num_rings % 2 == 0)
  1315.                 intel_ring_emit(signaller, MI_NOOP);
  1316.  
  1317.         return 0;
  1318. }
  1319.  
  1320. /**
  1321.  * gen6_add_request - Update the semaphore mailbox registers
  1322.  *
  1323.  * @request - request to write to the ring
  1324.  *
  1325.  * Update the mailbox registers in the *other* rings with the current seqno.
  1326.  * This acts like a signal in the canonical semaphore.
  1327.  */
  1328. static int
  1329. gen6_add_request(struct drm_i915_gem_request *req)
  1330. {
  1331.         struct intel_engine_cs *ring = req->ring;
  1332.         int ret;
  1333.  
  1334.         if (ring->semaphore.signal)
  1335.                 ret = ring->semaphore.signal(req, 4);
  1336.         else
  1337.                 ret = intel_ring_begin(req, 4);
  1338.  
  1339.         if (ret)
  1340.                 return ret;
  1341.  
  1342.         intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
  1343.         intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
  1344.         intel_ring_emit(ring, i915_gem_request_get_seqno(req));
  1345.         intel_ring_emit(ring, MI_USER_INTERRUPT);
  1346.         __intel_ring_advance(ring);
  1347.  
  1348.         return 0;
  1349. }
  1350.  
  1351. static inline bool i915_gem_has_seqno_wrapped(struct drm_device *dev,
  1352.                                               u32 seqno)
  1353. {
  1354.         struct drm_i915_private *dev_priv = dev->dev_private;
  1355.         return dev_priv->last_seqno < seqno;
  1356. }
  1357.  
  1358. /**
  1359.  * intel_ring_sync - sync the waiter to the signaller on seqno
  1360.  *
  1361.  * @waiter - ring that is waiting
  1362.  * @signaller - ring which has, or will signal
  1363.  * @seqno - seqno which the waiter will block on
  1364.  */
  1365.  
  1366. static int
  1367. gen8_ring_sync(struct drm_i915_gem_request *waiter_req,
  1368.                struct intel_engine_cs *signaller,
  1369.                u32 seqno)
  1370. {
  1371.         struct intel_engine_cs *waiter = waiter_req->ring;
  1372.         struct drm_i915_private *dev_priv = waiter->dev->dev_private;
  1373.         int ret;
  1374.  
  1375.         ret = intel_ring_begin(waiter_req, 4);
  1376.         if (ret)
  1377.                 return ret;
  1378.  
  1379.         intel_ring_emit(waiter, MI_SEMAPHORE_WAIT |
  1380.                                 MI_SEMAPHORE_GLOBAL_GTT |
  1381.                                 MI_SEMAPHORE_POLL |
  1382.                                 MI_SEMAPHORE_SAD_GTE_SDD);
  1383.         intel_ring_emit(waiter, seqno);
  1384.         intel_ring_emit(waiter,
  1385.                         lower_32_bits(GEN8_WAIT_OFFSET(waiter, signaller->id)));
  1386.         intel_ring_emit(waiter,
  1387.                         upper_32_bits(GEN8_WAIT_OFFSET(waiter, signaller->id)));
  1388.         intel_ring_advance(waiter);
  1389.         return 0;
  1390. }
  1391.  
  1392. static int
  1393. gen6_ring_sync(struct drm_i915_gem_request *waiter_req,
  1394.                struct intel_engine_cs *signaller,
  1395.                u32 seqno)
  1396. {
  1397.         struct intel_engine_cs *waiter = waiter_req->ring;
  1398.         u32 dw1 = MI_SEMAPHORE_MBOX |
  1399.                   MI_SEMAPHORE_COMPARE |
  1400.                   MI_SEMAPHORE_REGISTER;
  1401.         u32 wait_mbox = signaller->semaphore.mbox.wait[waiter->id];
  1402.         int ret;
  1403.  
  1404.         /* Throughout all of the GEM code, seqno passed implies our current
  1405.          * seqno is >= the last seqno executed. However for hardware the
  1406.          * comparison is strictly greater than.
  1407.          */
  1408.         seqno -= 1;
  1409.  
  1410.         WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID);
  1411.  
  1412.         ret = intel_ring_begin(waiter_req, 4);
  1413.         if (ret)
  1414.                 return ret;
  1415.  
  1416.         /* If seqno wrap happened, omit the wait with no-ops */
  1417.         if (likely(!i915_gem_has_seqno_wrapped(waiter->dev, seqno))) {
  1418.                 intel_ring_emit(waiter, dw1 | wait_mbox);
  1419.                 intel_ring_emit(waiter, seqno);
  1420.                 intel_ring_emit(waiter, 0);
  1421.                 intel_ring_emit(waiter, MI_NOOP);
  1422.         } else {
  1423.                 intel_ring_emit(waiter, MI_NOOP);
  1424.                 intel_ring_emit(waiter, MI_NOOP);
  1425.                 intel_ring_emit(waiter, MI_NOOP);
  1426.                 intel_ring_emit(waiter, MI_NOOP);
  1427.         }
  1428.         intel_ring_advance(waiter);
  1429.  
  1430.         return 0;
  1431. }
  1432.  
  1433. #define PIPE_CONTROL_FLUSH(ring__, addr__)                                      \
  1434. do {                                                                    \
  1435.         intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |                \
  1436.                  PIPE_CONTROL_DEPTH_STALL);                             \
  1437.         intel_ring_emit(ring__, (addr__) | PIPE_CONTROL_GLOBAL_GTT);                    \
  1438.         intel_ring_emit(ring__, 0);                                                     \
  1439.         intel_ring_emit(ring__, 0);                                                     \
  1440. } while (0)
  1441.  
  1442. static int
  1443. pc_render_add_request(struct drm_i915_gem_request *req)
  1444. {
  1445.         struct intel_engine_cs *ring = req->ring;
  1446.         u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
  1447.         int ret;
  1448.  
  1449.         /* For Ironlake, MI_USER_INTERRUPT was deprecated and apparently
  1450.          * incoherent with writes to memory, i.e. completely fubar,
  1451.          * so we need to use PIPE_NOTIFY instead.
  1452.          *
  1453.          * However, we also need to workaround the qword write
  1454.          * incoherence by flushing the 6 PIPE_NOTIFY buffers out to
  1455.          * memory before requesting an interrupt.
  1456.          */
  1457.         ret = intel_ring_begin(req, 32);
  1458.         if (ret)
  1459.                 return ret;
  1460.  
  1461.         intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
  1462.                         PIPE_CONTROL_WRITE_FLUSH |
  1463.                         PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
  1464.         intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
  1465.         intel_ring_emit(ring, i915_gem_request_get_seqno(req));
  1466.         intel_ring_emit(ring, 0);
  1467.         PIPE_CONTROL_FLUSH(ring, scratch_addr);
  1468.         scratch_addr += 2 * CACHELINE_BYTES; /* write to separate cachelines */
  1469.         PIPE_CONTROL_FLUSH(ring, scratch_addr);
  1470.         scratch_addr += 2 * CACHELINE_BYTES;
  1471.         PIPE_CONTROL_FLUSH(ring, scratch_addr);
  1472.         scratch_addr += 2 * CACHELINE_BYTES;
  1473.         PIPE_CONTROL_FLUSH(ring, scratch_addr);
  1474.         scratch_addr += 2 * CACHELINE_BYTES;
  1475.         PIPE_CONTROL_FLUSH(ring, scratch_addr);
  1476.         scratch_addr += 2 * CACHELINE_BYTES;
  1477.         PIPE_CONTROL_FLUSH(ring, scratch_addr);
  1478.  
  1479.         intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
  1480.                         PIPE_CONTROL_WRITE_FLUSH |
  1481.                         PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
  1482.                         PIPE_CONTROL_NOTIFY);
  1483.         intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
  1484.         intel_ring_emit(ring, i915_gem_request_get_seqno(req));
  1485.         intel_ring_emit(ring, 0);
  1486.         __intel_ring_advance(ring);
  1487.  
  1488.         return 0;
  1489. }
  1490.  
  1491. static u32
  1492. gen6_ring_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
  1493. {
  1494.         /* Workaround to force correct ordering between irq and seqno writes on
  1495.          * ivb (and maybe also on snb) by reading from a CS register (like
  1496.          * ACTHD) before reading the status page. */
  1497.         if (!lazy_coherency) {
  1498.                 struct drm_i915_private *dev_priv = ring->dev->dev_private;
  1499.                 POSTING_READ(RING_ACTHD(ring->mmio_base));
  1500.         }
  1501.  
  1502.         return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
  1503. }
  1504.  
  1505. static u32
  1506. ring_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
  1507. {
  1508.         return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
  1509. }
  1510.  
  1511. static void
  1512. ring_set_seqno(struct intel_engine_cs *ring, u32 seqno)
  1513. {
  1514.         intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno);
  1515. }
  1516.  
  1517. static u32
  1518. pc_render_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
  1519. {
  1520.         return ring->scratch.cpu_page[0];
  1521. }
  1522.  
  1523. static void
  1524. pc_render_set_seqno(struct intel_engine_cs *ring, u32 seqno)
  1525. {
  1526.         ring->scratch.cpu_page[0] = seqno;
  1527. }
  1528.  
  1529. static bool
  1530. gen5_ring_get_irq(struct intel_engine_cs *ring)
  1531. {
  1532.         struct drm_device *dev = ring->dev;
  1533.         struct drm_i915_private *dev_priv = dev->dev_private;
  1534.         unsigned long flags;
  1535.  
  1536.         if (WARN_ON(!intel_irqs_enabled(dev_priv)))
  1537.                 return false;
  1538.  
  1539.         spin_lock_irqsave(&dev_priv->irq_lock, flags);
  1540.         if (ring->irq_refcount++ == 0)
  1541.                 gen5_enable_gt_irq(dev_priv, ring->irq_enable_mask);
  1542.         spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
  1543.  
  1544.         return true;
  1545. }
  1546.  
  1547. static void
  1548. gen5_ring_put_irq(struct intel_engine_cs *ring)
  1549. {
  1550.         struct drm_device *dev = ring->dev;
  1551.         struct drm_i915_private *dev_priv = dev->dev_private;
  1552.         unsigned long flags;
  1553.  
  1554.         spin_lock_irqsave(&dev_priv->irq_lock, flags);
  1555.         if (--ring->irq_refcount == 0)
  1556.                 gen5_disable_gt_irq(dev_priv, ring->irq_enable_mask);
  1557.         spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
  1558. }
  1559.  
  1560. static bool
  1561. i9xx_ring_get_irq(struct intel_engine_cs *ring)
  1562. {
  1563.         struct drm_device *dev = ring->dev;
  1564.         struct drm_i915_private *dev_priv = dev->dev_private;
  1565.         unsigned long flags;
  1566.  
  1567.         if (!intel_irqs_enabled(dev_priv))
  1568.                 return false;
  1569.  
  1570.         spin_lock_irqsave(&dev_priv->irq_lock, flags);
  1571.         if (ring->irq_refcount++ == 0) {
  1572.                 dev_priv->irq_mask &= ~ring->irq_enable_mask;
  1573.                 I915_WRITE(IMR, dev_priv->irq_mask);
  1574.                 POSTING_READ(IMR);
  1575.         }
  1576.         spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
  1577.  
  1578.         return true;
  1579. }
  1580.  
  1581. static void
  1582. i9xx_ring_put_irq(struct intel_engine_cs *ring)
  1583. {
  1584.         struct drm_device *dev = ring->dev;
  1585.         struct drm_i915_private *dev_priv = dev->dev_private;
  1586.         unsigned long flags;
  1587.  
  1588.         spin_lock_irqsave(&dev_priv->irq_lock, flags);
  1589.         if (--ring->irq_refcount == 0) {
  1590.                 dev_priv->irq_mask |= ring->irq_enable_mask;
  1591.                 I915_WRITE(IMR, dev_priv->irq_mask);
  1592.                 POSTING_READ(IMR);
  1593.         }
  1594.         spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
  1595. }
  1596.  
  1597. static bool
  1598. i8xx_ring_get_irq(struct intel_engine_cs *ring)
  1599. {
  1600.         struct drm_device *dev = ring->dev;
  1601.         struct drm_i915_private *dev_priv = dev->dev_private;
  1602.         unsigned long flags;
  1603.  
  1604.         if (!intel_irqs_enabled(dev_priv))
  1605.                 return false;
  1606.  
  1607.         spin_lock_irqsave(&dev_priv->irq_lock, flags);
  1608.         if (ring->irq_refcount++ == 0) {
  1609.                 dev_priv->irq_mask &= ~ring->irq_enable_mask;
  1610.                 I915_WRITE16(IMR, dev_priv->irq_mask);
  1611.                 POSTING_READ16(IMR);
  1612.         }
  1613.         spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
  1614.  
  1615.         return true;
  1616. }
  1617.  
  1618. static void
  1619. i8xx_ring_put_irq(struct intel_engine_cs *ring)
  1620. {
  1621.         struct drm_device *dev = ring->dev;
  1622.         struct drm_i915_private *dev_priv = dev->dev_private;
  1623.         unsigned long flags;
  1624.  
  1625.         spin_lock_irqsave(&dev_priv->irq_lock, flags);
  1626.         if (--ring->irq_refcount == 0) {
  1627.                 dev_priv->irq_mask |= ring->irq_enable_mask;
  1628.                 I915_WRITE16(IMR, dev_priv->irq_mask);
  1629.                 POSTING_READ16(IMR);
  1630.         }
  1631.         spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
  1632. }
  1633.  
  1634. static int
  1635. bsd_ring_flush(struct drm_i915_gem_request *req,
  1636.                u32     invalidate_domains,
  1637.                u32     flush_domains)
  1638. {
  1639.         struct intel_engine_cs *ring = req->ring;
  1640.         int ret;
  1641.  
  1642.         ret = intel_ring_begin(req, 2);
  1643.         if (ret)
  1644.                 return ret;
  1645.  
  1646.         intel_ring_emit(ring, MI_FLUSH);
  1647.         intel_ring_emit(ring, MI_NOOP);
  1648.         intel_ring_advance(ring);
  1649.         return 0;
  1650. }
  1651.  
  1652. static int
  1653. i9xx_add_request(struct drm_i915_gem_request *req)
  1654. {
  1655.         struct intel_engine_cs *ring = req->ring;
  1656.         int ret;
  1657.  
  1658.         ret = intel_ring_begin(req, 4);
  1659.         if (ret)
  1660.                 return ret;
  1661.  
  1662.         intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
  1663.         intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
  1664.         intel_ring_emit(ring, i915_gem_request_get_seqno(req));
  1665.         intel_ring_emit(ring, MI_USER_INTERRUPT);
  1666.         __intel_ring_advance(ring);
  1667.  
  1668.         return 0;
  1669. }
  1670.  
  1671. static bool
  1672. gen6_ring_get_irq(struct intel_engine_cs *ring)
  1673. {
  1674.         struct drm_device *dev = ring->dev;
  1675.         struct drm_i915_private *dev_priv = dev->dev_private;
  1676.         unsigned long flags;
  1677.  
  1678.         if (WARN_ON(!intel_irqs_enabled(dev_priv)))
  1679.                 return false;
  1680.  
  1681.         spin_lock_irqsave(&dev_priv->irq_lock, flags);
  1682.         if (ring->irq_refcount++ == 0) {
  1683.                 if (HAS_L3_DPF(dev) && ring->id == RCS)
  1684.                         I915_WRITE_IMR(ring,
  1685.                                        ~(ring->irq_enable_mask |
  1686.                                          GT_PARITY_ERROR(dev)));
  1687.                 else
  1688.                         I915_WRITE_IMR(ring, ~ring->irq_enable_mask);
  1689.                 gen5_enable_gt_irq(dev_priv, ring->irq_enable_mask);
  1690.         }
  1691.         spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
  1692.  
  1693.         return true;
  1694. }
  1695.  
  1696. static void
  1697. gen6_ring_put_irq(struct intel_engine_cs *ring)
  1698. {
  1699.         struct drm_device *dev = ring->dev;
  1700.         struct drm_i915_private *dev_priv = dev->dev_private;
  1701.         unsigned long flags;
  1702.  
  1703.         spin_lock_irqsave(&dev_priv->irq_lock, flags);
  1704.         if (--ring->irq_refcount == 0) {
  1705.                 if (HAS_L3_DPF(dev) && ring->id == RCS)
  1706.                         I915_WRITE_IMR(ring, ~GT_PARITY_ERROR(dev));
  1707.                 else
  1708.                         I915_WRITE_IMR(ring, ~0);
  1709.                 gen5_disable_gt_irq(dev_priv, ring->irq_enable_mask);
  1710.         }
  1711.         spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
  1712. }
  1713.  
  1714. static bool
  1715. hsw_vebox_get_irq(struct intel_engine_cs *ring)
  1716. {
  1717.         struct drm_device *dev = ring->dev;
  1718.         struct drm_i915_private *dev_priv = dev->dev_private;
  1719.         unsigned long flags;
  1720.  
  1721.         if (WARN_ON(!intel_irqs_enabled(dev_priv)))
  1722.                 return false;
  1723.  
  1724.         spin_lock_irqsave(&dev_priv->irq_lock, flags);
  1725.         if (ring->irq_refcount++ == 0) {
  1726.                 I915_WRITE_IMR(ring, ~ring->irq_enable_mask);
  1727.                 gen6_enable_pm_irq(dev_priv, ring->irq_enable_mask);
  1728.         }
  1729.         spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
  1730.  
  1731.         return true;
  1732. }
  1733.  
  1734. static void
  1735. hsw_vebox_put_irq(struct intel_engine_cs *ring)
  1736. {
  1737.         struct drm_device *dev = ring->dev;
  1738.         struct drm_i915_private *dev_priv = dev->dev_private;
  1739.         unsigned long flags;
  1740.  
  1741.         spin_lock_irqsave(&dev_priv->irq_lock, flags);
  1742.         if (--ring->irq_refcount == 0) {
  1743.                 I915_WRITE_IMR(ring, ~0);
  1744.                 gen6_disable_pm_irq(dev_priv, ring->irq_enable_mask);
  1745.         }
  1746.         spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
  1747. }
  1748.  
  1749. static bool
  1750. gen8_ring_get_irq(struct intel_engine_cs *ring)
  1751. {
  1752.         struct drm_device *dev = ring->dev;
  1753.         struct drm_i915_private *dev_priv = dev->dev_private;
  1754.         unsigned long flags;
  1755.  
  1756.         if (WARN_ON(!intel_irqs_enabled(dev_priv)))
  1757.                 return false;
  1758.  
  1759.         spin_lock_irqsave(&dev_priv->irq_lock, flags);
  1760.         if (ring->irq_refcount++ == 0) {
  1761.                 if (HAS_L3_DPF(dev) && ring->id == RCS) {
  1762.                         I915_WRITE_IMR(ring,
  1763.                                        ~(ring->irq_enable_mask |
  1764.                                          GT_RENDER_L3_PARITY_ERROR_INTERRUPT));
  1765.                 } else {
  1766.                         I915_WRITE_IMR(ring, ~ring->irq_enable_mask);
  1767.                 }
  1768.                 POSTING_READ(RING_IMR(ring->mmio_base));
  1769.         }
  1770.         spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
  1771.  
  1772.         return true;
  1773. }
  1774.  
  1775. static void
  1776. gen8_ring_put_irq(struct intel_engine_cs *ring)
  1777. {
  1778.         struct drm_device *dev = ring->dev;
  1779.         struct drm_i915_private *dev_priv = dev->dev_private;
  1780.         unsigned long flags;
  1781.  
  1782.         spin_lock_irqsave(&dev_priv->irq_lock, flags);
  1783.         if (--ring->irq_refcount == 0) {
  1784.                 if (HAS_L3_DPF(dev) && ring->id == RCS) {
  1785.                         I915_WRITE_IMR(ring,
  1786.                                        ~GT_RENDER_L3_PARITY_ERROR_INTERRUPT);
  1787.                 } else {
  1788.                         I915_WRITE_IMR(ring, ~0);
  1789.                 }
  1790.                 POSTING_READ(RING_IMR(ring->mmio_base));
  1791.         }
  1792.         spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
  1793. }
  1794.  
  1795. static int
  1796. i965_dispatch_execbuffer(struct drm_i915_gem_request *req,
  1797.                          u64 offset, u32 length,
  1798.                          unsigned dispatch_flags)
  1799. {
  1800.         struct intel_engine_cs *ring = req->ring;
  1801.         int ret;
  1802.  
  1803.         ret = intel_ring_begin(req, 2);
  1804.         if (ret)
  1805.                 return ret;
  1806.  
  1807.         intel_ring_emit(ring,
  1808.                         MI_BATCH_BUFFER_START |
  1809.                         MI_BATCH_GTT |
  1810.                         (dispatch_flags & I915_DISPATCH_SECURE ?
  1811.                          0 : MI_BATCH_NON_SECURE_I965));
  1812.         intel_ring_emit(ring, offset);
  1813.         intel_ring_advance(ring);
  1814.  
  1815.         return 0;
  1816. }
  1817.  
  1818. /* Just userspace ABI convention to limit the wa batch bo to a resonable size */
  1819. #define I830_BATCH_LIMIT (256*1024)
  1820. #define I830_TLB_ENTRIES (2)
  1821. #define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT)
  1822. static int
  1823. i830_dispatch_execbuffer(struct drm_i915_gem_request *req,
  1824.                          u64 offset, u32 len,
  1825.                          unsigned dispatch_flags)
  1826. {
  1827.         struct intel_engine_cs *ring = req->ring;
  1828.         u32 cs_offset = ring->scratch.gtt_offset;
  1829.         int ret;
  1830.  
  1831.         ret = intel_ring_begin(req, 6);
  1832.         if (ret)
  1833.                 return ret;
  1834.  
  1835.         /* Evict the invalid PTE TLBs */
  1836.         intel_ring_emit(ring, COLOR_BLT_CMD | BLT_WRITE_RGBA);
  1837.         intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096);
  1838.         intel_ring_emit(ring, I830_TLB_ENTRIES << 16 | 4); /* load each page */
  1839.         intel_ring_emit(ring, cs_offset);
  1840.         intel_ring_emit(ring, 0xdeadbeef);
  1841.         intel_ring_emit(ring, MI_NOOP);
  1842.         intel_ring_advance(ring);
  1843.  
  1844.         if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) {
  1845.                 if (len > I830_BATCH_LIMIT)
  1846.                         return -ENOSPC;
  1847.  
  1848.                 ret = intel_ring_begin(req, 6 + 2);
  1849.                 if (ret)
  1850.                         return ret;
  1851.  
  1852.                 /* Blit the batch (which has now all relocs applied) to the
  1853.                  * stable batch scratch bo area (so that the CS never
  1854.                  * stumbles over its tlb invalidation bug) ...
  1855.                  */
  1856.                 intel_ring_emit(ring, SRC_COPY_BLT_CMD | BLT_WRITE_RGBA);
  1857.                 intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096);
  1858.                 intel_ring_emit(ring, DIV_ROUND_UP(len, 4096) << 16 | 4096);
  1859.                 intel_ring_emit(ring, cs_offset);
  1860.                 intel_ring_emit(ring, 4096);
  1861.                 intel_ring_emit(ring, offset);
  1862.  
  1863.                 intel_ring_emit(ring, MI_FLUSH);
  1864.                 intel_ring_emit(ring, MI_NOOP);
  1865.                 intel_ring_advance(ring);
  1866.  
  1867.                 /* ... and execute it. */
  1868.                 offset = cs_offset;
  1869.         }
  1870.  
  1871.         ret = intel_ring_begin(req, 4);
  1872.         if (ret)
  1873.                 return ret;
  1874.  
  1875.         intel_ring_emit(ring, MI_BATCH_BUFFER);
  1876.         intel_ring_emit(ring, offset | (dispatch_flags & I915_DISPATCH_SECURE ?
  1877.                                         0 : MI_BATCH_NON_SECURE));
  1878.         intel_ring_emit(ring, offset + len - 8);
  1879.         intel_ring_emit(ring, MI_NOOP);
  1880.         intel_ring_advance(ring);
  1881.  
  1882.         return 0;
  1883. }
  1884.  
  1885. static int
  1886. i915_dispatch_execbuffer(struct drm_i915_gem_request *req,
  1887.                          u64 offset, u32 len,
  1888.                          unsigned dispatch_flags)
  1889. {
  1890.         struct intel_engine_cs *ring = req->ring;
  1891.         int ret;
  1892.  
  1893.         ret = intel_ring_begin(req, 2);
  1894.         if (ret)
  1895.                 return ret;
  1896.  
  1897.         intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT);
  1898.         intel_ring_emit(ring, offset | (dispatch_flags & I915_DISPATCH_SECURE ?
  1899.                                         0 : MI_BATCH_NON_SECURE));
  1900.         intel_ring_advance(ring);
  1901.  
  1902.         return 0;
  1903. }
  1904.  
  1905. static void cleanup_phys_status_page(struct intel_engine_cs *ring)
  1906. {
  1907.         struct drm_i915_private *dev_priv = to_i915(ring->dev);
  1908.  
  1909.         if (!dev_priv->status_page_dmah)
  1910.                 return;
  1911.  
  1912.         drm_pci_free(ring->dev, dev_priv->status_page_dmah);
  1913.         ring->status_page.page_addr = NULL;
  1914. }
  1915.  
  1916. static void cleanup_status_page(struct intel_engine_cs *ring)
  1917. {
  1918.         struct drm_i915_gem_object *obj;
  1919.  
  1920.         obj = ring->status_page.obj;
  1921.         if (obj == NULL)
  1922.                 return;
  1923.  
  1924.         kunmap(sg_page(obj->pages->sgl));
  1925.         i915_gem_object_ggtt_unpin(obj);
  1926.         drm_gem_object_unreference(&obj->base);
  1927.         ring->status_page.obj = NULL;
  1928. }
  1929.  
  1930. static int init_status_page(struct intel_engine_cs *ring)
  1931. {
  1932.         struct drm_i915_gem_object *obj = ring->status_page.obj;
  1933.  
  1934.         if (obj == NULL) {
  1935.                 unsigned flags;
  1936.                 int ret;
  1937.  
  1938.                 obj = i915_gem_alloc_object(ring->dev, 4096);
  1939.                 if (obj == NULL) {
  1940.                         DRM_ERROR("Failed to allocate status page\n");
  1941.                         return -ENOMEM;
  1942.                 }
  1943.  
  1944.                 ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
  1945.                 if (ret)
  1946.                         goto err_unref;
  1947.  
  1948.                 flags = 0;
  1949.                 if (!HAS_LLC(ring->dev))
  1950.                         /* On g33, we cannot place HWS above 256MiB, so
  1951.                          * restrict its pinning to the low mappable arena.
  1952.                          * Though this restriction is not documented for
  1953.                          * gen4, gen5, or byt, they also behave similarly
  1954.                          * and hang if the HWS is placed at the top of the
  1955.                          * GTT. To generalise, it appears that all !llc
  1956.                          * platforms have issues with us placing the HWS
  1957.                          * above the mappable region (even though we never
  1958.                          * actualy map it).
  1959.                          */
  1960.                         flags |= PIN_MAPPABLE;
  1961.                 ret = i915_gem_obj_ggtt_pin(obj, 4096, flags);
  1962.                 if (ret) {
  1963. err_unref:
  1964.                         drm_gem_object_unreference(&obj->base);
  1965.                         return ret;
  1966.                 }
  1967.  
  1968.                 ring->status_page.obj = obj;
  1969.         }
  1970.  
  1971.         ring->status_page.gfx_addr = i915_gem_obj_ggtt_offset(obj);
  1972.         ring->status_page.page_addr = kmap(sg_page(obj->pages->sgl));
  1973.         memset(ring->status_page.page_addr, 0, PAGE_SIZE);
  1974.  
  1975.         DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n",
  1976.                         ring->name, ring->status_page.gfx_addr);
  1977.  
  1978.         return 0;
  1979. }
  1980.  
  1981. static int init_phys_status_page(struct intel_engine_cs *ring)
  1982. {
  1983.         struct drm_i915_private *dev_priv = ring->dev->dev_private;
  1984.  
  1985.         if (!dev_priv->status_page_dmah) {
  1986.                 dev_priv->status_page_dmah =
  1987.                         drm_pci_alloc(ring->dev, PAGE_SIZE, PAGE_SIZE);
  1988.                 if (!dev_priv->status_page_dmah)
  1989.                         return -ENOMEM;
  1990.         }
  1991.  
  1992.         ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
  1993.         memset(ring->status_page.page_addr, 0, PAGE_SIZE);
  1994.  
  1995.         return 0;
  1996. }
  1997.  
  1998. void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
  1999. {
  2000.         iounmap(ringbuf->virtual_start);
  2001.         ringbuf->virtual_start = NULL;
  2002.         i915_gem_object_ggtt_unpin(ringbuf->obj);
  2003. }
  2004.  
  2005. int intel_pin_and_map_ringbuffer_obj(struct drm_device *dev,
  2006.                                      struct intel_ringbuffer *ringbuf)
  2007. {
  2008.         struct drm_i915_private *dev_priv = to_i915(dev);
  2009.         struct drm_i915_gem_object *obj = ringbuf->obj;
  2010.         /* Ring wraparound at offset 0 sometimes hangs. No idea why. */
  2011.         unsigned flags = PIN_OFFSET_BIAS | 4096;
  2012.         int ret;
  2013.  
  2014.         ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE);
  2015.         if (ret)
  2016.                 return ret;
  2017.  
  2018.         ret = i915_gem_object_set_to_gtt_domain(obj, true);
  2019.         if (ret) {
  2020.                 i915_gem_object_ggtt_unpin(obj);
  2021.                 return ret;
  2022.         }
  2023.  
  2024.         ringbuf->virtual_start = ioremap_wc(dev_priv->gtt.mappable_base +
  2025.                         i915_gem_obj_ggtt_offset(obj), ringbuf->size);
  2026.         if (ringbuf->virtual_start == NULL) {
  2027.                 i915_gem_object_ggtt_unpin(obj);
  2028.                 return -EINVAL;
  2029.         }
  2030.  
  2031.         return 0;
  2032. }
  2033.  
  2034. static void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
  2035. {
  2036.         drm_gem_object_unreference(&ringbuf->obj->base);
  2037.         ringbuf->obj = NULL;
  2038. }
  2039.  
  2040. static int intel_alloc_ringbuffer_obj(struct drm_device *dev,
  2041.                                       struct intel_ringbuffer *ringbuf)
  2042. {
  2043.         struct drm_i915_gem_object *obj;
  2044.  
  2045.         obj = NULL;
  2046.         if (!HAS_LLC(dev))
  2047.                 obj = i915_gem_object_create_stolen(dev, ringbuf->size);
  2048.         if (obj == NULL)
  2049.                 obj = i915_gem_alloc_object(dev, ringbuf->size);
  2050.         if (obj == NULL)
  2051.                 return -ENOMEM;
  2052.  
  2053.         /* mark ring buffers as read-only from GPU side by default */
  2054.         obj->gt_ro = 1;
  2055.  
  2056.         ringbuf->obj = obj;
  2057.  
  2058.         return 0;
  2059. }
  2060.  
  2061. struct intel_ringbuffer *
  2062. intel_engine_create_ringbuffer(struct intel_engine_cs *engine, int size)
  2063. {
  2064.         struct intel_ringbuffer *ring;
  2065.         int ret;
  2066.  
  2067.         ring = kzalloc(sizeof(*ring), GFP_KERNEL);
  2068.         if (ring == NULL) {
  2069.                 DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n",
  2070.                                  engine->name);
  2071.                 return ERR_PTR(-ENOMEM);
  2072.         }
  2073.  
  2074.         ring->ring = engine;
  2075.         list_add(&ring->link, &engine->buffers);
  2076.  
  2077.         ring->size = size;
  2078.         /* Workaround an erratum on the i830 which causes a hang if
  2079.          * the TAIL pointer points to within the last 2 cachelines
  2080.          * of the buffer.
  2081.          */
  2082.         ring->effective_size = size;
  2083.         if (IS_I830(engine->dev) || IS_845G(engine->dev))
  2084.                 ring->effective_size -= 2 * CACHELINE_BYTES;
  2085.  
  2086.         ring->last_retired_head = -1;
  2087.         intel_ring_update_space(ring);
  2088.  
  2089.         ret = intel_alloc_ringbuffer_obj(engine->dev, ring);
  2090.         if (ret) {
  2091.                 DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s: %d\n",
  2092.                           engine->name, ret);
  2093.                 list_del(&ring->link);
  2094.                 kfree(ring);
  2095.                 return ERR_PTR(ret);
  2096.         }
  2097.  
  2098.         return ring;
  2099. }
  2100.  
  2101. void
  2102. intel_ringbuffer_free(struct intel_ringbuffer *ring)
  2103. {
  2104.         intel_destroy_ringbuffer_obj(ring);
  2105.         list_del(&ring->link);
  2106.         kfree(ring);
  2107. }
  2108.  
  2109. static int intel_init_ring_buffer(struct drm_device *dev,
  2110.                                   struct intel_engine_cs *ring)
  2111. {
  2112.         struct intel_ringbuffer *ringbuf;
  2113.         int ret;
  2114.  
  2115.         WARN_ON(ring->buffer);
  2116.  
  2117.         ring->dev = dev;
  2118.         INIT_LIST_HEAD(&ring->active_list);
  2119.         INIT_LIST_HEAD(&ring->request_list);
  2120.         INIT_LIST_HEAD(&ring->execlist_queue);
  2121.         INIT_LIST_HEAD(&ring->buffers);
  2122.         i915_gem_batch_pool_init(dev, &ring->batch_pool);
  2123.         memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno));
  2124.  
  2125.         init_waitqueue_head(&ring->irq_queue);
  2126.  
  2127.         ringbuf = intel_engine_create_ringbuffer(ring, 32 * PAGE_SIZE);
  2128.         if (IS_ERR(ringbuf)) {
  2129.                 ret = PTR_ERR(ringbuf);
  2130.                 goto error;
  2131.         }
  2132.         ring->buffer = ringbuf;
  2133.  
  2134.         if (I915_NEED_GFX_HWS(dev)) {
  2135.                 ret = init_status_page(ring);
  2136.                 if (ret)
  2137.                         goto error;
  2138.         } else {
  2139.                 WARN_ON(ring->id != RCS);
  2140.                 ret = init_phys_status_page(ring);
  2141.                 if (ret)
  2142.                         goto error;
  2143.         }
  2144.  
  2145.         ret = intel_pin_and_map_ringbuffer_obj(dev, ringbuf);
  2146.         if (ret) {
  2147.                 DRM_ERROR("Failed to pin and map ringbuffer %s: %d\n",
  2148.                                 ring->name, ret);
  2149.                 intel_destroy_ringbuffer_obj(ringbuf);
  2150.                 goto error;
  2151.         }
  2152.  
  2153.         ret = i915_cmd_parser_init_ring(ring);
  2154.         if (ret)
  2155.                 goto error;
  2156.  
  2157.         return 0;
  2158.  
  2159. error:
  2160.         intel_cleanup_ring_buffer(ring);
  2161.         return ret;
  2162. }
  2163.  
  2164. void intel_cleanup_ring_buffer(struct intel_engine_cs *ring)
  2165. {
  2166.         struct drm_i915_private *dev_priv;
  2167.  
  2168.         if (!intel_ring_initialized(ring))
  2169.                 return;
  2170.  
  2171.         dev_priv = to_i915(ring->dev);
  2172.  
  2173.         if (ring->buffer) {
  2174.         intel_stop_ring_buffer(ring);
  2175.         WARN_ON(!IS_GEN2(ring->dev) && (I915_READ_MODE(ring) & MODE_IDLE) == 0);
  2176.  
  2177.         intel_unpin_ringbuffer_obj(ring->buffer);
  2178.         intel_ringbuffer_free(ring->buffer);
  2179.         ring->buffer = NULL;
  2180.         }
  2181.  
  2182.         if (ring->cleanup)
  2183.                 ring->cleanup(ring);
  2184.  
  2185.         if (I915_NEED_GFX_HWS(ring->dev)) {
  2186.         cleanup_status_page(ring);
  2187.         } else {
  2188.                 WARN_ON(ring->id != RCS);
  2189.                 cleanup_phys_status_page(ring);
  2190.         }
  2191.  
  2192.         i915_cmd_parser_fini_ring(ring);
  2193.         i915_gem_batch_pool_fini(&ring->batch_pool);
  2194.         ring->dev = NULL;
  2195. }
  2196.  
  2197. static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
  2198. {
  2199.         struct intel_ringbuffer *ringbuf = ring->buffer;
  2200.         struct drm_i915_gem_request *request;
  2201.         unsigned space;
  2202.         int ret;
  2203.  
  2204.         if (intel_ring_space(ringbuf) >= n)
  2205.                 return 0;
  2206.  
  2207.         /* The whole point of reserving space is to not wait! */
  2208.         WARN_ON(ringbuf->reserved_in_use);
  2209.  
  2210.         list_for_each_entry(request, &ring->request_list, list) {
  2211.                 space = __intel_ring_space(request->postfix, ringbuf->tail,
  2212.                                            ringbuf->size);
  2213.                 if (space >= n)
  2214.                         break;
  2215.         }
  2216.  
  2217.         if (WARN_ON(&request->list == &ring->request_list))
  2218.                 return -ENOSPC;
  2219.  
  2220.         ret = i915_wait_request(request);
  2221.         if (ret)
  2222.                 return ret;
  2223.  
  2224.         ringbuf->space = space;
  2225.         return 0;
  2226. }
  2227.  
  2228. static void __wrap_ring_buffer(struct intel_ringbuffer *ringbuf)
  2229. {
  2230.         uint32_t __iomem *virt;
  2231.         int rem = ringbuf->size - ringbuf->tail;
  2232.  
  2233.         virt = ringbuf->virtual_start + ringbuf->tail;
  2234.         rem /= 4;
  2235.         while (rem--)
  2236.                 iowrite32(MI_NOOP, virt++);
  2237.  
  2238.         ringbuf->tail = 0;
  2239.         intel_ring_update_space(ringbuf);
  2240. }
  2241.  
  2242. int intel_ring_idle(struct intel_engine_cs *ring)
  2243. {
  2244.         struct drm_i915_gem_request *req;
  2245.  
  2246.         /* Wait upon the last request to be completed */
  2247.         if (list_empty(&ring->request_list))
  2248.                 return 0;
  2249.  
  2250.         req = list_entry(ring->request_list.prev,
  2251.                         struct drm_i915_gem_request,
  2252.                         list);
  2253.  
  2254.         /* Make sure we do not trigger any retires */
  2255.         return __i915_wait_request(req,
  2256.                                    atomic_read(&to_i915(ring->dev)->gpu_error.reset_counter),
  2257.                                    to_i915(ring->dev)->mm.interruptible,
  2258.                                    NULL, NULL);
  2259. }
  2260.  
  2261. int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request)
  2262. {
  2263.         request->ringbuf = request->ring->buffer;
  2264.         return 0;
  2265. }
  2266.  
  2267. int intel_ring_reserve_space(struct drm_i915_gem_request *request)
  2268. {
  2269.         /*
  2270.          * The first call merely notes the reserve request and is common for
  2271.          * all back ends. The subsequent localised _begin() call actually
  2272.          * ensures that the reservation is available. Without the begin, if
  2273.          * the request creator immediately submitted the request without
  2274.          * adding any commands to it then there might not actually be
  2275.          * sufficient room for the submission commands.
  2276.          */
  2277.         intel_ring_reserved_space_reserve(request->ringbuf, MIN_SPACE_FOR_ADD_REQUEST);
  2278.  
  2279.         return intel_ring_begin(request, 0);
  2280. }
  2281.  
  2282. void intel_ring_reserved_space_reserve(struct intel_ringbuffer *ringbuf, int size)
  2283. {
  2284.         WARN_ON(ringbuf->reserved_size);
  2285.         WARN_ON(ringbuf->reserved_in_use);
  2286.  
  2287.         ringbuf->reserved_size = size;
  2288. }
  2289.  
  2290. void intel_ring_reserved_space_cancel(struct intel_ringbuffer *ringbuf)
  2291. {
  2292.         WARN_ON(ringbuf->reserved_in_use);
  2293.  
  2294.         ringbuf->reserved_size   = 0;
  2295.         ringbuf->reserved_in_use = false;
  2296. }
  2297.  
  2298. void intel_ring_reserved_space_use(struct intel_ringbuffer *ringbuf)
  2299. {
  2300.         WARN_ON(ringbuf->reserved_in_use);
  2301.  
  2302.         ringbuf->reserved_in_use = true;
  2303.         ringbuf->reserved_tail   = ringbuf->tail;
  2304. }
  2305.  
  2306. void intel_ring_reserved_space_end(struct intel_ringbuffer *ringbuf)
  2307. {
  2308.         WARN_ON(!ringbuf->reserved_in_use);
  2309.         if (ringbuf->tail > ringbuf->reserved_tail) {
  2310.                 WARN(ringbuf->tail > ringbuf->reserved_tail + ringbuf->reserved_size,
  2311.                      "request reserved size too small: %d vs %d!\n",
  2312.                      ringbuf->tail - ringbuf->reserved_tail, ringbuf->reserved_size);
  2313.         } else {
  2314.                 /*
  2315.                  * The ring was wrapped while the reserved space was in use.
  2316.                  * That means that some unknown amount of the ring tail was
  2317.                  * no-op filled and skipped. Thus simply adding the ring size
  2318.                  * to the tail and doing the above space check will not work.
  2319.                  * Rather than attempt to track how much tail was skipped,
  2320.                  * it is much simpler to say that also skipping the sanity
  2321.                  * check every once in a while is not a big issue.
  2322.                  */
  2323.         }
  2324.  
  2325.         ringbuf->reserved_size   = 0;
  2326.         ringbuf->reserved_in_use = false;
  2327. }
  2328.  
  2329. static int __intel_ring_prepare(struct intel_engine_cs *ring, int bytes)
  2330. {
  2331.         struct intel_ringbuffer *ringbuf = ring->buffer;
  2332.         int remain_usable = ringbuf->effective_size - ringbuf->tail;
  2333.         int remain_actual = ringbuf->size - ringbuf->tail;
  2334.         int ret, total_bytes, wait_bytes = 0;
  2335.         bool need_wrap = false;
  2336.  
  2337.         if (ringbuf->reserved_in_use)
  2338.                 total_bytes = bytes;
  2339.         else
  2340.                 total_bytes = bytes + ringbuf->reserved_size;
  2341.  
  2342.         if (unlikely(bytes > remain_usable)) {
  2343.                 /*
  2344.                  * Not enough space for the basic request. So need to flush
  2345.                  * out the remainder and then wait for base + reserved.
  2346.                  */
  2347.                 wait_bytes = remain_actual + total_bytes;
  2348.                 need_wrap = true;
  2349.         } else {
  2350.                 if (unlikely(total_bytes > remain_usable)) {
  2351.                         /*
  2352.                          * The base request will fit but the reserved space
  2353.                          * falls off the end. So don't need an immediate wrap
  2354.                          * and only need to effectively wait for the reserved
  2355.                          * size space from the start of ringbuffer.
  2356.                          */
  2357.                         wait_bytes = remain_actual + ringbuf->reserved_size;
  2358.                 } else if (total_bytes > ringbuf->space) {
  2359.                         /* No wrapping required, just waiting. */
  2360.                         wait_bytes = total_bytes;
  2361.                 }
  2362.         }
  2363.  
  2364.         if (wait_bytes) {
  2365.                 ret = ring_wait_for_space(ring, wait_bytes);
  2366.                 if (unlikely(ret))
  2367.                         return ret;
  2368.  
  2369.                 if (need_wrap)
  2370.                         __wrap_ring_buffer(ringbuf);
  2371.         }
  2372.  
  2373.         return 0;
  2374. }
  2375.  
  2376. int intel_ring_begin(struct drm_i915_gem_request *req,
  2377.                      int num_dwords)
  2378. {
  2379.         struct intel_engine_cs *ring;
  2380.         struct drm_i915_private *dev_priv;
  2381.         int ret;
  2382.  
  2383.         WARN_ON(req == NULL);
  2384.         ring = req->ring;
  2385.         dev_priv = ring->dev->dev_private;
  2386.  
  2387.         ret = i915_gem_check_wedge(&dev_priv->gpu_error,
  2388.                                    dev_priv->mm.interruptible);
  2389.         if (ret)
  2390.                 return ret;
  2391.  
  2392.         ret = __intel_ring_prepare(ring, num_dwords * sizeof(uint32_t));
  2393.         if (ret)
  2394.                 return ret;
  2395.  
  2396.         ring->buffer->space -= num_dwords * sizeof(uint32_t);
  2397.         return 0;
  2398. }
  2399.  
  2400. /* Align the ring tail to a cacheline boundary */
  2401. int intel_ring_cacheline_align(struct drm_i915_gem_request *req)
  2402. {
  2403.         struct intel_engine_cs *ring = req->ring;
  2404.         int num_dwords = (ring->buffer->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
  2405.         int ret;
  2406.  
  2407.         if (num_dwords == 0)
  2408.                 return 0;
  2409.  
  2410.         num_dwords = CACHELINE_BYTES / sizeof(uint32_t) - num_dwords;
  2411.         ret = intel_ring_begin(req, num_dwords);
  2412.         if (ret)
  2413.                 return ret;
  2414.  
  2415.         while (num_dwords--)
  2416.                 intel_ring_emit(ring, MI_NOOP);
  2417.  
  2418.         intel_ring_advance(ring);
  2419.  
  2420.         return 0;
  2421. }
  2422.  
  2423. void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno)
  2424. {
  2425.         struct drm_device *dev = ring->dev;
  2426.         struct drm_i915_private *dev_priv = dev->dev_private;
  2427.  
  2428.         if (INTEL_INFO(dev)->gen == 6 || INTEL_INFO(dev)->gen == 7) {
  2429.                 I915_WRITE(RING_SYNC_0(ring->mmio_base), 0);
  2430.                 I915_WRITE(RING_SYNC_1(ring->mmio_base), 0);
  2431.                 if (HAS_VEBOX(dev))
  2432.                         I915_WRITE(RING_SYNC_2(ring->mmio_base), 0);
  2433.         }
  2434.  
  2435.         ring->set_seqno(ring, seqno);
  2436.         ring->hangcheck.seqno = seqno;
  2437. }
  2438.  
  2439. static void gen6_bsd_ring_write_tail(struct intel_engine_cs *ring,
  2440.                                      u32 value)
  2441. {
  2442.         struct drm_i915_private *dev_priv = ring->dev->dev_private;
  2443.  
  2444.        /* Every tail move must follow the sequence below */
  2445.  
  2446.         /* Disable notification that the ring is IDLE. The GT
  2447.          * will then assume that it is busy and bring it out of rc6.
  2448.          */
  2449.         I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
  2450.                    _MASKED_BIT_ENABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
  2451.  
  2452.         /* Clear the context id. Here be magic! */
  2453.         I915_WRITE64(GEN6_BSD_RNCID, 0x0);
  2454.  
  2455.         /* Wait for the ring not to be idle, i.e. for it to wake up. */
  2456.         if (wait_for((I915_READ(GEN6_BSD_SLEEP_PSMI_CONTROL) &
  2457.                       GEN6_BSD_SLEEP_INDICATOR) == 0,
  2458.                      50))
  2459.                 DRM_ERROR("timed out waiting for the BSD ring to wake up\n");
  2460.  
  2461.         /* Now that the ring is fully powered up, update the tail */
  2462.         I915_WRITE_TAIL(ring, value);
  2463.         POSTING_READ(RING_TAIL(ring->mmio_base));
  2464.  
  2465.         /* Let the ring send IDLE messages to the GT again,
  2466.          * and so let it sleep to conserve power when idle.
  2467.          */
  2468.         I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
  2469.                    _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
  2470. }
  2471.  
  2472. static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req,
  2473.                                u32 invalidate, u32 flush)
  2474. {
  2475.         struct intel_engine_cs *ring = req->ring;
  2476.         uint32_t cmd;
  2477.         int ret;
  2478.  
  2479.         ret = intel_ring_begin(req, 4);
  2480.         if (ret)
  2481.                 return ret;
  2482.  
  2483.         cmd = MI_FLUSH_DW;
  2484.         if (INTEL_INFO(ring->dev)->gen >= 8)
  2485.                 cmd += 1;
  2486.  
  2487.         /* We always require a command barrier so that subsequent
  2488.          * commands, such as breadcrumb interrupts, are strictly ordered
  2489.          * wrt the contents of the write cache being flushed to memory
  2490.          * (and thus being coherent from the CPU).
  2491.          */
  2492.         cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
  2493.  
  2494.         /*
  2495.          * Bspec vol 1c.5 - video engine command streamer:
  2496.          * "If ENABLED, all TLBs will be invalidated once the flush
  2497.          * operation is complete. This bit is only valid when the
  2498.          * Post-Sync Operation field is a value of 1h or 3h."
  2499.          */
  2500.         if (invalidate & I915_GEM_GPU_DOMAINS)
  2501.                 cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD;
  2502.  
  2503.         intel_ring_emit(ring, cmd);
  2504.         intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
  2505.         if (INTEL_INFO(ring->dev)->gen >= 8) {
  2506.                 intel_ring_emit(ring, 0); /* upper addr */
  2507.                 intel_ring_emit(ring, 0); /* value */
  2508.         } else  {
  2509.                 intel_ring_emit(ring, 0);
  2510.                 intel_ring_emit(ring, MI_NOOP);
  2511.         }
  2512.         intel_ring_advance(ring);
  2513.         return 0;
  2514. }
  2515.  
  2516. static int
  2517. gen8_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
  2518.                               u64 offset, u32 len,
  2519.                               unsigned dispatch_flags)
  2520. {
  2521.         struct intel_engine_cs *ring = req->ring;
  2522.         bool ppgtt = USES_PPGTT(ring->dev) &&
  2523.                         !(dispatch_flags & I915_DISPATCH_SECURE);
  2524.         int ret;
  2525.  
  2526.         ret = intel_ring_begin(req, 4);
  2527.         if (ret)
  2528.                 return ret;
  2529.  
  2530.         /* FIXME(BDW): Address space and security selectors. */
  2531.         intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8) |
  2532.                         (dispatch_flags & I915_DISPATCH_RS ?
  2533.                          MI_BATCH_RESOURCE_STREAMER : 0));
  2534.         intel_ring_emit(ring, lower_32_bits(offset));
  2535.         intel_ring_emit(ring, upper_32_bits(offset));
  2536.         intel_ring_emit(ring, MI_NOOP);
  2537.         intel_ring_advance(ring);
  2538.  
  2539.         return 0;
  2540. }
  2541.  
  2542. static int
  2543. hsw_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
  2544.                              u64 offset, u32 len,
  2545.                              unsigned dispatch_flags)
  2546. {
  2547.         struct intel_engine_cs *ring = req->ring;
  2548.         int ret;
  2549.  
  2550.         ret = intel_ring_begin(req, 2);
  2551.         if (ret)
  2552.                 return ret;
  2553.  
  2554.         intel_ring_emit(ring,
  2555.                         MI_BATCH_BUFFER_START |
  2556.                         (dispatch_flags & I915_DISPATCH_SECURE ?
  2557.                          0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW) |
  2558.                         (dispatch_flags & I915_DISPATCH_RS ?
  2559.                          MI_BATCH_RESOURCE_STREAMER : 0));
  2560.         /* bit0-7 is the length on GEN6+ */
  2561.         intel_ring_emit(ring, offset);
  2562.         intel_ring_advance(ring);
  2563.  
  2564.         return 0;
  2565. }
  2566.  
  2567. static int
  2568. gen6_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
  2569.                               u64 offset, u32 len,
  2570.                               unsigned dispatch_flags)
  2571. {
  2572.         struct intel_engine_cs *ring = req->ring;
  2573.         int ret;
  2574.  
  2575.         ret = intel_ring_begin(req, 2);
  2576.         if (ret)
  2577.                 return ret;
  2578.  
  2579.         intel_ring_emit(ring,
  2580.                         MI_BATCH_BUFFER_START |
  2581.                         (dispatch_flags & I915_DISPATCH_SECURE ?
  2582.                          0 : MI_BATCH_NON_SECURE_I965));
  2583.         /* bit0-7 is the length on GEN6+ */
  2584.         intel_ring_emit(ring, offset);
  2585.         intel_ring_advance(ring);
  2586.  
  2587.         return 0;
  2588. }
  2589.  
  2590. /* Blitter support (SandyBridge+) */
  2591.  
  2592. static int gen6_ring_flush(struct drm_i915_gem_request *req,
  2593.                            u32 invalidate, u32 flush)
  2594. {
  2595.         struct intel_engine_cs *ring = req->ring;
  2596.         struct drm_device *dev = ring->dev;
  2597.         uint32_t cmd;
  2598.         int ret;
  2599.  
  2600.         ret = intel_ring_begin(req, 4);
  2601.         if (ret)
  2602.                 return ret;
  2603.  
  2604.         cmd = MI_FLUSH_DW;
  2605.         if (INTEL_INFO(dev)->gen >= 8)
  2606.                 cmd += 1;
  2607.  
  2608.         /* We always require a command barrier so that subsequent
  2609.          * commands, such as breadcrumb interrupts, are strictly ordered
  2610.          * wrt the contents of the write cache being flushed to memory
  2611.          * (and thus being coherent from the CPU).
  2612.          */
  2613.         cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
  2614.  
  2615.         /*
  2616.          * Bspec vol 1c.3 - blitter engine command streamer:
  2617.          * "If ENABLED, all TLBs will be invalidated once the flush
  2618.          * operation is complete. This bit is only valid when the
  2619.          * Post-Sync Operation field is a value of 1h or 3h."
  2620.          */
  2621.         if (invalidate & I915_GEM_DOMAIN_RENDER)
  2622.                 cmd |= MI_INVALIDATE_TLB;
  2623.         intel_ring_emit(ring, cmd);
  2624.         intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
  2625.         if (INTEL_INFO(dev)->gen >= 8) {
  2626.                 intel_ring_emit(ring, 0); /* upper addr */
  2627.                 intel_ring_emit(ring, 0); /* value */
  2628.         } else  {
  2629.                 intel_ring_emit(ring, 0);
  2630.                 intel_ring_emit(ring, MI_NOOP);
  2631.         }
  2632.         intel_ring_advance(ring);
  2633.  
  2634.         return 0;
  2635. }
  2636.  
  2637. int intel_init_render_ring_buffer(struct drm_device *dev)
  2638. {
  2639.         struct drm_i915_private *dev_priv = dev->dev_private;
  2640.         struct intel_engine_cs *ring = &dev_priv->ring[RCS];
  2641.         struct drm_i915_gem_object *obj;
  2642.         int ret;
  2643.  
  2644.         ring->name = "render ring";
  2645.         ring->id = RCS;
  2646.         ring->mmio_base = RENDER_RING_BASE;
  2647.  
  2648.         if (INTEL_INFO(dev)->gen >= 8) {
  2649.                 if (i915_semaphore_is_enabled(dev)) {
  2650.                         obj = i915_gem_alloc_object(dev, 4096);
  2651.                         if (obj == NULL) {
  2652.                                 DRM_ERROR("Failed to allocate semaphore bo. Disabling semaphores\n");
  2653.                                 i915.semaphores = 0;
  2654.                         } else {
  2655.                                 i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
  2656.                                 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_NONBLOCK);
  2657.                                 if (ret != 0) {
  2658.                                         drm_gem_object_unreference(&obj->base);
  2659.                                         DRM_ERROR("Failed to pin semaphore bo. Disabling semaphores\n");
  2660.                                         i915.semaphores = 0;
  2661.                                 } else
  2662.                                         dev_priv->semaphore_obj = obj;
  2663.                         }
  2664.                 }
  2665.  
  2666.                 ring->init_context = intel_rcs_ctx_init;
  2667.                 ring->add_request = gen6_add_request;
  2668.                 ring->flush = gen8_render_ring_flush;
  2669.                 ring->irq_get = gen8_ring_get_irq;
  2670.                 ring->irq_put = gen8_ring_put_irq;
  2671.                 ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
  2672.                 ring->get_seqno = gen6_ring_get_seqno;
  2673.                 ring->set_seqno = ring_set_seqno;
  2674.                 if (i915_semaphore_is_enabled(dev)) {
  2675.                         WARN_ON(!dev_priv->semaphore_obj);
  2676.                         ring->semaphore.sync_to = gen8_ring_sync;
  2677.                         ring->semaphore.signal = gen8_rcs_signal;
  2678.                         GEN8_RING_SEMAPHORE_INIT;
  2679.                 }
  2680.         } else if (INTEL_INFO(dev)->gen >= 6) {
  2681.                 ring->init_context = intel_rcs_ctx_init;
  2682.                 ring->add_request = gen6_add_request;
  2683.                 ring->flush = gen7_render_ring_flush;
  2684.                 if (INTEL_INFO(dev)->gen == 6)
  2685.                         ring->flush = gen6_render_ring_flush;
  2686.                 ring->irq_get = gen6_ring_get_irq;
  2687.                 ring->irq_put = gen6_ring_put_irq;
  2688.                 ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
  2689.                 ring->get_seqno = gen6_ring_get_seqno;
  2690.                 ring->set_seqno = ring_set_seqno;
  2691.                 if (i915_semaphore_is_enabled(dev)) {
  2692.                         ring->semaphore.sync_to = gen6_ring_sync;
  2693.                         ring->semaphore.signal = gen6_signal;
  2694.                         /*
  2695.                          * The current semaphore is only applied on pre-gen8
  2696.                          * platform.  And there is no VCS2 ring on the pre-gen8
  2697.                          * platform. So the semaphore between RCS and VCS2 is
  2698.                          * initialized as INVALID.  Gen8 will initialize the
  2699.                          * sema between VCS2 and RCS later.
  2700.                          */
  2701.                         ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_INVALID;
  2702.                         ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_RV;
  2703.                         ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_RB;
  2704.                         ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_RVE;
  2705.                         ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
  2706.                         ring->semaphore.mbox.signal[RCS] = GEN6_NOSYNC;
  2707.                         ring->semaphore.mbox.signal[VCS] = GEN6_VRSYNC;
  2708.                         ring->semaphore.mbox.signal[BCS] = GEN6_BRSYNC;
  2709.                         ring->semaphore.mbox.signal[VECS] = GEN6_VERSYNC;
  2710.                         ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
  2711.                 }
  2712.         } else if (IS_GEN5(dev)) {
  2713.                 ring->add_request = pc_render_add_request;
  2714.                 ring->flush = gen4_render_ring_flush;
  2715.                 ring->get_seqno = pc_render_get_seqno;
  2716.                 ring->set_seqno = pc_render_set_seqno;
  2717.                 ring->irq_get = gen5_ring_get_irq;
  2718.                 ring->irq_put = gen5_ring_put_irq;
  2719.                 ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT |
  2720.                                         GT_RENDER_PIPECTL_NOTIFY_INTERRUPT;
  2721.         } else {
  2722.                 ring->add_request = i9xx_add_request;
  2723.                 if (INTEL_INFO(dev)->gen < 4)
  2724.                         ring->flush = gen2_render_ring_flush;
  2725.                 else
  2726.                         ring->flush = gen4_render_ring_flush;
  2727.                 ring->get_seqno = ring_get_seqno;
  2728.                 ring->set_seqno = ring_set_seqno;
  2729.                 if (IS_GEN2(dev)) {
  2730.                         ring->irq_get = i8xx_ring_get_irq;
  2731.                         ring->irq_put = i8xx_ring_put_irq;
  2732.                 } else {
  2733.                         ring->irq_get = i9xx_ring_get_irq;
  2734.                         ring->irq_put = i9xx_ring_put_irq;
  2735.                 }
  2736.                 ring->irq_enable_mask = I915_USER_INTERRUPT;
  2737.         }
  2738.         ring->write_tail = ring_write_tail;
  2739.  
  2740.         if (IS_HASWELL(dev))
  2741.                 ring->dispatch_execbuffer = hsw_ring_dispatch_execbuffer;
  2742.         else if (IS_GEN8(dev))
  2743.                 ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;
  2744.         else if (INTEL_INFO(dev)->gen >= 6)
  2745.                 ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
  2746.         else if (INTEL_INFO(dev)->gen >= 4)
  2747.                 ring->dispatch_execbuffer = i965_dispatch_execbuffer;
  2748.         else if (IS_I830(dev) || IS_845G(dev))
  2749.                 ring->dispatch_execbuffer = i830_dispatch_execbuffer;
  2750.         else
  2751.                 ring->dispatch_execbuffer = i915_dispatch_execbuffer;
  2752.         ring->init_hw = init_render_ring;
  2753.         ring->cleanup = render_ring_cleanup;
  2754.  
  2755.         /* Workaround batchbuffer to combat CS tlb bug. */
  2756.         if (HAS_BROKEN_CS_TLB(dev)) {
  2757.                 obj = i915_gem_alloc_object(dev, I830_WA_SIZE);
  2758.                 if (obj == NULL) {
  2759.                         DRM_ERROR("Failed to allocate batch bo\n");
  2760.                         return -ENOMEM;
  2761.                 }
  2762.  
  2763.                 ret = i915_gem_obj_ggtt_pin(obj, 0, 0);
  2764.                 if (ret != 0) {
  2765.                         drm_gem_object_unreference(&obj->base);
  2766.                         DRM_ERROR("Failed to ping batch bo\n");
  2767.                         return ret;
  2768.                 }
  2769.  
  2770.                 ring->scratch.obj = obj;
  2771.                 ring->scratch.gtt_offset = i915_gem_obj_ggtt_offset(obj);
  2772.         }
  2773.  
  2774.         ret = intel_init_ring_buffer(dev, ring);
  2775.         if (ret)
  2776.                 return ret;
  2777.  
  2778.         if (INTEL_INFO(dev)->gen >= 5) {
  2779.                 ret = intel_init_pipe_control(ring);
  2780.                 if (ret)
  2781.                         return ret;
  2782.         }
  2783.  
  2784.         return 0;
  2785. }
  2786.  
  2787. int intel_init_bsd_ring_buffer(struct drm_device *dev)
  2788. {
  2789.         struct drm_i915_private *dev_priv = dev->dev_private;
  2790.         struct intel_engine_cs *ring = &dev_priv->ring[VCS];
  2791.  
  2792.         ring->name = "bsd ring";
  2793.         ring->id = VCS;
  2794.  
  2795.         ring->write_tail = ring_write_tail;
  2796.         if (INTEL_INFO(dev)->gen >= 6) {
  2797.                 ring->mmio_base = GEN6_BSD_RING_BASE;
  2798.                 /* gen6 bsd needs a special wa for tail updates */
  2799.                 if (IS_GEN6(dev))
  2800.                         ring->write_tail = gen6_bsd_ring_write_tail;
  2801.                 ring->flush = gen6_bsd_ring_flush;
  2802.                 ring->add_request = gen6_add_request;
  2803.                 ring->get_seqno = gen6_ring_get_seqno;
  2804.                 ring->set_seqno = ring_set_seqno;
  2805.                 if (INTEL_INFO(dev)->gen >= 8) {
  2806.                         ring->irq_enable_mask =
  2807.                                 GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT;
  2808.                         ring->irq_get = gen8_ring_get_irq;
  2809.                         ring->irq_put = gen8_ring_put_irq;
  2810.                         ring->dispatch_execbuffer =
  2811.                                 gen8_ring_dispatch_execbuffer;
  2812.                         if (i915_semaphore_is_enabled(dev)) {
  2813.                                 ring->semaphore.sync_to = gen8_ring_sync;
  2814.                                 ring->semaphore.signal = gen8_xcs_signal;
  2815.                                 GEN8_RING_SEMAPHORE_INIT;
  2816.                         }
  2817.                 } else {
  2818.                         ring->irq_enable_mask = GT_BSD_USER_INTERRUPT;
  2819.                         ring->irq_get = gen6_ring_get_irq;
  2820.                         ring->irq_put = gen6_ring_put_irq;
  2821.                         ring->dispatch_execbuffer =
  2822.                                 gen6_ring_dispatch_execbuffer;
  2823.                         if (i915_semaphore_is_enabled(dev)) {
  2824.                                 ring->semaphore.sync_to = gen6_ring_sync;
  2825.                                 ring->semaphore.signal = gen6_signal;
  2826.                                 ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_VR;
  2827.                                 ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_INVALID;
  2828.                                 ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_VB;
  2829.                                 ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_VVE;
  2830.                                 ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
  2831.                                 ring->semaphore.mbox.signal[RCS] = GEN6_RVSYNC;
  2832.                                 ring->semaphore.mbox.signal[VCS] = GEN6_NOSYNC;
  2833.                                 ring->semaphore.mbox.signal[BCS] = GEN6_BVSYNC;
  2834.                                 ring->semaphore.mbox.signal[VECS] = GEN6_VEVSYNC;
  2835.                                 ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
  2836.                         }
  2837.                 }
  2838.         } else {
  2839.                 ring->mmio_base = BSD_RING_BASE;
  2840.                 ring->flush = bsd_ring_flush;
  2841.                 ring->add_request = i9xx_add_request;
  2842.                 ring->get_seqno = ring_get_seqno;
  2843.                 ring->set_seqno = ring_set_seqno;
  2844.                 if (IS_GEN5(dev)) {
  2845.                         ring->irq_enable_mask = ILK_BSD_USER_INTERRUPT;
  2846.                         ring->irq_get = gen5_ring_get_irq;
  2847.                         ring->irq_put = gen5_ring_put_irq;
  2848.                 } else {
  2849.                         ring->irq_enable_mask = I915_BSD_USER_INTERRUPT;
  2850.                         ring->irq_get = i9xx_ring_get_irq;
  2851.                         ring->irq_put = i9xx_ring_put_irq;
  2852.                 }
  2853.                 ring->dispatch_execbuffer = i965_dispatch_execbuffer;
  2854.         }
  2855.         ring->init_hw = init_ring_common;
  2856.  
  2857.         return intel_init_ring_buffer(dev, ring);
  2858. }
  2859.  
  2860. /**
  2861.  * Initialize the second BSD ring (eg. Broadwell GT3, Skylake GT3)
  2862.  */
  2863. int intel_init_bsd2_ring_buffer(struct drm_device *dev)
  2864. {
  2865.         struct drm_i915_private *dev_priv = dev->dev_private;
  2866.         struct intel_engine_cs *ring = &dev_priv->ring[VCS2];
  2867.  
  2868.         ring->name = "bsd2 ring";
  2869.         ring->id = VCS2;
  2870.  
  2871.         ring->write_tail = ring_write_tail;
  2872.         ring->mmio_base = GEN8_BSD2_RING_BASE;
  2873.         ring->flush = gen6_bsd_ring_flush;
  2874.         ring->add_request = gen6_add_request;
  2875.         ring->get_seqno = gen6_ring_get_seqno;
  2876.         ring->set_seqno = ring_set_seqno;
  2877.         ring->irq_enable_mask =
  2878.                         GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT;
  2879.         ring->irq_get = gen8_ring_get_irq;
  2880.         ring->irq_put = gen8_ring_put_irq;
  2881.         ring->dispatch_execbuffer =
  2882.                         gen8_ring_dispatch_execbuffer;
  2883.         if (i915_semaphore_is_enabled(dev)) {
  2884.                 ring->semaphore.sync_to = gen8_ring_sync;
  2885.                 ring->semaphore.signal = gen8_xcs_signal;
  2886.                 GEN8_RING_SEMAPHORE_INIT;
  2887.         }
  2888.         ring->init_hw = init_ring_common;
  2889.  
  2890.         return intel_init_ring_buffer(dev, ring);
  2891. }
  2892.  
  2893. int intel_init_blt_ring_buffer(struct drm_device *dev)
  2894. {
  2895.         struct drm_i915_private *dev_priv = dev->dev_private;
  2896.         struct intel_engine_cs *ring = &dev_priv->ring[BCS];
  2897.  
  2898.         ring->name = "blitter ring";
  2899.         ring->id = BCS;
  2900.  
  2901.         ring->mmio_base = BLT_RING_BASE;
  2902.         ring->write_tail = ring_write_tail;
  2903.         ring->flush = gen6_ring_flush;
  2904.         ring->add_request = gen6_add_request;
  2905.         ring->get_seqno = gen6_ring_get_seqno;
  2906.         ring->set_seqno = ring_set_seqno;
  2907.         if (INTEL_INFO(dev)->gen >= 8) {
  2908.                 ring->irq_enable_mask =
  2909.                         GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT;
  2910.                 ring->irq_get = gen8_ring_get_irq;
  2911.                 ring->irq_put = gen8_ring_put_irq;
  2912.                 ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;
  2913.                 if (i915_semaphore_is_enabled(dev)) {
  2914.                         ring->semaphore.sync_to = gen8_ring_sync;
  2915.                         ring->semaphore.signal = gen8_xcs_signal;
  2916.                         GEN8_RING_SEMAPHORE_INIT;
  2917.                 }
  2918.         } else {
  2919.                 ring->irq_enable_mask = GT_BLT_USER_INTERRUPT;
  2920.                 ring->irq_get = gen6_ring_get_irq;
  2921.                 ring->irq_put = gen6_ring_put_irq;
  2922.                 ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
  2923.                 if (i915_semaphore_is_enabled(dev)) {
  2924.                         ring->semaphore.signal = gen6_signal;
  2925.                         ring->semaphore.sync_to = gen6_ring_sync;
  2926.                         /*
  2927.                          * The current semaphore is only applied on pre-gen8
  2928.                          * platform.  And there is no VCS2 ring on the pre-gen8
  2929.                          * platform. So the semaphore between BCS and VCS2 is
  2930.                          * initialized as INVALID.  Gen8 will initialize the
  2931.                          * sema between BCS and VCS2 later.
  2932.                          */
  2933.                         ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_BR;
  2934.                         ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_BV;
  2935.                         ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_INVALID;
  2936.                         ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_BVE;
  2937.                         ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
  2938.                         ring->semaphore.mbox.signal[RCS] = GEN6_RBSYNC;
  2939.                         ring->semaphore.mbox.signal[VCS] = GEN6_VBSYNC;
  2940.                         ring->semaphore.mbox.signal[BCS] = GEN6_NOSYNC;
  2941.                         ring->semaphore.mbox.signal[VECS] = GEN6_VEBSYNC;
  2942.                         ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
  2943.                 }
  2944.         }
  2945.         ring->init_hw = init_ring_common;
  2946.  
  2947.         return intel_init_ring_buffer(dev, ring);
  2948. }
  2949.  
  2950. int intel_init_vebox_ring_buffer(struct drm_device *dev)
  2951. {
  2952.         struct drm_i915_private *dev_priv = dev->dev_private;
  2953.         struct intel_engine_cs *ring = &dev_priv->ring[VECS];
  2954.  
  2955.         ring->name = "video enhancement ring";
  2956.         ring->id = VECS;
  2957.  
  2958.         ring->mmio_base = VEBOX_RING_BASE;
  2959.         ring->write_tail = ring_write_tail;
  2960.         ring->flush = gen6_ring_flush;
  2961.         ring->add_request = gen6_add_request;
  2962.         ring->get_seqno = gen6_ring_get_seqno;
  2963.         ring->set_seqno = ring_set_seqno;
  2964.  
  2965.         if (INTEL_INFO(dev)->gen >= 8) {
  2966.                 ring->irq_enable_mask =
  2967.                         GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT;
  2968.                 ring->irq_get = gen8_ring_get_irq;
  2969.                 ring->irq_put = gen8_ring_put_irq;
  2970.                 ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;
  2971.                 if (i915_semaphore_is_enabled(dev)) {
  2972.                         ring->semaphore.sync_to = gen8_ring_sync;
  2973.                         ring->semaphore.signal = gen8_xcs_signal;
  2974.                         GEN8_RING_SEMAPHORE_INIT;
  2975.                 }
  2976.         } else {
  2977.                 ring->irq_enable_mask = PM_VEBOX_USER_INTERRUPT;
  2978.                 ring->irq_get = hsw_vebox_get_irq;
  2979.                 ring->irq_put = hsw_vebox_put_irq;
  2980.                 ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
  2981.                 if (i915_semaphore_is_enabled(dev)) {
  2982.                         ring->semaphore.sync_to = gen6_ring_sync;
  2983.                         ring->semaphore.signal = gen6_signal;
  2984.                         ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_VER;
  2985.                         ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_VEV;
  2986.                         ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_VEB;
  2987.                         ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_INVALID;
  2988.                         ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
  2989.                         ring->semaphore.mbox.signal[RCS] = GEN6_RVESYNC;
  2990.                         ring->semaphore.mbox.signal[VCS] = GEN6_VVESYNC;
  2991.                         ring->semaphore.mbox.signal[BCS] = GEN6_BVESYNC;
  2992.                         ring->semaphore.mbox.signal[VECS] = GEN6_NOSYNC;
  2993.                         ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
  2994.                 }
  2995.         }
  2996.         ring->init_hw = init_ring_common;
  2997.  
  2998.         return intel_init_ring_buffer(dev, ring);
  2999. }
  3000.  
  3001. int
  3002. intel_ring_flush_all_caches(struct drm_i915_gem_request *req)
  3003. {
  3004.         struct intel_engine_cs *ring = req->ring;
  3005.         int ret;
  3006.  
  3007.         if (!ring->gpu_caches_dirty)
  3008.                 return 0;
  3009.  
  3010.         ret = ring->flush(req, 0, I915_GEM_GPU_DOMAINS);
  3011.         if (ret)
  3012.                 return ret;
  3013.  
  3014.         trace_i915_gem_ring_flush(req, 0, I915_GEM_GPU_DOMAINS);
  3015.  
  3016.         ring->gpu_caches_dirty = false;
  3017.         return 0;
  3018. }
  3019.  
  3020. int
  3021. intel_ring_invalidate_all_caches(struct drm_i915_gem_request *req)
  3022. {
  3023.         struct intel_engine_cs *ring = req->ring;
  3024.         uint32_t flush_domains;
  3025.         int ret;
  3026.  
  3027.         flush_domains = 0;
  3028.         if (ring->gpu_caches_dirty)
  3029.                 flush_domains = I915_GEM_GPU_DOMAINS;
  3030.  
  3031.         ret = ring->flush(req, I915_GEM_GPU_DOMAINS, flush_domains);
  3032.         if (ret)
  3033.                 return ret;
  3034.  
  3035.         trace_i915_gem_ring_flush(req, I915_GEM_GPU_DOMAINS, flush_domains);
  3036.  
  3037.         ring->gpu_caches_dirty = false;
  3038.         return 0;
  3039. }
  3040.  
  3041. void
  3042. intel_stop_ring_buffer(struct intel_engine_cs *ring)
  3043. {
  3044.         int ret;
  3045.  
  3046.         if (!intel_ring_initialized(ring))
  3047.                 return;
  3048.  
  3049.         ret = intel_ring_idle(ring);
  3050.         if (ret && !i915_reset_in_progress(&to_i915(ring->dev)->gpu_error))
  3051.                 DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n",
  3052.                           ring->name, ret);
  3053.  
  3054.         stop_ring(ring);
  3055. }
  3056.