Subversion Repositories Kolibri OS

Rev

Rev 6937 | Blame | Compare with Previous | Last modification | View Log | Download | RSS feed

  1. /*
  2.  * Copyright © 2008-2010 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21.  * IN THE SOFTWARE.
  22.  *
  23.  * Authors:
  24.  *    Eric Anholt <eric@anholt.net>
  25.  *    Zou Nan hai <nanhai.zou@intel.com>
  26.  *    Xiang Hai hao<haihao.xiang@intel.com>
  27.  *
  28.  */
  29.  
  30. #include <linux/log2.h>
  31. #include <drm/drmP.h>
  32. #include "i915_drv.h"
  33. #include <drm/i915_drm.h>
  34. #include "i915_trace.h"
  35. #include "intel_drv.h"
  36.  
  37. int __intel_ring_space(int head, int tail, int size)
  38. {
  39.         int space = head - tail;
  40.         if (space <= 0)
  41.                 space += size;
  42.         return space - I915_RING_FREE_SPACE;
  43. }
  44.  
  45. void intel_ring_update_space(struct intel_ringbuffer *ringbuf)
  46. {
  47.         if (ringbuf->last_retired_head != -1) {
  48.                 ringbuf->head = ringbuf->last_retired_head;
  49.                 ringbuf->last_retired_head = -1;
  50.         }
  51.  
  52.         ringbuf->space = __intel_ring_space(ringbuf->head & HEAD_ADDR,
  53.                                             ringbuf->tail, ringbuf->size);
  54. }
  55.  
  56. int intel_ring_space(struct intel_ringbuffer *ringbuf)
  57. {
  58.         intel_ring_update_space(ringbuf);
  59.         return ringbuf->space;
  60. }
  61.  
  62. bool intel_ring_stopped(struct intel_engine_cs *ring)
  63. {
  64.         struct drm_i915_private *dev_priv = ring->dev->dev_private;
  65.         return dev_priv->gpu_error.stop_rings & intel_ring_flag(ring);
  66. }
  67.  
  68. static void __intel_ring_advance(struct intel_engine_cs *ring)
  69. {
  70.         struct intel_ringbuffer *ringbuf = ring->buffer;
  71.         ringbuf->tail &= ringbuf->size - 1;
  72.         if (intel_ring_stopped(ring))
  73.                 return;
  74.         ring->write_tail(ring, ringbuf->tail);
  75. }
  76.  
  77. static int
  78. gen2_render_ring_flush(struct drm_i915_gem_request *req,
  79.                        u32      invalidate_domains,
  80.                        u32      flush_domains)
  81. {
  82.         struct intel_engine_cs *ring = req->ring;
  83.         u32 cmd;
  84.         int ret;
  85.  
  86.         cmd = MI_FLUSH;
  87.         if (((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) == 0)
  88.                 cmd |= MI_NO_WRITE_FLUSH;
  89.  
  90.         if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
  91.                 cmd |= MI_READ_FLUSH;
  92.  
  93.         ret = intel_ring_begin(req, 2);
  94.         if (ret)
  95.                 return ret;
  96.  
  97.         intel_ring_emit(ring, cmd);
  98.         intel_ring_emit(ring, MI_NOOP);
  99.         intel_ring_advance(ring);
  100.  
  101.         return 0;
  102. }
  103.  
  104. static int
  105. gen4_render_ring_flush(struct drm_i915_gem_request *req,
  106.                        u32      invalidate_domains,
  107.                        u32      flush_domains)
  108. {
  109.         struct intel_engine_cs *ring = req->ring;
  110.         struct drm_device *dev = ring->dev;
  111.         u32 cmd;
  112.         int ret;
  113.  
  114.         /*
  115.          * read/write caches:
  116.          *
  117.          * I915_GEM_DOMAIN_RENDER is always invalidated, but is
  118.          * only flushed if MI_NO_WRITE_FLUSH is unset.  On 965, it is
  119.          * also flushed at 2d versus 3d pipeline switches.
  120.          *
  121.          * read-only caches:
  122.          *
  123.          * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
  124.          * MI_READ_FLUSH is set, and is always flushed on 965.
  125.          *
  126.          * I915_GEM_DOMAIN_COMMAND may not exist?
  127.          *
  128.          * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
  129.          * invalidated when MI_EXE_FLUSH is set.
  130.          *
  131.          * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
  132.          * invalidated with every MI_FLUSH.
  133.          *
  134.          * TLBs:
  135.          *
  136.          * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
  137.          * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
  138.          * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
  139.          * are flushed at any MI_FLUSH.
  140.          */
  141.  
  142.         cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
  143.         if ((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER)
  144.                 cmd &= ~MI_NO_WRITE_FLUSH;
  145.         if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION)
  146.                 cmd |= MI_EXE_FLUSH;
  147.  
  148.         if (invalidate_domains & I915_GEM_DOMAIN_COMMAND &&
  149.             (IS_G4X(dev) || IS_GEN5(dev)))
  150.                 cmd |= MI_INVALIDATE_ISP;
  151.  
  152.         ret = intel_ring_begin(req, 2);
  153.         if (ret)
  154.                 return ret;
  155.  
  156.         intel_ring_emit(ring, cmd);
  157.         intel_ring_emit(ring, MI_NOOP);
  158.         intel_ring_advance(ring);
  159.  
  160.         return 0;
  161. }
  162.  
  163. /**
  164.  * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
  165.  * implementing two workarounds on gen6.  From section 1.4.7.1
  166.  * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
  167.  *
  168.  * [DevSNB-C+{W/A}] Before any depth stall flush (including those
  169.  * produced by non-pipelined state commands), software needs to first
  170.  * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
  171.  * 0.
  172.  *
  173.  * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
  174.  * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
  175.  *
  176.  * And the workaround for these two requires this workaround first:
  177.  *
  178.  * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
  179.  * BEFORE the pipe-control with a post-sync op and no write-cache
  180.  * flushes.
  181.  *
  182.  * And this last workaround is tricky because of the requirements on
  183.  * that bit.  From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
  184.  * volume 2 part 1:
  185.  *
  186.  *     "1 of the following must also be set:
  187.  *      - Render Target Cache Flush Enable ([12] of DW1)
  188.  *      - Depth Cache Flush Enable ([0] of DW1)
  189.  *      - Stall at Pixel Scoreboard ([1] of DW1)
  190.  *      - Depth Stall ([13] of DW1)
  191.  *      - Post-Sync Operation ([13] of DW1)
  192.  *      - Notify Enable ([8] of DW1)"
  193.  *
  194.  * The cache flushes require the workaround flush that triggered this
  195.  * one, so we can't use it.  Depth stall would trigger the same.
  196.  * Post-sync nonzero is what triggered this second workaround, so we
  197.  * can't use that one either.  Notify enable is IRQs, which aren't
  198.  * really our business.  That leaves only stall at scoreboard.
  199.  */
  200. static int
  201. intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req)
  202. {
  203.         struct intel_engine_cs *ring = req->ring;
  204.         u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
  205.         int ret;
  206.  
  207.         ret = intel_ring_begin(req, 6);
  208.         if (ret)
  209.                 return ret;
  210.  
  211.         intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
  212.         intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
  213.                         PIPE_CONTROL_STALL_AT_SCOREBOARD);
  214.         intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
  215.         intel_ring_emit(ring, 0); /* low dword */
  216.         intel_ring_emit(ring, 0); /* high dword */
  217.         intel_ring_emit(ring, MI_NOOP);
  218.         intel_ring_advance(ring);
  219.  
  220.         ret = intel_ring_begin(req, 6);
  221.         if (ret)
  222.                 return ret;
  223.  
  224.         intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
  225.         intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE);
  226.         intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
  227.         intel_ring_emit(ring, 0);
  228.         intel_ring_emit(ring, 0);
  229.         intel_ring_emit(ring, MI_NOOP);
  230.         intel_ring_advance(ring);
  231.  
  232.         return 0;
  233. }
  234.  
  235. static int
  236. gen6_render_ring_flush(struct drm_i915_gem_request *req,
  237.                        u32 invalidate_domains, u32 flush_domains)
  238. {
  239.         struct intel_engine_cs *ring = req->ring;
  240.         u32 flags = 0;
  241.         u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
  242.         int ret;
  243.  
  244.         /* Force SNB workarounds for PIPE_CONTROL flushes */
  245.         ret = intel_emit_post_sync_nonzero_flush(req);
  246.         if (ret)
  247.                 return ret;
  248.  
  249.         /* Just flush everything.  Experiments have shown that reducing the
  250.          * number of bits based on the write domains has little performance
  251.          * impact.
  252.          */
  253.         if (flush_domains) {
  254.                 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
  255.                 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
  256.                 /*
  257.                  * Ensure that any following seqno writes only happen
  258.                  * when the render cache is indeed flushed.
  259.                  */
  260.                 flags |= PIPE_CONTROL_CS_STALL;
  261.         }
  262.         if (invalidate_domains) {
  263.                 flags |= PIPE_CONTROL_TLB_INVALIDATE;
  264.                 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
  265.                 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
  266.                 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
  267.                 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
  268.                 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
  269.                 /*
  270.                  * TLB invalidate requires a post-sync write.
  271.                  */
  272.                 flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
  273.         }
  274.  
  275.         ret = intel_ring_begin(req, 4);
  276.         if (ret)
  277.                 return ret;
  278.  
  279.         intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
  280.         intel_ring_emit(ring, flags);
  281.         intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
  282.         intel_ring_emit(ring, 0);
  283.         intel_ring_advance(ring);
  284.  
  285.         return 0;
  286. }
  287.  
  288. static int
  289. gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req)
  290. {
  291.         struct intel_engine_cs *ring = req->ring;
  292.         int ret;
  293.  
  294.         ret = intel_ring_begin(req, 4);
  295.         if (ret)
  296.                 return ret;
  297.  
  298.         intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
  299.         intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
  300.                               PIPE_CONTROL_STALL_AT_SCOREBOARD);
  301.         intel_ring_emit(ring, 0);
  302.         intel_ring_emit(ring, 0);
  303.         intel_ring_advance(ring);
  304.  
  305.         return 0;
  306. }
  307.  
  308. static int
  309. gen7_render_ring_flush(struct drm_i915_gem_request *req,
  310.                        u32 invalidate_domains, u32 flush_domains)
  311. {
  312.         struct intel_engine_cs *ring = req->ring;
  313.         u32 flags = 0;
  314.         u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
  315.         int ret;
  316.  
  317.         /*
  318.          * Ensure that any following seqno writes only happen when the render
  319.          * cache is indeed flushed.
  320.          *
  321.          * Workaround: 4th PIPE_CONTROL command (except the ones with only
  322.          * read-cache invalidate bits set) must have the CS_STALL bit set. We
  323.          * don't try to be clever and just set it unconditionally.
  324.          */
  325.         flags |= PIPE_CONTROL_CS_STALL;
  326.  
  327.         /* Just flush everything.  Experiments have shown that reducing the
  328.          * number of bits based on the write domains has little performance
  329.          * impact.
  330.          */
  331.         if (flush_domains) {
  332.                 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
  333.                 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
  334.                 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
  335.                 flags |= PIPE_CONTROL_FLUSH_ENABLE;
  336.         }
  337.         if (invalidate_domains) {
  338.                 flags |= PIPE_CONTROL_TLB_INVALIDATE;
  339.                 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
  340.                 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
  341.                 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
  342.                 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
  343.                 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
  344.                 flags |= PIPE_CONTROL_MEDIA_STATE_CLEAR;
  345.                 /*
  346.                  * TLB invalidate requires a post-sync write.
  347.                  */
  348.                 flags |= PIPE_CONTROL_QW_WRITE;
  349.                 flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
  350.  
  351.                 flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
  352.  
  353.                 /* Workaround: we must issue a pipe_control with CS-stall bit
  354.                  * set before a pipe_control command that has the state cache
  355.                  * invalidate bit set. */
  356.                 gen7_render_ring_cs_stall_wa(req);
  357.         }
  358.  
  359.         ret = intel_ring_begin(req, 4);
  360.         if (ret)
  361.                 return ret;
  362.  
  363.         intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
  364.         intel_ring_emit(ring, flags);
  365.         intel_ring_emit(ring, scratch_addr);
  366.         intel_ring_emit(ring, 0);
  367.         intel_ring_advance(ring);
  368.  
  369.         return 0;
  370. }
  371.  
  372. static int
  373. gen8_emit_pipe_control(struct drm_i915_gem_request *req,
  374.                        u32 flags, u32 scratch_addr)
  375. {
  376.         struct intel_engine_cs *ring = req->ring;
  377.         int ret;
  378.  
  379.         ret = intel_ring_begin(req, 6);
  380.         if (ret)
  381.                 return ret;
  382.  
  383.         intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6));
  384.         intel_ring_emit(ring, flags);
  385.         intel_ring_emit(ring, scratch_addr);
  386.         intel_ring_emit(ring, 0);
  387.         intel_ring_emit(ring, 0);
  388.         intel_ring_emit(ring, 0);
  389.         intel_ring_advance(ring);
  390.  
  391.         return 0;
  392. }
  393.  
  394. static int
  395. gen8_render_ring_flush(struct drm_i915_gem_request *req,
  396.                        u32 invalidate_domains, u32 flush_domains)
  397. {
  398.         u32 flags = 0;
  399.         u32 scratch_addr = req->ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
  400.         int ret;
  401.  
  402.         flags |= PIPE_CONTROL_CS_STALL;
  403.  
  404.         if (flush_domains) {
  405.                 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
  406.                 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
  407.                 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
  408.                 flags |= PIPE_CONTROL_FLUSH_ENABLE;
  409.         }
  410.         if (invalidate_domains) {
  411.                 flags |= PIPE_CONTROL_TLB_INVALIDATE;
  412.                 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
  413.                 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
  414.                 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
  415.                 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
  416.                 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
  417.                 flags |= PIPE_CONTROL_QW_WRITE;
  418.                 flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
  419.  
  420.                 /* WaCsStallBeforeStateCacheInvalidate:bdw,chv */
  421.                 ret = gen8_emit_pipe_control(req,
  422.                                              PIPE_CONTROL_CS_STALL |
  423.                                              PIPE_CONTROL_STALL_AT_SCOREBOARD,
  424.                                              0);
  425.                 if (ret)
  426.                         return ret;
  427.         }
  428.  
  429.         return gen8_emit_pipe_control(req, flags, scratch_addr);
  430. }
  431.  
  432. static void ring_write_tail(struct intel_engine_cs *ring,
  433.                             u32 value)
  434. {
  435.         struct drm_i915_private *dev_priv = ring->dev->dev_private;
  436.         I915_WRITE_TAIL(ring, value);
  437. }
  438.  
  439. u64 intel_ring_get_active_head(struct intel_engine_cs *ring)
  440. {
  441.         struct drm_i915_private *dev_priv = ring->dev->dev_private;
  442.         u64 acthd;
  443.  
  444.         if (INTEL_INFO(ring->dev)->gen >= 8)
  445.                 acthd = I915_READ64_2x32(RING_ACTHD(ring->mmio_base),
  446.                                          RING_ACTHD_UDW(ring->mmio_base));
  447.         else if (INTEL_INFO(ring->dev)->gen >= 4)
  448.                 acthd = I915_READ(RING_ACTHD(ring->mmio_base));
  449.         else
  450.                 acthd = I915_READ(ACTHD);
  451.  
  452.         return acthd;
  453. }
  454.  
  455. static void ring_setup_phys_status_page(struct intel_engine_cs *ring)
  456. {
  457.         struct drm_i915_private *dev_priv = ring->dev->dev_private;
  458.         u32 addr;
  459.  
  460.         addr = dev_priv->status_page_dmah->busaddr;
  461.         if (INTEL_INFO(ring->dev)->gen >= 4)
  462.                 addr |= (dev_priv->status_page_dmah->busaddr >> 28) & 0xf0;
  463.         I915_WRITE(HWS_PGA, addr);
  464. }
  465.  
  466. static void intel_ring_setup_status_page(struct intel_engine_cs *ring)
  467. {
  468.         struct drm_device *dev = ring->dev;
  469.         struct drm_i915_private *dev_priv = ring->dev->dev_private;
  470.         i915_reg_t mmio;
  471.  
  472.         /* The ring status page addresses are no longer next to the rest of
  473.          * the ring registers as of gen7.
  474.          */
  475.         if (IS_GEN7(dev)) {
  476.                 switch (ring->id) {
  477.                 case RCS:
  478.                         mmio = RENDER_HWS_PGA_GEN7;
  479.                         break;
  480.                 case BCS:
  481.                         mmio = BLT_HWS_PGA_GEN7;
  482.                         break;
  483.                 /*
  484.                  * VCS2 actually doesn't exist on Gen7. Only shut up
  485.                  * gcc switch check warning
  486.                  */
  487.                 case VCS2:
  488.                 case VCS:
  489.                         mmio = BSD_HWS_PGA_GEN7;
  490.                         break;
  491.                 case VECS:
  492.                         mmio = VEBOX_HWS_PGA_GEN7;
  493.                         break;
  494.                 }
  495.         } else if (IS_GEN6(ring->dev)) {
  496.                 mmio = RING_HWS_PGA_GEN6(ring->mmio_base);
  497.         } else {
  498.                 /* XXX: gen8 returns to sanity */
  499.                 mmio = RING_HWS_PGA(ring->mmio_base);
  500.         }
  501.  
  502.         I915_WRITE(mmio, (u32)ring->status_page.gfx_addr);
  503.         POSTING_READ(mmio);
  504.  
  505.         /*
  506.          * Flush the TLB for this page
  507.          *
  508.          * FIXME: These two bits have disappeared on gen8, so a question
  509.          * arises: do we still need this and if so how should we go about
  510.          * invalidating the TLB?
  511.          */
  512.         if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8) {
  513.                 i915_reg_t reg = RING_INSTPM(ring->mmio_base);
  514.  
  515.                 /* ring should be idle before issuing a sync flush*/
  516.                 WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0);
  517.  
  518.                 I915_WRITE(reg,
  519.                            _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE |
  520.                                               INSTPM_SYNC_FLUSH));
  521.                 if (wait_for((I915_READ(reg) & INSTPM_SYNC_FLUSH) == 0,
  522.                              1000))
  523.                         DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n",
  524.                                   ring->name);
  525.         }
  526. }
  527.  
  528. static bool stop_ring(struct intel_engine_cs *ring)
  529. {
  530.         struct drm_i915_private *dev_priv = to_i915(ring->dev);
  531.  
  532.         if (!IS_GEN2(ring->dev)) {
  533.                 I915_WRITE_MODE(ring, _MASKED_BIT_ENABLE(STOP_RING));
  534.                 if (wait_for((I915_READ_MODE(ring) & MODE_IDLE) != 0, 1000)) {
  535.                         DRM_ERROR("%s : timed out trying to stop ring\n", ring->name);
  536.                         /* Sometimes we observe that the idle flag is not
  537.                          * set even though the ring is empty. So double
  538.                          * check before giving up.
  539.                          */
  540.                         if (I915_READ_HEAD(ring) != I915_READ_TAIL(ring))
  541.                                 return false;
  542.                 }
  543.         }
  544.  
  545.         I915_WRITE_CTL(ring, 0);
  546.         I915_WRITE_HEAD(ring, 0);
  547.         ring->write_tail(ring, 0);
  548.  
  549.         if (!IS_GEN2(ring->dev)) {
  550.                 (void)I915_READ_CTL(ring);
  551.                 I915_WRITE_MODE(ring, _MASKED_BIT_DISABLE(STOP_RING));
  552.         }
  553.  
  554.         return (I915_READ_HEAD(ring) & HEAD_ADDR) == 0;
  555. }
  556.  
  557. static int init_ring_common(struct intel_engine_cs *ring)
  558. {
  559.         struct drm_device *dev = ring->dev;
  560.         struct drm_i915_private *dev_priv = dev->dev_private;
  561.         struct intel_ringbuffer *ringbuf = ring->buffer;
  562.         struct drm_i915_gem_object *obj = ringbuf->obj;
  563.         int ret = 0;
  564.  
  565.         intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
  566.  
  567.         if (!stop_ring(ring)) {
  568.                 /* G45 ring initialization often fails to reset head to zero */
  569.                 DRM_DEBUG_KMS("%s head not reset to zero "
  570.                               "ctl %08x head %08x tail %08x start %08x\n",
  571.                               ring->name,
  572.                               I915_READ_CTL(ring),
  573.                               I915_READ_HEAD(ring),
  574.                               I915_READ_TAIL(ring),
  575.                               I915_READ_START(ring));
  576.  
  577.                 if (!stop_ring(ring)) {
  578.                         DRM_ERROR("failed to set %s head to zero "
  579.                                   "ctl %08x head %08x tail %08x start %08x\n",
  580.                                   ring->name,
  581.                                   I915_READ_CTL(ring),
  582.                                   I915_READ_HEAD(ring),
  583.                                   I915_READ_TAIL(ring),
  584.                                   I915_READ_START(ring));
  585.                         ret = -EIO;
  586.                         goto out;
  587.                 }
  588.         }
  589.  
  590.         if (I915_NEED_GFX_HWS(dev))
  591.                 intel_ring_setup_status_page(ring);
  592.         else
  593.                 ring_setup_phys_status_page(ring);
  594.  
  595.         /* Enforce ordering by reading HEAD register back */
  596.         I915_READ_HEAD(ring);
  597.  
  598.         /* Initialize the ring. This must happen _after_ we've cleared the ring
  599.          * registers with the above sequence (the readback of the HEAD registers
  600.          * also enforces ordering), otherwise the hw might lose the new ring
  601.          * register values. */
  602.         I915_WRITE_START(ring, i915_gem_obj_ggtt_offset(obj));
  603.  
  604.         /* WaClearRingBufHeadRegAtInit:ctg,elk */
  605.         if (I915_READ_HEAD(ring))
  606.                 DRM_DEBUG("%s initialization failed [head=%08x], fudging\n",
  607.                           ring->name, I915_READ_HEAD(ring));
  608.         I915_WRITE_HEAD(ring, 0);
  609.         (void)I915_READ_HEAD(ring);
  610.  
  611.         I915_WRITE_CTL(ring,
  612.                         ((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES)
  613.                         | RING_VALID);
  614.  
  615.         /* If the head is still not zero, the ring is dead */
  616.         if (wait_for((I915_READ_CTL(ring) & RING_VALID) != 0 &&
  617.                      I915_READ_START(ring) == i915_gem_obj_ggtt_offset(obj) &&
  618.                      (I915_READ_HEAD(ring) & HEAD_ADDR) == 0, 50)) {
  619.                 DRM_ERROR("%s initialization failed "
  620.                           "ctl %08x (valid? %d) head %08x tail %08x start %08x [expected %08lx]\n",
  621.                           ring->name,
  622.                           I915_READ_CTL(ring), I915_READ_CTL(ring) & RING_VALID,
  623.                           I915_READ_HEAD(ring), I915_READ_TAIL(ring),
  624.                           I915_READ_START(ring), (unsigned long)i915_gem_obj_ggtt_offset(obj));
  625.                 ret = -EIO;
  626.                 goto out;
  627.         }
  628.  
  629.         ringbuf->last_retired_head = -1;
  630.         ringbuf->head = I915_READ_HEAD(ring);
  631.         ringbuf->tail = I915_READ_TAIL(ring) & TAIL_ADDR;
  632.         intel_ring_update_space(ringbuf);
  633.  
  634.         memset(&ring->hangcheck, 0, sizeof(ring->hangcheck));
  635.  
  636. out:
  637.         intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
  638.  
  639.         return ret;
  640. }
  641.  
  642. void
  643. intel_fini_pipe_control(struct intel_engine_cs *ring)
  644. {
  645.         struct drm_device *dev = ring->dev;
  646.  
  647.         if (ring->scratch.obj == NULL)
  648.                 return;
  649.  
  650.         if (INTEL_INFO(dev)->gen >= 5) {
  651.                 kunmap(sg_page(ring->scratch.obj->pages->sgl));
  652.                 i915_gem_object_ggtt_unpin(ring->scratch.obj);
  653.         }
  654.  
  655.         drm_gem_object_unreference(&ring->scratch.obj->base);
  656.         ring->scratch.obj = NULL;
  657. }
  658.  
  659. int
  660. intel_init_pipe_control(struct intel_engine_cs *ring)
  661. {
  662.         int ret;
  663.  
  664.         WARN_ON(ring->scratch.obj);
  665.  
  666.         ring->scratch.obj = i915_gem_alloc_object(ring->dev, 4096);
  667.         if (ring->scratch.obj == NULL) {
  668.                 DRM_ERROR("Failed to allocate seqno page\n");
  669.                 ret = -ENOMEM;
  670.                 goto err;
  671.         }
  672.  
  673.         ret = i915_gem_object_set_cache_level(ring->scratch.obj, I915_CACHE_LLC);
  674.         if (ret)
  675.                 goto err_unref;
  676.  
  677.         ret = i915_gem_obj_ggtt_pin(ring->scratch.obj, 4096, 0);
  678.         if (ret)
  679.                 goto err_unref;
  680.  
  681.         ring->scratch.gtt_offset = i915_gem_obj_ggtt_offset(ring->scratch.obj);
  682.         ring->scratch.cpu_page = kmap(sg_page(ring->scratch.obj->pages->sgl));
  683.         if (ring->scratch.cpu_page == NULL) {
  684.                 ret = -ENOMEM;
  685.                 goto err_unpin;
  686.         }
  687.  
  688.         DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08x\n",
  689.                          ring->name, ring->scratch.gtt_offset);
  690.         return 0;
  691.  
  692. err_unpin:
  693.         i915_gem_object_ggtt_unpin(ring->scratch.obj);
  694. err_unref:
  695.         drm_gem_object_unreference(&ring->scratch.obj->base);
  696. err:
  697.         return ret;
  698. }
  699.  
  700. static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req)
  701. {
  702.         int ret, i;
  703.         struct intel_engine_cs *ring = req->ring;
  704.         struct drm_device *dev = ring->dev;
  705.         struct drm_i915_private *dev_priv = dev->dev_private;
  706.         struct i915_workarounds *w = &dev_priv->workarounds;
  707.  
  708.         if (w->count == 0)
  709.                 return 0;
  710.  
  711.         ring->gpu_caches_dirty = true;
  712.         ret = intel_ring_flush_all_caches(req);
  713.         if (ret)
  714.                 return ret;
  715.  
  716.         ret = intel_ring_begin(req, (w->count * 2 + 2));
  717.         if (ret)
  718.                 return ret;
  719.  
  720.         intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(w->count));
  721.         for (i = 0; i < w->count; i++) {
  722.                 intel_ring_emit_reg(ring, w->reg[i].addr);
  723.                 intel_ring_emit(ring, w->reg[i].value);
  724.         }
  725.         intel_ring_emit(ring, MI_NOOP);
  726.  
  727.         intel_ring_advance(ring);
  728.  
  729.         ring->gpu_caches_dirty = true;
  730.         ret = intel_ring_flush_all_caches(req);
  731.         if (ret)
  732.                 return ret;
  733.  
  734.         DRM_DEBUG_DRIVER("Number of Workarounds emitted: %d\n", w->count);
  735.  
  736.         return 0;
  737. }
  738.  
  739. static int intel_rcs_ctx_init(struct drm_i915_gem_request *req)
  740. {
  741.         int ret;
  742.  
  743.         ret = intel_ring_workarounds_emit(req);
  744.         if (ret != 0)
  745.                 return ret;
  746.  
  747.         ret = i915_gem_render_state_init(req);
  748.         if (ret)
  749.                 return ret;
  750.  
  751.         return 0;
  752. }
  753.  
  754. static int wa_add(struct drm_i915_private *dev_priv,
  755.                   i915_reg_t addr,
  756.                   const u32 mask, const u32 val)
  757. {
  758.         const u32 idx = dev_priv->workarounds.count;
  759.  
  760.         if (WARN_ON(idx >= I915_MAX_WA_REGS))
  761.                 return -ENOSPC;
  762.  
  763.         dev_priv->workarounds.reg[idx].addr = addr;
  764.         dev_priv->workarounds.reg[idx].value = val;
  765.         dev_priv->workarounds.reg[idx].mask = mask;
  766.  
  767.         dev_priv->workarounds.count++;
  768.  
  769.         return 0;
  770. }
  771.  
  772. #define WA_REG(addr, mask, val) do { \
  773.                 const int r = wa_add(dev_priv, (addr), (mask), (val)); \
  774.                 if (r) \
  775.                         return r; \
  776.         } while (0)
  777.  
  778. #define WA_SET_BIT_MASKED(addr, mask) \
  779.         WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask))
  780.  
  781. #define WA_CLR_BIT_MASKED(addr, mask) \
  782.         WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask))
  783.  
  784. #define WA_SET_FIELD_MASKED(addr, mask, value) \
  785.         WA_REG(addr, mask, _MASKED_FIELD(mask, value))
  786.  
  787. #define WA_SET_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) | (mask))
  788. #define WA_CLR_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) & ~(mask))
  789.  
  790. #define WA_WRITE(addr, val) WA_REG(addr, 0xffffffff, val)
  791.  
  792. static int wa_ring_whitelist_reg(struct intel_engine_cs *ring, i915_reg_t reg)
  793. {
  794.         struct drm_i915_private *dev_priv = ring->dev->dev_private;
  795.         struct i915_workarounds *wa = &dev_priv->workarounds;
  796.         const uint32_t index = wa->hw_whitelist_count[ring->id];
  797.  
  798.         if (WARN_ON(index >= RING_MAX_NONPRIV_SLOTS))
  799.                 return -EINVAL;
  800.  
  801.         WA_WRITE(RING_FORCE_TO_NONPRIV(ring->mmio_base, index),
  802.                  i915_mmio_reg_offset(reg));
  803.         wa->hw_whitelist_count[ring->id]++;
  804.  
  805.         return 0;
  806. }
  807.  
  808. static int gen8_init_workarounds(struct intel_engine_cs *ring)
  809. {
  810.         struct drm_device *dev = ring->dev;
  811.         struct drm_i915_private *dev_priv = dev->dev_private;
  812.  
  813.         WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
  814.  
  815.         /* WaDisableAsyncFlipPerfMode:bdw,chv */
  816.         WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE);
  817.  
  818.         /* WaDisablePartialInstShootdown:bdw,chv */
  819.         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
  820.                           PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
  821.  
  822.         /* Use Force Non-Coherent whenever executing a 3D context. This is a
  823.          * workaround for for a possible hang in the unlikely event a TLB
  824.          * invalidation occurs during a PSD flush.
  825.          */
  826.         /* WaForceEnableNonCoherent:bdw,chv */
  827.         /* WaHdcDisableFetchWhenMasked:bdw,chv */
  828.         WA_SET_BIT_MASKED(HDC_CHICKEN0,
  829.                           HDC_DONOT_FETCH_MEM_WHEN_MASKED |
  830.                           HDC_FORCE_NON_COHERENT);
  831.  
  832.         /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
  833.          * "The Hierarchical Z RAW Stall Optimization allows non-overlapping
  834.          *  polygons in the same 8x4 pixel/sample area to be processed without
  835.          *  stalling waiting for the earlier ones to write to Hierarchical Z
  836.          *  buffer."
  837.          *
  838.          * This optimization is off by default for BDW and CHV; turn it on.
  839.          */
  840.         WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
  841.  
  842.         /* Wa4x4STCOptimizationDisable:bdw,chv */
  843.         WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
  844.  
  845.         /*
  846.          * BSpec recommends 8x4 when MSAA is used,
  847.          * however in practice 16x4 seems fastest.
  848.          *
  849.          * Note that PS/WM thread counts depend on the WIZ hashing
  850.          * disable bit, which we don't touch here, but it's good
  851.          * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
  852.          */
  853.         WA_SET_FIELD_MASKED(GEN7_GT_MODE,
  854.                             GEN6_WIZ_HASHING_MASK,
  855.                             GEN6_WIZ_HASHING_16x4);
  856.  
  857.         return 0;
  858. }
  859.  
  860. static int bdw_init_workarounds(struct intel_engine_cs *ring)
  861. {
  862.         int ret;
  863.         struct drm_device *dev = ring->dev;
  864.         struct drm_i915_private *dev_priv = dev->dev_private;
  865.  
  866.         ret = gen8_init_workarounds(ring);
  867.         if (ret)
  868.                 return ret;
  869.  
  870.         /* WaDisableThreadStallDopClockGating:bdw (pre-production) */
  871.         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
  872.  
  873.         /* WaDisableDopClockGating:bdw */
  874.         WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
  875.                           DOP_CLOCK_GATING_DISABLE);
  876.  
  877.         WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
  878.                           GEN8_SAMPLER_POWER_BYPASS_DIS);
  879.  
  880.         WA_SET_BIT_MASKED(HDC_CHICKEN0,
  881.                           /* WaForceContextSaveRestoreNonCoherent:bdw */
  882.                           HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
  883.                           /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
  884.                           (IS_BDW_GT3(dev) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
  885.  
  886.         return 0;
  887. }
  888.  
  889. static int chv_init_workarounds(struct intel_engine_cs *ring)
  890. {
  891.         int ret;
  892.         struct drm_device *dev = ring->dev;
  893.         struct drm_i915_private *dev_priv = dev->dev_private;
  894.  
  895.         ret = gen8_init_workarounds(ring);
  896.         if (ret)
  897.                 return ret;
  898.  
  899.         /* WaDisableThreadStallDopClockGating:chv */
  900.         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
  901.  
  902.         /* Improve HiZ throughput on CHV. */
  903.         WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
  904.  
  905.         return 0;
  906. }
  907.  
  908. static int gen9_init_workarounds(struct intel_engine_cs *ring)
  909. {
  910.         struct drm_device *dev = ring->dev;
  911.         struct drm_i915_private *dev_priv = dev->dev_private;
  912.         uint32_t tmp;
  913.         int ret;
  914.  
  915.         /* WaEnableLbsSlaRetryTimerDecrement:skl */
  916.         I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) |
  917.                    GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
  918.  
  919.         /* WaDisableKillLogic:bxt,skl */
  920.         I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
  921.                    ECOCHK_DIS_TLB);
  922.  
  923.         /* WaDisablePartialInstShootdown:skl,bxt */
  924.         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
  925.                           PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
  926.  
  927.         /* Syncing dependencies between camera and graphics:skl,bxt */
  928.         WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
  929.                           GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC);
  930.  
  931.         /* WaDisableDgMirrorFixInHalfSliceChicken5:skl,bxt */
  932.         if (IS_SKL_REVID(dev, 0, SKL_REVID_B0) ||
  933.             IS_BXT_REVID(dev, 0, BXT_REVID_A1))
  934.                 WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
  935.                                   GEN9_DG_MIRROR_FIX_ENABLE);
  936.  
  937.         /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:skl,bxt */
  938.         if (IS_SKL_REVID(dev, 0, SKL_REVID_B0) ||
  939.             IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
  940.                 WA_SET_BIT_MASKED(GEN7_COMMON_SLICE_CHICKEN1,
  941.                                   GEN9_RHWO_OPTIMIZATION_DISABLE);
  942.                 /*
  943.                  * WA also requires GEN9_SLICE_COMMON_ECO_CHICKEN0[14:14] to be set
  944.                  * but we do that in per ctx batchbuffer as there is an issue
  945.                  * with this register not getting restored on ctx restore
  946.                  */
  947.         }
  948.  
  949.         /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt */
  950.         if (IS_SKL_REVID(dev, SKL_REVID_C0, REVID_FOREVER) || IS_BROXTON(dev))
  951.                 WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
  952.                                   GEN9_ENABLE_YV12_BUGFIX);
  953.  
  954.         /* Wa4x4STCOptimizationDisable:skl,bxt */
  955.         /* WaDisablePartialResolveInVc:skl,bxt */
  956.         WA_SET_BIT_MASKED(CACHE_MODE_1, (GEN8_4x4_STC_OPTIMIZATION_DISABLE |
  957.                                          GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE));
  958.  
  959.         /* WaCcsTlbPrefetchDisable:skl,bxt */
  960.         WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
  961.                           GEN9_CCS_TLB_PREFETCH_ENABLE);
  962.  
  963.         /* WaDisableMaskBasedCammingInRCC:skl,bxt */
  964.         if (IS_SKL_REVID(dev, SKL_REVID_C0, SKL_REVID_C0) ||
  965.             IS_BXT_REVID(dev, 0, BXT_REVID_A1))
  966.                 WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0,
  967.                                   PIXEL_MASK_CAMMING_DISABLE);
  968.  
  969.         /* WaForceContextSaveRestoreNonCoherent:skl,bxt */
  970.         tmp = HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT;
  971.         if (IS_SKL_REVID(dev, SKL_REVID_F0, REVID_FOREVER) ||
  972.             IS_BXT_REVID(dev, BXT_REVID_B0, REVID_FOREVER))
  973.                 tmp |= HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE;
  974.         WA_SET_BIT_MASKED(HDC_CHICKEN0, tmp);
  975.  
  976.         /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt */
  977.         if (IS_SKYLAKE(dev) || IS_BXT_REVID(dev, 0, BXT_REVID_B0))
  978.                 WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
  979.                                   GEN8_SAMPLER_POWER_BYPASS_DIS);
  980.  
  981.         /* WaDisableSTUnitPowerOptimization:skl,bxt */
  982.         WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
  983.  
  984.         /* WaOCLCoherentLineFlush:skl,bxt */
  985.         I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) |
  986.                                     GEN8_LQSC_FLUSH_COHERENT_LINES));
  987.  
  988.         /* WaEnablePreemptionGranularityControlByUMD:skl,bxt */
  989.         ret= wa_ring_whitelist_reg(ring, GEN8_CS_CHICKEN1);
  990.         if (ret)
  991.                 return ret;
  992.  
  993.         /* WaAllowUMDToModifyHDCChicken1:skl,bxt */
  994.         ret = wa_ring_whitelist_reg(ring, GEN8_HDC_CHICKEN1);
  995.         if (ret)
  996.                 return ret;
  997.  
  998.         return 0;
  999. }
  1000.  
  1001. static int skl_tune_iz_hashing(struct intel_engine_cs *ring)
  1002. {
  1003.         struct drm_device *dev = ring->dev;
  1004.         struct drm_i915_private *dev_priv = dev->dev_private;
  1005.         u8 vals[3] = { 0, 0, 0 };
  1006.         unsigned int i;
  1007.  
  1008.         for (i = 0; i < 3; i++) {
  1009.                 u8 ss;
  1010.  
  1011.                 /*
  1012.                  * Only consider slices where one, and only one, subslice has 7
  1013.                  * EUs
  1014.                  */
  1015.                 if (!is_power_of_2(dev_priv->info.subslice_7eu[i]))
  1016.                         continue;
  1017.  
  1018.                 /*
  1019.                  * subslice_7eu[i] != 0 (because of the check above) and
  1020.                  * ss_max == 4 (maximum number of subslices possible per slice)
  1021.                  *
  1022.                  * ->    0 <= ss <= 3;
  1023.                  */
  1024.                 ss = ffs(dev_priv->info.subslice_7eu[i]) - 1;
  1025.                 vals[i] = 3 - ss;
  1026.         }
  1027.  
  1028.         if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
  1029.                 return 0;
  1030.  
  1031.         /* Tune IZ hashing. See intel_device_info_runtime_init() */
  1032.         WA_SET_FIELD_MASKED(GEN7_GT_MODE,
  1033.                             GEN9_IZ_HASHING_MASK(2) |
  1034.                             GEN9_IZ_HASHING_MASK(1) |
  1035.                             GEN9_IZ_HASHING_MASK(0),
  1036.                             GEN9_IZ_HASHING(2, vals[2]) |
  1037.                             GEN9_IZ_HASHING(1, vals[1]) |
  1038.                             GEN9_IZ_HASHING(0, vals[0]));
  1039.  
  1040.         return 0;
  1041. }
  1042.  
  1043. static int skl_init_workarounds(struct intel_engine_cs *ring)
  1044. {
  1045.         int ret;
  1046.         struct drm_device *dev = ring->dev;
  1047.         struct drm_i915_private *dev_priv = dev->dev_private;
  1048.  
  1049.         ret = gen9_init_workarounds(ring);
  1050.         if (ret)
  1051.                 return ret;
  1052.  
  1053.         /*
  1054.          * Actual WA is to disable percontext preemption granularity control
  1055.          * until D0 which is the default case so this is equivalent to
  1056.          * !WaDisablePerCtxtPreemptionGranularityControl:skl
  1057.          */
  1058.         if (IS_SKL_REVID(dev, SKL_REVID_E0, REVID_FOREVER)) {
  1059.                 I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1,
  1060.                            _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL));
  1061.         }
  1062.  
  1063.         if (IS_SKL_REVID(dev, 0, SKL_REVID_D0)) {
  1064.                 /* WaDisableChickenBitTSGBarrierAckForFFSliceCS:skl */
  1065.                 I915_WRITE(FF_SLICE_CS_CHICKEN2,
  1066.                            _MASKED_BIT_ENABLE(GEN9_TSG_BARRIER_ACK_DISABLE));
  1067.         }
  1068.  
  1069.         /* GEN8_L3SQCREG4 has a dependency with WA batch so any new changes
  1070.          * involving this register should also be added to WA batch as required.
  1071.          */
  1072.         if (IS_SKL_REVID(dev, 0, SKL_REVID_E0))
  1073.                 /* WaDisableLSQCROPERFforOCL:skl */
  1074.                 I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) |
  1075.                            GEN8_LQSC_RO_PERF_DIS);
  1076.  
  1077.         /* WaEnableGapsTsvCreditFix:skl */
  1078.         if (IS_SKL_REVID(dev, SKL_REVID_C0, REVID_FOREVER)) {
  1079.                 I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) |
  1080.                                            GEN9_GAPS_TSV_CREDIT_DISABLE));
  1081.         }
  1082.  
  1083.         /* WaDisablePowerCompilerClockGating:skl */
  1084.         if (IS_SKL_REVID(dev, SKL_REVID_B0, SKL_REVID_B0))
  1085.                 WA_SET_BIT_MASKED(HIZ_CHICKEN,
  1086.                                   BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE);
  1087.  
  1088.         /* This is tied to WaForceContextSaveRestoreNonCoherent */
  1089.         if (IS_SKL_REVID(dev, 0, REVID_FOREVER)) {
  1090.                 /*
  1091.                  *Use Force Non-Coherent whenever executing a 3D context. This
  1092.                  * is a workaround for a possible hang in the unlikely event
  1093.                  * a TLB invalidation occurs during a PSD flush.
  1094.                  */
  1095.                 /* WaForceEnableNonCoherent:skl */
  1096.                 WA_SET_BIT_MASKED(HDC_CHICKEN0,
  1097.                                   HDC_FORCE_NON_COHERENT);
  1098.  
  1099.                 /* WaDisableHDCInvalidation:skl */
  1100.                 I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
  1101.                            BDW_DISABLE_HDC_INVALIDATION);
  1102.         }
  1103.  
  1104.         /* WaBarrierPerformanceFixDisable:skl */
  1105.         if (IS_SKL_REVID(dev, SKL_REVID_C0, SKL_REVID_D0))
  1106.                 WA_SET_BIT_MASKED(HDC_CHICKEN0,
  1107.                                   HDC_FENCE_DEST_SLM_DISABLE |
  1108.                                   HDC_BARRIER_PERFORMANCE_DISABLE);
  1109.  
  1110.         /* WaDisableSbeCacheDispatchPortSharing:skl */
  1111.         if (IS_SKL_REVID(dev, 0, SKL_REVID_F0))
  1112.                 WA_SET_BIT_MASKED(
  1113.                         GEN7_HALF_SLICE_CHICKEN1,
  1114.                         GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
  1115.  
  1116.         /* WaDisableLSQCROPERFforOCL:skl */
  1117.         ret = wa_ring_whitelist_reg(ring, GEN8_L3SQCREG4);
  1118.         if (ret)
  1119.                 return ret;
  1120.  
  1121.         return skl_tune_iz_hashing(ring);
  1122. }
  1123.  
  1124. static int bxt_init_workarounds(struct intel_engine_cs *ring)
  1125. {
  1126.         int ret;
  1127.         struct drm_device *dev = ring->dev;
  1128.         struct drm_i915_private *dev_priv = dev->dev_private;
  1129.  
  1130.         ret = gen9_init_workarounds(ring);
  1131.         if (ret)
  1132.                 return ret;
  1133.  
  1134.         /* WaStoreMultiplePTEenable:bxt */
  1135.         /* This is a requirement according to Hardware specification */
  1136.         if (IS_BXT_REVID(dev, 0, BXT_REVID_A1))
  1137.                 I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_TLBPF);
  1138.  
  1139.         /* WaSetClckGatingDisableMedia:bxt */
  1140.         if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
  1141.                 I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) &
  1142.                                             ~GEN8_DOP_CLOCK_GATE_MEDIA_ENABLE));
  1143.         }
  1144.  
  1145.         /* WaDisableThreadStallDopClockGating:bxt */
  1146.         WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
  1147.                           STALL_DOP_GATING_DISABLE);
  1148.  
  1149.         /* WaDisableSbeCacheDispatchPortSharing:bxt */
  1150.         if (IS_BXT_REVID(dev, 0, BXT_REVID_B0)) {
  1151.                 WA_SET_BIT_MASKED(
  1152.                         GEN7_HALF_SLICE_CHICKEN1,
  1153.                         GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
  1154.         }
  1155.  
  1156.         /* WaDisableObjectLevelPreemptionForTrifanOrPolygon:bxt */
  1157.         /* WaDisableObjectLevelPreemptionForInstancedDraw:bxt */
  1158.         /* WaDisableObjectLevelPreemtionForInstanceId:bxt */
  1159.         /* WaDisableLSQCROPERFforOCL:bxt */
  1160.         if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
  1161.                 ret = wa_ring_whitelist_reg(ring, GEN9_CS_DEBUG_MODE1);
  1162.                 if (ret)
  1163.                         return ret;
  1164.  
  1165.                 ret = wa_ring_whitelist_reg(ring, GEN8_L3SQCREG4);
  1166.                 if (ret)
  1167.                         return ret;
  1168.         }
  1169.  
  1170.         return 0;
  1171. }
  1172.  
  1173. int init_workarounds_ring(struct intel_engine_cs *ring)
  1174. {
  1175.         struct drm_device *dev = ring->dev;
  1176.         struct drm_i915_private *dev_priv = dev->dev_private;
  1177.  
  1178.         WARN_ON(ring->id != RCS);
  1179.  
  1180.         dev_priv->workarounds.count = 0;
  1181.         dev_priv->workarounds.hw_whitelist_count[RCS] = 0;
  1182.  
  1183.         if (IS_BROADWELL(dev))
  1184.                 return bdw_init_workarounds(ring);
  1185.  
  1186.         if (IS_CHERRYVIEW(dev))
  1187.                 return chv_init_workarounds(ring);
  1188.  
  1189.         if (IS_SKYLAKE(dev))
  1190.                 return skl_init_workarounds(ring);
  1191.  
  1192.         if (IS_BROXTON(dev))
  1193.                 return bxt_init_workarounds(ring);
  1194.  
  1195.         return 0;
  1196. }
  1197.  
  1198. static int init_render_ring(struct intel_engine_cs *ring)
  1199. {
  1200.         struct drm_device *dev = ring->dev;
  1201.         struct drm_i915_private *dev_priv = dev->dev_private;
  1202.         int ret = init_ring_common(ring);
  1203.         if (ret)
  1204.                 return ret;
  1205.  
  1206.         /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
  1207.         if (INTEL_INFO(dev)->gen >= 4 && INTEL_INFO(dev)->gen < 7)
  1208.                 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH));
  1209.  
  1210.         /* We need to disable the AsyncFlip performance optimisations in order
  1211.          * to use MI_WAIT_FOR_EVENT within the CS. It should already be
  1212.          * programmed to '1' on all products.
  1213.          *
  1214.          * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv
  1215.          */
  1216.         if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8)
  1217.                 I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
  1218.  
  1219.         /* Required for the hardware to program scanline values for waiting */
  1220.         /* WaEnableFlushTlbInvalidationMode:snb */
  1221.         if (INTEL_INFO(dev)->gen == 6)
  1222.                 I915_WRITE(GFX_MODE,
  1223.                            _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT));
  1224.  
  1225.         /* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */
  1226.         if (IS_GEN7(dev))
  1227.                 I915_WRITE(GFX_MODE_GEN7,
  1228.                            _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT) |
  1229.                            _MASKED_BIT_ENABLE(GFX_REPLAY_MODE));
  1230.  
  1231.         if (IS_GEN6(dev)) {
  1232.                 /* From the Sandybridge PRM, volume 1 part 3, page 24:
  1233.                  * "If this bit is set, STCunit will have LRA as replacement
  1234.                  *  policy. [...] This bit must be reset.  LRA replacement
  1235.                  *  policy is not supported."
  1236.                  */
  1237.                 I915_WRITE(CACHE_MODE_0,
  1238.                            _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
  1239.         }
  1240.  
  1241.         if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8)
  1242.                 I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
  1243.  
  1244.         if (HAS_L3_DPF(dev))
  1245.                 I915_WRITE_IMR(ring, ~GT_PARITY_ERROR(dev));
  1246.  
  1247.         return init_workarounds_ring(ring);
  1248. }
  1249.  
  1250. static void render_ring_cleanup(struct intel_engine_cs *ring)
  1251. {
  1252.         struct drm_device *dev = ring->dev;
  1253.         struct drm_i915_private *dev_priv = dev->dev_private;
  1254.  
  1255.         if (dev_priv->semaphore_obj) {
  1256.                 i915_gem_object_ggtt_unpin(dev_priv->semaphore_obj);
  1257.                 drm_gem_object_unreference(&dev_priv->semaphore_obj->base);
  1258.                 dev_priv->semaphore_obj = NULL;
  1259.         }
  1260.  
  1261.         intel_fini_pipe_control(ring);
  1262. }
  1263.  
  1264. static int gen8_rcs_signal(struct drm_i915_gem_request *signaller_req,
  1265.                            unsigned int num_dwords)
  1266. {
  1267. #define MBOX_UPDATE_DWORDS 8
  1268.         struct intel_engine_cs *signaller = signaller_req->ring;
  1269.         struct drm_device *dev = signaller->dev;
  1270.         struct drm_i915_private *dev_priv = dev->dev_private;
  1271.         struct intel_engine_cs *waiter;
  1272.         int i, ret, num_rings;
  1273.  
  1274.         num_rings = hweight32(INTEL_INFO(dev)->ring_mask);
  1275.         num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS;
  1276. #undef MBOX_UPDATE_DWORDS
  1277.  
  1278.         ret = intel_ring_begin(signaller_req, num_dwords);
  1279.         if (ret)
  1280.                 return ret;
  1281.  
  1282.         for_each_ring(waiter, dev_priv, i) {
  1283.                 u32 seqno;
  1284.                 u64 gtt_offset = signaller->semaphore.signal_ggtt[i];
  1285.                 if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
  1286.                         continue;
  1287.  
  1288.                 seqno = i915_gem_request_get_seqno(signaller_req);
  1289.                 intel_ring_emit(signaller, GFX_OP_PIPE_CONTROL(6));
  1290.                 intel_ring_emit(signaller, PIPE_CONTROL_GLOBAL_GTT_IVB |
  1291.                                            PIPE_CONTROL_QW_WRITE |
  1292.                                            PIPE_CONTROL_FLUSH_ENABLE);
  1293.                 intel_ring_emit(signaller, lower_32_bits(gtt_offset));
  1294.                 intel_ring_emit(signaller, upper_32_bits(gtt_offset));
  1295.                 intel_ring_emit(signaller, seqno);
  1296.                 intel_ring_emit(signaller, 0);
  1297.                 intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
  1298.                                            MI_SEMAPHORE_TARGET(waiter->id));
  1299.                 intel_ring_emit(signaller, 0);
  1300.         }
  1301.  
  1302.         return 0;
  1303. }
  1304.  
  1305. static int gen8_xcs_signal(struct drm_i915_gem_request *signaller_req,
  1306.                            unsigned int num_dwords)
  1307. {
  1308. #define MBOX_UPDATE_DWORDS 6
  1309.         struct intel_engine_cs *signaller = signaller_req->ring;
  1310.         struct drm_device *dev = signaller->dev;
  1311.         struct drm_i915_private *dev_priv = dev->dev_private;
  1312.         struct intel_engine_cs *waiter;
  1313.         int i, ret, num_rings;
  1314.  
  1315.         num_rings = hweight32(INTEL_INFO(dev)->ring_mask);
  1316.         num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS;
  1317. #undef MBOX_UPDATE_DWORDS
  1318.  
  1319.         ret = intel_ring_begin(signaller_req, num_dwords);
  1320.         if (ret)
  1321.                 return ret;
  1322.  
  1323.         for_each_ring(waiter, dev_priv, i) {
  1324.                 u32 seqno;
  1325.                 u64 gtt_offset = signaller->semaphore.signal_ggtt[i];
  1326.                 if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
  1327.                         continue;
  1328.  
  1329.                 seqno = i915_gem_request_get_seqno(signaller_req);
  1330.                 intel_ring_emit(signaller, (MI_FLUSH_DW + 1) |
  1331.                                            MI_FLUSH_DW_OP_STOREDW);
  1332.                 intel_ring_emit(signaller, lower_32_bits(gtt_offset) |
  1333.                                            MI_FLUSH_DW_USE_GTT);
  1334.                 intel_ring_emit(signaller, upper_32_bits(gtt_offset));
  1335.                 intel_ring_emit(signaller, seqno);
  1336.                 intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
  1337.                                            MI_SEMAPHORE_TARGET(waiter->id));
  1338.                 intel_ring_emit(signaller, 0);
  1339.         }
  1340.  
  1341.         return 0;
  1342. }
  1343.  
  1344. static int gen6_signal(struct drm_i915_gem_request *signaller_req,
  1345.                        unsigned int num_dwords)
  1346. {
  1347.         struct intel_engine_cs *signaller = signaller_req->ring;
  1348.         struct drm_device *dev = signaller->dev;
  1349.         struct drm_i915_private *dev_priv = dev->dev_private;
  1350.         struct intel_engine_cs *useless;
  1351.         int i, ret, num_rings;
  1352.  
  1353. #define MBOX_UPDATE_DWORDS 3
  1354.         num_rings = hweight32(INTEL_INFO(dev)->ring_mask);
  1355.         num_dwords += round_up((num_rings-1) * MBOX_UPDATE_DWORDS, 2);
  1356. #undef MBOX_UPDATE_DWORDS
  1357.  
  1358.         ret = intel_ring_begin(signaller_req, num_dwords);
  1359.         if (ret)
  1360.                 return ret;
  1361.  
  1362.         for_each_ring(useless, dev_priv, i) {
  1363.                 i915_reg_t mbox_reg = signaller->semaphore.mbox.signal[i];
  1364.  
  1365.                 if (i915_mmio_reg_valid(mbox_reg)) {
  1366.                         u32 seqno = i915_gem_request_get_seqno(signaller_req);
  1367.  
  1368.                         intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1));
  1369.                         intel_ring_emit_reg(signaller, mbox_reg);
  1370.                         intel_ring_emit(signaller, seqno);
  1371.                 }
  1372.         }
  1373.  
  1374.         /* If num_dwords was rounded, make sure the tail pointer is correct */
  1375.         if (num_rings % 2 == 0)
  1376.                 intel_ring_emit(signaller, MI_NOOP);
  1377.  
  1378.         return 0;
  1379. }
  1380.  
  1381. /**
  1382.  * gen6_add_request - Update the semaphore mailbox registers
  1383.  *
  1384.  * @request - request to write to the ring
  1385.  *
  1386.  * Update the mailbox registers in the *other* rings with the current seqno.
  1387.  * This acts like a signal in the canonical semaphore.
  1388.  */
  1389. static int
  1390. gen6_add_request(struct drm_i915_gem_request *req)
  1391. {
  1392.         struct intel_engine_cs *ring = req->ring;
  1393.         int ret;
  1394.  
  1395.         if (ring->semaphore.signal)
  1396.                 ret = ring->semaphore.signal(req, 4);
  1397.         else
  1398.                 ret = intel_ring_begin(req, 4);
  1399.  
  1400.         if (ret)
  1401.                 return ret;
  1402.  
  1403.         intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
  1404.         intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
  1405.         intel_ring_emit(ring, i915_gem_request_get_seqno(req));
  1406.         intel_ring_emit(ring, MI_USER_INTERRUPT);
  1407.         __intel_ring_advance(ring);
  1408.  
  1409.         return 0;
  1410. }
  1411.  
  1412. static inline bool i915_gem_has_seqno_wrapped(struct drm_device *dev,
  1413.                                               u32 seqno)
  1414. {
  1415.         struct drm_i915_private *dev_priv = dev->dev_private;
  1416.         return dev_priv->last_seqno < seqno;
  1417. }
  1418.  
  1419. /**
  1420.  * intel_ring_sync - sync the waiter to the signaller on seqno
  1421.  *
  1422.  * @waiter - ring that is waiting
  1423.  * @signaller - ring which has, or will signal
  1424.  * @seqno - seqno which the waiter will block on
  1425.  */
  1426.  
  1427. static int
  1428. gen8_ring_sync(struct drm_i915_gem_request *waiter_req,
  1429.                struct intel_engine_cs *signaller,
  1430.                u32 seqno)
  1431. {
  1432.         struct intel_engine_cs *waiter = waiter_req->ring;
  1433.         struct drm_i915_private *dev_priv = waiter->dev->dev_private;
  1434.         int ret;
  1435.  
  1436.         ret = intel_ring_begin(waiter_req, 4);
  1437.         if (ret)
  1438.                 return ret;
  1439.  
  1440.         intel_ring_emit(waiter, MI_SEMAPHORE_WAIT |
  1441.                                 MI_SEMAPHORE_GLOBAL_GTT |
  1442.                                 MI_SEMAPHORE_POLL |
  1443.                                 MI_SEMAPHORE_SAD_GTE_SDD);
  1444.         intel_ring_emit(waiter, seqno);
  1445.         intel_ring_emit(waiter,
  1446.                         lower_32_bits(GEN8_WAIT_OFFSET(waiter, signaller->id)));
  1447.         intel_ring_emit(waiter,
  1448.                         upper_32_bits(GEN8_WAIT_OFFSET(waiter, signaller->id)));
  1449.         intel_ring_advance(waiter);
  1450.         return 0;
  1451. }
  1452.  
  1453. static int
  1454. gen6_ring_sync(struct drm_i915_gem_request *waiter_req,
  1455.                struct intel_engine_cs *signaller,
  1456.                u32 seqno)
  1457. {
  1458.         struct intel_engine_cs *waiter = waiter_req->ring;
  1459.         u32 dw1 = MI_SEMAPHORE_MBOX |
  1460.                   MI_SEMAPHORE_COMPARE |
  1461.                   MI_SEMAPHORE_REGISTER;
  1462.         u32 wait_mbox = signaller->semaphore.mbox.wait[waiter->id];
  1463.         int ret;
  1464.  
  1465.         /* Throughout all of the GEM code, seqno passed implies our current
  1466.          * seqno is >= the last seqno executed. However for hardware the
  1467.          * comparison is strictly greater than.
  1468.          */
  1469.         seqno -= 1;
  1470.  
  1471.         WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID);
  1472.  
  1473.         ret = intel_ring_begin(waiter_req, 4);
  1474.         if (ret)
  1475.                 return ret;
  1476.  
  1477.         /* If seqno wrap happened, omit the wait with no-ops */
  1478.         if (likely(!i915_gem_has_seqno_wrapped(waiter->dev, seqno))) {
  1479.                 intel_ring_emit(waiter, dw1 | wait_mbox);
  1480.                 intel_ring_emit(waiter, seqno);
  1481.                 intel_ring_emit(waiter, 0);
  1482.                 intel_ring_emit(waiter, MI_NOOP);
  1483.         } else {
  1484.                 intel_ring_emit(waiter, MI_NOOP);
  1485.                 intel_ring_emit(waiter, MI_NOOP);
  1486.                 intel_ring_emit(waiter, MI_NOOP);
  1487.                 intel_ring_emit(waiter, MI_NOOP);
  1488.         }
  1489.         intel_ring_advance(waiter);
  1490.  
  1491.         return 0;
  1492. }
  1493.  
  1494. #define PIPE_CONTROL_FLUSH(ring__, addr__)                                      \
  1495. do {                                                                    \
  1496.         intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |                \
  1497.                  PIPE_CONTROL_DEPTH_STALL);                             \
  1498.         intel_ring_emit(ring__, (addr__) | PIPE_CONTROL_GLOBAL_GTT);                    \
  1499.         intel_ring_emit(ring__, 0);                                                     \
  1500.         intel_ring_emit(ring__, 0);                                                     \
  1501. } while (0)
  1502.  
  1503. static int
  1504. pc_render_add_request(struct drm_i915_gem_request *req)
  1505. {
  1506.         struct intel_engine_cs *ring = req->ring;
  1507.         u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
  1508.         int ret;
  1509.  
  1510.         /* For Ironlake, MI_USER_INTERRUPT was deprecated and apparently
  1511.          * incoherent with writes to memory, i.e. completely fubar,
  1512.          * so we need to use PIPE_NOTIFY instead.
  1513.          *
  1514.          * However, we also need to workaround the qword write
  1515.          * incoherence by flushing the 6 PIPE_NOTIFY buffers out to
  1516.          * memory before requesting an interrupt.
  1517.          */
  1518.         ret = intel_ring_begin(req, 32);
  1519.         if (ret)
  1520.                 return ret;
  1521.  
  1522.         intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
  1523.                         PIPE_CONTROL_WRITE_FLUSH |
  1524.                         PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
  1525.         intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
  1526.         intel_ring_emit(ring, i915_gem_request_get_seqno(req));
  1527.         intel_ring_emit(ring, 0);
  1528.         PIPE_CONTROL_FLUSH(ring, scratch_addr);
  1529.         scratch_addr += 2 * CACHELINE_BYTES; /* write to separate cachelines */
  1530.         PIPE_CONTROL_FLUSH(ring, scratch_addr);
  1531.         scratch_addr += 2 * CACHELINE_BYTES;
  1532.         PIPE_CONTROL_FLUSH(ring, scratch_addr);
  1533.         scratch_addr += 2 * CACHELINE_BYTES;
  1534.         PIPE_CONTROL_FLUSH(ring, scratch_addr);
  1535.         scratch_addr += 2 * CACHELINE_BYTES;
  1536.         PIPE_CONTROL_FLUSH(ring, scratch_addr);
  1537.         scratch_addr += 2 * CACHELINE_BYTES;
  1538.         PIPE_CONTROL_FLUSH(ring, scratch_addr);
  1539.  
  1540.         intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
  1541.                         PIPE_CONTROL_WRITE_FLUSH |
  1542.                         PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
  1543.                         PIPE_CONTROL_NOTIFY);
  1544.         intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
  1545.         intel_ring_emit(ring, i915_gem_request_get_seqno(req));
  1546.         intel_ring_emit(ring, 0);
  1547.         __intel_ring_advance(ring);
  1548.  
  1549.         return 0;
  1550. }
  1551.  
  1552. static u32
  1553. gen6_ring_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
  1554. {
  1555.         /* Workaround to force correct ordering between irq and seqno writes on
  1556.          * ivb (and maybe also on snb) by reading from a CS register (like
  1557.          * ACTHD) before reading the status page. */
  1558.         if (!lazy_coherency) {
  1559.                 struct drm_i915_private *dev_priv = ring->dev->dev_private;
  1560.                 POSTING_READ(RING_ACTHD(ring->mmio_base));
  1561.         }
  1562.  
  1563.         return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
  1564. }
  1565.  
  1566. static u32
  1567. ring_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
  1568. {
  1569.         return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
  1570. }
  1571.  
  1572. static void
  1573. ring_set_seqno(struct intel_engine_cs *ring, u32 seqno)
  1574. {
  1575.         intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno);
  1576. }
  1577.  
  1578. static u32
  1579. pc_render_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
  1580. {
  1581.         return ring->scratch.cpu_page[0];
  1582. }
  1583.  
  1584. static void
  1585. pc_render_set_seqno(struct intel_engine_cs *ring, u32 seqno)
  1586. {
  1587.         ring->scratch.cpu_page[0] = seqno;
  1588. }
  1589.  
  1590. static bool
  1591. gen5_ring_get_irq(struct intel_engine_cs *ring)
  1592. {
  1593.         struct drm_device *dev = ring->dev;
  1594.         struct drm_i915_private *dev_priv = dev->dev_private;
  1595.         unsigned long flags;
  1596.  
  1597.         if (WARN_ON(!intel_irqs_enabled(dev_priv)))
  1598.                 return false;
  1599.  
  1600.         spin_lock_irqsave(&dev_priv->irq_lock, flags);
  1601.         if (ring->irq_refcount++ == 0)
  1602.                 gen5_enable_gt_irq(dev_priv, ring->irq_enable_mask);
  1603.         spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
  1604.  
  1605.         return true;
  1606. }
  1607.  
  1608. static void
  1609. gen5_ring_put_irq(struct intel_engine_cs *ring)
  1610. {
  1611.         struct drm_device *dev = ring->dev;
  1612.         struct drm_i915_private *dev_priv = dev->dev_private;
  1613.         unsigned long flags;
  1614.  
  1615.         spin_lock_irqsave(&dev_priv->irq_lock, flags);
  1616.         if (--ring->irq_refcount == 0)
  1617.                 gen5_disable_gt_irq(dev_priv, ring->irq_enable_mask);
  1618.         spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
  1619. }
  1620.  
  1621. static bool
  1622. i9xx_ring_get_irq(struct intel_engine_cs *ring)
  1623. {
  1624.         struct drm_device *dev = ring->dev;
  1625.         struct drm_i915_private *dev_priv = dev->dev_private;
  1626.         unsigned long flags;
  1627.  
  1628.         if (!intel_irqs_enabled(dev_priv))
  1629.                 return false;
  1630.  
  1631.         spin_lock_irqsave(&dev_priv->irq_lock, flags);
  1632.         if (ring->irq_refcount++ == 0) {
  1633.                 dev_priv->irq_mask &= ~ring->irq_enable_mask;
  1634.                 I915_WRITE(IMR, dev_priv->irq_mask);
  1635.                 POSTING_READ(IMR);
  1636.         }
  1637.         spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
  1638.  
  1639.         return true;
  1640. }
  1641.  
  1642. static void
  1643. i9xx_ring_put_irq(struct intel_engine_cs *ring)
  1644. {
  1645.         struct drm_device *dev = ring->dev;
  1646.         struct drm_i915_private *dev_priv = dev->dev_private;
  1647.         unsigned long flags;
  1648.  
  1649.         spin_lock_irqsave(&dev_priv->irq_lock, flags);
  1650.         if (--ring->irq_refcount == 0) {
  1651.                 dev_priv->irq_mask |= ring->irq_enable_mask;
  1652.                 I915_WRITE(IMR, dev_priv->irq_mask);
  1653.                 POSTING_READ(IMR);
  1654.         }
  1655.         spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
  1656. }
  1657.  
  1658. static bool
  1659. i8xx_ring_get_irq(struct intel_engine_cs *ring)
  1660. {
  1661.         struct drm_device *dev = ring->dev;
  1662.         struct drm_i915_private *dev_priv = dev->dev_private;
  1663.         unsigned long flags;
  1664.  
  1665.         if (!intel_irqs_enabled(dev_priv))
  1666.                 return false;
  1667.  
  1668.         spin_lock_irqsave(&dev_priv->irq_lock, flags);
  1669.         if (ring->irq_refcount++ == 0) {
  1670.                 dev_priv->irq_mask &= ~ring->irq_enable_mask;
  1671.                 I915_WRITE16(IMR, dev_priv->irq_mask);
  1672.                 POSTING_READ16(IMR);
  1673.         }
  1674.         spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
  1675.  
  1676.         return true;
  1677. }
  1678.  
  1679. static void
  1680. i8xx_ring_put_irq(struct intel_engine_cs *ring)
  1681. {
  1682.         struct drm_device *dev = ring->dev;
  1683.         struct drm_i915_private *dev_priv = dev->dev_private;
  1684.         unsigned long flags;
  1685.  
  1686.         spin_lock_irqsave(&dev_priv->irq_lock, flags);
  1687.         if (--ring->irq_refcount == 0) {
  1688.                 dev_priv->irq_mask |= ring->irq_enable_mask;
  1689.                 I915_WRITE16(IMR, dev_priv->irq_mask);
  1690.                 POSTING_READ16(IMR);
  1691.         }
  1692.         spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
  1693. }
  1694.  
  1695. static int
  1696. bsd_ring_flush(struct drm_i915_gem_request *req,
  1697.                u32     invalidate_domains,
  1698.                u32     flush_domains)
  1699. {
  1700.         struct intel_engine_cs *ring = req->ring;
  1701.         int ret;
  1702.  
  1703.         ret = intel_ring_begin(req, 2);
  1704.         if (ret)
  1705.                 return ret;
  1706.  
  1707.         intel_ring_emit(ring, MI_FLUSH);
  1708.         intel_ring_emit(ring, MI_NOOP);
  1709.         intel_ring_advance(ring);
  1710.         return 0;
  1711. }
  1712.  
  1713. static int
  1714. i9xx_add_request(struct drm_i915_gem_request *req)
  1715. {
  1716.         struct intel_engine_cs *ring = req->ring;
  1717.         int ret;
  1718.  
  1719.         ret = intel_ring_begin(req, 4);
  1720.         if (ret)
  1721.                 return ret;
  1722.  
  1723.         intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
  1724.         intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
  1725.         intel_ring_emit(ring, i915_gem_request_get_seqno(req));
  1726.         intel_ring_emit(ring, MI_USER_INTERRUPT);
  1727.         __intel_ring_advance(ring);
  1728.  
  1729.         return 0;
  1730. }
  1731.  
  1732. static bool
  1733. gen6_ring_get_irq(struct intel_engine_cs *ring)
  1734. {
  1735.         struct drm_device *dev = ring->dev;
  1736.         struct drm_i915_private *dev_priv = dev->dev_private;
  1737.         unsigned long flags;
  1738.  
  1739.         if (WARN_ON(!intel_irqs_enabled(dev_priv)))
  1740.                 return false;
  1741.  
  1742.         spin_lock_irqsave(&dev_priv->irq_lock, flags);
  1743.         if (ring->irq_refcount++ == 0) {
  1744.                 if (HAS_L3_DPF(dev) && ring->id == RCS)
  1745.                         I915_WRITE_IMR(ring,
  1746.                                        ~(ring->irq_enable_mask |
  1747.                                          GT_PARITY_ERROR(dev)));
  1748.                 else
  1749.                         I915_WRITE_IMR(ring, ~ring->irq_enable_mask);
  1750.                 gen5_enable_gt_irq(dev_priv, ring->irq_enable_mask);
  1751.         }
  1752.         spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
  1753.  
  1754.         return true;
  1755. }
  1756.  
  1757. static void
  1758. gen6_ring_put_irq(struct intel_engine_cs *ring)
  1759. {
  1760.         struct drm_device *dev = ring->dev;
  1761.         struct drm_i915_private *dev_priv = dev->dev_private;
  1762.         unsigned long flags;
  1763.  
  1764.         spin_lock_irqsave(&dev_priv->irq_lock, flags);
  1765.         if (--ring->irq_refcount == 0) {
  1766.                 if (HAS_L3_DPF(dev) && ring->id == RCS)
  1767.                         I915_WRITE_IMR(ring, ~GT_PARITY_ERROR(dev));
  1768.                 else
  1769.                         I915_WRITE_IMR(ring, ~0);
  1770.                 gen5_disable_gt_irq(dev_priv, ring->irq_enable_mask);
  1771.         }
  1772.         spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
  1773. }
  1774.  
  1775. static bool
  1776. hsw_vebox_get_irq(struct intel_engine_cs *ring)
  1777. {
  1778.         struct drm_device *dev = ring->dev;
  1779.         struct drm_i915_private *dev_priv = dev->dev_private;
  1780.         unsigned long flags;
  1781.  
  1782.         if (WARN_ON(!intel_irqs_enabled(dev_priv)))
  1783.                 return false;
  1784.  
  1785.         spin_lock_irqsave(&dev_priv->irq_lock, flags);
  1786.         if (ring->irq_refcount++ == 0) {
  1787.                 I915_WRITE_IMR(ring, ~ring->irq_enable_mask);
  1788.                 gen6_enable_pm_irq(dev_priv, ring->irq_enable_mask);
  1789.         }
  1790.         spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
  1791.  
  1792.         return true;
  1793. }
  1794.  
  1795. static void
  1796. hsw_vebox_put_irq(struct intel_engine_cs *ring)
  1797. {
  1798.         struct drm_device *dev = ring->dev;
  1799.         struct drm_i915_private *dev_priv = dev->dev_private;
  1800.         unsigned long flags;
  1801.  
  1802.         spin_lock_irqsave(&dev_priv->irq_lock, flags);
  1803.         if (--ring->irq_refcount == 0) {
  1804.                 I915_WRITE_IMR(ring, ~0);
  1805.                 gen6_disable_pm_irq(dev_priv, ring->irq_enable_mask);
  1806.         }
  1807.         spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
  1808. }
  1809.  
  1810. static bool
  1811. gen8_ring_get_irq(struct intel_engine_cs *ring)
  1812. {
  1813.         struct drm_device *dev = ring->dev;
  1814.         struct drm_i915_private *dev_priv = dev->dev_private;
  1815.         unsigned long flags;
  1816.  
  1817.         if (WARN_ON(!intel_irqs_enabled(dev_priv)))
  1818.                 return false;
  1819.  
  1820.         spin_lock_irqsave(&dev_priv->irq_lock, flags);
  1821.         if (ring->irq_refcount++ == 0) {
  1822.                 if (HAS_L3_DPF(dev) && ring->id == RCS) {
  1823.                         I915_WRITE_IMR(ring,
  1824.                                        ~(ring->irq_enable_mask |
  1825.                                          GT_RENDER_L3_PARITY_ERROR_INTERRUPT));
  1826.                 } else {
  1827.                         I915_WRITE_IMR(ring, ~ring->irq_enable_mask);
  1828.                 }
  1829.                 POSTING_READ(RING_IMR(ring->mmio_base));
  1830.         }
  1831.         spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
  1832.  
  1833.         return true;
  1834. }
  1835.  
  1836. static void
  1837. gen8_ring_put_irq(struct intel_engine_cs *ring)
  1838. {
  1839.         struct drm_device *dev = ring->dev;
  1840.         struct drm_i915_private *dev_priv = dev->dev_private;
  1841.         unsigned long flags;
  1842.  
  1843.         spin_lock_irqsave(&dev_priv->irq_lock, flags);
  1844.         if (--ring->irq_refcount == 0) {
  1845.                 if (HAS_L3_DPF(dev) && ring->id == RCS) {
  1846.                         I915_WRITE_IMR(ring,
  1847.                                        ~GT_RENDER_L3_PARITY_ERROR_INTERRUPT);
  1848.                 } else {
  1849.                         I915_WRITE_IMR(ring, ~0);
  1850.                 }
  1851.                 POSTING_READ(RING_IMR(ring->mmio_base));
  1852.         }
  1853.         spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
  1854. }
  1855.  
  1856. static int
  1857. i965_dispatch_execbuffer(struct drm_i915_gem_request *req,
  1858.                          u64 offset, u32 length,
  1859.                          unsigned dispatch_flags)
  1860. {
  1861.         struct intel_engine_cs *ring = req->ring;
  1862.         int ret;
  1863.  
  1864.         ret = intel_ring_begin(req, 2);
  1865.         if (ret)
  1866.                 return ret;
  1867.  
  1868.         intel_ring_emit(ring,
  1869.                         MI_BATCH_BUFFER_START |
  1870.                         MI_BATCH_GTT |
  1871.                         (dispatch_flags & I915_DISPATCH_SECURE ?
  1872.                          0 : MI_BATCH_NON_SECURE_I965));
  1873.         intel_ring_emit(ring, offset);
  1874.         intel_ring_advance(ring);
  1875.  
  1876.         return 0;
  1877. }
  1878.  
  1879. /* Just userspace ABI convention to limit the wa batch bo to a resonable size */
  1880. #define I830_BATCH_LIMIT (256*1024)
  1881. #define I830_TLB_ENTRIES (2)
  1882. #define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT)
  1883. static int
  1884. i830_dispatch_execbuffer(struct drm_i915_gem_request *req,
  1885.                          u64 offset, u32 len,
  1886.                          unsigned dispatch_flags)
  1887. {
  1888.         struct intel_engine_cs *ring = req->ring;
  1889.         u32 cs_offset = ring->scratch.gtt_offset;
  1890.         int ret;
  1891.  
  1892.         ret = intel_ring_begin(req, 6);
  1893.         if (ret)
  1894.                 return ret;
  1895.  
  1896.         /* Evict the invalid PTE TLBs */
  1897.         intel_ring_emit(ring, COLOR_BLT_CMD | BLT_WRITE_RGBA);
  1898.         intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096);
  1899.         intel_ring_emit(ring, I830_TLB_ENTRIES << 16 | 4); /* load each page */
  1900.         intel_ring_emit(ring, cs_offset);
  1901.         intel_ring_emit(ring, 0xdeadbeef);
  1902.         intel_ring_emit(ring, MI_NOOP);
  1903.         intel_ring_advance(ring);
  1904.  
  1905.         if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) {
  1906.                 if (len > I830_BATCH_LIMIT)
  1907.                         return -ENOSPC;
  1908.  
  1909.                 ret = intel_ring_begin(req, 6 + 2);
  1910.                 if (ret)
  1911.                         return ret;
  1912.  
  1913.                 /* Blit the batch (which has now all relocs applied) to the
  1914.                  * stable batch scratch bo area (so that the CS never
  1915.                  * stumbles over its tlb invalidation bug) ...
  1916.                  */
  1917.                 intel_ring_emit(ring, SRC_COPY_BLT_CMD | BLT_WRITE_RGBA);
  1918.                 intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096);
  1919.                 intel_ring_emit(ring, DIV_ROUND_UP(len, 4096) << 16 | 4096);
  1920.                 intel_ring_emit(ring, cs_offset);
  1921.                 intel_ring_emit(ring, 4096);
  1922.                 intel_ring_emit(ring, offset);
  1923.  
  1924.                 intel_ring_emit(ring, MI_FLUSH);
  1925.                 intel_ring_emit(ring, MI_NOOP);
  1926.                 intel_ring_advance(ring);
  1927.  
  1928.                 /* ... and execute it. */
  1929.                 offset = cs_offset;
  1930.         }
  1931.  
  1932.         ret = intel_ring_begin(req, 2);
  1933.         if (ret)
  1934.                 return ret;
  1935.  
  1936.         intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT);
  1937.         intel_ring_emit(ring, offset | (dispatch_flags & I915_DISPATCH_SECURE ?
  1938.                                         0 : MI_BATCH_NON_SECURE));
  1939.         intel_ring_advance(ring);
  1940.  
  1941.         return 0;
  1942. }
  1943.  
  1944. static int
  1945. i915_dispatch_execbuffer(struct drm_i915_gem_request *req,
  1946.                          u64 offset, u32 len,
  1947.                          unsigned dispatch_flags)
  1948. {
  1949.         struct intel_engine_cs *ring = req->ring;
  1950.         int ret;
  1951.  
  1952.         ret = intel_ring_begin(req, 2);
  1953.         if (ret)
  1954.                 return ret;
  1955.  
  1956.         intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT);
  1957.         intel_ring_emit(ring, offset | (dispatch_flags & I915_DISPATCH_SECURE ?
  1958.                                         0 : MI_BATCH_NON_SECURE));
  1959.         intel_ring_advance(ring);
  1960.  
  1961.         return 0;
  1962. }
  1963.  
  1964. static void cleanup_phys_status_page(struct intel_engine_cs *ring)
  1965. {
  1966.         struct drm_i915_private *dev_priv = to_i915(ring->dev);
  1967.  
  1968.         if (!dev_priv->status_page_dmah)
  1969.                 return;
  1970.  
  1971.         drm_pci_free(ring->dev, dev_priv->status_page_dmah);
  1972.         ring->status_page.page_addr = NULL;
  1973. }
  1974.  
  1975. static void cleanup_status_page(struct intel_engine_cs *ring)
  1976. {
  1977.         struct drm_i915_gem_object *obj;
  1978.  
  1979.         obj = ring->status_page.obj;
  1980.         if (obj == NULL)
  1981.                 return;
  1982.  
  1983.         kunmap(sg_page(obj->pages->sgl));
  1984.         i915_gem_object_ggtt_unpin(obj);
  1985.         drm_gem_object_unreference(&obj->base);
  1986.         ring->status_page.obj = NULL;
  1987. }
  1988.  
  1989. static int init_status_page(struct intel_engine_cs *ring)
  1990. {
  1991.         struct drm_i915_gem_object *obj = ring->status_page.obj;
  1992.  
  1993.         if (obj == NULL) {
  1994.                 unsigned flags;
  1995.                 int ret;
  1996.  
  1997.                 obj = i915_gem_alloc_object(ring->dev, 4096);
  1998.                 if (obj == NULL) {
  1999.                         DRM_ERROR("Failed to allocate status page\n");
  2000.                         return -ENOMEM;
  2001.                 }
  2002.  
  2003.                 ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
  2004.                 if (ret)
  2005.                         goto err_unref;
  2006.  
  2007.                 flags = 0;
  2008.                 if (!HAS_LLC(ring->dev))
  2009.                         /* On g33, we cannot place HWS above 256MiB, so
  2010.                          * restrict its pinning to the low mappable arena.
  2011.                          * Though this restriction is not documented for
  2012.                          * gen4, gen5, or byt, they also behave similarly
  2013.                          * and hang if the HWS is placed at the top of the
  2014.                          * GTT. To generalise, it appears that all !llc
  2015.                          * platforms have issues with us placing the HWS
  2016.                          * above the mappable region (even though we never
  2017.                          * actualy map it).
  2018.                          */
  2019.                         flags |= PIN_MAPPABLE;
  2020.                 ret = i915_gem_obj_ggtt_pin(obj, 4096, flags);
  2021.                 if (ret) {
  2022. err_unref:
  2023.                         drm_gem_object_unreference(&obj->base);
  2024.                         return ret;
  2025.                 }
  2026.  
  2027.                 ring->status_page.obj = obj;
  2028.         }
  2029.  
  2030.         ring->status_page.gfx_addr = i915_gem_obj_ggtt_offset(obj);
  2031.         ring->status_page.page_addr = kmap(sg_page(obj->pages->sgl));
  2032.         memset(ring->status_page.page_addr, 0, PAGE_SIZE);
  2033.  
  2034.         DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n",
  2035.                         ring->name, ring->status_page.gfx_addr);
  2036.  
  2037.         return 0;
  2038. }
  2039.  
  2040. static int init_phys_status_page(struct intel_engine_cs *ring)
  2041. {
  2042.         struct drm_i915_private *dev_priv = ring->dev->dev_private;
  2043.  
  2044.         if (!dev_priv->status_page_dmah) {
  2045.                 dev_priv->status_page_dmah =
  2046.                         drm_pci_alloc(ring->dev, PAGE_SIZE, PAGE_SIZE);
  2047.                 if (!dev_priv->status_page_dmah)
  2048.                         return -ENOMEM;
  2049.         }
  2050.  
  2051.         ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
  2052.         memset(ring->status_page.page_addr, 0, PAGE_SIZE);
  2053.  
  2054.         return 0;
  2055. }
  2056.  
  2057. void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
  2058. {
  2059.         if (HAS_LLC(ringbuf->obj->base.dev) && !ringbuf->obj->stolen)
  2060.                 vunmap(ringbuf->virtual_start);
  2061.         else
  2062.                 iounmap(ringbuf->virtual_start);
  2063.         ringbuf->virtual_start = NULL;
  2064.         ringbuf->vma = NULL;
  2065.         i915_gem_object_ggtt_unpin(ringbuf->obj);
  2066. }
  2067.  
  2068. static u32 *vmap_obj(struct drm_i915_gem_object *obj)
  2069. {
  2070.         struct sg_page_iter sg_iter;
  2071.         struct page **pages;
  2072.         void *addr;
  2073.         int i;
  2074.  
  2075.         pages = drm_malloc_ab(obj->base.size >> PAGE_SHIFT, sizeof(*pages));
  2076.         if (pages == NULL)
  2077.                 return NULL;
  2078.  
  2079.         i = 0;
  2080.         for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0)
  2081.                 pages[i++] = sg_page_iter_page(&sg_iter);
  2082.  
  2083.         addr = vmap(pages, i, 0, PAGE_KERNEL);
  2084.         drm_free_large(pages);
  2085.  
  2086.         return addr;
  2087. }
  2088.  
  2089. int intel_pin_and_map_ringbuffer_obj(struct drm_device *dev,
  2090.                                      struct intel_ringbuffer *ringbuf)
  2091. {
  2092.         struct drm_i915_private *dev_priv = to_i915(dev);
  2093.         struct drm_i915_gem_object *obj = ringbuf->obj;
  2094.         /* Ring wraparound at offset 0 sometimes hangs. No idea why. */
  2095.         unsigned flags = PIN_OFFSET_BIAS | 4096;
  2096.         int ret;
  2097.  
  2098.         if (HAS_LLC(dev_priv) && !obj->stolen) {
  2099.                 ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, flags);
  2100.                 if (ret)
  2101.                         return ret;
  2102.  
  2103.                 ret = i915_gem_object_set_to_cpu_domain(obj, true);
  2104.                 if (ret) {
  2105.                         i915_gem_object_ggtt_unpin(obj);
  2106.                         return ret;
  2107.                 }
  2108.  
  2109.                 ringbuf->virtual_start = vmap_obj(obj);
  2110.                 if (ringbuf->virtual_start == NULL) {
  2111.                         i915_gem_object_ggtt_unpin(obj);
  2112.                         return -ENOMEM;
  2113.                 }
  2114.         } else {
  2115.                 ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE,
  2116.                                             flags | PIN_MAPPABLE);
  2117.                 if (ret)
  2118.                         return ret;
  2119.  
  2120.                 ret = i915_gem_object_set_to_gtt_domain(obj, true);
  2121.                 if (ret) {
  2122.                         i915_gem_object_ggtt_unpin(obj);
  2123.                         return ret;
  2124.                 }
  2125.  
  2126.                 /* Access through the GTT requires the device to be awake. */
  2127.                 assert_rpm_wakelock_held(dev_priv);
  2128.  
  2129.                 ringbuf->virtual_start = ioremap_wc(dev_priv->gtt.mappable_base +
  2130.                                                     i915_gem_obj_ggtt_offset(obj), ringbuf->size);
  2131.                 if (ringbuf->virtual_start == NULL) {
  2132.                         i915_gem_object_ggtt_unpin(obj);
  2133.                         return -EINVAL;
  2134.                 }
  2135.         }
  2136.  
  2137.         ringbuf->vma = i915_gem_obj_to_ggtt(obj);
  2138.  
  2139.         return 0;
  2140. }
  2141.  
  2142. static void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
  2143. {
  2144.         drm_gem_object_unreference(&ringbuf->obj->base);
  2145.         ringbuf->obj = NULL;
  2146. }
  2147.  
  2148. static int intel_alloc_ringbuffer_obj(struct drm_device *dev,
  2149.                                       struct intel_ringbuffer *ringbuf)
  2150. {
  2151.         struct drm_i915_gem_object *obj;
  2152.  
  2153.         obj = NULL;
  2154.         if (!HAS_LLC(dev))
  2155.                 obj = i915_gem_object_create_stolen(dev, ringbuf->size);
  2156.         if (obj == NULL)
  2157.                 obj = i915_gem_alloc_object(dev, ringbuf->size);
  2158.         if (obj == NULL)
  2159.                 return -ENOMEM;
  2160.  
  2161.         /* mark ring buffers as read-only from GPU side by default */
  2162.         obj->gt_ro = 1;
  2163.  
  2164.         ringbuf->obj = obj;
  2165.  
  2166.         return 0;
  2167. }
  2168.  
  2169. struct intel_ringbuffer *
  2170. intel_engine_create_ringbuffer(struct intel_engine_cs *engine, int size)
  2171. {
  2172.         struct intel_ringbuffer *ring;
  2173.         int ret;
  2174.  
  2175.         ring = kzalloc(sizeof(*ring), GFP_KERNEL);
  2176.         if (ring == NULL) {
  2177.                 DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n",
  2178.                                  engine->name);
  2179.                 return ERR_PTR(-ENOMEM);
  2180.         }
  2181.  
  2182.         ring->ring = engine;
  2183.         list_add(&ring->link, &engine->buffers);
  2184.  
  2185.         ring->size = size;
  2186.         /* Workaround an erratum on the i830 which causes a hang if
  2187.          * the TAIL pointer points to within the last 2 cachelines
  2188.          * of the buffer.
  2189.          */
  2190.         ring->effective_size = size;
  2191.         if (IS_I830(engine->dev) || IS_845G(engine->dev))
  2192.                 ring->effective_size -= 2 * CACHELINE_BYTES;
  2193.  
  2194.         ring->last_retired_head = -1;
  2195.         intel_ring_update_space(ring);
  2196.  
  2197.         ret = intel_alloc_ringbuffer_obj(engine->dev, ring);
  2198.         if (ret) {
  2199.                 DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s: %d\n",
  2200.                                  engine->name, ret);
  2201.                 list_del(&ring->link);
  2202.                 kfree(ring);
  2203.                 return ERR_PTR(ret);
  2204.         }
  2205.  
  2206.         return ring;
  2207. }
  2208.  
  2209. void
  2210. intel_ringbuffer_free(struct intel_ringbuffer *ring)
  2211. {
  2212.         intel_destroy_ringbuffer_obj(ring);
  2213.         list_del(&ring->link);
  2214.         kfree(ring);
  2215. }
  2216.  
  2217. static int intel_init_ring_buffer(struct drm_device *dev,
  2218.                                   struct intel_engine_cs *ring)
  2219. {
  2220.         struct intel_ringbuffer *ringbuf;
  2221.         int ret;
  2222.  
  2223.         WARN_ON(ring->buffer);
  2224.  
  2225.         ring->dev = dev;
  2226.         INIT_LIST_HEAD(&ring->active_list);
  2227.         INIT_LIST_HEAD(&ring->request_list);
  2228.         INIT_LIST_HEAD(&ring->execlist_queue);
  2229.         INIT_LIST_HEAD(&ring->buffers);
  2230.         i915_gem_batch_pool_init(dev, &ring->batch_pool);
  2231.         memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno));
  2232.  
  2233.         init_waitqueue_head(&ring->irq_queue);
  2234.  
  2235.         ringbuf = intel_engine_create_ringbuffer(ring, 32 * PAGE_SIZE);
  2236.         if (IS_ERR(ringbuf)) {
  2237.                 ret = PTR_ERR(ringbuf);
  2238.                 goto error;
  2239.         }
  2240.         ring->buffer = ringbuf;
  2241.  
  2242.         if (I915_NEED_GFX_HWS(dev)) {
  2243.                 ret = init_status_page(ring);
  2244.                 if (ret)
  2245.                         goto error;
  2246.         } else {
  2247.                 WARN_ON(ring->id != RCS);
  2248.                 ret = init_phys_status_page(ring);
  2249.                 if (ret)
  2250.                         goto error;
  2251.         }
  2252.  
  2253.         ret = intel_pin_and_map_ringbuffer_obj(dev, ringbuf);
  2254.         if (ret) {
  2255.                 DRM_ERROR("Failed to pin and map ringbuffer %s: %d\n",
  2256.                                 ring->name, ret);
  2257.                 intel_destroy_ringbuffer_obj(ringbuf);
  2258.                 goto error;
  2259.         }
  2260.  
  2261.         ret = i915_cmd_parser_init_ring(ring);
  2262.         if (ret)
  2263.                 goto error;
  2264.  
  2265.         return 0;
  2266.  
  2267. error:
  2268.         intel_cleanup_ring_buffer(ring);
  2269.         return ret;
  2270. }
  2271.  
  2272. void intel_cleanup_ring_buffer(struct intel_engine_cs *ring)
  2273. {
  2274.         struct drm_i915_private *dev_priv;
  2275.  
  2276.         if (!intel_ring_initialized(ring))
  2277.                 return;
  2278.  
  2279.         dev_priv = to_i915(ring->dev);
  2280.  
  2281.         if (ring->buffer) {
  2282.                 intel_stop_ring_buffer(ring);
  2283.                 WARN_ON(!IS_GEN2(ring->dev) && (I915_READ_MODE(ring) & MODE_IDLE) == 0);
  2284.  
  2285.                 intel_unpin_ringbuffer_obj(ring->buffer);
  2286.                 intel_ringbuffer_free(ring->buffer);
  2287.                 ring->buffer = NULL;
  2288.         }
  2289.  
  2290.         if (ring->cleanup)
  2291.                 ring->cleanup(ring);
  2292.  
  2293.         if (I915_NEED_GFX_HWS(ring->dev)) {
  2294.                 cleanup_status_page(ring);
  2295.         } else {
  2296.                 WARN_ON(ring->id != RCS);
  2297.                 cleanup_phys_status_page(ring);
  2298.         }
  2299.  
  2300.         i915_cmd_parser_fini_ring(ring);
  2301.         i915_gem_batch_pool_fini(&ring->batch_pool);
  2302.         ring->dev = NULL;
  2303. }
  2304.  
  2305. static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
  2306. {
  2307.         struct intel_ringbuffer *ringbuf = ring->buffer;
  2308.         struct drm_i915_gem_request *request;
  2309.         unsigned space;
  2310.         int ret;
  2311.  
  2312.         if (intel_ring_space(ringbuf) >= n)
  2313.                 return 0;
  2314.  
  2315.         /* The whole point of reserving space is to not wait! */
  2316.         WARN_ON(ringbuf->reserved_in_use);
  2317.  
  2318.         list_for_each_entry(request, &ring->request_list, list) {
  2319.                 space = __intel_ring_space(request->postfix, ringbuf->tail,
  2320.                                            ringbuf->size);
  2321.                 if (space >= n)
  2322.                         break;
  2323.         }
  2324.  
  2325.         if (WARN_ON(&request->list == &ring->request_list))
  2326.                 return -ENOSPC;
  2327.  
  2328.         ret = i915_wait_request(request);
  2329.         if (ret)
  2330.                 return ret;
  2331.  
  2332.         ringbuf->space = space;
  2333.         return 0;
  2334. }
  2335.  
  2336. static void __wrap_ring_buffer(struct intel_ringbuffer *ringbuf)
  2337. {
  2338.         uint32_t __iomem *virt;
  2339.         int rem = ringbuf->size - ringbuf->tail;
  2340.  
  2341.         virt = ringbuf->virtual_start + ringbuf->tail;
  2342.         rem /= 4;
  2343.         while (rem--)
  2344.                 iowrite32(MI_NOOP, virt++);
  2345.  
  2346.         ringbuf->tail = 0;
  2347.         intel_ring_update_space(ringbuf);
  2348. }
  2349.  
  2350. int intel_ring_idle(struct intel_engine_cs *ring)
  2351. {
  2352.         struct drm_i915_gem_request *req;
  2353.  
  2354.         /* Wait upon the last request to be completed */
  2355.         if (list_empty(&ring->request_list))
  2356.                 return 0;
  2357.  
  2358.         req = list_entry(ring->request_list.prev,
  2359.                         struct drm_i915_gem_request,
  2360.                         list);
  2361.  
  2362.         /* Make sure we do not trigger any retires */
  2363.         return __i915_wait_request(req,
  2364.                                    atomic_read(&to_i915(ring->dev)->gpu_error.reset_counter),
  2365.                                    to_i915(ring->dev)->mm.interruptible,
  2366.                                    NULL, NULL);
  2367. }
  2368.  
  2369. int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request)
  2370. {
  2371.         request->ringbuf = request->ring->buffer;
  2372.         return 0;
  2373. }
  2374.  
  2375. int intel_ring_reserve_space(struct drm_i915_gem_request *request)
  2376. {
  2377.         /*
  2378.          * The first call merely notes the reserve request and is common for
  2379.          * all back ends. The subsequent localised _begin() call actually
  2380.          * ensures that the reservation is available. Without the begin, if
  2381.          * the request creator immediately submitted the request without
  2382.          * adding any commands to it then there might not actually be
  2383.          * sufficient room for the submission commands.
  2384.          */
  2385.         intel_ring_reserved_space_reserve(request->ringbuf, MIN_SPACE_FOR_ADD_REQUEST);
  2386.  
  2387.         return intel_ring_begin(request, 0);
  2388. }
  2389.  
  2390. void intel_ring_reserved_space_reserve(struct intel_ringbuffer *ringbuf, int size)
  2391. {
  2392.         WARN_ON(ringbuf->reserved_size);
  2393.         WARN_ON(ringbuf->reserved_in_use);
  2394.  
  2395.         ringbuf->reserved_size = size;
  2396. }
  2397.  
  2398. void intel_ring_reserved_space_cancel(struct intel_ringbuffer *ringbuf)
  2399. {
  2400.         WARN_ON(ringbuf->reserved_in_use);
  2401.  
  2402.         ringbuf->reserved_size   = 0;
  2403.         ringbuf->reserved_in_use = false;
  2404. }
  2405.  
  2406. void intel_ring_reserved_space_use(struct intel_ringbuffer *ringbuf)
  2407. {
  2408.         WARN_ON(ringbuf->reserved_in_use);
  2409.  
  2410.         ringbuf->reserved_in_use = true;
  2411.         ringbuf->reserved_tail   = ringbuf->tail;
  2412. }
  2413.  
  2414. void intel_ring_reserved_space_end(struct intel_ringbuffer *ringbuf)
  2415. {
  2416.         WARN_ON(!ringbuf->reserved_in_use);
  2417.         if (ringbuf->tail > ringbuf->reserved_tail) {
  2418.                 WARN(ringbuf->tail > ringbuf->reserved_tail + ringbuf->reserved_size,
  2419.                      "request reserved size too small: %d vs %d!\n",
  2420.                      ringbuf->tail - ringbuf->reserved_tail, ringbuf->reserved_size);
  2421.         } else {
  2422.                 /*
  2423.                  * The ring was wrapped while the reserved space was in use.
  2424.                  * That means that some unknown amount of the ring tail was
  2425.                  * no-op filled and skipped. Thus simply adding the ring size
  2426.                  * to the tail and doing the above space check will not work.
  2427.                  * Rather than attempt to track how much tail was skipped,
  2428.                  * it is much simpler to say that also skipping the sanity
  2429.                  * check every once in a while is not a big issue.
  2430.                  */
  2431.         }
  2432.  
  2433.         ringbuf->reserved_size   = 0;
  2434.         ringbuf->reserved_in_use = false;
  2435. }
  2436.  
  2437. static int __intel_ring_prepare(struct intel_engine_cs *ring, int bytes)
  2438. {
  2439.         struct intel_ringbuffer *ringbuf = ring->buffer;
  2440.         int remain_usable = ringbuf->effective_size - ringbuf->tail;
  2441.         int remain_actual = ringbuf->size - ringbuf->tail;
  2442.         int ret, total_bytes, wait_bytes = 0;
  2443.         bool need_wrap = false;
  2444.  
  2445.         if (ringbuf->reserved_in_use)
  2446.                 total_bytes = bytes;
  2447.         else
  2448.                 total_bytes = bytes + ringbuf->reserved_size;
  2449.  
  2450.         if (unlikely(bytes > remain_usable)) {
  2451.                 /*
  2452.                  * Not enough space for the basic request. So need to flush
  2453.                  * out the remainder and then wait for base + reserved.
  2454.                  */
  2455.                 wait_bytes = remain_actual + total_bytes;
  2456.                 need_wrap = true;
  2457.         } else {
  2458.                 if (unlikely(total_bytes > remain_usable)) {
  2459.                         /*
  2460.                          * The base request will fit but the reserved space
  2461.                          * falls off the end. So don't need an immediate wrap
  2462.                          * and only need to effectively wait for the reserved
  2463.                          * size space from the start of ringbuffer.
  2464.                          */
  2465.                         wait_bytes = remain_actual + ringbuf->reserved_size;
  2466.                 } else if (total_bytes > ringbuf->space) {
  2467.                         /* No wrapping required, just waiting. */
  2468.                         wait_bytes = total_bytes;
  2469.                 }
  2470.         }
  2471.  
  2472.         if (wait_bytes) {
  2473.                 ret = ring_wait_for_space(ring, wait_bytes);
  2474.                 if (unlikely(ret))
  2475.                         return ret;
  2476.  
  2477.                 if (need_wrap)
  2478.                         __wrap_ring_buffer(ringbuf);
  2479.         }
  2480.  
  2481.         return 0;
  2482. }
  2483.  
  2484. int intel_ring_begin(struct drm_i915_gem_request *req,
  2485.                      int num_dwords)
  2486. {
  2487.         struct intel_engine_cs *ring;
  2488.         struct drm_i915_private *dev_priv;
  2489.         int ret;
  2490.  
  2491.         WARN_ON(req == NULL);
  2492.         ring = req->ring;
  2493.         dev_priv = ring->dev->dev_private;
  2494.  
  2495.         ret = i915_gem_check_wedge(&dev_priv->gpu_error,
  2496.                                    dev_priv->mm.interruptible);
  2497.         if (ret)
  2498.                 return ret;
  2499.  
  2500.         ret = __intel_ring_prepare(ring, num_dwords * sizeof(uint32_t));
  2501.         if (ret)
  2502.                 return ret;
  2503.  
  2504.         ring->buffer->space -= num_dwords * sizeof(uint32_t);
  2505.         return 0;
  2506. }
  2507.  
  2508. /* Align the ring tail to a cacheline boundary */
  2509. int intel_ring_cacheline_align(struct drm_i915_gem_request *req)
  2510. {
  2511.         struct intel_engine_cs *ring = req->ring;
  2512.         int num_dwords = (ring->buffer->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
  2513.         int ret;
  2514.  
  2515.         if (num_dwords == 0)
  2516.                 return 0;
  2517.  
  2518.         num_dwords = CACHELINE_BYTES / sizeof(uint32_t) - num_dwords;
  2519.         ret = intel_ring_begin(req, num_dwords);
  2520.         if (ret)
  2521.                 return ret;
  2522.  
  2523.         while (num_dwords--)
  2524.                 intel_ring_emit(ring, MI_NOOP);
  2525.  
  2526.         intel_ring_advance(ring);
  2527.  
  2528.         return 0;
  2529. }
  2530.  
  2531. void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno)
  2532. {
  2533.         struct drm_device *dev = ring->dev;
  2534.         struct drm_i915_private *dev_priv = dev->dev_private;
  2535.  
  2536.         if (INTEL_INFO(dev)->gen == 6 || INTEL_INFO(dev)->gen == 7) {
  2537.                 I915_WRITE(RING_SYNC_0(ring->mmio_base), 0);
  2538.                 I915_WRITE(RING_SYNC_1(ring->mmio_base), 0);
  2539.                 if (HAS_VEBOX(dev))
  2540.                         I915_WRITE(RING_SYNC_2(ring->mmio_base), 0);
  2541.         }
  2542.  
  2543.         ring->set_seqno(ring, seqno);
  2544.         ring->hangcheck.seqno = seqno;
  2545. }
  2546.  
  2547. static void gen6_bsd_ring_write_tail(struct intel_engine_cs *ring,
  2548.                                      u32 value)
  2549. {
  2550.         struct drm_i915_private *dev_priv = ring->dev->dev_private;
  2551.  
  2552.        /* Every tail move must follow the sequence below */
  2553.  
  2554.         /* Disable notification that the ring is IDLE. The GT
  2555.          * will then assume that it is busy and bring it out of rc6.
  2556.          */
  2557.         I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
  2558.                    _MASKED_BIT_ENABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
  2559.  
  2560.         /* Clear the context id. Here be magic! */
  2561.         I915_WRITE64(GEN6_BSD_RNCID, 0x0);
  2562.  
  2563.         /* Wait for the ring not to be idle, i.e. for it to wake up. */
  2564.         if (wait_for((I915_READ(GEN6_BSD_SLEEP_PSMI_CONTROL) &
  2565.                       GEN6_BSD_SLEEP_INDICATOR) == 0,
  2566.                      50))
  2567.                 DRM_ERROR("timed out waiting for the BSD ring to wake up\n");
  2568.  
  2569.         /* Now that the ring is fully powered up, update the tail */
  2570.         I915_WRITE_TAIL(ring, value);
  2571.         POSTING_READ(RING_TAIL(ring->mmio_base));
  2572.  
  2573.         /* Let the ring send IDLE messages to the GT again,
  2574.          * and so let it sleep to conserve power when idle.
  2575.          */
  2576.         I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
  2577.                    _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
  2578. }
  2579.  
  2580. static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req,
  2581.                                u32 invalidate, u32 flush)
  2582. {
  2583.         struct intel_engine_cs *ring = req->ring;
  2584.         uint32_t cmd;
  2585.         int ret;
  2586.  
  2587.         ret = intel_ring_begin(req, 4);
  2588.         if (ret)
  2589.                 return ret;
  2590.  
  2591.         cmd = MI_FLUSH_DW;
  2592.         if (INTEL_INFO(ring->dev)->gen >= 8)
  2593.                 cmd += 1;
  2594.  
  2595.         /* We always require a command barrier so that subsequent
  2596.          * commands, such as breadcrumb interrupts, are strictly ordered
  2597.          * wrt the contents of the write cache being flushed to memory
  2598.          * (and thus being coherent from the CPU).
  2599.          */
  2600.         cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
  2601.  
  2602.         /*
  2603.          * Bspec vol 1c.5 - video engine command streamer:
  2604.          * "If ENABLED, all TLBs will be invalidated once the flush
  2605.          * operation is complete. This bit is only valid when the
  2606.          * Post-Sync Operation field is a value of 1h or 3h."
  2607.          */
  2608.         if (invalidate & I915_GEM_GPU_DOMAINS)
  2609.                 cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD;
  2610.  
  2611.         intel_ring_emit(ring, cmd);
  2612.         intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
  2613.         if (INTEL_INFO(ring->dev)->gen >= 8) {
  2614.                 intel_ring_emit(ring, 0); /* upper addr */
  2615.                 intel_ring_emit(ring, 0); /* value */
  2616.         } else  {
  2617.                 intel_ring_emit(ring, 0);
  2618.                 intel_ring_emit(ring, MI_NOOP);
  2619.         }
  2620.         intel_ring_advance(ring);
  2621.         return 0;
  2622. }
  2623.  
  2624. static int
  2625. gen8_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
  2626.                               u64 offset, u32 len,
  2627.                               unsigned dispatch_flags)
  2628. {
  2629.         struct intel_engine_cs *ring = req->ring;
  2630.         bool ppgtt = USES_PPGTT(ring->dev) &&
  2631.                         !(dispatch_flags & I915_DISPATCH_SECURE);
  2632.         int ret;
  2633.  
  2634.         ret = intel_ring_begin(req, 4);
  2635.         if (ret)
  2636.                 return ret;
  2637.  
  2638.         /* FIXME(BDW): Address space and security selectors. */
  2639.         intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8) |
  2640.                         (dispatch_flags & I915_DISPATCH_RS ?
  2641.                          MI_BATCH_RESOURCE_STREAMER : 0));
  2642.         intel_ring_emit(ring, lower_32_bits(offset));
  2643.         intel_ring_emit(ring, upper_32_bits(offset));
  2644.         intel_ring_emit(ring, MI_NOOP);
  2645.         intel_ring_advance(ring);
  2646.  
  2647.         return 0;
  2648. }
  2649.  
  2650. static int
  2651. hsw_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
  2652.                              u64 offset, u32 len,
  2653.                              unsigned dispatch_flags)
  2654. {
  2655.         struct intel_engine_cs *ring = req->ring;
  2656.         int ret;
  2657.  
  2658.         ret = intel_ring_begin(req, 2);
  2659.         if (ret)
  2660.                 return ret;
  2661.  
  2662.         intel_ring_emit(ring,
  2663.                         MI_BATCH_BUFFER_START |
  2664.                         (dispatch_flags & I915_DISPATCH_SECURE ?
  2665.                          0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW) |
  2666.                         (dispatch_flags & I915_DISPATCH_RS ?
  2667.                          MI_BATCH_RESOURCE_STREAMER : 0));
  2668.         /* bit0-7 is the length on GEN6+ */
  2669.         intel_ring_emit(ring, offset);
  2670.         intel_ring_advance(ring);
  2671.  
  2672.         return 0;
  2673. }
  2674.  
  2675. static int
  2676. gen6_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
  2677.                               u64 offset, u32 len,
  2678.                               unsigned dispatch_flags)
  2679. {
  2680.         struct intel_engine_cs *ring = req->ring;
  2681.         int ret;
  2682.  
  2683.         ret = intel_ring_begin(req, 2);
  2684.         if (ret)
  2685.                 return ret;
  2686.  
  2687.         intel_ring_emit(ring,
  2688.                         MI_BATCH_BUFFER_START |
  2689.                         (dispatch_flags & I915_DISPATCH_SECURE ?
  2690.                          0 : MI_BATCH_NON_SECURE_I965));
  2691.         /* bit0-7 is the length on GEN6+ */
  2692.         intel_ring_emit(ring, offset);
  2693.         intel_ring_advance(ring);
  2694.  
  2695.         return 0;
  2696. }
  2697.  
  2698. /* Blitter support (SandyBridge+) */
  2699.  
  2700. static int gen6_ring_flush(struct drm_i915_gem_request *req,
  2701.                            u32 invalidate, u32 flush)
  2702. {
  2703.         struct intel_engine_cs *ring = req->ring;
  2704.         struct drm_device *dev = ring->dev;
  2705.         uint32_t cmd;
  2706.         int ret;
  2707.  
  2708.         ret = intel_ring_begin(req, 4);
  2709.         if (ret)
  2710.                 return ret;
  2711.  
  2712.         cmd = MI_FLUSH_DW;
  2713.         if (INTEL_INFO(dev)->gen >= 8)
  2714.                 cmd += 1;
  2715.  
  2716.         /* We always require a command barrier so that subsequent
  2717.          * commands, such as breadcrumb interrupts, are strictly ordered
  2718.          * wrt the contents of the write cache being flushed to memory
  2719.          * (and thus being coherent from the CPU).
  2720.          */
  2721.         cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
  2722.  
  2723.         /*
  2724.          * Bspec vol 1c.3 - blitter engine command streamer:
  2725.          * "If ENABLED, all TLBs will be invalidated once the flush
  2726.          * operation is complete. This bit is only valid when the
  2727.          * Post-Sync Operation field is a value of 1h or 3h."
  2728.          */
  2729.         if (invalidate & I915_GEM_DOMAIN_RENDER)
  2730.                 cmd |= MI_INVALIDATE_TLB;
  2731.         intel_ring_emit(ring, cmd);
  2732.         intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
  2733.         if (INTEL_INFO(dev)->gen >= 8) {
  2734.                 intel_ring_emit(ring, 0); /* upper addr */
  2735.                 intel_ring_emit(ring, 0); /* value */
  2736.         } else  {
  2737.                 intel_ring_emit(ring, 0);
  2738.                 intel_ring_emit(ring, MI_NOOP);
  2739.         }
  2740.         intel_ring_advance(ring);
  2741.  
  2742.         return 0;
  2743. }
  2744.  
  2745. int intel_init_render_ring_buffer(struct drm_device *dev)
  2746. {
  2747.         struct drm_i915_private *dev_priv = dev->dev_private;
  2748.         struct intel_engine_cs *ring = &dev_priv->ring[RCS];
  2749.         struct drm_i915_gem_object *obj;
  2750.         int ret;
  2751.  
  2752.         ring->name = "render ring";
  2753.         ring->id = RCS;
  2754.         ring->exec_id = I915_EXEC_RENDER;
  2755.         ring->mmio_base = RENDER_RING_BASE;
  2756.  
  2757.         if (INTEL_INFO(dev)->gen >= 8) {
  2758.                 if (i915_semaphore_is_enabled(dev)) {
  2759.                         obj = i915_gem_alloc_object(dev, 4096);
  2760.                         if (obj == NULL) {
  2761.                                 DRM_ERROR("Failed to allocate semaphore bo. Disabling semaphores\n");
  2762.                                 i915.semaphores = 0;
  2763.                         } else {
  2764.                                 i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
  2765.                                 ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_NONBLOCK);
  2766.                                 if (ret != 0) {
  2767.                                         drm_gem_object_unreference(&obj->base);
  2768.                                         DRM_ERROR("Failed to pin semaphore bo. Disabling semaphores\n");
  2769.                                         i915.semaphores = 0;
  2770.                                 } else
  2771.                                         dev_priv->semaphore_obj = obj;
  2772.                         }
  2773.                 }
  2774.  
  2775.                 ring->init_context = intel_rcs_ctx_init;
  2776.                 ring->add_request = gen6_add_request;
  2777.                 ring->flush = gen8_render_ring_flush;
  2778.                 ring->irq_get = gen8_ring_get_irq;
  2779.                 ring->irq_put = gen8_ring_put_irq;
  2780.                 ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
  2781.                 ring->get_seqno = gen6_ring_get_seqno;
  2782.                 ring->set_seqno = ring_set_seqno;
  2783.                 if (i915_semaphore_is_enabled(dev)) {
  2784.                         WARN_ON(!dev_priv->semaphore_obj);
  2785.                         ring->semaphore.sync_to = gen8_ring_sync;
  2786.                         ring->semaphore.signal = gen8_rcs_signal;
  2787.                         GEN8_RING_SEMAPHORE_INIT;
  2788.                 }
  2789.         } else if (INTEL_INFO(dev)->gen >= 6) {
  2790.                 ring->init_context = intel_rcs_ctx_init;
  2791.                 ring->add_request = gen6_add_request;
  2792.                 ring->flush = gen7_render_ring_flush;
  2793.                 if (INTEL_INFO(dev)->gen == 6)
  2794.                         ring->flush = gen6_render_ring_flush;
  2795.                 ring->irq_get = gen6_ring_get_irq;
  2796.                 ring->irq_put = gen6_ring_put_irq;
  2797.                 ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
  2798.                 ring->get_seqno = gen6_ring_get_seqno;
  2799.                 ring->set_seqno = ring_set_seqno;
  2800.                 if (i915_semaphore_is_enabled(dev)) {
  2801.                         ring->semaphore.sync_to = gen6_ring_sync;
  2802.                         ring->semaphore.signal = gen6_signal;
  2803.                         /*
  2804.                          * The current semaphore is only applied on pre-gen8
  2805.                          * platform.  And there is no VCS2 ring on the pre-gen8
  2806.                          * platform. So the semaphore between RCS and VCS2 is
  2807.                          * initialized as INVALID.  Gen8 will initialize the
  2808.                          * sema between VCS2 and RCS later.
  2809.                          */
  2810.                         ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_INVALID;
  2811.                         ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_RV;
  2812.                         ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_RB;
  2813.                         ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_RVE;
  2814.                         ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
  2815.                         ring->semaphore.mbox.signal[RCS] = GEN6_NOSYNC;
  2816.                         ring->semaphore.mbox.signal[VCS] = GEN6_VRSYNC;
  2817.                         ring->semaphore.mbox.signal[BCS] = GEN6_BRSYNC;
  2818.                         ring->semaphore.mbox.signal[VECS] = GEN6_VERSYNC;
  2819.                         ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
  2820.                 }
  2821.         } else if (IS_GEN5(dev)) {
  2822.                 ring->add_request = pc_render_add_request;
  2823.                 ring->flush = gen4_render_ring_flush;
  2824.                 ring->get_seqno = pc_render_get_seqno;
  2825.                 ring->set_seqno = pc_render_set_seqno;
  2826.                 ring->irq_get = gen5_ring_get_irq;
  2827.                 ring->irq_put = gen5_ring_put_irq;
  2828.                 ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT |
  2829.                                         GT_RENDER_PIPECTL_NOTIFY_INTERRUPT;
  2830.         } else {
  2831.                 ring->add_request = i9xx_add_request;
  2832.                 if (INTEL_INFO(dev)->gen < 4)
  2833.                         ring->flush = gen2_render_ring_flush;
  2834.                 else
  2835.                         ring->flush = gen4_render_ring_flush;
  2836.                 ring->get_seqno = ring_get_seqno;
  2837.                 ring->set_seqno = ring_set_seqno;
  2838.                 if (IS_GEN2(dev)) {
  2839.                         ring->irq_get = i8xx_ring_get_irq;
  2840.                         ring->irq_put = i8xx_ring_put_irq;
  2841.                 } else {
  2842.                         ring->irq_get = i9xx_ring_get_irq;
  2843.                         ring->irq_put = i9xx_ring_put_irq;
  2844.                 }
  2845.                 ring->irq_enable_mask = I915_USER_INTERRUPT;
  2846.         }
  2847.         ring->write_tail = ring_write_tail;
  2848.  
  2849.         if (IS_HASWELL(dev))
  2850.                 ring->dispatch_execbuffer = hsw_ring_dispatch_execbuffer;
  2851.         else if (IS_GEN8(dev))
  2852.                 ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;
  2853.         else if (INTEL_INFO(dev)->gen >= 6)
  2854.                 ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
  2855.         else if (INTEL_INFO(dev)->gen >= 4)
  2856.                 ring->dispatch_execbuffer = i965_dispatch_execbuffer;
  2857.         else if (IS_I830(dev) || IS_845G(dev))
  2858.                 ring->dispatch_execbuffer = i830_dispatch_execbuffer;
  2859.         else
  2860.                 ring->dispatch_execbuffer = i915_dispatch_execbuffer;
  2861.         ring->init_hw = init_render_ring;
  2862.         ring->cleanup = render_ring_cleanup;
  2863.  
  2864.         /* Workaround batchbuffer to combat CS tlb bug. */
  2865.         if (HAS_BROKEN_CS_TLB(dev)) {
  2866.                 obj = i915_gem_alloc_object(dev, I830_WA_SIZE);
  2867.                 if (obj == NULL) {
  2868.                         DRM_ERROR("Failed to allocate batch bo\n");
  2869.                         return -ENOMEM;
  2870.                 }
  2871.  
  2872.                 ret = i915_gem_obj_ggtt_pin(obj, 0, 0);
  2873.                 if (ret != 0) {
  2874.                         drm_gem_object_unreference(&obj->base);
  2875.                         DRM_ERROR("Failed to ping batch bo\n");
  2876.                         return ret;
  2877.                 }
  2878.  
  2879.                 ring->scratch.obj = obj;
  2880.                 ring->scratch.gtt_offset = i915_gem_obj_ggtt_offset(obj);
  2881.         }
  2882.  
  2883.         ret = intel_init_ring_buffer(dev, ring);
  2884.         if (ret)
  2885.                 return ret;
  2886.  
  2887.         if (INTEL_INFO(dev)->gen >= 5) {
  2888.                 ret = intel_init_pipe_control(ring);
  2889.                 if (ret)
  2890.                         return ret;
  2891.         }
  2892.  
  2893.         return 0;
  2894. }
  2895.  
  2896. int intel_init_bsd_ring_buffer(struct drm_device *dev)
  2897. {
  2898.         struct drm_i915_private *dev_priv = dev->dev_private;
  2899.         struct intel_engine_cs *ring = &dev_priv->ring[VCS];
  2900.  
  2901.         ring->name = "bsd ring";
  2902.         ring->id = VCS;
  2903.         ring->exec_id = I915_EXEC_BSD;
  2904.  
  2905.         ring->write_tail = ring_write_tail;
  2906.         if (INTEL_INFO(dev)->gen >= 6) {
  2907.                 ring->mmio_base = GEN6_BSD_RING_BASE;
  2908.                 /* gen6 bsd needs a special wa for tail updates */
  2909.                 if (IS_GEN6(dev))
  2910.                         ring->write_tail = gen6_bsd_ring_write_tail;
  2911.                 ring->flush = gen6_bsd_ring_flush;
  2912.                 ring->add_request = gen6_add_request;
  2913.                 ring->get_seqno = gen6_ring_get_seqno;
  2914.                 ring->set_seqno = ring_set_seqno;
  2915.                 if (INTEL_INFO(dev)->gen >= 8) {
  2916.                         ring->irq_enable_mask =
  2917.                                 GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT;
  2918.                         ring->irq_get = gen8_ring_get_irq;
  2919.                         ring->irq_put = gen8_ring_put_irq;
  2920.                         ring->dispatch_execbuffer =
  2921.                                 gen8_ring_dispatch_execbuffer;
  2922.                         if (i915_semaphore_is_enabled(dev)) {
  2923.                                 ring->semaphore.sync_to = gen8_ring_sync;
  2924.                                 ring->semaphore.signal = gen8_xcs_signal;
  2925.                                 GEN8_RING_SEMAPHORE_INIT;
  2926.                         }
  2927.                 } else {
  2928.                         ring->irq_enable_mask = GT_BSD_USER_INTERRUPT;
  2929.                         ring->irq_get = gen6_ring_get_irq;
  2930.                         ring->irq_put = gen6_ring_put_irq;
  2931.                         ring->dispatch_execbuffer =
  2932.                                 gen6_ring_dispatch_execbuffer;
  2933.                         if (i915_semaphore_is_enabled(dev)) {
  2934.                                 ring->semaphore.sync_to = gen6_ring_sync;
  2935.                                 ring->semaphore.signal = gen6_signal;
  2936.                                 ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_VR;
  2937.                                 ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_INVALID;
  2938.                                 ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_VB;
  2939.                                 ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_VVE;
  2940.                                 ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
  2941.                                 ring->semaphore.mbox.signal[RCS] = GEN6_RVSYNC;
  2942.                                 ring->semaphore.mbox.signal[VCS] = GEN6_NOSYNC;
  2943.                                 ring->semaphore.mbox.signal[BCS] = GEN6_BVSYNC;
  2944.                                 ring->semaphore.mbox.signal[VECS] = GEN6_VEVSYNC;
  2945.                                 ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
  2946.                         }
  2947.                 }
  2948.         } else {
  2949.                 ring->mmio_base = BSD_RING_BASE;
  2950.                 ring->flush = bsd_ring_flush;
  2951.                 ring->add_request = i9xx_add_request;
  2952.                 ring->get_seqno = ring_get_seqno;
  2953.                 ring->set_seqno = ring_set_seqno;
  2954.                 if (IS_GEN5(dev)) {
  2955.                         ring->irq_enable_mask = ILK_BSD_USER_INTERRUPT;
  2956.                         ring->irq_get = gen5_ring_get_irq;
  2957.                         ring->irq_put = gen5_ring_put_irq;
  2958.                 } else {
  2959.                         ring->irq_enable_mask = I915_BSD_USER_INTERRUPT;
  2960.                         ring->irq_get = i9xx_ring_get_irq;
  2961.                         ring->irq_put = i9xx_ring_put_irq;
  2962.                 }
  2963.                 ring->dispatch_execbuffer = i965_dispatch_execbuffer;
  2964.         }
  2965.         ring->init_hw = init_ring_common;
  2966.  
  2967.         return intel_init_ring_buffer(dev, ring);
  2968. }
  2969.  
  2970. /**
  2971.  * Initialize the second BSD ring (eg. Broadwell GT3, Skylake GT3)
  2972.  */
  2973. int intel_init_bsd2_ring_buffer(struct drm_device *dev)
  2974. {
  2975.         struct drm_i915_private *dev_priv = dev->dev_private;
  2976.         struct intel_engine_cs *ring = &dev_priv->ring[VCS2];
  2977.  
  2978.         ring->name = "bsd2 ring";
  2979.         ring->id = VCS2;
  2980.         ring->exec_id = I915_EXEC_BSD;
  2981.  
  2982.         ring->write_tail = ring_write_tail;
  2983.         ring->mmio_base = GEN8_BSD2_RING_BASE;
  2984.         ring->flush = gen6_bsd_ring_flush;
  2985.         ring->add_request = gen6_add_request;
  2986.         ring->get_seqno = gen6_ring_get_seqno;
  2987.         ring->set_seqno = ring_set_seqno;
  2988.         ring->irq_enable_mask =
  2989.                         GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT;
  2990.         ring->irq_get = gen8_ring_get_irq;
  2991.         ring->irq_put = gen8_ring_put_irq;
  2992.         ring->dispatch_execbuffer =
  2993.                         gen8_ring_dispatch_execbuffer;
  2994.         if (i915_semaphore_is_enabled(dev)) {
  2995.                 ring->semaphore.sync_to = gen8_ring_sync;
  2996.                 ring->semaphore.signal = gen8_xcs_signal;
  2997.                 GEN8_RING_SEMAPHORE_INIT;
  2998.         }
  2999.         ring->init_hw = init_ring_common;
  3000.  
  3001.         return intel_init_ring_buffer(dev, ring);
  3002. }
  3003.  
  3004. int intel_init_blt_ring_buffer(struct drm_device *dev)
  3005. {
  3006.         struct drm_i915_private *dev_priv = dev->dev_private;
  3007.         struct intel_engine_cs *ring = &dev_priv->ring[BCS];
  3008.  
  3009.         ring->name = "blitter ring";
  3010.         ring->id = BCS;
  3011.         ring->exec_id = I915_EXEC_BLT;
  3012.  
  3013.         ring->mmio_base = BLT_RING_BASE;
  3014.         ring->write_tail = ring_write_tail;
  3015.         ring->flush = gen6_ring_flush;
  3016.         ring->add_request = gen6_add_request;
  3017.         ring->get_seqno = gen6_ring_get_seqno;
  3018.         ring->set_seqno = ring_set_seqno;
  3019.         if (INTEL_INFO(dev)->gen >= 8) {
  3020.                 ring->irq_enable_mask =
  3021.                         GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT;
  3022.                 ring->irq_get = gen8_ring_get_irq;
  3023.                 ring->irq_put = gen8_ring_put_irq;
  3024.                 ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;
  3025.                 if (i915_semaphore_is_enabled(dev)) {
  3026.                         ring->semaphore.sync_to = gen8_ring_sync;
  3027.                         ring->semaphore.signal = gen8_xcs_signal;
  3028.                         GEN8_RING_SEMAPHORE_INIT;
  3029.                 }
  3030.         } else {
  3031.                 ring->irq_enable_mask = GT_BLT_USER_INTERRUPT;
  3032.                 ring->irq_get = gen6_ring_get_irq;
  3033.                 ring->irq_put = gen6_ring_put_irq;
  3034.                 ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
  3035.                 if (i915_semaphore_is_enabled(dev)) {
  3036.                         ring->semaphore.signal = gen6_signal;
  3037.                         ring->semaphore.sync_to = gen6_ring_sync;
  3038.                         /*
  3039.                          * The current semaphore is only applied on pre-gen8
  3040.                          * platform.  And there is no VCS2 ring on the pre-gen8
  3041.                          * platform. So the semaphore between BCS and VCS2 is
  3042.                          * initialized as INVALID.  Gen8 will initialize the
  3043.                          * sema between BCS and VCS2 later.
  3044.                          */
  3045.                         ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_BR;
  3046.                         ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_BV;
  3047.                         ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_INVALID;
  3048.                         ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_BVE;
  3049.                         ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
  3050.                         ring->semaphore.mbox.signal[RCS] = GEN6_RBSYNC;
  3051.                         ring->semaphore.mbox.signal[VCS] = GEN6_VBSYNC;
  3052.                         ring->semaphore.mbox.signal[BCS] = GEN6_NOSYNC;
  3053.                         ring->semaphore.mbox.signal[VECS] = GEN6_VEBSYNC;
  3054.                         ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
  3055.                 }
  3056.         }
  3057.         ring->init_hw = init_ring_common;
  3058.  
  3059.         return intel_init_ring_buffer(dev, ring);
  3060. }
  3061.  
  3062. int intel_init_vebox_ring_buffer(struct drm_device *dev)
  3063. {
  3064.         struct drm_i915_private *dev_priv = dev->dev_private;
  3065.         struct intel_engine_cs *ring = &dev_priv->ring[VECS];
  3066.  
  3067.         ring->name = "video enhancement ring";
  3068.         ring->id = VECS;
  3069.         ring->exec_id = I915_EXEC_VEBOX;
  3070.  
  3071.         ring->mmio_base = VEBOX_RING_BASE;
  3072.         ring->write_tail = ring_write_tail;
  3073.         ring->flush = gen6_ring_flush;
  3074.         ring->add_request = gen6_add_request;
  3075.         ring->get_seqno = gen6_ring_get_seqno;
  3076.         ring->set_seqno = ring_set_seqno;
  3077.  
  3078.         if (INTEL_INFO(dev)->gen >= 8) {
  3079.                 ring->irq_enable_mask =
  3080.                         GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT;
  3081.                 ring->irq_get = gen8_ring_get_irq;
  3082.                 ring->irq_put = gen8_ring_put_irq;
  3083.                 ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;
  3084.                 if (i915_semaphore_is_enabled(dev)) {
  3085.                         ring->semaphore.sync_to = gen8_ring_sync;
  3086.                         ring->semaphore.signal = gen8_xcs_signal;
  3087.                         GEN8_RING_SEMAPHORE_INIT;
  3088.                 }
  3089.         } else {
  3090.                 ring->irq_enable_mask = PM_VEBOX_USER_INTERRUPT;
  3091.                 ring->irq_get = hsw_vebox_get_irq;
  3092.                 ring->irq_put = hsw_vebox_put_irq;
  3093.                 ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
  3094.                 if (i915_semaphore_is_enabled(dev)) {
  3095.                         ring->semaphore.sync_to = gen6_ring_sync;
  3096.                         ring->semaphore.signal = gen6_signal;
  3097.                         ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_VER;
  3098.                         ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_VEV;
  3099.                         ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_VEB;
  3100.                         ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_INVALID;
  3101.                         ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
  3102.                         ring->semaphore.mbox.signal[RCS] = GEN6_RVESYNC;
  3103.                         ring->semaphore.mbox.signal[VCS] = GEN6_VVESYNC;
  3104.                         ring->semaphore.mbox.signal[BCS] = GEN6_BVESYNC;
  3105.                         ring->semaphore.mbox.signal[VECS] = GEN6_NOSYNC;
  3106.                         ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
  3107.                 }
  3108.         }
  3109.         ring->init_hw = init_ring_common;
  3110.  
  3111.         return intel_init_ring_buffer(dev, ring);
  3112. }
  3113.  
  3114. int
  3115. intel_ring_flush_all_caches(struct drm_i915_gem_request *req)
  3116. {
  3117.         struct intel_engine_cs *ring = req->ring;
  3118.         int ret;
  3119.  
  3120.         if (!ring->gpu_caches_dirty)
  3121.                 return 0;
  3122.  
  3123.         ret = ring->flush(req, 0, I915_GEM_GPU_DOMAINS);
  3124.         if (ret)
  3125.                 return ret;
  3126.  
  3127.         trace_i915_gem_ring_flush(req, 0, I915_GEM_GPU_DOMAINS);
  3128.  
  3129.         ring->gpu_caches_dirty = false;
  3130.         return 0;
  3131. }
  3132.  
  3133. int
  3134. intel_ring_invalidate_all_caches(struct drm_i915_gem_request *req)
  3135. {
  3136.         struct intel_engine_cs *ring = req->ring;
  3137.         uint32_t flush_domains;
  3138.         int ret;
  3139.  
  3140.         flush_domains = 0;
  3141.         if (ring->gpu_caches_dirty)
  3142.                 flush_domains = I915_GEM_GPU_DOMAINS;
  3143.  
  3144.         ret = ring->flush(req, I915_GEM_GPU_DOMAINS, flush_domains);
  3145.         if (ret)
  3146.                 return ret;
  3147.  
  3148.         trace_i915_gem_ring_flush(req, I915_GEM_GPU_DOMAINS, flush_domains);
  3149.  
  3150.         ring->gpu_caches_dirty = false;
  3151.         return 0;
  3152. }
  3153.  
  3154. void
  3155. intel_stop_ring_buffer(struct intel_engine_cs *ring)
  3156. {
  3157.         int ret;
  3158.  
  3159.         if (!intel_ring_initialized(ring))
  3160.                 return;
  3161.  
  3162.         ret = intel_ring_idle(ring);
  3163.         if (ret && !i915_reset_in_progress(&to_i915(ring->dev)->gpu_error))
  3164.                 DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n",
  3165.                           ring->name, ret);
  3166.  
  3167.         stop_ring(ring);
  3168. }
  3169.