Subversion Repositories Kolibri OS

Rev

Rev 6937 | Blame | Compare with Previous | Last modification | View Log | Download | RSS feed

  1. /*
  2.  * Copyright © 2014 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21.  * IN THE SOFTWARE.
  22.  *
  23.  * Authors:
  24.  *    Ben Widawsky <ben@bwidawsk.net>
  25.  *    Michel Thierry <michel.thierry@intel.com>
  26.  *    Thomas Daniel <thomas.daniel@intel.com>
  27.  *    Oscar Mateo <oscar.mateo@intel.com>
  28.  *
  29.  */
  30.  
  31. /**
  32.  * DOC: Logical Rings, Logical Ring Contexts and Execlists
  33.  *
  34.  * Motivation:
  35.  * GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts".
  36.  * These expanded contexts enable a number of new abilities, especially
  37.  * "Execlists" (also implemented in this file).
  38.  *
  39.  * One of the main differences with the legacy HW contexts is that logical
  40.  * ring contexts incorporate many more things to the context's state, like
  41.  * PDPs or ringbuffer control registers:
  42.  *
  43.  * The reason why PDPs are included in the context is straightforward: as
  44.  * PPGTTs (per-process GTTs) are actually per-context, having the PDPs
  45.  * contained there mean you don't need to do a ppgtt->switch_mm yourself,
  46.  * instead, the GPU will do it for you on the context switch.
  47.  *
  48.  * But, what about the ringbuffer control registers (head, tail, etc..)?
  49.  * shouldn't we just need a set of those per engine command streamer? This is
  50.  * where the name "Logical Rings" starts to make sense: by virtualizing the
  51.  * rings, the engine cs shifts to a new "ring buffer" with every context
  52.  * switch. When you want to submit a workload to the GPU you: A) choose your
  53.  * context, B) find its appropriate virtualized ring, C) write commands to it
  54.  * and then, finally, D) tell the GPU to switch to that context.
  55.  *
  56.  * Instead of the legacy MI_SET_CONTEXT, the way you tell the GPU to switch
  57.  * to a contexts is via a context execution list, ergo "Execlists".
  58.  *
  59.  * LRC implementation:
  60.  * Regarding the creation of contexts, we have:
  61.  *
  62.  * - One global default context.
  63.  * - One local default context for each opened fd.
  64.  * - One local extra context for each context create ioctl call.
  65.  *
  66.  * Now that ringbuffers belong per-context (and not per-engine, like before)
  67.  * and that contexts are uniquely tied to a given engine (and not reusable,
  68.  * like before) we need:
  69.  *
  70.  * - One ringbuffer per-engine inside each context.
  71.  * - One backing object per-engine inside each context.
  72.  *
  73.  * The global default context starts its life with these new objects fully
  74.  * allocated and populated. The local default context for each opened fd is
  75.  * more complex, because we don't know at creation time which engine is going
  76.  * to use them. To handle this, we have implemented a deferred creation of LR
  77.  * contexts:
  78.  *
  79.  * The local context starts its life as a hollow or blank holder, that only
  80.  * gets populated for a given engine once we receive an execbuffer. If later
  81.  * on we receive another execbuffer ioctl for the same context but a different
  82.  * engine, we allocate/populate a new ringbuffer and context backing object and
  83.  * so on.
  84.  *
  85.  * Finally, regarding local contexts created using the ioctl call: as they are
  86.  * only allowed with the render ring, we can allocate & populate them right
  87.  * away (no need to defer anything, at least for now).
  88.  *
  89.  * Execlists implementation:
  90.  * Execlists are the new method by which, on gen8+ hardware, workloads are
  91.  * submitted for execution (as opposed to the legacy, ringbuffer-based, method).
  92.  * This method works as follows:
  93.  *
  94.  * When a request is committed, its commands (the BB start and any leading or
  95.  * trailing commands, like the seqno breadcrumbs) are placed in the ringbuffer
  96.  * for the appropriate context. The tail pointer in the hardware context is not
  97.  * updated at this time, but instead, kept by the driver in the ringbuffer
  98.  * structure. A structure representing this request is added to a request queue
  99.  * for the appropriate engine: this structure contains a copy of the context's
  100.  * tail after the request was written to the ring buffer and a pointer to the
  101.  * context itself.
  102.  *
  103.  * If the engine's request queue was empty before the request was added, the
  104.  * queue is processed immediately. Otherwise the queue will be processed during
  105.  * a context switch interrupt. In any case, elements on the queue will get sent
  106.  * (in pairs) to the GPU's ExecLists Submit Port (ELSP, for short) with a
  107.  * globally unique 20-bits submission ID.
  108.  *
  109.  * When execution of a request completes, the GPU updates the context status
  110.  * buffer with a context complete event and generates a context switch interrupt.
  111.  * During the interrupt handling, the driver examines the events in the buffer:
  112.  * for each context complete event, if the announced ID matches that on the head
  113.  * of the request queue, then that request is retired and removed from the queue.
  114.  *
  115.  * After processing, if any requests were retired and the queue is not empty
  116.  * then a new execution list can be submitted. The two requests at the front of
  117.  * the queue are next to be submitted but since a context may not occur twice in
  118.  * an execution list, if subsequent requests have the same ID as the first then
  119.  * the two requests must be combined. This is done simply by discarding requests
  120.  * at the head of the queue until either only one requests is left (in which case
  121.  * we use a NULL second context) or the first two requests have unique IDs.
  122.  *
  123.  * By always executing the first two requests in the queue the driver ensures
  124.  * that the GPU is kept as busy as possible. In the case where a single context
  125.  * completes but a second context is still executing, the request for this second
  126.  * context will be at the head of the queue when we remove the first one. This
  127.  * request will then be resubmitted along with a new request for a different context,
  128.  * which will cause the hardware to continue executing the second request and queue
  129.  * the new request (the GPU detects the condition of a context getting preempted
  130.  * with the same context and optimizes the context switch flow by not doing
  131.  * preemption, but just sampling the new tail pointer).
  132.  *
  133.  */
  134.  
  135. #include <drm/drmP.h>
  136. #include <drm/i915_drm.h>
  137. #include "i915_drv.h"
  138. #include "intel_mocs.h"
  139.  
  140. #define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE)
  141. #define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE)
  142. #define GEN8_LR_CONTEXT_OTHER_SIZE (2 * PAGE_SIZE)
  143.  
  144. #define RING_EXECLIST_QFULL             (1 << 0x2)
  145. #define RING_EXECLIST1_VALID            (1 << 0x3)
  146. #define RING_EXECLIST0_VALID            (1 << 0x4)
  147. #define RING_EXECLIST_ACTIVE_STATUS     (3 << 0xE)
  148. #define RING_EXECLIST1_ACTIVE           (1 << 0x11)
  149. #define RING_EXECLIST0_ACTIVE           (1 << 0x12)
  150.  
  151. #define GEN8_CTX_STATUS_IDLE_ACTIVE     (1 << 0)
  152. #define GEN8_CTX_STATUS_PREEMPTED       (1 << 1)
  153. #define GEN8_CTX_STATUS_ELEMENT_SWITCH  (1 << 2)
  154. #define GEN8_CTX_STATUS_ACTIVE_IDLE     (1 << 3)
  155. #define GEN8_CTX_STATUS_COMPLETE        (1 << 4)
  156. #define GEN8_CTX_STATUS_LITE_RESTORE    (1 << 15)
  157.  
  158. #define CTX_LRI_HEADER_0                0x01
  159. #define CTX_CONTEXT_CONTROL             0x02
  160. #define CTX_RING_HEAD                   0x04
  161. #define CTX_RING_TAIL                   0x06
  162. #define CTX_RING_BUFFER_START           0x08
  163. #define CTX_RING_BUFFER_CONTROL         0x0a
  164. #define CTX_BB_HEAD_U                   0x0c
  165. #define CTX_BB_HEAD_L                   0x0e
  166. #define CTX_BB_STATE                    0x10
  167. #define CTX_SECOND_BB_HEAD_U            0x12
  168. #define CTX_SECOND_BB_HEAD_L            0x14
  169. #define CTX_SECOND_BB_STATE             0x16
  170. #define CTX_BB_PER_CTX_PTR              0x18
  171. #define CTX_RCS_INDIRECT_CTX            0x1a
  172. #define CTX_RCS_INDIRECT_CTX_OFFSET     0x1c
  173. #define CTX_LRI_HEADER_1                0x21
  174. #define CTX_CTX_TIMESTAMP               0x22
  175. #define CTX_PDP3_UDW                    0x24
  176. #define CTX_PDP3_LDW                    0x26
  177. #define CTX_PDP2_UDW                    0x28
  178. #define CTX_PDP2_LDW                    0x2a
  179. #define CTX_PDP1_UDW                    0x2c
  180. #define CTX_PDP1_LDW                    0x2e
  181. #define CTX_PDP0_UDW                    0x30
  182. #define CTX_PDP0_LDW                    0x32
  183. #define CTX_LRI_HEADER_2                0x41
  184. #define CTX_R_PWR_CLK_STATE             0x42
  185. #define CTX_GPGPU_CSR_BASE_ADDRESS      0x44
  186.  
  187. #define GEN8_CTX_VALID (1<<0)
  188. #define GEN8_CTX_FORCE_PD_RESTORE (1<<1)
  189. #define GEN8_CTX_FORCE_RESTORE (1<<2)
  190. #define GEN8_CTX_L3LLC_COHERENT (1<<5)
  191. #define GEN8_CTX_PRIVILEGE (1<<8)
  192.  
  193. #define ASSIGN_CTX_REG(reg_state, pos, reg, val) do { \
  194.         (reg_state)[(pos)+0] = i915_mmio_reg_offset(reg); \
  195.         (reg_state)[(pos)+1] = (val); \
  196. } while (0)
  197.  
  198. #define ASSIGN_CTX_PDP(ppgtt, reg_state, n) do {                \
  199.         const u64 _addr = i915_page_dir_dma_addr((ppgtt), (n)); \
  200.         reg_state[CTX_PDP ## n ## _UDW+1] = upper_32_bits(_addr); \
  201.         reg_state[CTX_PDP ## n ## _LDW+1] = lower_32_bits(_addr); \
  202. } while (0)
  203.  
  204. #define ASSIGN_CTX_PML4(ppgtt, reg_state) do { \
  205.         reg_state[CTX_PDP0_UDW + 1] = upper_32_bits(px_dma(&ppgtt->pml4)); \
  206.         reg_state[CTX_PDP0_LDW + 1] = lower_32_bits(px_dma(&ppgtt->pml4)); \
  207. } while (0)
  208.  
  209. enum {
  210.         ADVANCED_CONTEXT = 0,
  211.         LEGACY_32B_CONTEXT,
  212.         ADVANCED_AD_CONTEXT,
  213.         LEGACY_64B_CONTEXT
  214. };
  215. #define GEN8_CTX_ADDRESSING_MODE_SHIFT 3
  216. #define GEN8_CTX_ADDRESSING_MODE(dev)  (USES_FULL_48BIT_PPGTT(dev) ?\
  217.                 LEGACY_64B_CONTEXT :\
  218.                 LEGACY_32B_CONTEXT)
  219. enum {
  220.         FAULT_AND_HANG = 0,
  221.         FAULT_AND_HALT, /* Debug only */
  222.         FAULT_AND_STREAM,
  223.         FAULT_AND_CONTINUE /* Unsupported */
  224. };
  225. #define GEN8_CTX_ID_SHIFT 32
  226. #define GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT        0x17
  227. #define GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT        0x26
  228.  
  229. static int intel_lr_context_pin(struct intel_context *ctx,
  230.                                 struct intel_engine_cs *engine);
  231. static void lrc_setup_hardware_status_page(struct intel_engine_cs *ring,
  232.                 struct drm_i915_gem_object *default_ctx_obj);
  233.  
  234.  
  235. /**
  236.  * intel_sanitize_enable_execlists() - sanitize i915.enable_execlists
  237.  * @dev: DRM device.
  238.  * @enable_execlists: value of i915.enable_execlists module parameter.
  239.  *
  240.  * Only certain platforms support Execlists (the prerequisites being
  241.  * support for Logical Ring Contexts and Aliasing PPGTT or better).
  242.  *
  243.  * Return: 1 if Execlists is supported and has to be enabled.
  244.  */
  245. int intel_sanitize_enable_execlists(struct drm_device *dev, int enable_execlists)
  246. {
  247.         WARN_ON(i915.enable_ppgtt == -1);
  248.  
  249.         /* On platforms with execlist available, vGPU will only
  250.          * support execlist mode, no ring buffer mode.
  251.          */
  252.         if (HAS_LOGICAL_RING_CONTEXTS(dev) && intel_vgpu_active(dev))
  253.                 return 1;
  254.  
  255.         if (INTEL_INFO(dev)->gen >= 9)
  256.                 return 1;
  257.  
  258.         if (enable_execlists == 0)
  259.                 return 0;
  260.  
  261.         if (HAS_LOGICAL_RING_CONTEXTS(dev) && USES_PPGTT(dev) &&
  262.             i915.use_mmio_flip >= 0)
  263.                 return 1;
  264.  
  265.         return 0;
  266. }
  267.  
  268. static void
  269. logical_ring_init_platform_invariants(struct intel_engine_cs *ring)
  270. {
  271.         struct drm_device *dev = ring->dev;
  272.  
  273.         ring->disable_lite_restore_wa = (IS_SKL_REVID(dev, 0, SKL_REVID_B0) ||
  274.                                         IS_BXT_REVID(dev, 0, BXT_REVID_A1)) &&
  275.                                         (ring->id == VCS || ring->id == VCS2);
  276.  
  277.         ring->ctx_desc_template = GEN8_CTX_VALID;
  278.         ring->ctx_desc_template |= GEN8_CTX_ADDRESSING_MODE(dev) <<
  279.                                    GEN8_CTX_ADDRESSING_MODE_SHIFT;
  280.         if (IS_GEN8(dev))
  281.                 ring->ctx_desc_template |= GEN8_CTX_L3LLC_COHERENT;
  282.         ring->ctx_desc_template |= GEN8_CTX_PRIVILEGE;
  283.  
  284.         /* TODO: WaDisableLiteRestore when we start using semaphore
  285.          * signalling between Command Streamers */
  286.         /* ring->ctx_desc_template |= GEN8_CTX_FORCE_RESTORE; */
  287.  
  288.         /* WaEnableForceRestoreInCtxtDescForVCS:skl */
  289.         /* WaEnableForceRestoreInCtxtDescForVCS:bxt */
  290.         if (ring->disable_lite_restore_wa)
  291.                 ring->ctx_desc_template |= GEN8_CTX_FORCE_RESTORE;
  292. }
  293.  
  294. /**
  295.  * intel_lr_context_descriptor_update() - calculate & cache the descriptor
  296.  *                                        descriptor for a pinned context
  297.  *
  298.  * @ctx: Context to work on
  299.  * @ring: Engine the descriptor will be used with
  300.  *
  301.  * The context descriptor encodes various attributes of a context,
  302.  * including its GTT address and some flags. Because it's fairly
  303.  * expensive to calculate, we'll just do it once and cache the result,
  304.  * which remains valid until the context is unpinned.
  305.  *
  306.  * This is what a descriptor looks like, from LSB to MSB:
  307.  *    bits 0-11:    flags, GEN8_CTX_* (cached in ctx_desc_template)
  308.  *    bits 12-31:    LRCA, GTT address of (the HWSP of) this context
  309.  *    bits 32-51:    ctx ID, a globally unique tag (the LRCA again!)
  310.  *    bits 52-63:    reserved, may encode the engine ID (for GuC)
  311.  */
  312. static void
  313. intel_lr_context_descriptor_update(struct intel_context *ctx,
  314.                                    struct intel_engine_cs *ring)
  315. {
  316.         uint64_t lrca, desc;
  317.  
  318.         lrca = ctx->engine[ring->id].lrc_vma->node.start +
  319.                LRC_PPHWSP_PN * PAGE_SIZE;
  320.  
  321.         desc = ring->ctx_desc_template;                    /* bits  0-11 */
  322.         desc |= lrca;                                      /* bits 12-31 */
  323.         desc |= (lrca >> PAGE_SHIFT) << GEN8_CTX_ID_SHIFT; /* bits 32-51 */
  324.  
  325.         ctx->engine[ring->id].lrc_desc = desc;
  326. }
  327.  
  328. uint64_t intel_lr_context_descriptor(struct intel_context *ctx,
  329.                                      struct intel_engine_cs *ring)
  330. {
  331.         return ctx->engine[ring->id].lrc_desc;
  332. }
  333.  
  334. /**
  335.  * intel_execlists_ctx_id() - get the Execlists Context ID
  336.  * @ctx: Context to get the ID for
  337.  * @ring: Engine to get the ID for
  338.  *
  339.  * Do not confuse with ctx->id! Unfortunately we have a name overload
  340.  * here: the old context ID we pass to userspace as a handler so that
  341.  * they can refer to a context, and the new context ID we pass to the
  342.  * ELSP so that the GPU can inform us of the context status via
  343.  * interrupts.
  344.  *
  345.  * The context ID is a portion of the context descriptor, so we can
  346.  * just extract the required part from the cached descriptor.
  347.  *
  348.  * Return: 20-bits globally unique context ID.
  349.  */
  350. u32 intel_execlists_ctx_id(struct intel_context *ctx,
  351.                            struct intel_engine_cs *ring)
  352. {
  353.         return intel_lr_context_descriptor(ctx, ring) >> GEN8_CTX_ID_SHIFT;
  354. }
  355.  
  356. static void execlists_elsp_write(struct drm_i915_gem_request *rq0,
  357.                                  struct drm_i915_gem_request *rq1)
  358. {
  359.  
  360.         struct intel_engine_cs *ring = rq0->ring;
  361.         struct drm_device *dev = ring->dev;
  362.         struct drm_i915_private *dev_priv = dev->dev_private;
  363.         uint64_t desc[2];
  364.  
  365.         if (rq1) {
  366.                 desc[1] = intel_lr_context_descriptor(rq1->ctx, rq1->ring);
  367.                 rq1->elsp_submitted++;
  368.         } else {
  369.                 desc[1] = 0;
  370.         }
  371.  
  372.         desc[0] = intel_lr_context_descriptor(rq0->ctx, rq0->ring);
  373.         rq0->elsp_submitted++;
  374.  
  375.         /* You must always write both descriptors in the order below. */
  376.         spin_lock(&dev_priv->uncore.lock);
  377.         intel_uncore_forcewake_get__locked(dev_priv, FORCEWAKE_ALL);
  378.         I915_WRITE_FW(RING_ELSP(ring), upper_32_bits(desc[1]));
  379.         I915_WRITE_FW(RING_ELSP(ring), lower_32_bits(desc[1]));
  380.  
  381.         I915_WRITE_FW(RING_ELSP(ring), upper_32_bits(desc[0]));
  382.         /* The context is automatically loaded after the following */
  383.         I915_WRITE_FW(RING_ELSP(ring), lower_32_bits(desc[0]));
  384.  
  385.         /* ELSP is a wo register, use another nearby reg for posting */
  386.         POSTING_READ_FW(RING_EXECLIST_STATUS_LO(ring));
  387.         intel_uncore_forcewake_put__locked(dev_priv, FORCEWAKE_ALL);
  388.         spin_unlock(&dev_priv->uncore.lock);
  389. }
  390.  
  391. static int execlists_update_context(struct drm_i915_gem_request *rq)
  392. {
  393.         struct intel_engine_cs *ring = rq->ring;
  394.         struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt;
  395.         uint32_t *reg_state = rq->ctx->engine[ring->id].lrc_reg_state;
  396.  
  397.         reg_state[CTX_RING_TAIL+1] = rq->tail;
  398.  
  399.         if (ppgtt && !USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
  400.                 /* True 32b PPGTT with dynamic page allocation: update PDP
  401.                  * registers and point the unallocated PDPs to scratch page.
  402.                  * PML4 is allocated during ppgtt init, so this is not needed
  403.                  * in 48-bit mode.
  404.                  */
  405.                 ASSIGN_CTX_PDP(ppgtt, reg_state, 3);
  406.                 ASSIGN_CTX_PDP(ppgtt, reg_state, 2);
  407.                 ASSIGN_CTX_PDP(ppgtt, reg_state, 1);
  408.                 ASSIGN_CTX_PDP(ppgtt, reg_state, 0);
  409.         }
  410.  
  411.         return 0;
  412. }
  413.  
  414. static void execlists_submit_requests(struct drm_i915_gem_request *rq0,
  415.                                       struct drm_i915_gem_request *rq1)
  416. {
  417.         execlists_update_context(rq0);
  418.  
  419.         if (rq1)
  420.                 execlists_update_context(rq1);
  421.  
  422.         execlists_elsp_write(rq0, rq1);
  423. }
  424.  
  425. static void execlists_context_unqueue(struct intel_engine_cs *ring)
  426. {
  427.         struct drm_i915_gem_request *req0 = NULL, *req1 = NULL;
  428.         struct drm_i915_gem_request *cursor = NULL, *tmp = NULL;
  429.  
  430.         assert_spin_locked(&ring->execlist_lock);
  431.  
  432.         /*
  433.          * If irqs are not active generate a warning as batches that finish
  434.          * without the irqs may get lost and a GPU Hang may occur.
  435.          */
  436.         WARN_ON(!intel_irqs_enabled(ring->dev->dev_private));
  437.  
  438.         if (list_empty(&ring->execlist_queue))
  439.                 return;
  440.  
  441.         /* Try to read in pairs */
  442.         list_for_each_entry_safe(cursor, tmp, &ring->execlist_queue,
  443.                                  execlist_link) {
  444.                 if (!req0) {
  445.                         req0 = cursor;
  446.                 } else if (req0->ctx == cursor->ctx) {
  447.                         /* Same ctx: ignore first request, as second request
  448.                          * will update tail past first request's workload */
  449.                         cursor->elsp_submitted = req0->elsp_submitted;
  450.                         list_move_tail(&req0->execlist_link,
  451.                                        &ring->execlist_retired_req_list);
  452.                         req0 = cursor;
  453.                 } else {
  454.                         req1 = cursor;
  455.                         break;
  456.                 }
  457.         }
  458.  
  459.         if (IS_GEN8(ring->dev) || IS_GEN9(ring->dev)) {
  460.                 /*
  461.                  * WaIdleLiteRestore: make sure we never cause a lite
  462.                  * restore with HEAD==TAIL
  463.                  */
  464.                 if (req0->elsp_submitted) {
  465.                         /*
  466.                          * Apply the wa NOOPS to prevent ring:HEAD == req:TAIL
  467.                          * as we resubmit the request. See gen8_emit_request()
  468.                          * for where we prepare the padding after the end of the
  469.                          * request.
  470.                          */
  471.                         struct intel_ringbuffer *ringbuf;
  472.  
  473.                         ringbuf = req0->ctx->engine[ring->id].ringbuf;
  474.                         req0->tail += 8;
  475.                         req0->tail &= ringbuf->size - 1;
  476.                 }
  477.         }
  478.  
  479.         WARN_ON(req1 && req1->elsp_submitted);
  480.  
  481.         execlists_submit_requests(req0, req1);
  482. }
  483.  
  484. static bool execlists_check_remove_request(struct intel_engine_cs *ring,
  485.                                            u32 request_id)
  486. {
  487.         struct drm_i915_gem_request *head_req;
  488.  
  489.         assert_spin_locked(&ring->execlist_lock);
  490.  
  491.         head_req = list_first_entry_or_null(&ring->execlist_queue,
  492.                                             struct drm_i915_gem_request,
  493.                                             execlist_link);
  494.  
  495.         if (head_req != NULL) {
  496.                 if (intel_execlists_ctx_id(head_req->ctx, ring) == request_id) {
  497.                         WARN(head_req->elsp_submitted == 0,
  498.                              "Never submitted head request\n");
  499.  
  500.                         if (--head_req->elsp_submitted <= 0) {
  501.                                 list_move_tail(&head_req->execlist_link,
  502.                                                &ring->execlist_retired_req_list);
  503.                                 return true;
  504.                         }
  505.                 }
  506.         }
  507.  
  508.         return false;
  509. }
  510.  
  511. static void get_context_status(struct intel_engine_cs *ring,
  512.                                u8 read_pointer,
  513.                                u32 *status, u32 *context_id)
  514. {
  515.         struct drm_i915_private *dev_priv = ring->dev->dev_private;
  516.  
  517.         if (WARN_ON(read_pointer >= GEN8_CSB_ENTRIES))
  518.                 return;
  519.  
  520.         *status = I915_READ(RING_CONTEXT_STATUS_BUF_LO(ring, read_pointer));
  521.         *context_id = I915_READ(RING_CONTEXT_STATUS_BUF_HI(ring, read_pointer));
  522. }
  523.  
  524. /**
  525.  * intel_lrc_irq_handler() - handle Context Switch interrupts
  526.  * @ring: Engine Command Streamer to handle.
  527.  *
  528.  * Check the unread Context Status Buffers and manage the submission of new
  529.  * contexts to the ELSP accordingly.
  530.  */
  531. void intel_lrc_irq_handler(struct intel_engine_cs *ring)
  532. {
  533.         struct drm_i915_private *dev_priv = ring->dev->dev_private;
  534.         u32 status_pointer;
  535.         u8 read_pointer;
  536.         u8 write_pointer;
  537.         u32 status = 0;
  538.         u32 status_id;
  539.         u32 submit_contexts = 0;
  540.  
  541.         status_pointer = I915_READ(RING_CONTEXT_STATUS_PTR(ring));
  542.  
  543.         read_pointer = ring->next_context_status_buffer;
  544.         write_pointer = GEN8_CSB_WRITE_PTR(status_pointer);
  545.         if (read_pointer > write_pointer)
  546.                 write_pointer += GEN8_CSB_ENTRIES;
  547.  
  548.         spin_lock(&ring->execlist_lock);
  549.  
  550.         while (read_pointer < write_pointer) {
  551.  
  552.                 get_context_status(ring, ++read_pointer % GEN8_CSB_ENTRIES,
  553.                                    &status, &status_id);
  554.  
  555.                 if (status & GEN8_CTX_STATUS_IDLE_ACTIVE)
  556.                         continue;
  557.  
  558.                 if (status & GEN8_CTX_STATUS_PREEMPTED) {
  559.                         if (status & GEN8_CTX_STATUS_LITE_RESTORE) {
  560.                                 if (execlists_check_remove_request(ring, status_id))
  561.                                         WARN(1, "Lite Restored request removed from queue\n");
  562.                         } else
  563.                                 WARN(1, "Preemption without Lite Restore\n");
  564.                 }
  565.  
  566.                 if ((status & GEN8_CTX_STATUS_ACTIVE_IDLE) ||
  567.                     (status & GEN8_CTX_STATUS_ELEMENT_SWITCH)) {
  568.                         if (execlists_check_remove_request(ring, status_id))
  569.                                 submit_contexts++;
  570.                 }
  571.         }
  572.  
  573.         if (ring->disable_lite_restore_wa) {
  574.                 /* Prevent a ctx to preempt itself */
  575.                 if ((status & GEN8_CTX_STATUS_ACTIVE_IDLE) &&
  576.                     (submit_contexts != 0))
  577.                         execlists_context_unqueue(ring);
  578.         } else if (submit_contexts != 0) {
  579.                 execlists_context_unqueue(ring);
  580.         }
  581.  
  582.         spin_unlock(&ring->execlist_lock);
  583.  
  584.         if (unlikely(submit_contexts > 2))
  585.                 DRM_ERROR("More than two context complete events?\n");
  586.  
  587.         ring->next_context_status_buffer = write_pointer % GEN8_CSB_ENTRIES;
  588.  
  589.         /* Update the read pointer to the old write pointer. Manual ringbuffer
  590.          * management ftw </sarcasm> */
  591.         I915_WRITE(RING_CONTEXT_STATUS_PTR(ring),
  592.                    _MASKED_FIELD(GEN8_CSB_READ_PTR_MASK,
  593.                                  ring->next_context_status_buffer << 8));
  594. }
  595.  
  596. static int execlists_context_queue(struct drm_i915_gem_request *request)
  597. {
  598.         struct intel_engine_cs *ring = request->ring;
  599.         struct drm_i915_gem_request *cursor;
  600.         int num_elements = 0;
  601.  
  602.         if (request->ctx != request->i915->kernel_context)
  603.                 intel_lr_context_pin(request->ctx, ring);
  604.  
  605.         i915_gem_request_reference(request);
  606.  
  607.         spin_lock_irq(&ring->execlist_lock);
  608.  
  609.         list_for_each_entry(cursor, &ring->execlist_queue, execlist_link)
  610.                 if (++num_elements > 2)
  611.                         break;
  612.  
  613.         if (num_elements > 2) {
  614.                 struct drm_i915_gem_request *tail_req;
  615.  
  616.                 tail_req = list_last_entry(&ring->execlist_queue,
  617.                                            struct drm_i915_gem_request,
  618.                                            execlist_link);
  619.  
  620.                 if (request->ctx == tail_req->ctx) {
  621.                         WARN(tail_req->elsp_submitted != 0,
  622.                                 "More than 2 already-submitted reqs queued\n");
  623.                         list_move_tail(&tail_req->execlist_link,
  624.                                        &ring->execlist_retired_req_list);
  625.                 }
  626.         }
  627.  
  628.         list_add_tail(&request->execlist_link, &ring->execlist_queue);
  629.         if (num_elements == 0)
  630.                 execlists_context_unqueue(ring);
  631.  
  632.         spin_unlock_irq(&ring->execlist_lock);
  633.  
  634.         return 0;
  635. }
  636.  
  637. static int logical_ring_invalidate_all_caches(struct drm_i915_gem_request *req)
  638. {
  639.         struct intel_engine_cs *ring = req->ring;
  640.         uint32_t flush_domains;
  641.         int ret;
  642.  
  643.         flush_domains = 0;
  644.         if (ring->gpu_caches_dirty)
  645.                 flush_domains = I915_GEM_GPU_DOMAINS;
  646.  
  647.         ret = ring->emit_flush(req, I915_GEM_GPU_DOMAINS, flush_domains);
  648.         if (ret)
  649.                 return ret;
  650.  
  651.         ring->gpu_caches_dirty = false;
  652.         return 0;
  653. }
  654.  
  655. static int execlists_move_to_gpu(struct drm_i915_gem_request *req,
  656.                                  struct list_head *vmas)
  657. {
  658.         const unsigned other_rings = ~intel_ring_flag(req->ring);
  659.         struct i915_vma *vma;
  660.         uint32_t flush_domains = 0;
  661.         bool flush_chipset = false;
  662.         int ret;
  663.  
  664.         list_for_each_entry(vma, vmas, exec_list) {
  665.                 struct drm_i915_gem_object *obj = vma->obj;
  666.  
  667.                 if (obj->active & other_rings) {
  668.                         ret = i915_gem_object_sync(obj, req->ring, &req);
  669.                         if (ret)
  670.                                 return ret;
  671.                 }
  672.  
  673.                 if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
  674.                         flush_chipset |= i915_gem_clflush_object(obj, false);
  675.  
  676.                 flush_domains |= obj->base.write_domain;
  677.         }
  678.  
  679.         if (flush_domains & I915_GEM_DOMAIN_GTT)
  680.                 wmb();
  681.  
  682.         /* Unconditionally invalidate gpu caches and ensure that we do flush
  683.          * any residual writes from the previous batch.
  684.          */
  685.         return logical_ring_invalidate_all_caches(req);
  686. }
  687.  
  688. int intel_logical_ring_alloc_request_extras(struct drm_i915_gem_request *request)
  689. {
  690.         int ret = 0;
  691.  
  692.         request->ringbuf = request->ctx->engine[request->ring->id].ringbuf;
  693.  
  694.         if (i915.enable_guc_submission) {
  695.                 /*
  696.                  * Check that the GuC has space for the request before
  697.                  * going any further, as the i915_add_request() call
  698.                  * later on mustn't fail ...
  699.                  */
  700.                 struct intel_guc *guc = &request->i915->guc;
  701.  
  702.                 ret = i915_guc_wq_check_space(guc->execbuf_client);
  703.                 if (ret)
  704.                         return ret;
  705.         }
  706.  
  707.         if (request->ctx != request->i915->kernel_context)
  708.                 ret = intel_lr_context_pin(request->ctx, request->ring);
  709.  
  710.         return ret;
  711. }
  712.  
  713. static int logical_ring_wait_for_space(struct drm_i915_gem_request *req,
  714.                                        int bytes)
  715. {
  716.         struct intel_ringbuffer *ringbuf = req->ringbuf;
  717.         struct intel_engine_cs *ring = req->ring;
  718.         struct drm_i915_gem_request *target;
  719.         unsigned space;
  720.         int ret;
  721.  
  722.         if (intel_ring_space(ringbuf) >= bytes)
  723.                 return 0;
  724.  
  725.         /* The whole point of reserving space is to not wait! */
  726.         WARN_ON(ringbuf->reserved_in_use);
  727.  
  728.         list_for_each_entry(target, &ring->request_list, list) {
  729.                 /*
  730.                  * The request queue is per-engine, so can contain requests
  731.                  * from multiple ringbuffers. Here, we must ignore any that
  732.                  * aren't from the ringbuffer we're considering.
  733.                  */
  734.                 if (target->ringbuf != ringbuf)
  735.                         continue;
  736.  
  737.                 /* Would completion of this request free enough space? */
  738.                 space = __intel_ring_space(target->postfix, ringbuf->tail,
  739.                                            ringbuf->size);
  740.                 if (space >= bytes)
  741.                         break;
  742.         }
  743.  
  744.         if (WARN_ON(&target->list == &ring->request_list))
  745.                 return -ENOSPC;
  746.  
  747.         ret = i915_wait_request(target);
  748.         if (ret)
  749.                 return ret;
  750.  
  751.         ringbuf->space = space;
  752.         return 0;
  753. }
  754.  
  755. /*
  756.  * intel_logical_ring_advance_and_submit() - advance the tail and submit the workload
  757.  * @request: Request to advance the logical ringbuffer of.
  758.  *
  759.  * The tail is updated in our logical ringbuffer struct, not in the actual context. What
  760.  * really happens during submission is that the context and current tail will be placed
  761.  * on a queue waiting for the ELSP to be ready to accept a new context submission. At that
  762.  * point, the tail *inside* the context is updated and the ELSP written to.
  763.  */
  764. static int
  765. intel_logical_ring_advance_and_submit(struct drm_i915_gem_request *request)
  766. {
  767.         struct intel_ringbuffer *ringbuf = request->ringbuf;
  768.         struct drm_i915_private *dev_priv = request->i915;
  769.         struct intel_engine_cs *engine = request->ring;
  770.  
  771.         intel_logical_ring_advance(ringbuf);
  772.         request->tail = ringbuf->tail;
  773.  
  774.         /*
  775.          * Here we add two extra NOOPs as padding to avoid
  776.          * lite restore of a context with HEAD==TAIL.
  777.          *
  778.          * Caller must reserve WA_TAIL_DWORDS for us!
  779.          */
  780.         intel_logical_ring_emit(ringbuf, MI_NOOP);
  781.         intel_logical_ring_emit(ringbuf, MI_NOOP);
  782.         intel_logical_ring_advance(ringbuf);
  783.  
  784.         if (intel_ring_stopped(engine))
  785.                 return 0;
  786.  
  787.         if (engine->last_context != request->ctx) {
  788.                 if (engine->last_context)
  789.                         intel_lr_context_unpin(engine->last_context, engine);
  790.                 if (request->ctx != request->i915->kernel_context) {
  791.                         intel_lr_context_pin(request->ctx, engine);
  792.                         engine->last_context = request->ctx;
  793.                 } else {
  794.                         engine->last_context = NULL;
  795.                 }
  796.         }
  797.  
  798.         if (dev_priv->guc.execbuf_client)
  799.                 i915_guc_submit(dev_priv->guc.execbuf_client, request);
  800.         else
  801.                 execlists_context_queue(request);
  802.  
  803.         return 0;
  804. }
  805.  
  806. static void __wrap_ring_buffer(struct intel_ringbuffer *ringbuf)
  807. {
  808.         uint32_t __iomem *virt;
  809.         int rem = ringbuf->size - ringbuf->tail;
  810.  
  811.         virt = ringbuf->virtual_start + ringbuf->tail;
  812.         rem /= 4;
  813.         while (rem--)
  814.                 iowrite32(MI_NOOP, virt++);
  815.  
  816.         ringbuf->tail = 0;
  817.         intel_ring_update_space(ringbuf);
  818. }
  819.  
  820. static int logical_ring_prepare(struct drm_i915_gem_request *req, int bytes)
  821. {
  822.         struct intel_ringbuffer *ringbuf = req->ringbuf;
  823.         int remain_usable = ringbuf->effective_size - ringbuf->tail;
  824.         int remain_actual = ringbuf->size - ringbuf->tail;
  825.         int ret, total_bytes, wait_bytes = 0;
  826.         bool need_wrap = false;
  827.  
  828.         if (ringbuf->reserved_in_use)
  829.                 total_bytes = bytes;
  830.         else
  831.                 total_bytes = bytes + ringbuf->reserved_size;
  832.  
  833.         if (unlikely(bytes > remain_usable)) {
  834.                 /*
  835.                  * Not enough space for the basic request. So need to flush
  836.                  * out the remainder and then wait for base + reserved.
  837.                  */
  838.                 wait_bytes = remain_actual + total_bytes;
  839.                 need_wrap = true;
  840.         } else {
  841.                 if (unlikely(total_bytes > remain_usable)) {
  842.                         /*
  843.                          * The base request will fit but the reserved space
  844.                          * falls off the end. So don't need an immediate wrap
  845.                          * and only need to effectively wait for the reserved
  846.                          * size space from the start of ringbuffer.
  847.                          */
  848.                         wait_bytes = remain_actual + ringbuf->reserved_size;
  849.                 } else if (total_bytes > ringbuf->space) {
  850.                         /* No wrapping required, just waiting. */
  851.                         wait_bytes = total_bytes;
  852.                 }
  853.         }
  854.  
  855.         if (wait_bytes) {
  856.                 ret = logical_ring_wait_for_space(req, wait_bytes);
  857.                 if (unlikely(ret))
  858.                         return ret;
  859.  
  860.                 if (need_wrap)
  861.                         __wrap_ring_buffer(ringbuf);
  862.         }
  863.  
  864.         return 0;
  865. }
  866.  
  867. /**
  868.  * intel_logical_ring_begin() - prepare the logical ringbuffer to accept some commands
  869.  *
  870.  * @req: The request to start some new work for
  871.  * @num_dwords: number of DWORDs that we plan to write to the ringbuffer.
  872.  *
  873.  * The ringbuffer might not be ready to accept the commands right away (maybe it needs to
  874.  * be wrapped, or wait a bit for the tail to be updated). This function takes care of that
  875.  * and also preallocates a request (every workload submission is still mediated through
  876.  * requests, same as it did with legacy ringbuffer submission).
  877.  *
  878.  * Return: non-zero if the ringbuffer is not ready to be written to.
  879.  */
  880. int intel_logical_ring_begin(struct drm_i915_gem_request *req, int num_dwords)
  881. {
  882.         struct drm_i915_private *dev_priv;
  883.         int ret;
  884.  
  885.         WARN_ON(req == NULL);
  886.         dev_priv = req->ring->dev->dev_private;
  887.  
  888.         ret = i915_gem_check_wedge(&dev_priv->gpu_error,
  889.                                    dev_priv->mm.interruptible);
  890.         if (ret)
  891.                 return ret;
  892.  
  893.         ret = logical_ring_prepare(req, num_dwords * sizeof(uint32_t));
  894.         if (ret)
  895.                 return ret;
  896.  
  897.         req->ringbuf->space -= num_dwords * sizeof(uint32_t);
  898.         return 0;
  899. }
  900.  
  901. int intel_logical_ring_reserve_space(struct drm_i915_gem_request *request)
  902. {
  903.         /*
  904.          * The first call merely notes the reserve request and is common for
  905.          * all back ends. The subsequent localised _begin() call actually
  906.          * ensures that the reservation is available. Without the begin, if
  907.          * the request creator immediately submitted the request without
  908.          * adding any commands to it then there might not actually be
  909.          * sufficient room for the submission commands.
  910.          */
  911.         intel_ring_reserved_space_reserve(request->ringbuf, MIN_SPACE_FOR_ADD_REQUEST);
  912.  
  913.         return intel_logical_ring_begin(request, 0);
  914. }
  915.  
  916. /**
  917.  * execlists_submission() - submit a batchbuffer for execution, Execlists style
  918.  * @dev: DRM device.
  919.  * @file: DRM file.
  920.  * @ring: Engine Command Streamer to submit to.
  921.  * @ctx: Context to employ for this submission.
  922.  * @args: execbuffer call arguments.
  923.  * @vmas: list of vmas.
  924.  * @batch_obj: the batchbuffer to submit.
  925.  * @exec_start: batchbuffer start virtual address pointer.
  926.  * @dispatch_flags: translated execbuffer call flags.
  927.  *
  928.  * This is the evil twin version of i915_gem_ringbuffer_submission. It abstracts
  929.  * away the submission details of the execbuffer ioctl call.
  930.  *
  931.  * Return: non-zero if the submission fails.
  932.  */
  933. int intel_execlists_submission(struct i915_execbuffer_params *params,
  934.                                struct drm_i915_gem_execbuffer2 *args,
  935.                                struct list_head *vmas)
  936. {
  937.         struct drm_device       *dev = params->dev;
  938.         struct intel_engine_cs  *ring = params->ring;
  939.         struct drm_i915_private *dev_priv = dev->dev_private;
  940.         struct intel_ringbuffer *ringbuf = params->ctx->engine[ring->id].ringbuf;
  941.         u64 exec_start;
  942.         int instp_mode;
  943.         u32 instp_mask;
  944.         int ret;
  945.  
  946.         instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK;
  947.         instp_mask = I915_EXEC_CONSTANTS_MASK;
  948.         switch (instp_mode) {
  949.         case I915_EXEC_CONSTANTS_REL_GENERAL:
  950.         case I915_EXEC_CONSTANTS_ABSOLUTE:
  951.         case I915_EXEC_CONSTANTS_REL_SURFACE:
  952.                 if (instp_mode != 0 && ring != &dev_priv->ring[RCS]) {
  953.                         DRM_DEBUG("non-0 rel constants mode on non-RCS\n");
  954.                         return -EINVAL;
  955.                 }
  956.  
  957.                 if (instp_mode != dev_priv->relative_constants_mode) {
  958.                         if (instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) {
  959.                                 DRM_DEBUG("rel surface constants mode invalid on gen5+\n");
  960.                                 return -EINVAL;
  961.                         }
  962.  
  963.                         /* The HW changed the meaning on this bit on gen6 */
  964.                         instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
  965.                 }
  966.                 break;
  967.         default:
  968.                 DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode);
  969.                 return -EINVAL;
  970.         }
  971.  
  972.         if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
  973.                 DRM_DEBUG("sol reset is gen7 only\n");
  974.                 return -EINVAL;
  975.         }
  976.  
  977.         ret = execlists_move_to_gpu(params->request, vmas);
  978.         if (ret)
  979.                 return ret;
  980.  
  981.         if (ring == &dev_priv->ring[RCS] &&
  982.             instp_mode != dev_priv->relative_constants_mode) {
  983.                 ret = intel_logical_ring_begin(params->request, 4);
  984.                 if (ret)
  985.                         return ret;
  986.  
  987.                 intel_logical_ring_emit(ringbuf, MI_NOOP);
  988.                 intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(1));
  989.                 intel_logical_ring_emit_reg(ringbuf, INSTPM);
  990.                 intel_logical_ring_emit(ringbuf, instp_mask << 16 | instp_mode);
  991.                 intel_logical_ring_advance(ringbuf);
  992.  
  993.                 dev_priv->relative_constants_mode = instp_mode;
  994.         }
  995.  
  996.         exec_start = params->batch_obj_vm_offset +
  997.                      args->batch_start_offset;
  998.  
  999.         ret = ring->emit_bb_start(params->request, exec_start, params->dispatch_flags);
  1000.         if (ret)
  1001.                 return ret;
  1002.  
  1003.         trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags);
  1004.  
  1005.         i915_gem_execbuffer_move_to_active(vmas, params->request);
  1006.         i915_gem_execbuffer_retire_commands(params);
  1007.  
  1008.         return 0;
  1009. }
  1010.  
  1011. void intel_execlists_retire_requests(struct intel_engine_cs *ring)
  1012. {
  1013.         struct drm_i915_gem_request *req, *tmp;
  1014.         struct list_head retired_list;
  1015.  
  1016.         WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
  1017.         if (list_empty(&ring->execlist_retired_req_list))
  1018.                 return;
  1019.  
  1020.         INIT_LIST_HEAD(&retired_list);
  1021.         spin_lock_irq(&ring->execlist_lock);
  1022.         list_replace_init(&ring->execlist_retired_req_list, &retired_list);
  1023.         spin_unlock_irq(&ring->execlist_lock);
  1024.  
  1025.         list_for_each_entry_safe(req, tmp, &retired_list, execlist_link) {
  1026.                 struct intel_context *ctx = req->ctx;
  1027.                 struct drm_i915_gem_object *ctx_obj =
  1028.                                 ctx->engine[ring->id].state;
  1029.  
  1030.                 if (ctx_obj && (ctx != req->i915->kernel_context))
  1031.                         intel_lr_context_unpin(ctx, ring);
  1032.  
  1033.                 list_del(&req->execlist_link);
  1034.                 i915_gem_request_unreference(req);
  1035.         }
  1036. }
  1037.  
  1038. void intel_logical_ring_stop(struct intel_engine_cs *ring)
  1039. {
  1040.         struct drm_i915_private *dev_priv = ring->dev->dev_private;
  1041.         int ret;
  1042.  
  1043.         if (!intel_ring_initialized(ring))
  1044.                 return;
  1045.  
  1046.         ret = intel_ring_idle(ring);
  1047.         if (ret && !i915_reset_in_progress(&to_i915(ring->dev)->gpu_error))
  1048.                 DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n",
  1049.                           ring->name, ret);
  1050.  
  1051.         /* TODO: Is this correct with Execlists enabled? */
  1052.         I915_WRITE_MODE(ring, _MASKED_BIT_ENABLE(STOP_RING));
  1053.         if (wait_for_atomic((I915_READ_MODE(ring) & MODE_IDLE) != 0, 1000)) {
  1054.                 DRM_ERROR("%s :timed out trying to stop ring\n", ring->name);
  1055.                 return;
  1056.         }
  1057.         I915_WRITE_MODE(ring, _MASKED_BIT_DISABLE(STOP_RING));
  1058. }
  1059.  
  1060. int logical_ring_flush_all_caches(struct drm_i915_gem_request *req)
  1061. {
  1062.         struct intel_engine_cs *ring = req->ring;
  1063.         int ret;
  1064.  
  1065.         if (!ring->gpu_caches_dirty)
  1066.                 return 0;
  1067.  
  1068.         ret = ring->emit_flush(req, 0, I915_GEM_GPU_DOMAINS);
  1069.         if (ret)
  1070.                 return ret;
  1071.  
  1072.         ring->gpu_caches_dirty = false;
  1073.         return 0;
  1074. }
  1075.  
  1076. static int intel_lr_context_do_pin(struct intel_context *ctx,
  1077.                                    struct intel_engine_cs *ring)
  1078. {
  1079.         struct drm_device *dev = ring->dev;
  1080.         struct drm_i915_private *dev_priv = dev->dev_private;
  1081.         struct drm_i915_gem_object *ctx_obj = ctx->engine[ring->id].state;
  1082.         struct intel_ringbuffer *ringbuf = ctx->engine[ring->id].ringbuf;
  1083.         struct page *lrc_state_page;
  1084.         uint32_t *lrc_reg_state;
  1085.         int ret;
  1086.  
  1087.         WARN_ON(!mutex_is_locked(&ring->dev->struct_mutex));
  1088.  
  1089.         ret = i915_gem_obj_ggtt_pin(ctx_obj, GEN8_LR_CONTEXT_ALIGN,
  1090.                         PIN_OFFSET_BIAS | GUC_WOPCM_TOP);
  1091.         if (ret)
  1092.                 return ret;
  1093.  
  1094.         lrc_state_page = i915_gem_object_get_dirty_page(ctx_obj, LRC_STATE_PN);
  1095.         if (WARN_ON(!lrc_state_page)) {
  1096.                 ret = -ENODEV;
  1097.                 goto unpin_ctx_obj;
  1098.         }
  1099.  
  1100.         ret = intel_pin_and_map_ringbuffer_obj(ring->dev, ringbuf);
  1101.         if (ret)
  1102.                 goto unpin_ctx_obj;
  1103.  
  1104.         ctx->engine[ring->id].lrc_vma = i915_gem_obj_to_ggtt(ctx_obj);
  1105.         intel_lr_context_descriptor_update(ctx, ring);
  1106.         lrc_reg_state = kmap(lrc_state_page);
  1107.         lrc_reg_state[CTX_RING_BUFFER_START+1] = ringbuf->vma->node.start;
  1108.         ctx->engine[ring->id].lrc_reg_state = lrc_reg_state;
  1109.         ctx_obj->dirty = true;
  1110.  
  1111.         /* Invalidate GuC TLB. */
  1112.         if (i915.enable_guc_submission)
  1113.                 I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
  1114.  
  1115.         return ret;
  1116.  
  1117. unpin_ctx_obj:
  1118.         i915_gem_object_ggtt_unpin(ctx_obj);
  1119.  
  1120.         return ret;
  1121. }
  1122.  
  1123. static int intel_lr_context_pin(struct intel_context *ctx,
  1124.                                 struct intel_engine_cs *engine)
  1125. {
  1126.         int ret = 0;
  1127.  
  1128.         if (ctx->engine[engine->id].pin_count++ == 0) {
  1129.                 ret = intel_lr_context_do_pin(ctx, engine);
  1130.                 if (ret)
  1131.                         goto reset_pin_count;
  1132.  
  1133.                 i915_gem_context_reference(ctx);
  1134.         }
  1135.         return ret;
  1136.  
  1137. reset_pin_count:
  1138.         ctx->engine[engine->id].pin_count = 0;
  1139.         return ret;
  1140. }
  1141.  
  1142. void intel_lr_context_unpin(struct intel_context *ctx,
  1143.                             struct intel_engine_cs *engine)
  1144. {
  1145.         struct drm_i915_gem_object *ctx_obj = ctx->engine[engine->id].state;
  1146.  
  1147.         WARN_ON(!mutex_is_locked(&ctx->i915->dev->struct_mutex));
  1148.         if (--ctx->engine[engine->id].pin_count == 0) {
  1149. //              kunmap(kmap_to_page(ctx->engine[engine->id].lrc_reg_state));
  1150.                 intel_unpin_ringbuffer_obj(ctx->engine[engine->id].ringbuf);
  1151.                 i915_gem_object_ggtt_unpin(ctx_obj);
  1152.                 ctx->engine[engine->id].lrc_vma = NULL;
  1153.                 ctx->engine[engine->id].lrc_desc = 0;
  1154.                 ctx->engine[engine->id].lrc_reg_state = NULL;
  1155.  
  1156.                 i915_gem_context_unreference(ctx);
  1157.         }
  1158. }
  1159.  
  1160. static int intel_logical_ring_workarounds_emit(struct drm_i915_gem_request *req)
  1161. {
  1162.         int ret, i;
  1163.         struct intel_engine_cs *ring = req->ring;
  1164.         struct intel_ringbuffer *ringbuf = req->ringbuf;
  1165.         struct drm_device *dev = ring->dev;
  1166.         struct drm_i915_private *dev_priv = dev->dev_private;
  1167.         struct i915_workarounds *w = &dev_priv->workarounds;
  1168.  
  1169.         if (w->count == 0)
  1170.                 return 0;
  1171.  
  1172.         ring->gpu_caches_dirty = true;
  1173.         ret = logical_ring_flush_all_caches(req);
  1174.         if (ret)
  1175.                 return ret;
  1176.  
  1177.         ret = intel_logical_ring_begin(req, w->count * 2 + 2);
  1178.         if (ret)
  1179.                 return ret;
  1180.  
  1181.         intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(w->count));
  1182.         for (i = 0; i < w->count; i++) {
  1183.                 intel_logical_ring_emit_reg(ringbuf, w->reg[i].addr);
  1184.                 intel_logical_ring_emit(ringbuf, w->reg[i].value);
  1185.         }
  1186.         intel_logical_ring_emit(ringbuf, MI_NOOP);
  1187.  
  1188.         intel_logical_ring_advance(ringbuf);
  1189.  
  1190.         ring->gpu_caches_dirty = true;
  1191.         ret = logical_ring_flush_all_caches(req);
  1192.         if (ret)
  1193.                 return ret;
  1194.  
  1195.         return 0;
  1196. }
  1197.  
  1198. #define wa_ctx_emit(batch, index, cmd)                                  \
  1199.         do {                                                            \
  1200.                 int __index = (index)++;                                \
  1201.                 if (WARN_ON(__index >= (PAGE_SIZE / sizeof(uint32_t)))) { \
  1202.                         return -ENOSPC;                                 \
  1203.                 }                                                       \
  1204.                 batch[__index] = (cmd);                                 \
  1205.         } while (0)
  1206.  
  1207. #define wa_ctx_emit_reg(batch, index, reg) \
  1208.         wa_ctx_emit((batch), (index), i915_mmio_reg_offset(reg))
  1209.  
  1210. /*
  1211.  * In this WA we need to set GEN8_L3SQCREG4[21:21] and reset it after
  1212.  * PIPE_CONTROL instruction. This is required for the flush to happen correctly
  1213.  * but there is a slight complication as this is applied in WA batch where the
  1214.  * values are only initialized once so we cannot take register value at the
  1215.  * beginning and reuse it further; hence we save its value to memory, upload a
  1216.  * constant value with bit21 set and then we restore it back with the saved value.
  1217.  * To simplify the WA, a constant value is formed by using the default value
  1218.  * of this register. This shouldn't be a problem because we are only modifying
  1219.  * it for a short period and this batch in non-premptible. We can ofcourse
  1220.  * use additional instructions that read the actual value of the register
  1221.  * at that time and set our bit of interest but it makes the WA complicated.
  1222.  *
  1223.  * This WA is also required for Gen9 so extracting as a function avoids
  1224.  * code duplication.
  1225.  */
  1226. static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *ring,
  1227.                                                 uint32_t *const batch,
  1228.                                                 uint32_t index)
  1229. {
  1230.         uint32_t l3sqc4_flush = (0x40400000 | GEN8_LQSC_FLUSH_COHERENT_LINES);
  1231.  
  1232.         /*
  1233.          * WaDisableLSQCROPERFforOCL:skl
  1234.          * This WA is implemented in skl_init_clock_gating() but since
  1235.          * this batch updates GEN8_L3SQCREG4 with default value we need to
  1236.          * set this bit here to retain the WA during flush.
  1237.          */
  1238.         if (IS_SKL_REVID(ring->dev, 0, SKL_REVID_E0))
  1239.                 l3sqc4_flush |= GEN8_LQSC_RO_PERF_DIS;
  1240.  
  1241.         wa_ctx_emit(batch, index, (MI_STORE_REGISTER_MEM_GEN8 |
  1242.                                    MI_SRM_LRM_GLOBAL_GTT));
  1243.         wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4);
  1244.         wa_ctx_emit(batch, index, ring->scratch.gtt_offset + 256);
  1245.         wa_ctx_emit(batch, index, 0);
  1246.  
  1247.         wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1));
  1248.         wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4);
  1249.         wa_ctx_emit(batch, index, l3sqc4_flush);
  1250.  
  1251.         wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6));
  1252.         wa_ctx_emit(batch, index, (PIPE_CONTROL_CS_STALL |
  1253.                                    PIPE_CONTROL_DC_FLUSH_ENABLE));
  1254.         wa_ctx_emit(batch, index, 0);
  1255.         wa_ctx_emit(batch, index, 0);
  1256.         wa_ctx_emit(batch, index, 0);
  1257.         wa_ctx_emit(batch, index, 0);
  1258.  
  1259.         wa_ctx_emit(batch, index, (MI_LOAD_REGISTER_MEM_GEN8 |
  1260.                                    MI_SRM_LRM_GLOBAL_GTT));
  1261.         wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4);
  1262.         wa_ctx_emit(batch, index, ring->scratch.gtt_offset + 256);
  1263.         wa_ctx_emit(batch, index, 0);
  1264.  
  1265.         return index;
  1266. }
  1267.  
  1268. static inline uint32_t wa_ctx_start(struct i915_wa_ctx_bb *wa_ctx,
  1269.                                     uint32_t offset,
  1270.                                     uint32_t start_alignment)
  1271. {
  1272.         return wa_ctx->offset = ALIGN(offset, start_alignment);
  1273. }
  1274.  
  1275. static inline int wa_ctx_end(struct i915_wa_ctx_bb *wa_ctx,
  1276.                              uint32_t offset,
  1277.                              uint32_t size_alignment)
  1278. {
  1279.         wa_ctx->size = offset - wa_ctx->offset;
  1280.  
  1281.         WARN(wa_ctx->size % size_alignment,
  1282.              "wa_ctx_bb failed sanity checks: size %d is not aligned to %d\n",
  1283.              wa_ctx->size, size_alignment);
  1284.         return 0;
  1285. }
  1286.  
  1287. /**
  1288.  * gen8_init_indirectctx_bb() - initialize indirect ctx batch with WA
  1289.  *
  1290.  * @ring: only applicable for RCS
  1291.  * @wa_ctx: structure representing wa_ctx
  1292.  *  offset: specifies start of the batch, should be cache-aligned. This is updated
  1293.  *    with the offset value received as input.
  1294.  *  size: size of the batch in DWORDS but HW expects in terms of cachelines
  1295.  * @batch: page in which WA are loaded
  1296.  * @offset: This field specifies the start of the batch, it should be
  1297.  *  cache-aligned otherwise it is adjusted accordingly.
  1298.  *  Typically we only have one indirect_ctx and per_ctx batch buffer which are
  1299.  *  initialized at the beginning and shared across all contexts but this field
  1300.  *  helps us to have multiple batches at different offsets and select them based
  1301.  *  on a criteria. At the moment this batch always start at the beginning of the page
  1302.  *  and at this point we don't have multiple wa_ctx batch buffers.
  1303.  *
  1304.  *  The number of WA applied are not known at the beginning; we use this field
  1305.  *  to return the no of DWORDS written.
  1306.  *
  1307.  *  It is to be noted that this batch does not contain MI_BATCH_BUFFER_END
  1308.  *  so it adds NOOPs as padding to make it cacheline aligned.
  1309.  *  MI_BATCH_BUFFER_END will be added to perctx batch and both of them together
  1310.  *  makes a complete batch buffer.
  1311.  *
  1312.  * Return: non-zero if we exceed the PAGE_SIZE limit.
  1313.  */
  1314.  
  1315. static int gen8_init_indirectctx_bb(struct intel_engine_cs *ring,
  1316.                                     struct i915_wa_ctx_bb *wa_ctx,
  1317.                                     uint32_t *const batch,
  1318.                                     uint32_t *offset)
  1319. {
  1320.         uint32_t scratch_addr;
  1321.         uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
  1322.  
  1323.         /* WaDisableCtxRestoreArbitration:bdw,chv */
  1324.         wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_DISABLE);
  1325.  
  1326.         /* WaFlushCoherentL3CacheLinesAtContextSwitch:bdw */
  1327.         if (IS_BROADWELL(ring->dev)) {
  1328.                 int rc = gen8_emit_flush_coherentl3_wa(ring, batch, index);
  1329.                 if (rc < 0)
  1330.                         return rc;
  1331.                 index = rc;
  1332.         }
  1333.  
  1334.         /* WaClearSlmSpaceAtContextSwitch:bdw,chv */
  1335.         /* Actual scratch location is at 128 bytes offset */
  1336.         scratch_addr = ring->scratch.gtt_offset + 2*CACHELINE_BYTES;
  1337.  
  1338.         wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6));
  1339.         wa_ctx_emit(batch, index, (PIPE_CONTROL_FLUSH_L3 |
  1340.                                    PIPE_CONTROL_GLOBAL_GTT_IVB |
  1341.                                    PIPE_CONTROL_CS_STALL |
  1342.                                    PIPE_CONTROL_QW_WRITE));
  1343.         wa_ctx_emit(batch, index, scratch_addr);
  1344.         wa_ctx_emit(batch, index, 0);
  1345.         wa_ctx_emit(batch, index, 0);
  1346.         wa_ctx_emit(batch, index, 0);
  1347.  
  1348.         /* Pad to end of cacheline */
  1349.         while (index % CACHELINE_DWORDS)
  1350.                 wa_ctx_emit(batch, index, MI_NOOP);
  1351.  
  1352.         /*
  1353.          * MI_BATCH_BUFFER_END is not required in Indirect ctx BB because
  1354.          * execution depends on the length specified in terms of cache lines
  1355.          * in the register CTX_RCS_INDIRECT_CTX
  1356.          */
  1357.  
  1358.         return wa_ctx_end(wa_ctx, *offset = index, CACHELINE_DWORDS);
  1359. }
  1360.  
  1361. /**
  1362.  * gen8_init_perctx_bb() - initialize per ctx batch with WA
  1363.  *
  1364.  * @ring: only applicable for RCS
  1365.  * @wa_ctx: structure representing wa_ctx
  1366.  *  offset: specifies start of the batch, should be cache-aligned.
  1367.  *  size: size of the batch in DWORDS but HW expects in terms of cachelines
  1368.  * @batch: page in which WA are loaded
  1369.  * @offset: This field specifies the start of this batch.
  1370.  *   This batch is started immediately after indirect_ctx batch. Since we ensure
  1371.  *   that indirect_ctx ends on a cacheline this batch is aligned automatically.
  1372.  *
  1373.  *   The number of DWORDS written are returned using this field.
  1374.  *
  1375.  *  This batch is terminated with MI_BATCH_BUFFER_END and so we need not add padding
  1376.  *  to align it with cacheline as padding after MI_BATCH_BUFFER_END is redundant.
  1377.  */
  1378. static int gen8_init_perctx_bb(struct intel_engine_cs *ring,
  1379.                                struct i915_wa_ctx_bb *wa_ctx,
  1380.                                uint32_t *const batch,
  1381.                                uint32_t *offset)
  1382. {
  1383.         uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
  1384.  
  1385.         /* WaDisableCtxRestoreArbitration:bdw,chv */
  1386.         wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_ENABLE);
  1387.  
  1388.         wa_ctx_emit(batch, index, MI_BATCH_BUFFER_END);
  1389.  
  1390.         return wa_ctx_end(wa_ctx, *offset = index, 1);
  1391. }
  1392.  
  1393. static int gen9_init_indirectctx_bb(struct intel_engine_cs *ring,
  1394.                                     struct i915_wa_ctx_bb *wa_ctx,
  1395.                                     uint32_t *const batch,
  1396.                                     uint32_t *offset)
  1397. {
  1398.         int ret;
  1399.         struct drm_device *dev = ring->dev;
  1400.         uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
  1401.  
  1402.         /* WaDisableCtxRestoreArbitration:skl,bxt */
  1403.         if (IS_SKL_REVID(dev, 0, SKL_REVID_D0) ||
  1404.             IS_BXT_REVID(dev, 0, BXT_REVID_A1))
  1405.                 wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_DISABLE);
  1406.  
  1407.         /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt */
  1408.         ret = gen8_emit_flush_coherentl3_wa(ring, batch, index);
  1409.         if (ret < 0)
  1410.                 return ret;
  1411.         index = ret;
  1412.  
  1413.         /* Pad to end of cacheline */
  1414.         while (index % CACHELINE_DWORDS)
  1415.                 wa_ctx_emit(batch, index, MI_NOOP);
  1416.  
  1417.         return wa_ctx_end(wa_ctx, *offset = index, CACHELINE_DWORDS);
  1418. }
  1419.  
  1420. static int gen9_init_perctx_bb(struct intel_engine_cs *ring,
  1421.                                struct i915_wa_ctx_bb *wa_ctx,
  1422.                                uint32_t *const batch,
  1423.                                uint32_t *offset)
  1424. {
  1425.         struct drm_device *dev = ring->dev;
  1426.         uint32_t index = wa_ctx_start(wa_ctx, *offset, CACHELINE_DWORDS);
  1427.  
  1428.         /* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:skl,bxt */
  1429.         if (IS_SKL_REVID(dev, 0, SKL_REVID_B0) ||
  1430.             IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
  1431.                 wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1));
  1432.                 wa_ctx_emit_reg(batch, index, GEN9_SLICE_COMMON_ECO_CHICKEN0);
  1433.                 wa_ctx_emit(batch, index,
  1434.                             _MASKED_BIT_ENABLE(DISABLE_PIXEL_MASK_CAMMING));
  1435.                 wa_ctx_emit(batch, index, MI_NOOP);
  1436.         }
  1437.  
  1438.         /* WaDisableCtxRestoreArbitration:skl,bxt */
  1439.         if (IS_SKL_REVID(dev, 0, SKL_REVID_D0) ||
  1440.             IS_BXT_REVID(dev, 0, BXT_REVID_A1))
  1441.                 wa_ctx_emit(batch, index, MI_ARB_ON_OFF | MI_ARB_ENABLE);
  1442.  
  1443.         wa_ctx_emit(batch, index, MI_BATCH_BUFFER_END);
  1444.  
  1445.         return wa_ctx_end(wa_ctx, *offset = index, 1);
  1446. }
  1447.  
  1448. static int lrc_setup_wa_ctx_obj(struct intel_engine_cs *ring, u32 size)
  1449. {
  1450.         int ret;
  1451.  
  1452.         ring->wa_ctx.obj = i915_gem_alloc_object(ring->dev, PAGE_ALIGN(size));
  1453.         if (!ring->wa_ctx.obj) {
  1454.                 DRM_DEBUG_DRIVER("alloc LRC WA ctx backing obj failed.\n");
  1455.                 return -ENOMEM;
  1456.         }
  1457.  
  1458.         ret = i915_gem_obj_ggtt_pin(ring->wa_ctx.obj, PAGE_SIZE, 0);
  1459.         if (ret) {
  1460.                 DRM_DEBUG_DRIVER("pin LRC WA ctx backing obj failed: %d\n",
  1461.                                  ret);
  1462.                 drm_gem_object_unreference(&ring->wa_ctx.obj->base);
  1463.                 return ret;
  1464.         }
  1465.  
  1466.         return 0;
  1467. }
  1468.  
  1469. static void lrc_destroy_wa_ctx_obj(struct intel_engine_cs *ring)
  1470. {
  1471.         if (ring->wa_ctx.obj) {
  1472.                 i915_gem_object_ggtt_unpin(ring->wa_ctx.obj);
  1473.                 drm_gem_object_unreference(&ring->wa_ctx.obj->base);
  1474.                 ring->wa_ctx.obj = NULL;
  1475.         }
  1476. }
  1477.  
  1478. static int intel_init_workaround_bb(struct intel_engine_cs *ring)
  1479. {
  1480.         int ret;
  1481.         uint32_t *batch;
  1482.         uint32_t offset;
  1483.         struct page *page;
  1484.         struct i915_ctx_workarounds *wa_ctx = &ring->wa_ctx;
  1485.  
  1486.         WARN_ON(ring->id != RCS);
  1487.  
  1488.         /* update this when WA for higher Gen are added */
  1489.         if (INTEL_INFO(ring->dev)->gen > 9) {
  1490.                 DRM_ERROR("WA batch buffer is not initialized for Gen%d\n",
  1491.                           INTEL_INFO(ring->dev)->gen);
  1492.                 return 0;
  1493.         }
  1494.  
  1495.         /* some WA perform writes to scratch page, ensure it is valid */
  1496.         if (ring->scratch.obj == NULL) {
  1497.                 DRM_ERROR("scratch page not allocated for %s\n", ring->name);
  1498.                 return -EINVAL;
  1499.         }
  1500.  
  1501.         ret = lrc_setup_wa_ctx_obj(ring, PAGE_SIZE);
  1502.         if (ret) {
  1503.                 DRM_DEBUG_DRIVER("Failed to setup context WA page: %d\n", ret);
  1504.                 return ret;
  1505.         }
  1506.  
  1507.         page = i915_gem_object_get_dirty_page(wa_ctx->obj, 0);
  1508.         batch = kmap_atomic(page);
  1509.         offset = 0;
  1510.  
  1511.         if (INTEL_INFO(ring->dev)->gen == 8) {
  1512.                 ret = gen8_init_indirectctx_bb(ring,
  1513.                                                &wa_ctx->indirect_ctx,
  1514.                                                batch,
  1515.                                                &offset);
  1516.                 if (ret)
  1517.                         goto out;
  1518.  
  1519.                 ret = gen8_init_perctx_bb(ring,
  1520.                                           &wa_ctx->per_ctx,
  1521.                                           batch,
  1522.                                           &offset);
  1523.                 if (ret)
  1524.                         goto out;
  1525.         } else if (INTEL_INFO(ring->dev)->gen == 9) {
  1526.                 ret = gen9_init_indirectctx_bb(ring,
  1527.                                                &wa_ctx->indirect_ctx,
  1528.                                                batch,
  1529.                                                &offset);
  1530.                 if (ret)
  1531.                         goto out;
  1532.  
  1533.                 ret = gen9_init_perctx_bb(ring,
  1534.                                           &wa_ctx->per_ctx,
  1535.                                           batch,
  1536.                                           &offset);
  1537.                 if (ret)
  1538.                         goto out;
  1539.         }
  1540.  
  1541. out:
  1542.         kunmap_atomic(batch);
  1543.         if (ret)
  1544.                 lrc_destroy_wa_ctx_obj(ring);
  1545.  
  1546.         return ret;
  1547. }
  1548.  
  1549. static int gen8_init_common_ring(struct intel_engine_cs *ring)
  1550. {
  1551.         struct drm_device *dev = ring->dev;
  1552.         struct drm_i915_private *dev_priv = dev->dev_private;
  1553.         u8 next_context_status_buffer_hw;
  1554.  
  1555.         lrc_setup_hardware_status_page(ring,
  1556.                                 dev_priv->kernel_context->engine[ring->id].state);
  1557.  
  1558.         I915_WRITE_IMR(ring, ~(ring->irq_enable_mask | ring->irq_keep_mask));
  1559.         I915_WRITE(RING_HWSTAM(ring->mmio_base), 0xffffffff);
  1560.  
  1561.         I915_WRITE(RING_MODE_GEN7(ring),
  1562.                    _MASKED_BIT_DISABLE(GFX_REPLAY_MODE) |
  1563.                    _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE));
  1564.         POSTING_READ(RING_MODE_GEN7(ring));
  1565.  
  1566.         /*
  1567.          * Instead of resetting the Context Status Buffer (CSB) read pointer to
  1568.          * zero, we need to read the write pointer from hardware and use its
  1569.          * value because "this register is power context save restored".
  1570.          * Effectively, these states have been observed:
  1571.          *
  1572.          *      | Suspend-to-idle (freeze) | Suspend-to-RAM (mem) |
  1573.          * BDW  | CSB regs not reset       | CSB regs reset       |
  1574.          * CHT  | CSB regs not reset       | CSB regs not reset   |
  1575.          * SKL  |         ?                |         ?            |
  1576.          * BXT  |         ?                |         ?            |
  1577.          */
  1578.         next_context_status_buffer_hw =
  1579.                 GEN8_CSB_WRITE_PTR(I915_READ(RING_CONTEXT_STATUS_PTR(ring)));
  1580.  
  1581.         /*
  1582.          * When the CSB registers are reset (also after power-up / gpu reset),
  1583.          * CSB write pointer is set to all 1's, which is not valid, use '5' in
  1584.          * this special case, so the first element read is CSB[0].
  1585.          */
  1586.         if (next_context_status_buffer_hw == GEN8_CSB_PTR_MASK)
  1587.                 next_context_status_buffer_hw = (GEN8_CSB_ENTRIES - 1);
  1588.  
  1589.         ring->next_context_status_buffer = next_context_status_buffer_hw;
  1590.         DRM_DEBUG_DRIVER("Execlists enabled for %s\n", ring->name);
  1591.  
  1592.         memset(&ring->hangcheck, 0, sizeof(ring->hangcheck));
  1593.  
  1594.         return 0;
  1595. }
  1596.  
  1597. static int gen8_init_render_ring(struct intel_engine_cs *ring)
  1598. {
  1599.         struct drm_device *dev = ring->dev;
  1600.         struct drm_i915_private *dev_priv = dev->dev_private;
  1601.         int ret;
  1602.  
  1603.         ret = gen8_init_common_ring(ring);
  1604.         if (ret)
  1605.                 return ret;
  1606.  
  1607.         /* We need to disable the AsyncFlip performance optimisations in order
  1608.          * to use MI_WAIT_FOR_EVENT within the CS. It should already be
  1609.          * programmed to '1' on all products.
  1610.          *
  1611.          * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv,bdw,chv
  1612.          */
  1613.         I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
  1614.  
  1615.         I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
  1616.  
  1617.         return init_workarounds_ring(ring);
  1618. }
  1619.  
  1620. static int gen9_init_render_ring(struct intel_engine_cs *ring)
  1621. {
  1622.         int ret;
  1623.  
  1624.         ret = gen8_init_common_ring(ring);
  1625.         if (ret)
  1626.                 return ret;
  1627.  
  1628.         return init_workarounds_ring(ring);
  1629. }
  1630.  
  1631. static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req)
  1632. {
  1633.         struct i915_hw_ppgtt *ppgtt = req->ctx->ppgtt;
  1634.         struct intel_engine_cs *ring = req->ring;
  1635.         struct intel_ringbuffer *ringbuf = req->ringbuf;
  1636.         const int num_lri_cmds = GEN8_LEGACY_PDPES * 2;
  1637.         int i, ret;
  1638.  
  1639.         ret = intel_logical_ring_begin(req, num_lri_cmds * 2 + 2);
  1640.         if (ret)
  1641.                 return ret;
  1642.  
  1643.         intel_logical_ring_emit(ringbuf, MI_LOAD_REGISTER_IMM(num_lri_cmds));
  1644.         for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) {
  1645.                 const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
  1646.  
  1647.                 intel_logical_ring_emit_reg(ringbuf, GEN8_RING_PDP_UDW(ring, i));
  1648.                 intel_logical_ring_emit(ringbuf, upper_32_bits(pd_daddr));
  1649.                 intel_logical_ring_emit_reg(ringbuf, GEN8_RING_PDP_LDW(ring, i));
  1650.                 intel_logical_ring_emit(ringbuf, lower_32_bits(pd_daddr));
  1651.         }
  1652.  
  1653.         intel_logical_ring_emit(ringbuf, MI_NOOP);
  1654.         intel_logical_ring_advance(ringbuf);
  1655.  
  1656.         return 0;
  1657. }
  1658.  
  1659. static int gen8_emit_bb_start(struct drm_i915_gem_request *req,
  1660.                               u64 offset, unsigned dispatch_flags)
  1661. {
  1662.         struct intel_ringbuffer *ringbuf = req->ringbuf;
  1663.         bool ppgtt = !(dispatch_flags & I915_DISPATCH_SECURE);
  1664.         int ret;
  1665.  
  1666.         /* Don't rely in hw updating PDPs, specially in lite-restore.
  1667.          * Ideally, we should set Force PD Restore in ctx descriptor,
  1668.          * but we can't. Force Restore would be a second option, but
  1669.          * it is unsafe in case of lite-restore (because the ctx is
  1670.          * not idle). PML4 is allocated during ppgtt init so this is
  1671.          * not needed in 48-bit.*/
  1672.         if (req->ctx->ppgtt &&
  1673.             (intel_ring_flag(req->ring) & req->ctx->ppgtt->pd_dirty_rings)) {
  1674.                 if (!USES_FULL_48BIT_PPGTT(req->i915) &&
  1675.                     !intel_vgpu_active(req->i915->dev)) {
  1676.                         ret = intel_logical_ring_emit_pdps(req);
  1677.                         if (ret)
  1678.                                 return ret;
  1679.                 }
  1680.  
  1681.                 req->ctx->ppgtt->pd_dirty_rings &= ~intel_ring_flag(req->ring);
  1682.         }
  1683.  
  1684.         ret = intel_logical_ring_begin(req, 4);
  1685.         if (ret)
  1686.                 return ret;
  1687.  
  1688.         /* FIXME(BDW): Address space and security selectors. */
  1689.         intel_logical_ring_emit(ringbuf, MI_BATCH_BUFFER_START_GEN8 |
  1690.                                 (ppgtt<<8) |
  1691.                                 (dispatch_flags & I915_DISPATCH_RS ?
  1692.                                  MI_BATCH_RESOURCE_STREAMER : 0));
  1693.         intel_logical_ring_emit(ringbuf, lower_32_bits(offset));
  1694.         intel_logical_ring_emit(ringbuf, upper_32_bits(offset));
  1695.         intel_logical_ring_emit(ringbuf, MI_NOOP);
  1696.         intel_logical_ring_advance(ringbuf);
  1697.  
  1698.         return 0;
  1699. }
  1700.  
  1701. static bool gen8_logical_ring_get_irq(struct intel_engine_cs *ring)
  1702. {
  1703.         struct drm_device *dev = ring->dev;
  1704.         struct drm_i915_private *dev_priv = dev->dev_private;
  1705.         unsigned long flags;
  1706.  
  1707.         if (WARN_ON(!intel_irqs_enabled(dev_priv)))
  1708.                 return false;
  1709.  
  1710.         spin_lock_irqsave(&dev_priv->irq_lock, flags);
  1711.         if (ring->irq_refcount++ == 0) {
  1712.                 I915_WRITE_IMR(ring, ~(ring->irq_enable_mask | ring->irq_keep_mask));
  1713.                 POSTING_READ(RING_IMR(ring->mmio_base));
  1714.         }
  1715.         spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
  1716.  
  1717.         return true;
  1718. }
  1719.  
  1720. static void gen8_logical_ring_put_irq(struct intel_engine_cs *ring)
  1721. {
  1722.         struct drm_device *dev = ring->dev;
  1723.         struct drm_i915_private *dev_priv = dev->dev_private;
  1724.         unsigned long flags;
  1725.  
  1726.         spin_lock_irqsave(&dev_priv->irq_lock, flags);
  1727.         if (--ring->irq_refcount == 0) {
  1728.                 I915_WRITE_IMR(ring, ~ring->irq_keep_mask);
  1729.                 POSTING_READ(RING_IMR(ring->mmio_base));
  1730.         }
  1731.         spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
  1732. }
  1733.  
  1734. static int gen8_emit_flush(struct drm_i915_gem_request *request,
  1735.                            u32 invalidate_domains,
  1736.                            u32 unused)
  1737. {
  1738.         struct intel_ringbuffer *ringbuf = request->ringbuf;
  1739.         struct intel_engine_cs *ring = ringbuf->ring;
  1740.         struct drm_device *dev = ring->dev;
  1741.         struct drm_i915_private *dev_priv = dev->dev_private;
  1742.         uint32_t cmd;
  1743.         int ret;
  1744.  
  1745.         ret = intel_logical_ring_begin(request, 4);
  1746.         if (ret)
  1747.                 return ret;
  1748.  
  1749.         cmd = MI_FLUSH_DW + 1;
  1750.  
  1751.         /* We always require a command barrier so that subsequent
  1752.          * commands, such as breadcrumb interrupts, are strictly ordered
  1753.          * wrt the contents of the write cache being flushed to memory
  1754.          * (and thus being coherent from the CPU).
  1755.          */
  1756.         cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
  1757.  
  1758.         if (invalidate_domains & I915_GEM_GPU_DOMAINS) {
  1759.                 cmd |= MI_INVALIDATE_TLB;
  1760.                 if (ring == &dev_priv->ring[VCS])
  1761.                         cmd |= MI_INVALIDATE_BSD;
  1762.         }
  1763.  
  1764.         intel_logical_ring_emit(ringbuf, cmd);
  1765.         intel_logical_ring_emit(ringbuf,
  1766.                                 I915_GEM_HWS_SCRATCH_ADDR |
  1767.                                 MI_FLUSH_DW_USE_GTT);
  1768.         intel_logical_ring_emit(ringbuf, 0); /* upper addr */
  1769.         intel_logical_ring_emit(ringbuf, 0); /* value */
  1770.         intel_logical_ring_advance(ringbuf);
  1771.  
  1772.         return 0;
  1773. }
  1774.  
  1775. static int gen8_emit_flush_render(struct drm_i915_gem_request *request,
  1776.                                   u32 invalidate_domains,
  1777.                                   u32 flush_domains)
  1778. {
  1779.         struct intel_ringbuffer *ringbuf = request->ringbuf;
  1780.         struct intel_engine_cs *ring = ringbuf->ring;
  1781.         u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
  1782.         bool vf_flush_wa = false;
  1783.         u32 flags = 0;
  1784.         int ret;
  1785.  
  1786.         flags |= PIPE_CONTROL_CS_STALL;
  1787.  
  1788.         if (flush_domains) {
  1789.                 flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
  1790.                 flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
  1791.                 flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
  1792.                 flags |= PIPE_CONTROL_FLUSH_ENABLE;
  1793.         }
  1794.  
  1795.         if (invalidate_domains) {
  1796.                 flags |= PIPE_CONTROL_TLB_INVALIDATE;
  1797.                 flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
  1798.                 flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
  1799.                 flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
  1800.                 flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
  1801.                 flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
  1802.                 flags |= PIPE_CONTROL_QW_WRITE;
  1803.                 flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
  1804.  
  1805.                 /*
  1806.                  * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL
  1807.                  * pipe control.
  1808.                  */
  1809.                 if (IS_GEN9(ring->dev))
  1810.                         vf_flush_wa = true;
  1811.         }
  1812.  
  1813.         ret = intel_logical_ring_begin(request, vf_flush_wa ? 12 : 6);
  1814.         if (ret)
  1815.                 return ret;
  1816.  
  1817.         if (vf_flush_wa) {
  1818.                 intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6));
  1819.                 intel_logical_ring_emit(ringbuf, 0);
  1820.                 intel_logical_ring_emit(ringbuf, 0);
  1821.                 intel_logical_ring_emit(ringbuf, 0);
  1822.                 intel_logical_ring_emit(ringbuf, 0);
  1823.                 intel_logical_ring_emit(ringbuf, 0);
  1824.         }
  1825.  
  1826.         intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6));
  1827.         intel_logical_ring_emit(ringbuf, flags);
  1828.         intel_logical_ring_emit(ringbuf, scratch_addr);
  1829.         intel_logical_ring_emit(ringbuf, 0);
  1830.         intel_logical_ring_emit(ringbuf, 0);
  1831.         intel_logical_ring_emit(ringbuf, 0);
  1832.         intel_logical_ring_advance(ringbuf);
  1833.  
  1834.         return 0;
  1835. }
  1836.  
  1837. static u32 gen8_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
  1838. {
  1839.         return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
  1840. }
  1841.  
  1842. static void gen8_set_seqno(struct intel_engine_cs *ring, u32 seqno)
  1843. {
  1844.         intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno);
  1845. }
  1846.  
  1847. static u32 bxt_a_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
  1848. {
  1849.  
  1850.         /*
  1851.          * On BXT A steppings there is a HW coherency issue whereby the
  1852.          * MI_STORE_DATA_IMM storing the completed request's seqno
  1853.          * occasionally doesn't invalidate the CPU cache. Work around this by
  1854.          * clflushing the corresponding cacheline whenever the caller wants
  1855.          * the coherency to be guaranteed. Note that this cacheline is known
  1856.          * to be clean at this point, since we only write it in
  1857.          * bxt_a_set_seqno(), where we also do a clflush after the write. So
  1858.          * this clflush in practice becomes an invalidate operation.
  1859.          */
  1860.  
  1861.         if (!lazy_coherency)
  1862.                 intel_flush_status_page(ring, I915_GEM_HWS_INDEX);
  1863.  
  1864.         return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
  1865. }
  1866.  
  1867. static void bxt_a_set_seqno(struct intel_engine_cs *ring, u32 seqno)
  1868. {
  1869.         intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno);
  1870.  
  1871.         /* See bxt_a_get_seqno() explaining the reason for the clflush. */
  1872.         intel_flush_status_page(ring, I915_GEM_HWS_INDEX);
  1873. }
  1874.  
  1875. /*
  1876.  * Reserve space for 2 NOOPs at the end of each request to be
  1877.  * used as a workaround for not being allowed to do lite
  1878.  * restore with HEAD==TAIL (WaIdleLiteRestore).
  1879.  */
  1880. #define WA_TAIL_DWORDS 2
  1881.  
  1882. static inline u32 hws_seqno_address(struct intel_engine_cs *engine)
  1883. {
  1884.         return engine->status_page.gfx_addr + I915_GEM_HWS_INDEX_ADDR;
  1885. }
  1886.  
  1887. static int gen8_emit_request(struct drm_i915_gem_request *request)
  1888. {
  1889.         struct intel_ringbuffer *ringbuf = request->ringbuf;
  1890.         int ret;
  1891.  
  1892.         ret = intel_logical_ring_begin(request, 6 + WA_TAIL_DWORDS);
  1893.         if (ret)
  1894.                 return ret;
  1895.  
  1896.         /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
  1897.         BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5));
  1898.  
  1899.         intel_logical_ring_emit(ringbuf,
  1900.                                 (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW);
  1901.         intel_logical_ring_emit(ringbuf,
  1902.                                 hws_seqno_address(request->ring) |
  1903.                                 MI_FLUSH_DW_USE_GTT);
  1904.         intel_logical_ring_emit(ringbuf, 0);
  1905.         intel_logical_ring_emit(ringbuf, i915_gem_request_get_seqno(request));
  1906.         intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT);
  1907.         intel_logical_ring_emit(ringbuf, MI_NOOP);
  1908.         return intel_logical_ring_advance_and_submit(request);
  1909. }
  1910.  
  1911. static int gen8_emit_request_render(struct drm_i915_gem_request *request)
  1912. {
  1913.         struct intel_ringbuffer *ringbuf = request->ringbuf;
  1914.         int ret;
  1915.  
  1916.         ret = intel_logical_ring_begin(request, 8 + WA_TAIL_DWORDS);
  1917.         if (ret)
  1918.                 return ret;
  1919.  
  1920.         /* We're using qword write, seqno should be aligned to 8 bytes. */
  1921.         BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1);
  1922.  
  1923.         /* w/a for post sync ops following a GPGPU operation we
  1924.          * need a prior CS_STALL, which is emitted by the flush
  1925.          * following the batch.
  1926.          */
  1927.         intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6));
  1928.         intel_logical_ring_emit(ringbuf,
  1929.                                 (PIPE_CONTROL_GLOBAL_GTT_IVB |
  1930.                                  PIPE_CONTROL_CS_STALL |
  1931.                                  PIPE_CONTROL_QW_WRITE));
  1932.         intel_logical_ring_emit(ringbuf, hws_seqno_address(request->ring));
  1933.         intel_logical_ring_emit(ringbuf, 0);
  1934.         intel_logical_ring_emit(ringbuf, i915_gem_request_get_seqno(request));
  1935.         /* We're thrashing one dword of HWS. */
  1936.         intel_logical_ring_emit(ringbuf, 0);
  1937.         intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT);
  1938.         intel_logical_ring_emit(ringbuf, MI_NOOP);
  1939.         return intel_logical_ring_advance_and_submit(request);
  1940. }
  1941.  
  1942. static int intel_lr_context_render_state_init(struct drm_i915_gem_request *req)
  1943. {
  1944.         struct render_state so;
  1945.         int ret;
  1946.  
  1947.         ret = i915_gem_render_state_prepare(req->ring, &so);
  1948.         if (ret)
  1949.                 return ret;
  1950.  
  1951.         if (so.rodata == NULL)
  1952.                 return 0;
  1953.  
  1954.         ret = req->ring->emit_bb_start(req, so.ggtt_offset,
  1955.                                        I915_DISPATCH_SECURE);
  1956.         if (ret)
  1957.                 goto out;
  1958.  
  1959.         ret = req->ring->emit_bb_start(req,
  1960.                                        (so.ggtt_offset + so.aux_batch_offset),
  1961.                                        I915_DISPATCH_SECURE);
  1962.         if (ret)
  1963.                 goto out;
  1964.  
  1965.         i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req);
  1966.  
  1967. out:
  1968.         i915_gem_render_state_fini(&so);
  1969.         return ret;
  1970. }
  1971.  
  1972. static int gen8_init_rcs_context(struct drm_i915_gem_request *req)
  1973. {
  1974.         int ret;
  1975.  
  1976.         ret = intel_logical_ring_workarounds_emit(req);
  1977.         if (ret)
  1978.                 return ret;
  1979.  
  1980.         ret = intel_rcs_context_init_mocs(req);
  1981.         /*
  1982.          * Failing to program the MOCS is non-fatal.The system will not
  1983.          * run at peak performance. So generate an error and carry on.
  1984.          */
  1985.         if (ret)
  1986.                 DRM_ERROR("MOCS failed to program: expect performance issues.\n");
  1987.  
  1988.         return intel_lr_context_render_state_init(req);
  1989. }
  1990.  
  1991. /**
  1992.  * intel_logical_ring_cleanup() - deallocate the Engine Command Streamer
  1993.  *
  1994.  * @ring: Engine Command Streamer.
  1995.  *
  1996.  */
  1997. void intel_logical_ring_cleanup(struct intel_engine_cs *ring)
  1998. {
  1999.         struct drm_i915_private *dev_priv;
  2000.  
  2001.         if (!intel_ring_initialized(ring))
  2002.                 return;
  2003.  
  2004.         dev_priv = ring->dev->dev_private;
  2005.  
  2006.         if (ring->buffer) {
  2007.                 intel_logical_ring_stop(ring);
  2008.                 WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0);
  2009.         }
  2010.  
  2011.         if (ring->cleanup)
  2012.                 ring->cleanup(ring);
  2013.  
  2014.         i915_cmd_parser_fini_ring(ring);
  2015.         i915_gem_batch_pool_fini(&ring->batch_pool);
  2016.  
  2017.         if (ring->status_page.obj) {
  2018.                 kunmap(sg_page(ring->status_page.obj->pages->sgl));
  2019.                 ring->status_page.obj = NULL;
  2020.         }
  2021.  
  2022.         ring->disable_lite_restore_wa = false;
  2023.         ring->ctx_desc_template = 0;
  2024.  
  2025.         lrc_destroy_wa_ctx_obj(ring);
  2026.         ring->dev = NULL;
  2027. }
  2028.  
  2029. static void
  2030. logical_ring_default_vfuncs(struct drm_device *dev,
  2031.                             struct intel_engine_cs *ring)
  2032. {
  2033.         /* Default vfuncs which can be overriden by each engine. */
  2034.         ring->init_hw = gen8_init_common_ring;
  2035.         ring->emit_request = gen8_emit_request;
  2036.         ring->emit_flush = gen8_emit_flush;
  2037.         ring->irq_get = gen8_logical_ring_get_irq;
  2038.         ring->irq_put = gen8_logical_ring_put_irq;
  2039.         ring->emit_bb_start = gen8_emit_bb_start;
  2040.         if (IS_BXT_REVID(dev, 0, BXT_REVID_A1)) {
  2041.                 ring->get_seqno = bxt_a_get_seqno;
  2042.                 ring->set_seqno = bxt_a_set_seqno;
  2043.         } else {
  2044.                 ring->get_seqno = gen8_get_seqno;
  2045.                 ring->set_seqno = gen8_set_seqno;
  2046.         }
  2047. }
  2048.  
  2049. static inline void
  2050. logical_ring_default_irqs(struct intel_engine_cs *ring, unsigned shift)
  2051. {
  2052.         ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift;
  2053.         ring->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift;
  2054. }
  2055.  
  2056. static int
  2057. logical_ring_init(struct drm_device *dev, struct intel_engine_cs *ring)
  2058. {
  2059.         struct intel_context *dctx = to_i915(dev)->kernel_context;
  2060.         int ret;
  2061.  
  2062.         /* Intentionally left blank. */
  2063.         ring->buffer = NULL;
  2064.  
  2065.         ring->dev = dev;
  2066.         INIT_LIST_HEAD(&ring->active_list);
  2067.         INIT_LIST_HEAD(&ring->request_list);
  2068.         i915_gem_batch_pool_init(dev, &ring->batch_pool);
  2069.         init_waitqueue_head(&ring->irq_queue);
  2070.  
  2071.         INIT_LIST_HEAD(&ring->buffers);
  2072.         INIT_LIST_HEAD(&ring->execlist_queue);
  2073.         INIT_LIST_HEAD(&ring->execlist_retired_req_list);
  2074.         spin_lock_init(&ring->execlist_lock);
  2075.  
  2076.         logical_ring_init_platform_invariants(ring);
  2077.  
  2078.         ret = i915_cmd_parser_init_ring(ring);
  2079.         if (ret)
  2080.                 goto error;
  2081.  
  2082.         ret = intel_lr_context_deferred_alloc(dctx, ring);
  2083.         if (ret)
  2084.                 goto error;
  2085.  
  2086.         /* As this is the default context, always pin it */
  2087.         ret = intel_lr_context_do_pin(dctx, ring);
  2088.         if (ret) {
  2089.                 DRM_ERROR(
  2090.                         "Failed to pin and map ringbuffer %s: %d\n",
  2091.                         ring->name, ret);
  2092.                 goto error;
  2093.         }
  2094.  
  2095.         return 0;
  2096.  
  2097. error:
  2098.         intel_logical_ring_cleanup(ring);
  2099.         return ret;
  2100. }
  2101.  
  2102. static int logical_render_ring_init(struct drm_device *dev)
  2103. {
  2104.         struct drm_i915_private *dev_priv = dev->dev_private;
  2105.         struct intel_engine_cs *ring = &dev_priv->ring[RCS];
  2106.         int ret;
  2107.  
  2108.         ring->name = "render ring";
  2109.         ring->id = RCS;
  2110.         ring->exec_id = I915_EXEC_RENDER;
  2111.         ring->guc_id = GUC_RENDER_ENGINE;
  2112.         ring->mmio_base = RENDER_RING_BASE;
  2113.  
  2114.         logical_ring_default_irqs(ring, GEN8_RCS_IRQ_SHIFT);
  2115.         if (HAS_L3_DPF(dev))
  2116.                 ring->irq_keep_mask |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT;
  2117.  
  2118.         logical_ring_default_vfuncs(dev, ring);
  2119.  
  2120.         /* Override some for render ring. */
  2121.         if (INTEL_INFO(dev)->gen >= 9)
  2122.                 ring->init_hw = gen9_init_render_ring;
  2123.         else
  2124.                 ring->init_hw = gen8_init_render_ring;
  2125.         ring->init_context = gen8_init_rcs_context;
  2126.         ring->cleanup = intel_fini_pipe_control;
  2127.         ring->emit_flush = gen8_emit_flush_render;
  2128.         ring->emit_request = gen8_emit_request_render;
  2129.  
  2130.         ring->dev = dev;
  2131.  
  2132.         ret = intel_init_pipe_control(ring);
  2133.         if (ret)
  2134.                 return ret;
  2135.  
  2136.         ret = intel_init_workaround_bb(ring);
  2137.         if (ret) {
  2138.                 /*
  2139.                  * We continue even if we fail to initialize WA batch
  2140.                  * because we only expect rare glitches but nothing
  2141.                  * critical to prevent us from using GPU
  2142.                  */
  2143.                 DRM_ERROR("WA batch buffer initialization failed: %d\n",
  2144.                           ret);
  2145.         }
  2146.  
  2147.         ret = logical_ring_init(dev, ring);
  2148.         if (ret) {
  2149.                 lrc_destroy_wa_ctx_obj(ring);
  2150.         }
  2151.  
  2152.         return ret;
  2153. }
  2154.  
  2155. static int logical_bsd_ring_init(struct drm_device *dev)
  2156. {
  2157.         struct drm_i915_private *dev_priv = dev->dev_private;
  2158.         struct intel_engine_cs *ring = &dev_priv->ring[VCS];
  2159.  
  2160.         ring->name = "bsd ring";
  2161.         ring->id = VCS;
  2162.         ring->exec_id = I915_EXEC_BSD;
  2163.         ring->guc_id = GUC_VIDEO_ENGINE;
  2164.         ring->mmio_base = GEN6_BSD_RING_BASE;
  2165.  
  2166.         logical_ring_default_irqs(ring, GEN8_VCS1_IRQ_SHIFT);
  2167.         logical_ring_default_vfuncs(dev, ring);
  2168.  
  2169.         return logical_ring_init(dev, ring);
  2170. }
  2171.  
  2172. static int logical_bsd2_ring_init(struct drm_device *dev)
  2173. {
  2174.         struct drm_i915_private *dev_priv = dev->dev_private;
  2175.         struct intel_engine_cs *ring = &dev_priv->ring[VCS2];
  2176.  
  2177.         ring->name = "bsd2 ring";
  2178.         ring->id = VCS2;
  2179.         ring->exec_id = I915_EXEC_BSD;
  2180.         ring->guc_id = GUC_VIDEO_ENGINE2;
  2181.         ring->mmio_base = GEN8_BSD2_RING_BASE;
  2182.  
  2183.         logical_ring_default_irqs(ring, GEN8_VCS2_IRQ_SHIFT);
  2184.         logical_ring_default_vfuncs(dev, ring);
  2185.  
  2186.         return logical_ring_init(dev, ring);
  2187. }
  2188.  
  2189. static int logical_blt_ring_init(struct drm_device *dev)
  2190. {
  2191.         struct drm_i915_private *dev_priv = dev->dev_private;
  2192.         struct intel_engine_cs *ring = &dev_priv->ring[BCS];
  2193.  
  2194.         ring->name = "blitter ring";
  2195.         ring->id = BCS;
  2196.         ring->exec_id = I915_EXEC_BLT;
  2197.         ring->guc_id = GUC_BLITTER_ENGINE;
  2198.         ring->mmio_base = BLT_RING_BASE;
  2199.  
  2200.         logical_ring_default_irqs(ring, GEN8_BCS_IRQ_SHIFT);
  2201.         logical_ring_default_vfuncs(dev, ring);
  2202.  
  2203.         return logical_ring_init(dev, ring);
  2204. }
  2205.  
  2206. static int logical_vebox_ring_init(struct drm_device *dev)
  2207. {
  2208.         struct drm_i915_private *dev_priv = dev->dev_private;
  2209.         struct intel_engine_cs *ring = &dev_priv->ring[VECS];
  2210.  
  2211.         ring->name = "video enhancement ring";
  2212.         ring->id = VECS;
  2213.         ring->exec_id = I915_EXEC_VEBOX;
  2214.         ring->guc_id = GUC_VIDEOENHANCE_ENGINE;
  2215.         ring->mmio_base = VEBOX_RING_BASE;
  2216.  
  2217.         logical_ring_default_irqs(ring, GEN8_VECS_IRQ_SHIFT);
  2218.         logical_ring_default_vfuncs(dev, ring);
  2219.  
  2220.         return logical_ring_init(dev, ring);
  2221. }
  2222.  
  2223. /**
  2224.  * intel_logical_rings_init() - allocate, populate and init the Engine Command Streamers
  2225.  * @dev: DRM device.
  2226.  *
  2227.  * This function inits the engines for an Execlists submission style (the equivalent in the
  2228.  * legacy ringbuffer submission world would be i915_gem_init_rings). It does it only for
  2229.  * those engines that are present in the hardware.
  2230.  *
  2231.  * Return: non-zero if the initialization failed.
  2232.  */
  2233. int intel_logical_rings_init(struct drm_device *dev)
  2234. {
  2235.         struct drm_i915_private *dev_priv = dev->dev_private;
  2236.         int ret;
  2237.  
  2238.         ret = logical_render_ring_init(dev);
  2239.         if (ret)
  2240.                 return ret;
  2241.  
  2242.         if (HAS_BSD(dev)) {
  2243.                 ret = logical_bsd_ring_init(dev);
  2244.                 if (ret)
  2245.                         goto cleanup_render_ring;
  2246.         }
  2247.  
  2248.         if (HAS_BLT(dev)) {
  2249.                 ret = logical_blt_ring_init(dev);
  2250.                 if (ret)
  2251.                         goto cleanup_bsd_ring;
  2252.         }
  2253.  
  2254.         if (HAS_VEBOX(dev)) {
  2255.                 ret = logical_vebox_ring_init(dev);
  2256.                 if (ret)
  2257.                         goto cleanup_blt_ring;
  2258.         }
  2259.  
  2260.         if (HAS_BSD2(dev)) {
  2261.                 ret = logical_bsd2_ring_init(dev);
  2262.                 if (ret)
  2263.                         goto cleanup_vebox_ring;
  2264.         }
  2265.  
  2266.         return 0;
  2267.  
  2268. cleanup_vebox_ring:
  2269.         intel_logical_ring_cleanup(&dev_priv->ring[VECS]);
  2270. cleanup_blt_ring:
  2271.         intel_logical_ring_cleanup(&dev_priv->ring[BCS]);
  2272. cleanup_bsd_ring:
  2273.         intel_logical_ring_cleanup(&dev_priv->ring[VCS]);
  2274. cleanup_render_ring:
  2275.         intel_logical_ring_cleanup(&dev_priv->ring[RCS]);
  2276.  
  2277.         return ret;
  2278. }
  2279.  
  2280. static u32
  2281. make_rpcs(struct drm_device *dev)
  2282. {
  2283.         u32 rpcs = 0;
  2284.  
  2285.         /*
  2286.          * No explicit RPCS request is needed to ensure full
  2287.          * slice/subslice/EU enablement prior to Gen9.
  2288.         */
  2289.         if (INTEL_INFO(dev)->gen < 9)
  2290.                 return 0;
  2291.  
  2292.         /*
  2293.          * Starting in Gen9, render power gating can leave
  2294.          * slice/subslice/EU in a partially enabled state. We
  2295.          * must make an explicit request through RPCS for full
  2296.          * enablement.
  2297.         */
  2298.         if (INTEL_INFO(dev)->has_slice_pg) {
  2299.                 rpcs |= GEN8_RPCS_S_CNT_ENABLE;
  2300.                 rpcs |= INTEL_INFO(dev)->slice_total <<
  2301.                         GEN8_RPCS_S_CNT_SHIFT;
  2302.                 rpcs |= GEN8_RPCS_ENABLE;
  2303.         }
  2304.  
  2305.         if (INTEL_INFO(dev)->has_subslice_pg) {
  2306.                 rpcs |= GEN8_RPCS_SS_CNT_ENABLE;
  2307.                 rpcs |= INTEL_INFO(dev)->subslice_per_slice <<
  2308.                         GEN8_RPCS_SS_CNT_SHIFT;
  2309.                 rpcs |= GEN8_RPCS_ENABLE;
  2310.         }
  2311.  
  2312.         if (INTEL_INFO(dev)->has_eu_pg) {
  2313.                 rpcs |= INTEL_INFO(dev)->eu_per_subslice <<
  2314.                         GEN8_RPCS_EU_MIN_SHIFT;
  2315.                 rpcs |= INTEL_INFO(dev)->eu_per_subslice <<
  2316.                         GEN8_RPCS_EU_MAX_SHIFT;
  2317.                 rpcs |= GEN8_RPCS_ENABLE;
  2318.         }
  2319.  
  2320.         return rpcs;
  2321. }
  2322.  
  2323. static u32 intel_lr_indirect_ctx_offset(struct intel_engine_cs *ring)
  2324. {
  2325.         u32 indirect_ctx_offset;
  2326.  
  2327.         switch (INTEL_INFO(ring->dev)->gen) {
  2328.         default:
  2329.                 MISSING_CASE(INTEL_INFO(ring->dev)->gen);
  2330.                 /* fall through */
  2331.         case 9:
  2332.                 indirect_ctx_offset =
  2333.                         GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
  2334.                 break;
  2335.         case 8:
  2336.                 indirect_ctx_offset =
  2337.                         GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT;
  2338.                 break;
  2339.         }
  2340.  
  2341.         return indirect_ctx_offset;
  2342. }
  2343.  
  2344. static int
  2345. populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_obj,
  2346.                     struct intel_engine_cs *ring, struct intel_ringbuffer *ringbuf)
  2347. {
  2348.         struct drm_device *dev = ring->dev;
  2349.         struct drm_i915_private *dev_priv = dev->dev_private;
  2350.         struct i915_hw_ppgtt *ppgtt = ctx->ppgtt;
  2351.         struct page *page;
  2352.         uint32_t *reg_state;
  2353.         int ret;
  2354.  
  2355.         if (!ppgtt)
  2356.                 ppgtt = dev_priv->mm.aliasing_ppgtt;
  2357.  
  2358.         ret = i915_gem_object_set_to_cpu_domain(ctx_obj, true);
  2359.         if (ret) {
  2360.                 DRM_DEBUG_DRIVER("Could not set to CPU domain\n");
  2361.                 return ret;
  2362.         }
  2363.  
  2364.         ret = i915_gem_object_get_pages(ctx_obj);
  2365.         if (ret) {
  2366.                 DRM_DEBUG_DRIVER("Could not get object pages\n");
  2367.                 return ret;
  2368.         }
  2369.  
  2370.         i915_gem_object_pin_pages(ctx_obj);
  2371.  
  2372.         /* The second page of the context object contains some fields which must
  2373.          * be set up prior to the first execution. */
  2374.         page = i915_gem_object_get_dirty_page(ctx_obj, LRC_STATE_PN);
  2375.         reg_state = kmap_atomic(page);
  2376.  
  2377.         /* A context is actually a big batch buffer with several MI_LOAD_REGISTER_IMM
  2378.          * commands followed by (reg, value) pairs. The values we are setting here are
  2379.          * only for the first context restore: on a subsequent save, the GPU will
  2380.          * recreate this batchbuffer with new values (including all the missing
  2381.          * MI_LOAD_REGISTER_IMM commands that we are not initializing here). */
  2382.         reg_state[CTX_LRI_HEADER_0] =
  2383.                 MI_LOAD_REGISTER_IMM(ring->id == RCS ? 14 : 11) | MI_LRI_FORCE_POSTED;
  2384.         ASSIGN_CTX_REG(reg_state, CTX_CONTEXT_CONTROL, RING_CONTEXT_CONTROL(ring),
  2385.                        _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
  2386.                                           CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
  2387.                                           (HAS_RESOURCE_STREAMER(dev) ?
  2388.                                             CTX_CTRL_RS_CTX_ENABLE : 0)));
  2389.         ASSIGN_CTX_REG(reg_state, CTX_RING_HEAD, RING_HEAD(ring->mmio_base), 0);
  2390.         ASSIGN_CTX_REG(reg_state, CTX_RING_TAIL, RING_TAIL(ring->mmio_base), 0);
  2391.         /* Ring buffer start address is not known until the buffer is pinned.
  2392.          * It is written to the context image in execlists_update_context()
  2393.          */
  2394.         ASSIGN_CTX_REG(reg_state, CTX_RING_BUFFER_START, RING_START(ring->mmio_base), 0);
  2395.         ASSIGN_CTX_REG(reg_state, CTX_RING_BUFFER_CONTROL, RING_CTL(ring->mmio_base),
  2396.                        ((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES) | RING_VALID);
  2397.         ASSIGN_CTX_REG(reg_state, CTX_BB_HEAD_U, RING_BBADDR_UDW(ring->mmio_base), 0);
  2398.         ASSIGN_CTX_REG(reg_state, CTX_BB_HEAD_L, RING_BBADDR(ring->mmio_base), 0);
  2399.         ASSIGN_CTX_REG(reg_state, CTX_BB_STATE, RING_BBSTATE(ring->mmio_base),
  2400.                        RING_BB_PPGTT);
  2401.         ASSIGN_CTX_REG(reg_state, CTX_SECOND_BB_HEAD_U, RING_SBBADDR_UDW(ring->mmio_base), 0);
  2402.         ASSIGN_CTX_REG(reg_state, CTX_SECOND_BB_HEAD_L, RING_SBBADDR(ring->mmio_base), 0);
  2403.         ASSIGN_CTX_REG(reg_state, CTX_SECOND_BB_STATE, RING_SBBSTATE(ring->mmio_base), 0);
  2404.         if (ring->id == RCS) {
  2405.                 ASSIGN_CTX_REG(reg_state, CTX_BB_PER_CTX_PTR, RING_BB_PER_CTX_PTR(ring->mmio_base), 0);
  2406.                 ASSIGN_CTX_REG(reg_state, CTX_RCS_INDIRECT_CTX, RING_INDIRECT_CTX(ring->mmio_base), 0);
  2407.                 ASSIGN_CTX_REG(reg_state, CTX_RCS_INDIRECT_CTX_OFFSET, RING_INDIRECT_CTX_OFFSET(ring->mmio_base), 0);
  2408.                 if (ring->wa_ctx.obj) {
  2409.                         struct i915_ctx_workarounds *wa_ctx = &ring->wa_ctx;
  2410.                         uint32_t ggtt_offset = i915_gem_obj_ggtt_offset(wa_ctx->obj);
  2411.  
  2412.                         reg_state[CTX_RCS_INDIRECT_CTX+1] =
  2413.                                 (ggtt_offset + wa_ctx->indirect_ctx.offset * sizeof(uint32_t)) |
  2414.                                 (wa_ctx->indirect_ctx.size / CACHELINE_DWORDS);
  2415.  
  2416.                         reg_state[CTX_RCS_INDIRECT_CTX_OFFSET+1] =
  2417.                                 intel_lr_indirect_ctx_offset(ring) << 6;
  2418.  
  2419.                         reg_state[CTX_BB_PER_CTX_PTR+1] =
  2420.                                 (ggtt_offset + wa_ctx->per_ctx.offset * sizeof(uint32_t)) |
  2421.                                 0x01;
  2422.                 }
  2423.         }
  2424.         reg_state[CTX_LRI_HEADER_1] = MI_LOAD_REGISTER_IMM(9) | MI_LRI_FORCE_POSTED;
  2425.         ASSIGN_CTX_REG(reg_state, CTX_CTX_TIMESTAMP, RING_CTX_TIMESTAMP(ring->mmio_base), 0);
  2426.         /* PDP values well be assigned later if needed */
  2427.         ASSIGN_CTX_REG(reg_state, CTX_PDP3_UDW, GEN8_RING_PDP_UDW(ring, 3), 0);
  2428.         ASSIGN_CTX_REG(reg_state, CTX_PDP3_LDW, GEN8_RING_PDP_LDW(ring, 3), 0);
  2429.         ASSIGN_CTX_REG(reg_state, CTX_PDP2_UDW, GEN8_RING_PDP_UDW(ring, 2), 0);
  2430.         ASSIGN_CTX_REG(reg_state, CTX_PDP2_LDW, GEN8_RING_PDP_LDW(ring, 2), 0);
  2431.         ASSIGN_CTX_REG(reg_state, CTX_PDP1_UDW, GEN8_RING_PDP_UDW(ring, 1), 0);
  2432.         ASSIGN_CTX_REG(reg_state, CTX_PDP1_LDW, GEN8_RING_PDP_LDW(ring, 1), 0);
  2433.         ASSIGN_CTX_REG(reg_state, CTX_PDP0_UDW, GEN8_RING_PDP_UDW(ring, 0), 0);
  2434.         ASSIGN_CTX_REG(reg_state, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(ring, 0), 0);
  2435.  
  2436.         if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
  2437.                 /* 64b PPGTT (48bit canonical)
  2438.                  * PDP0_DESCRIPTOR contains the base address to PML4 and
  2439.                  * other PDP Descriptors are ignored.
  2440.                  */
  2441.                 ASSIGN_CTX_PML4(ppgtt, reg_state);
  2442.         } else {
  2443.                 /* 32b PPGTT
  2444.                  * PDP*_DESCRIPTOR contains the base address of space supported.
  2445.                  * With dynamic page allocation, PDPs may not be allocated at
  2446.                  * this point. Point the unallocated PDPs to the scratch page
  2447.                  */
  2448.                 ASSIGN_CTX_PDP(ppgtt, reg_state, 3);
  2449.                 ASSIGN_CTX_PDP(ppgtt, reg_state, 2);
  2450.                 ASSIGN_CTX_PDP(ppgtt, reg_state, 1);
  2451.                 ASSIGN_CTX_PDP(ppgtt, reg_state, 0);
  2452.         }
  2453.  
  2454.         if (ring->id == RCS) {
  2455.                 reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
  2456.                 ASSIGN_CTX_REG(reg_state, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
  2457.                                make_rpcs(dev));
  2458.         }
  2459.  
  2460.         kunmap_atomic(reg_state);
  2461.         i915_gem_object_unpin_pages(ctx_obj);
  2462.  
  2463.         return 0;
  2464. }
  2465.  
  2466. /**
  2467.  * intel_lr_context_free() - free the LRC specific bits of a context
  2468.  * @ctx: the LR context to free.
  2469.  *
  2470.  * The real context freeing is done in i915_gem_context_free: this only
  2471.  * takes care of the bits that are LRC related: the per-engine backing
  2472.  * objects and the logical ringbuffer.
  2473.  */
  2474. void intel_lr_context_free(struct intel_context *ctx)
  2475. {
  2476.         int i;
  2477.  
  2478.         for (i = I915_NUM_RINGS; --i >= 0; ) {
  2479.                 struct intel_ringbuffer *ringbuf = ctx->engine[i].ringbuf;
  2480.                 struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state;
  2481.  
  2482.                 if (!ctx_obj)
  2483.                         continue;
  2484.  
  2485.                 if (ctx == ctx->i915->kernel_context) {
  2486.                         intel_unpin_ringbuffer_obj(ringbuf);
  2487.                         i915_gem_object_ggtt_unpin(ctx_obj);
  2488.                 }
  2489.  
  2490.                 WARN_ON(ctx->engine[i].pin_count);
  2491.                 intel_ringbuffer_free(ringbuf);
  2492.                 drm_gem_object_unreference(&ctx_obj->base);
  2493.         }
  2494. }
  2495.  
  2496. /**
  2497.  * intel_lr_context_size() - return the size of the context for an engine
  2498.  * @ring: which engine to find the context size for
  2499.  *
  2500.  * Each engine may require a different amount of space for a context image,
  2501.  * so when allocating (or copying) an image, this function can be used to
  2502.  * find the right size for the specific engine.
  2503.  *
  2504.  * Return: size (in bytes) of an engine-specific context image
  2505.  *
  2506.  * Note: this size includes the HWSP, which is part of the context image
  2507.  * in LRC mode, but does not include the "shared data page" used with
  2508.  * GuC submission. The caller should account for this if using the GuC.
  2509.  */
  2510. uint32_t intel_lr_context_size(struct intel_engine_cs *ring)
  2511. {
  2512.         int ret = 0;
  2513.  
  2514.         WARN_ON(INTEL_INFO(ring->dev)->gen < 8);
  2515.  
  2516.         switch (ring->id) {
  2517.         case RCS:
  2518.                 if (INTEL_INFO(ring->dev)->gen >= 9)
  2519.                         ret = GEN9_LR_CONTEXT_RENDER_SIZE;
  2520.                 else
  2521.                         ret = GEN8_LR_CONTEXT_RENDER_SIZE;
  2522.                 break;
  2523.         case VCS:
  2524.         case BCS:
  2525.         case VECS:
  2526.         case VCS2:
  2527.                 ret = GEN8_LR_CONTEXT_OTHER_SIZE;
  2528.                 break;
  2529.         }
  2530.  
  2531.         return ret;
  2532. }
  2533.  
  2534. static void lrc_setup_hardware_status_page(struct intel_engine_cs *ring,
  2535.                 struct drm_i915_gem_object *default_ctx_obj)
  2536. {
  2537.         struct drm_i915_private *dev_priv = ring->dev->dev_private;
  2538.         struct page *page;
  2539.  
  2540.         /* The HWSP is part of the default context object in LRC mode. */
  2541.         ring->status_page.gfx_addr = i915_gem_obj_ggtt_offset(default_ctx_obj)
  2542.                         + LRC_PPHWSP_PN * PAGE_SIZE;
  2543.         page = i915_gem_object_get_page(default_ctx_obj, LRC_PPHWSP_PN);
  2544.         ring->status_page.page_addr = kmap(page);
  2545.         ring->status_page.obj = default_ctx_obj;
  2546.  
  2547.         I915_WRITE(RING_HWS_PGA(ring->mmio_base),
  2548.                         (u32)ring->status_page.gfx_addr);
  2549.         POSTING_READ(RING_HWS_PGA(ring->mmio_base));
  2550. }
  2551.  
  2552. /**
  2553.  * intel_lr_context_deferred_alloc() - create the LRC specific bits of a context
  2554.  * @ctx: LR context to create.
  2555.  * @ring: engine to be used with the context.
  2556.  *
  2557.  * This function can be called more than once, with different engines, if we plan
  2558.  * to use the context with them. The context backing objects and the ringbuffers
  2559.  * (specially the ringbuffer backing objects) suck a lot of memory up, and that's why
  2560.  * the creation is a deferred call: it's better to make sure first that we need to use
  2561.  * a given ring with the context.
  2562.  *
  2563.  * Return: non-zero on error.
  2564.  */
  2565.  
  2566. int intel_lr_context_deferred_alloc(struct intel_context *ctx,
  2567.                                     struct intel_engine_cs *ring)
  2568. {
  2569.         struct drm_device *dev = ring->dev;
  2570.         struct drm_i915_gem_object *ctx_obj;
  2571.         uint32_t context_size;
  2572.         struct intel_ringbuffer *ringbuf;
  2573.         int ret;
  2574.  
  2575.         WARN_ON(ctx->legacy_hw_ctx.rcs_state != NULL);
  2576.         WARN_ON(ctx->engine[ring->id].state);
  2577.  
  2578.         context_size = round_up(intel_lr_context_size(ring), 4096);
  2579.  
  2580.         /* One extra page as the sharing data between driver and GuC */
  2581.         context_size += PAGE_SIZE * LRC_PPHWSP_PN;
  2582.  
  2583.         ctx_obj = i915_gem_alloc_object(dev, context_size);
  2584.         if (!ctx_obj) {
  2585.                 DRM_DEBUG_DRIVER("Alloc LRC backing obj failed.\n");
  2586.                 return -ENOMEM;
  2587.         }
  2588.  
  2589.         ringbuf = intel_engine_create_ringbuffer(ring, 4 * PAGE_SIZE);
  2590.         if (IS_ERR(ringbuf)) {
  2591.                 ret = PTR_ERR(ringbuf);
  2592.                 goto error_deref_obj;
  2593.         }
  2594.  
  2595.         ret = populate_lr_context(ctx, ctx_obj, ring, ringbuf);
  2596.         if (ret) {
  2597.                 DRM_DEBUG_DRIVER("Failed to populate LRC: %d\n", ret);
  2598.                 goto error_ringbuf;
  2599.         }
  2600.  
  2601.         ctx->engine[ring->id].ringbuf = ringbuf;
  2602.         ctx->engine[ring->id].state = ctx_obj;
  2603.  
  2604.         if (ctx != ctx->i915->kernel_context && ring->init_context) {
  2605.                 struct drm_i915_gem_request *req;
  2606.  
  2607.                 req = i915_gem_request_alloc(ring, ctx);
  2608.                 if (IS_ERR(req)) {
  2609.                         ret = PTR_ERR(req);
  2610.                         DRM_ERROR("ring create req: %d\n", ret);
  2611.                         goto error_ringbuf;
  2612.                 }
  2613.  
  2614.                 ret = ring->init_context(req);
  2615.                 if (ret) {
  2616.                         DRM_ERROR("ring init context: %d\n",
  2617.                                 ret);
  2618.                         i915_gem_request_cancel(req);
  2619.                         goto error_ringbuf;
  2620.                 }
  2621.                 i915_add_request_no_flush(req);
  2622.         }
  2623.         return 0;
  2624.  
  2625. error_ringbuf:
  2626.         intel_ringbuffer_free(ringbuf);
  2627. error_deref_obj:
  2628.         drm_gem_object_unreference(&ctx_obj->base);
  2629.         ctx->engine[ring->id].ringbuf = NULL;
  2630.         ctx->engine[ring->id].state = NULL;
  2631.         return ret;
  2632. }
  2633.  
  2634. void intel_lr_context_reset(struct drm_device *dev,
  2635.                         struct intel_context *ctx)
  2636. {
  2637.         struct drm_i915_private *dev_priv = dev->dev_private;
  2638.         struct intel_engine_cs *ring;
  2639.         int i;
  2640.  
  2641.         for_each_ring(ring, dev_priv, i) {
  2642.                 struct drm_i915_gem_object *ctx_obj =
  2643.                                 ctx->engine[ring->id].state;
  2644.                 struct intel_ringbuffer *ringbuf =
  2645.                                 ctx->engine[ring->id].ringbuf;
  2646.                 uint32_t *reg_state;
  2647.                 struct page *page;
  2648.  
  2649.                 if (!ctx_obj)
  2650.                         continue;
  2651.  
  2652.                 if (i915_gem_object_get_pages(ctx_obj)) {
  2653.                         WARN(1, "Failed get_pages for context obj\n");
  2654.                         continue;
  2655.                 }
  2656.                 page = i915_gem_object_get_dirty_page(ctx_obj, LRC_STATE_PN);
  2657.                 reg_state = kmap_atomic(page);
  2658.  
  2659.                 reg_state[CTX_RING_HEAD+1] = 0;
  2660.                 reg_state[CTX_RING_TAIL+1] = 0;
  2661.  
  2662.                 kunmap_atomic(reg_state);
  2663.  
  2664.                 ringbuf->head = 0;
  2665.                 ringbuf->tail = 0;
  2666.         }
  2667. }
  2668.