Subversion Repositories Kolibri OS

Compare Revisions

Regard whitespace Rev 6083 → Rev 6084

/drivers/video/drm/i915/intel_ringbuffer.c
52,18 → 52,29
 
int __intel_ring_space(int head, int tail, int size)
{
int space = head - (tail + I915_RING_FREE_SPACE);
if (space < 0)
int space = head - tail;
if (space <= 0)
space += size;
return space;
return space - I915_RING_FREE_SPACE;
}
 
int intel_ring_space(struct intel_ringbuffer *ringbuf)
void intel_ring_update_space(struct intel_ringbuffer *ringbuf)
{
return __intel_ring_space(ringbuf->head & HEAD_ADDR,
if (ringbuf->last_retired_head != -1) {
ringbuf->head = ringbuf->last_retired_head;
ringbuf->last_retired_head = -1;
}
 
ringbuf->space = __intel_ring_space(ringbuf->head & HEAD_ADDR,
ringbuf->tail, ringbuf->size);
}
 
int intel_ring_space(struct intel_ringbuffer *ringbuf)
{
intel_ring_update_space(ringbuf);
return ringbuf->space;
}
 
bool intel_ring_stopped(struct intel_engine_cs *ring)
{
struct drm_i915_private *dev_priv = ring->dev->dev_private;
70,7 → 81,7
return dev_priv->gpu_error.stop_rings & intel_ring_flag(ring);
}
 
void __intel_ring_advance(struct intel_engine_cs *ring)
static void __intel_ring_advance(struct intel_engine_cs *ring)
{
struct intel_ringbuffer *ringbuf = ring->buffer;
ringbuf->tail &= ringbuf->size - 1;
80,10 → 91,11
}
 
static int
gen2_render_ring_flush(struct intel_engine_cs *ring,
gen2_render_ring_flush(struct drm_i915_gem_request *req,
u32 invalidate_domains,
u32 flush_domains)
{
struct intel_engine_cs *ring = req->ring;
u32 cmd;
int ret;
 
94,7 → 106,7
if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
cmd |= MI_READ_FLUSH;
 
ret = intel_ring_begin(ring, 2);
ret = intel_ring_begin(req, 2);
if (ret)
return ret;
 
106,10 → 118,11
}
 
static int
gen4_render_ring_flush(struct intel_engine_cs *ring,
gen4_render_ring_flush(struct drm_i915_gem_request *req,
u32 invalidate_domains,
u32 flush_domains)
{
struct intel_engine_cs *ring = req->ring;
struct drm_device *dev = ring->dev;
u32 cmd;
int ret;
152,7 → 165,7
(IS_G4X(dev) || IS_GEN5(dev)))
cmd |= MI_INVALIDATE_ISP;
 
ret = intel_ring_begin(ring, 2);
ret = intel_ring_begin(req, 2);
if (ret)
return ret;
 
201,13 → 214,13
* really our business. That leaves only stall at scoreboard.
*/
static int
intel_emit_post_sync_nonzero_flush(struct intel_engine_cs *ring)
intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req)
{
struct intel_engine_cs *ring = req->ring;
u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
int ret;
 
 
ret = intel_ring_begin(ring, 6);
ret = intel_ring_begin(req, 6);
if (ret)
return ret;
 
220,7 → 233,7
intel_ring_emit(ring, MI_NOOP);
intel_ring_advance(ring);
 
ret = intel_ring_begin(ring, 6);
ret = intel_ring_begin(req, 6);
if (ret)
return ret;
 
236,15 → 249,16
}
 
static int
gen6_render_ring_flush(struct intel_engine_cs *ring,
gen6_render_ring_flush(struct drm_i915_gem_request *req,
u32 invalidate_domains, u32 flush_domains)
{
struct intel_engine_cs *ring = req->ring;
u32 flags = 0;
u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
int ret;
 
/* Force SNB workarounds for PIPE_CONTROL flushes */
ret = intel_emit_post_sync_nonzero_flush(ring);
ret = intel_emit_post_sync_nonzero_flush(req);
if (ret)
return ret;
 
274,7 → 288,7
flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
}
 
ret = intel_ring_begin(ring, 4);
ret = intel_ring_begin(req, 4);
if (ret)
return ret;
 
288,11 → 302,12
}
 
static int
gen7_render_ring_cs_stall_wa(struct intel_engine_cs *ring)
gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req)
{
struct intel_engine_cs *ring = req->ring;
int ret;
 
ret = intel_ring_begin(ring, 4);
ret = intel_ring_begin(req, 4);
if (ret)
return ret;
 
306,33 → 321,11
return 0;
}
 
static int gen7_ring_fbc_flush(struct intel_engine_cs *ring, u32 value)
{
int ret;
 
if (!ring->fbc_dirty)
return 0;
 
ret = intel_ring_begin(ring, 6);
if (ret)
return ret;
/* WaFbcNukeOn3DBlt:ivb/hsw */
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
intel_ring_emit(ring, MSG_FBC_REND_STATE);
intel_ring_emit(ring, value);
intel_ring_emit(ring, MI_STORE_REGISTER_MEM(1) | MI_SRM_LRM_GLOBAL_GTT);
intel_ring_emit(ring, MSG_FBC_REND_STATE);
intel_ring_emit(ring, ring->scratch.gtt_offset + 256);
intel_ring_advance(ring);
 
ring->fbc_dirty = false;
return 0;
}
 
static int
gen7_render_ring_flush(struct intel_engine_cs *ring,
gen7_render_ring_flush(struct drm_i915_gem_request *req,
u32 invalidate_domains, u32 flush_domains)
{
struct intel_engine_cs *ring = req->ring;
u32 flags = 0;
u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
int ret;
354,6 → 347,7
if (flush_domains) {
flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
flags |= PIPE_CONTROL_FLUSH_ENABLE;
}
if (invalidate_domains) {
flags |= PIPE_CONTROL_TLB_INVALIDATE;
374,10 → 368,10
/* Workaround: we must issue a pipe_control with CS-stall bit
* set before a pipe_control command that has the state cache
* invalidate bit set. */
gen7_render_ring_cs_stall_wa(ring);
gen7_render_ring_cs_stall_wa(req);
}
 
ret = intel_ring_begin(ring, 4);
ret = intel_ring_begin(req, 4);
if (ret)
return ret;
 
387,19 → 381,17
intel_ring_emit(ring, 0);
intel_ring_advance(ring);
 
if (!invalidate_domains && flush_domains)
return gen7_ring_fbc_flush(ring, FBC_REND_NUKE);
 
return 0;
}
 
static int
gen8_emit_pipe_control(struct intel_engine_cs *ring,
gen8_emit_pipe_control(struct drm_i915_gem_request *req,
u32 flags, u32 scratch_addr)
{
struct intel_engine_cs *ring = req->ring;
int ret;
 
ret = intel_ring_begin(ring, 6);
ret = intel_ring_begin(req, 6);
if (ret)
return ret;
 
415,11 → 407,11
}
 
static int
gen8_render_ring_flush(struct intel_engine_cs *ring,
gen8_render_ring_flush(struct drm_i915_gem_request *req,
u32 invalidate_domains, u32 flush_domains)
{
u32 flags = 0;
u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
u32 scratch_addr = req->ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
int ret;
 
flags |= PIPE_CONTROL_CS_STALL;
427,6 → 419,7
if (flush_domains) {
flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
flags |= PIPE_CONTROL_FLUSH_ENABLE;
}
if (invalidate_domains) {
flags |= PIPE_CONTROL_TLB_INVALIDATE;
439,7 → 432,7
flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
 
/* WaCsStallBeforeStateCacheInvalidate:bdw,chv */
ret = gen8_emit_pipe_control(ring,
ret = gen8_emit_pipe_control(req,
PIPE_CONTROL_CS_STALL |
PIPE_CONTROL_STALL_AT_SCOREBOARD,
0);
447,14 → 440,7
return ret;
}
 
ret = gen8_emit_pipe_control(ring, flags, scratch_addr);
if (ret)
return ret;
 
if (!invalidate_domains && flush_domains)
return gen7_ring_fbc_flush(ring, FBC_REND_NUKE);
 
return 0;
return gen8_emit_pipe_control(req, flags, scratch_addr);
}
 
static void ring_write_tail(struct intel_engine_cs *ring,
491,6 → 477,68
I915_WRITE(HWS_PGA, addr);
}
 
static void intel_ring_setup_status_page(struct intel_engine_cs *ring)
{
struct drm_device *dev = ring->dev;
struct drm_i915_private *dev_priv = ring->dev->dev_private;
u32 mmio = 0;
 
/* The ring status page addresses are no longer next to the rest of
* the ring registers as of gen7.
*/
if (IS_GEN7(dev)) {
switch (ring->id) {
case RCS:
mmio = RENDER_HWS_PGA_GEN7;
break;
case BCS:
mmio = BLT_HWS_PGA_GEN7;
break;
/*
* VCS2 actually doesn't exist on Gen7. Only shut up
* gcc switch check warning
*/
case VCS2:
case VCS:
mmio = BSD_HWS_PGA_GEN7;
break;
case VECS:
mmio = VEBOX_HWS_PGA_GEN7;
break;
}
} else if (IS_GEN6(ring->dev)) {
mmio = RING_HWS_PGA_GEN6(ring->mmio_base);
} else {
/* XXX: gen8 returns to sanity */
mmio = RING_HWS_PGA(ring->mmio_base);
}
 
I915_WRITE(mmio, (u32)ring->status_page.gfx_addr);
POSTING_READ(mmio);
 
/*
* Flush the TLB for this page
*
* FIXME: These two bits have disappeared on gen8, so a question
* arises: do we still need this and if so how should we go about
* invalidating the TLB?
*/
if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8) {
u32 reg = RING_INSTPM(ring->mmio_base);
 
/* ring should be idle before issuing a sync flush*/
WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0);
 
I915_WRITE(reg,
_MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE |
INSTPM_SYNC_FLUSH));
if (wait_for((I915_READ(reg) & INSTPM_SYNC_FLUSH) == 0,
1000))
DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n",
ring->name);
}
}
 
static bool stop_ring(struct intel_engine_cs *ring)
{
struct drm_i915_private *dev_priv = to_i915(ring->dev);
528,7 → 576,7
struct drm_i915_gem_object *obj = ringbuf->obj;
int ret = 0;
 
gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL);
intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
 
if (!stop_ring(ring)) {
/* G45 ring initialization often fails to reset head to zero */
592,15 → 640,15
goto out;
}
 
ringbuf->last_retired_head = -1;
ringbuf->head = I915_READ_HEAD(ring);
ringbuf->tail = I915_READ_TAIL(ring) & TAIL_ADDR;
ringbuf->space = intel_ring_space(ringbuf);
ringbuf->last_retired_head = -1;
intel_ring_update_space(ringbuf);
 
memset(&ring->hangcheck, 0, sizeof(ring->hangcheck));
 
out:
gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
 
return ret;
}
627,8 → 675,7
{
int ret;
 
if (ring->scratch.obj)
return 0;
WARN_ON(ring->scratch.obj);
 
ring->scratch.obj = i915_gem_alloc_object(ring->dev, 4096);
if (ring->scratch.obj == NULL) {
664,23 → 711,23
return ret;
}
 
static int intel_ring_workarounds_emit(struct intel_engine_cs *ring,
struct intel_context *ctx)
static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req)
{
int ret, i;
struct intel_engine_cs *ring = req->ring;
struct drm_device *dev = ring->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct i915_workarounds *w = &dev_priv->workarounds;
 
if (WARN_ON(w->count == 0))
if (w->count == 0)
return 0;
 
ring->gpu_caches_dirty = true;
ret = intel_ring_flush_all_caches(ring);
ret = intel_ring_flush_all_caches(req);
if (ret)
return ret;
 
ret = intel_ring_begin(ring, (w->count * 2 + 2));
ret = intel_ring_begin(req, (w->count * 2 + 2));
if (ret)
return ret;
 
694,7 → 741,7
intel_ring_advance(ring);
 
ring->gpu_caches_dirty = true;
ret = intel_ring_flush_all_caches(ring);
ret = intel_ring_flush_all_caches(req);
if (ret)
return ret;
 
703,6 → 750,21
return 0;
}
 
static int intel_rcs_ctx_init(struct drm_i915_gem_request *req)
{
int ret;
 
ret = intel_ring_workarounds_emit(req);
if (ret != 0)
return ret;
 
ret = i915_gem_render_state_init(req);
if (ret)
DRM_ERROR("init render state: %d\n", ret);
 
return ret;
}
 
static int wa_add(struct drm_i915_private *dev_priv,
const u32 addr, const u32 mask, const u32 val)
{
720,11 → 782,11
return 0;
}
 
#define WA_REG(addr, mask, val) { \
#define WA_REG(addr, mask, val) do { \
const int r = wa_add(dev_priv, (addr), (mask), (val)); \
if (r) \
return r; \
}
} while (0)
 
#define WA_SET_BIT_MASKED(addr, mask) \
WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask))
740,37 → 802,43
 
#define WA_WRITE(addr, val) WA_REG(addr, 0xffffffff, val)
 
static int bdw_init_workarounds(struct intel_engine_cs *ring)
static int gen8_init_workarounds(struct intel_engine_cs *ring)
{
struct drm_device *dev = ring->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
 
/* WaDisablePartialInstShootdown:bdw */
/* WaDisableThreadStallDopClockGating:bdw (pre-production) */
WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE |
STALL_DOP_GATING_DISABLE);
WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
 
/* WaDisableDopClockGating:bdw */
WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
DOP_CLOCK_GATING_DISABLE);
/* WaDisableAsyncFlipPerfMode:bdw,chv */
WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE);
 
WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
GEN8_SAMPLER_POWER_BYPASS_DIS);
/* WaDisablePartialInstShootdown:bdw,chv */
WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
 
/* Use Force Non-Coherent whenever executing a 3D context. This is a
* workaround for for a possible hang in the unlikely event a TLB
* invalidation occurs during a PSD flush.
*/
/* WaDisableFenceDestinationToSLM:bdw (GT3 pre-production) */
/* WaForceEnableNonCoherent:bdw,chv */
/* WaHdcDisableFetchWhenMasked:bdw,chv */
WA_SET_BIT_MASKED(HDC_CHICKEN0,
HDC_FORCE_NON_COHERENT |
(IS_BDW_GT3(dev) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
HDC_DONOT_FETCH_MEM_WHEN_MASKED |
HDC_FORCE_NON_COHERENT);
 
/* Wa4x4STCOptimizationDisable:bdw */
WA_SET_BIT_MASKED(CACHE_MODE_1,
GEN8_4x4_STC_OPTIMIZATION_DISABLE);
/* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
* "The Hierarchical Z RAW Stall Optimization allows non-overlapping
* polygons in the same 8x4 pixel/sample area to be processed without
* stalling waiting for the earlier ones to write to Hierarchical Z
* buffer."
*
* This optimization is off by default for BDW and CHV; turn it on.
*/
WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
 
/* Wa4x4STCOptimizationDisable:bdw,chv */
WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
 
/*
* BSpec recommends 8x4 when MSAA is used,
* however in practice 16x4 seems fastest.
786,27 → 854,279
return 0;
}
 
static int bdw_init_workarounds(struct intel_engine_cs *ring)
{
int ret;
struct drm_device *dev = ring->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
 
ret = gen8_init_workarounds(ring);
if (ret)
return ret;
 
/* WaDisableThreadStallDopClockGating:bdw (pre-production) */
WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
 
/* WaDisableDopClockGating:bdw */
WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
DOP_CLOCK_GATING_DISABLE);
 
WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
GEN8_SAMPLER_POWER_BYPASS_DIS);
 
WA_SET_BIT_MASKED(HDC_CHICKEN0,
/* WaForceContextSaveRestoreNonCoherent:bdw */
HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
/* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
(IS_BDW_GT3(dev) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
 
return 0;
}
 
static int chv_init_workarounds(struct intel_engine_cs *ring)
{
int ret;
struct drm_device *dev = ring->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
 
/* WaDisablePartialInstShootdown:chv */
ret = gen8_init_workarounds(ring);
if (ret)
return ret;
 
/* WaDisableThreadStallDopClockGating:chv */
WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
 
/* Improve HiZ throughput on CHV. */
WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
 
return 0;
}
 
static int gen9_init_workarounds(struct intel_engine_cs *ring)
{
struct drm_device *dev = ring->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
uint32_t tmp;
 
/* WaEnableLbsSlaRetryTimerDecrement:skl */
I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) |
GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
 
/* WaDisableKillLogic:bxt,skl */
I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
ECOCHK_DIS_TLB);
 
/* WaDisablePartialInstShootdown:skl,bxt */
WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE |
STALL_DOP_GATING_DISABLE);
PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
 
/* Use Force Non-Coherent whenever executing a 3D context. This is a
* workaround for a possible hang in the unlikely event a TLB
* invalidation occurs during a PSD flush.
/* Syncing dependencies between camera and graphics:skl,bxt */
WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC);
 
if ((IS_SKYLAKE(dev) && (INTEL_REVID(dev) == SKL_REVID_A0 ||
INTEL_REVID(dev) == SKL_REVID_B0)) ||
(IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0)) {
/* WaDisableDgMirrorFixInHalfSliceChicken5:skl,bxt */
WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
GEN9_DG_MIRROR_FIX_ENABLE);
}
 
if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) <= SKL_REVID_B0) ||
(IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0)) {
/* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:skl,bxt */
WA_SET_BIT_MASKED(GEN7_COMMON_SLICE_CHICKEN1,
GEN9_RHWO_OPTIMIZATION_DISABLE);
/*
* WA also requires GEN9_SLICE_COMMON_ECO_CHICKEN0[14:14] to be set
* but we do that in per ctx batchbuffer as there is an issue
* with this register not getting restored on ctx restore
*/
/* WaForceEnableNonCoherent:chv */
/* WaHdcDisableFetchWhenMasked:chv */
}
 
if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) >= SKL_REVID_C0) ||
IS_BROXTON(dev)) {
/* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt */
WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7,
GEN9_ENABLE_YV12_BUGFIX);
}
 
/* Wa4x4STCOptimizationDisable:skl,bxt */
/* WaDisablePartialResolveInVc:skl,bxt */
WA_SET_BIT_MASKED(CACHE_MODE_1, (GEN8_4x4_STC_OPTIMIZATION_DISABLE |
GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE));
 
/* WaCcsTlbPrefetchDisable:skl,bxt */
WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5,
GEN9_CCS_TLB_PREFETCH_ENABLE);
 
/* WaDisableMaskBasedCammingInRCC:skl,bxt */
if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) == SKL_REVID_C0) ||
(IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0))
WA_SET_BIT_MASKED(SLICE_ECO_CHICKEN0,
PIXEL_MASK_CAMMING_DISABLE);
 
/* WaForceContextSaveRestoreNonCoherent:skl,bxt */
tmp = HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT;
if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) == SKL_REVID_F0) ||
(IS_BROXTON(dev) && INTEL_REVID(dev) >= BXT_REVID_B0))
tmp |= HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE;
WA_SET_BIT_MASKED(HDC_CHICKEN0, tmp);
 
/* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt */
if (IS_SKYLAKE(dev) ||
(IS_BROXTON(dev) && INTEL_REVID(dev) <= BXT_REVID_B0)) {
WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
GEN8_SAMPLER_POWER_BYPASS_DIS);
}
 
/* WaDisableSTUnitPowerOptimization:skl,bxt */
WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
 
return 0;
}
 
static int skl_tune_iz_hashing(struct intel_engine_cs *ring)
{
struct drm_device *dev = ring->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
u8 vals[3] = { 0, 0, 0 };
unsigned int i;
 
for (i = 0; i < 3; i++) {
u8 ss;
 
/*
* Only consider slices where one, and only one, subslice has 7
* EUs
*/
if (hweight8(dev_priv->info.subslice_7eu[i]) != 1)
continue;
 
/*
* subslice_7eu[i] != 0 (because of the check above) and
* ss_max == 4 (maximum number of subslices possible per slice)
*
* -> 0 <= ss <= 3;
*/
ss = ffs(dev_priv->info.subslice_7eu[i]) - 1;
vals[i] = 3 - ss;
}
 
if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
return 0;
 
/* Tune IZ hashing. See intel_device_info_runtime_init() */
WA_SET_FIELD_MASKED(GEN7_GT_MODE,
GEN9_IZ_HASHING_MASK(2) |
GEN9_IZ_HASHING_MASK(1) |
GEN9_IZ_HASHING_MASK(0),
GEN9_IZ_HASHING(2, vals[2]) |
GEN9_IZ_HASHING(1, vals[1]) |
GEN9_IZ_HASHING(0, vals[0]));
 
return 0;
}
 
static int skl_init_workarounds(struct intel_engine_cs *ring)
{
int ret;
struct drm_device *dev = ring->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
 
ret = gen9_init_workarounds(ring);
if (ret)
return ret;
 
if (INTEL_REVID(dev) <= SKL_REVID_D0) {
/* WaDisableHDCInvalidation:skl */
I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) |
BDW_DISABLE_HDC_INVALIDATION);
 
/* WaDisableChickenBitTSGBarrierAckForFFSliceCS:skl */
I915_WRITE(FF_SLICE_CS_CHICKEN2,
_MASKED_BIT_ENABLE(GEN9_TSG_BARRIER_ACK_DISABLE));
}
 
/* GEN8_L3SQCREG4 has a dependency with WA batch so any new changes
* involving this register should also be added to WA batch as required.
*/
if (INTEL_REVID(dev) <= SKL_REVID_E0)
/* WaDisableLSQCROPERFforOCL:skl */
I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) |
GEN8_LQSC_RO_PERF_DIS);
 
/* WaEnableGapsTsvCreditFix:skl */
if (IS_SKYLAKE(dev) && (INTEL_REVID(dev) >= SKL_REVID_C0)) {
I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) |
GEN9_GAPS_TSV_CREDIT_DISABLE));
}
 
/* WaDisablePowerCompilerClockGating:skl */
if (INTEL_REVID(dev) == SKL_REVID_B0)
WA_SET_BIT_MASKED(HIZ_CHICKEN,
BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE);
 
if (INTEL_REVID(dev) <= SKL_REVID_D0) {
/*
*Use Force Non-Coherent whenever executing a 3D context. This
* is a workaround for a possible hang in the unlikely event
* a TLB invalidation occurs during a PSD flush.
*/
/* WaForceEnableNonCoherent:skl */
WA_SET_BIT_MASKED(HDC_CHICKEN0,
HDC_FORCE_NON_COHERENT |
HDC_DONOT_FETCH_MEM_WHEN_MASKED);
HDC_FORCE_NON_COHERENT);
}
 
if (INTEL_REVID(dev) == SKL_REVID_C0 ||
INTEL_REVID(dev) == SKL_REVID_D0)
/* WaBarrierPerformanceFixDisable:skl */
WA_SET_BIT_MASKED(HDC_CHICKEN0,
HDC_FENCE_DEST_SLM_DISABLE |
HDC_BARRIER_PERFORMANCE_DISABLE);
 
/* WaDisableSbeCacheDispatchPortSharing:skl */
if (INTEL_REVID(dev) <= SKL_REVID_F0) {
WA_SET_BIT_MASKED(
GEN7_HALF_SLICE_CHICKEN1,
GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
}
 
return skl_tune_iz_hashing(ring);
}
 
static int bxt_init_workarounds(struct intel_engine_cs *ring)
{
int ret;
struct drm_device *dev = ring->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
 
ret = gen9_init_workarounds(ring);
if (ret)
return ret;
 
/* WaStoreMultiplePTEenable:bxt */
/* This is a requirement according to Hardware specification */
if (INTEL_REVID(dev) == BXT_REVID_A0)
I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_TLBPF);
 
/* WaSetClckGatingDisableMedia:bxt */
if (INTEL_REVID(dev) == BXT_REVID_A0) {
I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) &
~GEN8_DOP_CLOCK_GATE_MEDIA_ENABLE));
}
 
/* WaDisableThreadStallDopClockGating:bxt */
WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
STALL_DOP_GATING_DISABLE);
 
/* WaDisableSbeCacheDispatchPortSharing:bxt */
if (INTEL_REVID(dev) <= BXT_REVID_B0) {
WA_SET_BIT_MASKED(
GEN7_HALF_SLICE_CHICKEN1,
GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
}
 
return 0;
}
 
825,6 → 1145,12
if (IS_CHERRYVIEW(dev))
return chv_init_workarounds(ring);
 
if (IS_SKYLAKE(dev))
return skl_init_workarounds(ring);
 
if (IS_BROXTON(dev))
return bxt_init_workarounds(ring);
 
return 0;
}
 
844,9 → 1170,9
* to use MI_WAIT_FOR_EVENT within the CS. It should already be
* programmed to '1' on all products.
*
* WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv,bdw,chv
* WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv
*/
if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 9)
if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8)
I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));
 
/* Required for the hardware to program scanline values for waiting */
861,12 → 1187,6
_MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT) |
_MASKED_BIT_ENABLE(GFX_REPLAY_MODE));
 
if (INTEL_INFO(dev)->gen >= 5) {
ret = intel_init_pipe_control(ring);
if (ret)
return ret;
}
 
if (IS_GEN6(dev)) {
/* From the Sandybridge PRM, volume 1 part 3, page 24:
* "If this bit is set, STCunit will have LRA as replacement
877,7 → 1197,7
_MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
}
 
if (INTEL_INFO(dev)->gen >= 6)
if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8)
I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
 
if (HAS_L3_DPF(dev))
900,10 → 1220,11
intel_fini_pipe_control(ring);
}
 
static int gen8_rcs_signal(struct intel_engine_cs *signaller,
static int gen8_rcs_signal(struct drm_i915_gem_request *signaller_req,
unsigned int num_dwords)
{
#define MBOX_UPDATE_DWORDS 8
struct intel_engine_cs *signaller = signaller_req->ring;
struct drm_device *dev = signaller->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_engine_cs *waiter;
913,15 → 1234,17
num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS;
#undef MBOX_UPDATE_DWORDS
 
ret = intel_ring_begin(signaller, num_dwords);
ret = intel_ring_begin(signaller_req, num_dwords);
if (ret)
return ret;
 
for_each_ring(waiter, dev_priv, i) {
u32 seqno;
u64 gtt_offset = signaller->semaphore.signal_ggtt[i];
if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
continue;
 
seqno = i915_gem_request_get_seqno(signaller_req);
intel_ring_emit(signaller, GFX_OP_PIPE_CONTROL(6));
intel_ring_emit(signaller, PIPE_CONTROL_GLOBAL_GTT_IVB |
PIPE_CONTROL_QW_WRITE |
928,7 → 1251,7
PIPE_CONTROL_FLUSH_ENABLE);
intel_ring_emit(signaller, lower_32_bits(gtt_offset));
intel_ring_emit(signaller, upper_32_bits(gtt_offset));
intel_ring_emit(signaller, signaller->outstanding_lazy_seqno);
intel_ring_emit(signaller, seqno);
intel_ring_emit(signaller, 0);
intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
MI_SEMAPHORE_TARGET(waiter->id));
938,10 → 1261,11
return 0;
}
 
static int gen8_xcs_signal(struct intel_engine_cs *signaller,
static int gen8_xcs_signal(struct drm_i915_gem_request *signaller_req,
unsigned int num_dwords)
{
#define MBOX_UPDATE_DWORDS 6
struct intel_engine_cs *signaller = signaller_req->ring;
struct drm_device *dev = signaller->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_engine_cs *waiter;
951,21 → 1275,23
num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS;
#undef MBOX_UPDATE_DWORDS
 
ret = intel_ring_begin(signaller, num_dwords);
ret = intel_ring_begin(signaller_req, num_dwords);
if (ret)
return ret;
 
for_each_ring(waiter, dev_priv, i) {
u32 seqno;
u64 gtt_offset = signaller->semaphore.signal_ggtt[i];
if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
continue;
 
seqno = i915_gem_request_get_seqno(signaller_req);
intel_ring_emit(signaller, (MI_FLUSH_DW + 1) |
MI_FLUSH_DW_OP_STOREDW);
intel_ring_emit(signaller, lower_32_bits(gtt_offset) |
MI_FLUSH_DW_USE_GTT);
intel_ring_emit(signaller, upper_32_bits(gtt_offset));
intel_ring_emit(signaller, signaller->outstanding_lazy_seqno);
intel_ring_emit(signaller, seqno);
intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
MI_SEMAPHORE_TARGET(waiter->id));
intel_ring_emit(signaller, 0);
974,9 → 1300,10
return 0;
}
 
static int gen6_signal(struct intel_engine_cs *signaller,
static int gen6_signal(struct drm_i915_gem_request *signaller_req,
unsigned int num_dwords)
{
struct intel_engine_cs *signaller = signaller_req->ring;
struct drm_device *dev = signaller->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_engine_cs *useless;
987,7 → 1314,7
num_dwords += round_up((num_rings-1) * MBOX_UPDATE_DWORDS, 2);
#undef MBOX_UPDATE_DWORDS
 
ret = intel_ring_begin(signaller, num_dwords);
ret = intel_ring_begin(signaller_req, num_dwords);
if (ret)
return ret;
 
994,9 → 1321,10
for_each_ring(useless, dev_priv, i) {
u32 mbox_reg = signaller->semaphore.mbox.signal[i];
if (mbox_reg != GEN6_NOSYNC) {
u32 seqno = i915_gem_request_get_seqno(signaller_req);
intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1));
intel_ring_emit(signaller, mbox_reg);
intel_ring_emit(signaller, signaller->outstanding_lazy_seqno);
intel_ring_emit(signaller, seqno);
}
}
 
1010,21 → 1338,21
/**
* gen6_add_request - Update the semaphore mailbox registers
*
* @ring - ring that is adding a request
* @seqno - return seqno stuck into the ring
* @request - request to write to the ring
*
* Update the mailbox registers in the *other* rings with the current seqno.
* This acts like a signal in the canonical semaphore.
*/
static int
gen6_add_request(struct intel_engine_cs *ring)
gen6_add_request(struct drm_i915_gem_request *req)
{
struct intel_engine_cs *ring = req->ring;
int ret;
 
if (ring->semaphore.signal)
ret = ring->semaphore.signal(ring, 4);
ret = ring->semaphore.signal(req, 4);
else
ret = intel_ring_begin(ring, 4);
ret = intel_ring_begin(req, 4);
 
if (ret)
return ret;
1031,7 → 1359,7
 
intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
intel_ring_emit(ring, ring->outstanding_lazy_seqno);
intel_ring_emit(ring, i915_gem_request_get_seqno(req));
intel_ring_emit(ring, MI_USER_INTERRUPT);
__intel_ring_advance(ring);
 
1054,14 → 1382,15
*/
 
static int
gen8_ring_sync(struct intel_engine_cs *waiter,
gen8_ring_sync(struct drm_i915_gem_request *waiter_req,
struct intel_engine_cs *signaller,
u32 seqno)
{
struct intel_engine_cs *waiter = waiter_req->ring;
struct drm_i915_private *dev_priv = waiter->dev->dev_private;
int ret;
 
ret = intel_ring_begin(waiter, 4);
ret = intel_ring_begin(waiter_req, 4);
if (ret)
return ret;
 
1079,10 → 1408,11
}
 
static int
gen6_ring_sync(struct intel_engine_cs *waiter,
gen6_ring_sync(struct drm_i915_gem_request *waiter_req,
struct intel_engine_cs *signaller,
u32 seqno)
{
struct intel_engine_cs *waiter = waiter_req->ring;
u32 dw1 = MI_SEMAPHORE_MBOX |
MI_SEMAPHORE_COMPARE |
MI_SEMAPHORE_REGISTER;
1097,7 → 1427,7
 
WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID);
 
ret = intel_ring_begin(waiter, 4);
ret = intel_ring_begin(waiter_req, 4);
if (ret)
return ret;
 
1128,8 → 1458,9
} while (0)
 
static int
pc_render_add_request(struct intel_engine_cs *ring)
pc_render_add_request(struct drm_i915_gem_request *req)
{
struct intel_engine_cs *ring = req->ring;
u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
int ret;
 
1141,7 → 1472,7
* incoherence by flushing the 6 PIPE_NOTIFY buffers out to
* memory before requesting an interrupt.
*/
ret = intel_ring_begin(ring, 32);
ret = intel_ring_begin(req, 32);
if (ret)
return ret;
 
1149,7 → 1480,7
PIPE_CONTROL_WRITE_FLUSH |
PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
intel_ring_emit(ring, ring->outstanding_lazy_seqno);
intel_ring_emit(ring, i915_gem_request_get_seqno(req));
intel_ring_emit(ring, 0);
PIPE_CONTROL_FLUSH(ring, scratch_addr);
scratch_addr += 2 * CACHELINE_BYTES; /* write to separate cachelines */
1168,7 → 1499,7
PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
PIPE_CONTROL_NOTIFY);
intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
intel_ring_emit(ring, ring->outstanding_lazy_seqno);
intel_ring_emit(ring, i915_gem_request_get_seqno(req));
intel_ring_emit(ring, 0);
__intel_ring_advance(ring);
 
1318,76 → 1649,15
spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
}
 
void intel_ring_setup_status_page(struct intel_engine_cs *ring)
{
struct drm_device *dev = ring->dev;
struct drm_i915_private *dev_priv = ring->dev->dev_private;
u32 mmio = 0;
 
/* The ring status page addresses are no longer next to the rest of
* the ring registers as of gen7.
*/
if (IS_GEN7(dev)) {
switch (ring->id) {
case RCS:
mmio = RENDER_HWS_PGA_GEN7;
break;
case BCS:
mmio = BLT_HWS_PGA_GEN7;
break;
/*
* VCS2 actually doesn't exist on Gen7. Only shut up
* gcc switch check warning
*/
case VCS2:
case VCS:
mmio = BSD_HWS_PGA_GEN7;
break;
case VECS:
mmio = VEBOX_HWS_PGA_GEN7;
break;
}
} else if (IS_GEN6(ring->dev)) {
mmio = RING_HWS_PGA_GEN6(ring->mmio_base);
} else {
/* XXX: gen8 returns to sanity */
mmio = RING_HWS_PGA(ring->mmio_base);
}
 
I915_WRITE(mmio, (u32)ring->status_page.gfx_addr);
POSTING_READ(mmio);
 
/*
* Flush the TLB for this page
*
* FIXME: These two bits have disappeared on gen8, so a question
* arises: do we still need this and if so how should we go about
* invalidating the TLB?
*/
if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8) {
u32 reg = RING_INSTPM(ring->mmio_base);
 
/* ring should be idle before issuing a sync flush*/
WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0);
 
I915_WRITE(reg,
_MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE |
INSTPM_SYNC_FLUSH));
if (wait_for((I915_READ(reg) & INSTPM_SYNC_FLUSH) == 0,
1000))
DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n",
ring->name);
}
}
 
static int
bsd_ring_flush(struct intel_engine_cs *ring,
bsd_ring_flush(struct drm_i915_gem_request *req,
u32 invalidate_domains,
u32 flush_domains)
{
struct intel_engine_cs *ring = req->ring;
int ret;
 
ret = intel_ring_begin(ring, 2);
ret = intel_ring_begin(req, 2);
if (ret)
return ret;
 
1398,17 → 1668,18
}
 
static int
i9xx_add_request(struct intel_engine_cs *ring)
i9xx_add_request(struct drm_i915_gem_request *req)
{
struct intel_engine_cs *ring = req->ring;
int ret;
 
ret = intel_ring_begin(ring, 4);
ret = intel_ring_begin(req, 4);
if (ret)
return ret;
 
intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
intel_ring_emit(ring, ring->outstanding_lazy_seqno);
intel_ring_emit(ring, i915_gem_request_get_seqno(req));
intel_ring_emit(ring, MI_USER_INTERRUPT);
__intel_ring_advance(ring);
 
1540,13 → 1811,14
}
 
static int
i965_dispatch_execbuffer(struct intel_engine_cs *ring,
i965_dispatch_execbuffer(struct drm_i915_gem_request *req,
u64 offset, u32 length,
unsigned flags)
unsigned dispatch_flags)
{
struct intel_engine_cs *ring = req->ring;
int ret;
 
ret = intel_ring_begin(ring, 2);
ret = intel_ring_begin(req, 2);
if (ret)
return ret;
 
1553,7 → 1825,8
intel_ring_emit(ring,
MI_BATCH_BUFFER_START |
MI_BATCH_GTT |
(flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965));
(dispatch_flags & I915_DISPATCH_SECURE ?
0 : MI_BATCH_NON_SECURE_I965));
intel_ring_emit(ring, offset);
intel_ring_advance(ring);
 
1565,14 → 1838,15
#define I830_TLB_ENTRIES (2)
#define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT)
static int
i830_dispatch_execbuffer(struct intel_engine_cs *ring,
i830_dispatch_execbuffer(struct drm_i915_gem_request *req,
u64 offset, u32 len,
unsigned flags)
unsigned dispatch_flags)
{
struct intel_engine_cs *ring = req->ring;
u32 cs_offset = ring->scratch.gtt_offset;
int ret;
 
ret = intel_ring_begin(ring, 6);
ret = intel_ring_begin(req, 6);
if (ret)
return ret;
 
1585,11 → 1859,11
intel_ring_emit(ring, MI_NOOP);
intel_ring_advance(ring);
 
if ((flags & I915_DISPATCH_PINNED) == 0) {
if ((dispatch_flags & I915_DISPATCH_PINNED) == 0) {
if (len > I830_BATCH_LIMIT)
return -ENOSPC;
 
ret = intel_ring_begin(ring, 6 + 2);
ret = intel_ring_begin(req, 6 + 2);
if (ret)
return ret;
 
1612,12 → 1886,13
offset = cs_offset;
}
 
ret = intel_ring_begin(ring, 4);
ret = intel_ring_begin(req, 4);
if (ret)
return ret;
 
intel_ring_emit(ring, MI_BATCH_BUFFER);
intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
intel_ring_emit(ring, offset | (dispatch_flags & I915_DISPATCH_SECURE ?
0 : MI_BATCH_NON_SECURE));
intel_ring_emit(ring, offset + len - 8);
intel_ring_emit(ring, MI_NOOP);
intel_ring_advance(ring);
1626,18 → 1901,20
}
 
static int
i915_dispatch_execbuffer(struct intel_engine_cs *ring,
i915_dispatch_execbuffer(struct drm_i915_gem_request *req,
u64 offset, u32 len,
unsigned flags)
unsigned dispatch_flags)
{
struct intel_engine_cs *ring = req->ring;
int ret;
 
ret = intel_ring_begin(ring, 2);
ret = intel_ring_begin(req, 2);
if (ret)
return ret;
 
intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT);
intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
intel_ring_emit(ring, offset | (dispatch_flags & I915_DISPATCH_SECURE ?
0 : MI_BATCH_NON_SECURE));
intel_ring_advance(ring);
 
return 0;
1759,13 → 2036,13
return 0;
}
 
void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
static void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
{
drm_gem_object_unreference(&ringbuf->obj->base);
ringbuf->obj = NULL;
}
 
int intel_alloc_ringbuffer_obj(struct drm_device *dev,
static int intel_alloc_ringbuffer_obj(struct drm_device *dev,
struct intel_ringbuffer *ringbuf)
{
struct drm_i915_gem_object *obj;
1786,29 → 2063,70
return 0;
}
 
struct intel_ringbuffer *
intel_engine_create_ringbuffer(struct intel_engine_cs *engine, int size)
{
struct intel_ringbuffer *ring;
int ret;
 
ring = kzalloc(sizeof(*ring), GFP_KERNEL);
if (ring == NULL)
return ERR_PTR(-ENOMEM);
 
ring->ring = engine;
 
ring->size = size;
/* Workaround an erratum on the i830 which causes a hang if
* the TAIL pointer points to within the last 2 cachelines
* of the buffer.
*/
ring->effective_size = size;
if (IS_I830(engine->dev) || IS_845G(engine->dev))
ring->effective_size -= 2 * CACHELINE_BYTES;
 
ring->last_retired_head = -1;
intel_ring_update_space(ring);
 
ret = intel_alloc_ringbuffer_obj(engine->dev, ring);
if (ret) {
DRM_ERROR("Failed to allocate ringbuffer %s: %d\n",
engine->name, ret);
kfree(ring);
return ERR_PTR(ret);
}
 
return ring;
}
 
void
intel_ringbuffer_free(struct intel_ringbuffer *ring)
{
intel_destroy_ringbuffer_obj(ring);
kfree(ring);
}
 
static int intel_init_ring_buffer(struct drm_device *dev,
struct intel_engine_cs *ring)
{
struct intel_ringbuffer *ringbuf = ring->buffer;
struct intel_ringbuffer *ringbuf;
int ret;
 
if (ringbuf == NULL) {
ringbuf = kzalloc(sizeof(*ringbuf), GFP_KERNEL);
if (!ringbuf)
return -ENOMEM;
ring->buffer = ringbuf;
}
WARN_ON(ring->buffer);
 
ring->dev = dev;
INIT_LIST_HEAD(&ring->active_list);
INIT_LIST_HEAD(&ring->request_list);
INIT_LIST_HEAD(&ring->execlist_queue);
ringbuf->size = 32 * PAGE_SIZE;
ringbuf->ring = ring;
i915_gem_batch_pool_init(dev, &ring->batch_pool);
memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno));
 
init_waitqueue_head(&ring->irq_queue);
 
ringbuf = intel_engine_create_ringbuffer(ring, 32 * PAGE_SIZE);
if (IS_ERR(ringbuf))
return PTR_ERR(ringbuf);
ring->buffer = ringbuf;
 
if (I915_NEED_GFX_HWS(dev)) {
ret = init_status_page(ring);
if (ret)
1820,14 → 2138,6
goto error;
}
 
if (ringbuf->obj == NULL) {
ret = intel_alloc_ringbuffer_obj(dev, ringbuf);
if (ret) {
DRM_ERROR("Failed to allocate ringbuffer %s: %d\n",
ring->name, ret);
goto error;
}
 
ret = intel_pin_and_map_ringbuffer_obj(dev, ringbuf);
if (ret) {
DRM_ERROR("Failed to pin and map ringbuffer %s: %d\n",
1835,28 → 2145,15
intel_destroy_ringbuffer_obj(ringbuf);
goto error;
}
}
 
/* Workaround an erratum on the i830 which causes a hang if
* the TAIL pointer points to within the last 2 cachelines
* of the buffer.
*/
ringbuf->effective_size = ringbuf->size;
if (IS_I830(dev) || IS_845G(dev))
ringbuf->effective_size -= 2 * CACHELINE_BYTES;
 
ret = i915_cmd_parser_init_ring(ring);
if (ret)
goto error;
 
ret = ring->init(ring);
if (ret)
goto error;
 
return 0;
 
error:
kfree(ringbuf);
intel_ringbuffer_free(ringbuf);
ring->buffer = NULL;
return ret;
}
1864,131 → 2161,64
void intel_cleanup_ring_buffer(struct intel_engine_cs *ring)
{
struct drm_i915_private *dev_priv;
struct intel_ringbuffer *ringbuf;
 
if (!intel_ring_initialized(ring))
return;
 
dev_priv = to_i915(ring->dev);
ringbuf = ring->buffer;
 
intel_stop_ring_buffer(ring);
WARN_ON(!IS_GEN2(ring->dev) && (I915_READ_MODE(ring) & MODE_IDLE) == 0);
 
intel_unpin_ringbuffer_obj(ringbuf);
intel_destroy_ringbuffer_obj(ringbuf);
ring->preallocated_lazy_request = NULL;
ring->outstanding_lazy_seqno = 0;
intel_unpin_ringbuffer_obj(ring->buffer);
intel_ringbuffer_free(ring->buffer);
ring->buffer = NULL;
 
if (ring->cleanup)
ring->cleanup(ring);
 
// cleanup_status_page(ring);
cleanup_status_page(ring);
 
i915_cmd_parser_fini_ring(ring);
 
kfree(ringbuf);
ring->buffer = NULL;
i915_gem_batch_pool_fini(&ring->batch_pool);
}
 
static int intel_ring_wait_request(struct intel_engine_cs *ring, int n)
static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
{
struct intel_ringbuffer *ringbuf = ring->buffer;
struct drm_i915_gem_request *request;
u32 seqno = 0;
unsigned space;
int ret;
 
if (ringbuf->last_retired_head != -1) {
ringbuf->head = ringbuf->last_retired_head;
ringbuf->last_retired_head = -1;
 
ringbuf->space = intel_ring_space(ringbuf);
if (ringbuf->space >= n)
if (intel_ring_space(ringbuf) >= n)
return 0;
}
 
/* The whole point of reserving space is to not wait! */
WARN_ON(ringbuf->reserved_in_use);
 
list_for_each_entry(request, &ring->request_list, list) {
if (__intel_ring_space(request->tail, ringbuf->tail,
ringbuf->size) >= n) {
seqno = request->seqno;
space = __intel_ring_space(request->postfix, ringbuf->tail,
ringbuf->size);
if (space >= n)
break;
}
}
 
if (seqno == 0)
if (WARN_ON(&request->list == &ring->request_list))
return -ENOSPC;
 
ret = i915_wait_seqno(ring, seqno);
ret = i915_wait_request(request);
if (ret)
return ret;
 
i915_gem_retire_requests_ring(ring);
ringbuf->head = ringbuf->last_retired_head;
ringbuf->last_retired_head = -1;
 
ringbuf->space = intel_ring_space(ringbuf);
ringbuf->space = space;
return 0;
}
 
static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
static void __wrap_ring_buffer(struct intel_ringbuffer *ringbuf)
{
struct drm_device *dev = ring->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_ringbuffer *ringbuf = ring->buffer;
unsigned long end;
int ret;
 
ret = intel_ring_wait_request(ring, n);
if (ret != -ENOSPC)
return ret;
 
/* force the tail write in case we have been skipping them */
__intel_ring_advance(ring);
 
/* With GEM the hangcheck timer should kick us out of the loop,
* leaving it early runs the risk of corrupting GEM state (due
* to running on almost untested codepaths). But on resume
* timers don't work yet, so prevent a complete hang in that
* case by choosing an insanely large timeout. */
end = jiffies + 60 * HZ;
 
trace_i915_ring_wait_begin(ring);
do {
ringbuf->head = I915_READ_HEAD(ring);
ringbuf->space = intel_ring_space(ringbuf);
if (ringbuf->space >= n) {
ret = 0;
break;
}
 
msleep(1);
 
ret = i915_gem_check_wedge(&dev_priv->gpu_error,
dev_priv->mm.interruptible);
if (ret)
break;
 
if (time_after(jiffies, end)) {
ret = -EBUSY;
break;
}
} while (1);
trace_i915_ring_wait_end(ring);
return ret;
}
 
static int intel_wrap_ring_buffer(struct intel_engine_cs *ring)
{
uint32_t __iomem *virt;
struct intel_ringbuffer *ringbuf = ring->buffer;
int rem = ringbuf->size - ringbuf->tail;
 
if (ringbuf->space < rem) {
int ret = ring_wait_for_space(ring, rem);
if (ret)
return ret;
}
 
virt = ringbuf->virtual_start + ringbuf->tail;
rem /= 4;
while (rem--)
1995,80 → 2225,154
iowrite32(MI_NOOP, virt++);
 
ringbuf->tail = 0;
ringbuf->space = intel_ring_space(ringbuf);
 
return 0;
intel_ring_update_space(ringbuf);
}
 
int intel_ring_idle(struct intel_engine_cs *ring)
{
u32 seqno;
int ret;
struct drm_i915_gem_request *req;
 
/* We need to add any requests required to flush the objects and ring */
if (ring->outstanding_lazy_seqno) {
ret = i915_add_request(ring, NULL);
if (ret)
return ret;
}
 
/* Wait upon the last request to be completed */
if (list_empty(&ring->request_list))
return 0;
 
seqno = list_entry(ring->request_list.prev,
req = list_entry(ring->request_list.prev,
struct drm_i915_gem_request,
list)->seqno;
list);
 
return i915_wait_seqno(ring, seqno);
/* Make sure we do not trigger any retires */
return __i915_wait_request(req,
atomic_read(&to_i915(ring->dev)->gpu_error.reset_counter),
to_i915(ring->dev)->mm.interruptible,
NULL, NULL);
}
 
static int
intel_ring_alloc_seqno(struct intel_engine_cs *ring)
int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request)
{
if (ring->outstanding_lazy_seqno)
request->ringbuf = request->ring->buffer;
return 0;
}
 
if (ring->preallocated_lazy_request == NULL) {
struct drm_i915_gem_request *request;
int intel_ring_reserve_space(struct drm_i915_gem_request *request)
{
/*
* The first call merely notes the reserve request and is common for
* all back ends. The subsequent localised _begin() call actually
* ensures that the reservation is available. Without the begin, if
* the request creator immediately submitted the request without
* adding any commands to it then there might not actually be
* sufficient room for the submission commands.
*/
intel_ring_reserved_space_reserve(request->ringbuf, MIN_SPACE_FOR_ADD_REQUEST);
 
request = kmalloc(sizeof(*request), GFP_KERNEL);
if (request == NULL)
return -ENOMEM;
return intel_ring_begin(request, 0);
}
 
ring->preallocated_lazy_request = request;
void intel_ring_reserved_space_reserve(struct intel_ringbuffer *ringbuf, int size)
{
WARN_ON(ringbuf->reserved_size);
WARN_ON(ringbuf->reserved_in_use);
 
ringbuf->reserved_size = size;
}
 
return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_seqno);
void intel_ring_reserved_space_cancel(struct intel_ringbuffer *ringbuf)
{
WARN_ON(ringbuf->reserved_in_use);
 
ringbuf->reserved_size = 0;
ringbuf->reserved_in_use = false;
}
 
static int __intel_ring_prepare(struct intel_engine_cs *ring,
int bytes)
void intel_ring_reserved_space_use(struct intel_ringbuffer *ringbuf)
{
WARN_ON(ringbuf->reserved_in_use);
 
ringbuf->reserved_in_use = true;
ringbuf->reserved_tail = ringbuf->tail;
}
 
void intel_ring_reserved_space_end(struct intel_ringbuffer *ringbuf)
{
WARN_ON(!ringbuf->reserved_in_use);
if (ringbuf->tail > ringbuf->reserved_tail) {
WARN(ringbuf->tail > ringbuf->reserved_tail + ringbuf->reserved_size,
"request reserved size too small: %d vs %d!\n",
ringbuf->tail - ringbuf->reserved_tail, ringbuf->reserved_size);
} else {
/*
* The ring was wrapped while the reserved space was in use.
* That means that some unknown amount of the ring tail was
* no-op filled and skipped. Thus simply adding the ring size
* to the tail and doing the above space check will not work.
* Rather than attempt to track how much tail was skipped,
* it is much simpler to say that also skipping the sanity
* check every once in a while is not a big issue.
*/
}
 
ringbuf->reserved_size = 0;
ringbuf->reserved_in_use = false;
}
 
static int __intel_ring_prepare(struct intel_engine_cs *ring, int bytes)
{
struct intel_ringbuffer *ringbuf = ring->buffer;
int ret;
int remain_usable = ringbuf->effective_size - ringbuf->tail;
int remain_actual = ringbuf->size - ringbuf->tail;
int ret, total_bytes, wait_bytes = 0;
bool need_wrap = false;
 
if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) {
ret = intel_wrap_ring_buffer(ring);
if (unlikely(ret))
return ret;
if (ringbuf->reserved_in_use)
total_bytes = bytes;
else
total_bytes = bytes + ringbuf->reserved_size;
 
if (unlikely(bytes > remain_usable)) {
/*
* Not enough space for the basic request. So need to flush
* out the remainder and then wait for base + reserved.
*/
wait_bytes = remain_actual + total_bytes;
need_wrap = true;
} else {
if (unlikely(total_bytes > remain_usable)) {
/*
* The base request will fit but the reserved space
* falls off the end. So only need to to wait for the
* reserved size after flushing out the remainder.
*/
wait_bytes = remain_actual + ringbuf->reserved_size;
need_wrap = true;
} else if (total_bytes > ringbuf->space) {
/* No wrapping required, just waiting. */
wait_bytes = total_bytes;
}
}
 
if (unlikely(ringbuf->space < bytes)) {
ret = ring_wait_for_space(ring, bytes);
if (wait_bytes) {
ret = ring_wait_for_space(ring, wait_bytes);
if (unlikely(ret))
return ret;
 
if (need_wrap)
__wrap_ring_buffer(ringbuf);
}
 
return 0;
}
 
int intel_ring_begin(struct intel_engine_cs *ring,
int intel_ring_begin(struct drm_i915_gem_request *req,
int num_dwords)
{
struct drm_i915_private *dev_priv = ring->dev->dev_private;
struct intel_engine_cs *ring;
struct drm_i915_private *dev_priv;
int ret;
 
WARN_ON(req == NULL);
ring = req->ring;
dev_priv = ring->dev->dev_private;
 
ret = i915_gem_check_wedge(&dev_priv->gpu_error,
dev_priv->mm.interruptible);
if (ret)
2078,18 → 2382,14
if (ret)
return ret;
 
/* Preallocate the olr before touching the ring */
ret = intel_ring_alloc_seqno(ring);
if (ret)
return ret;
 
ring->buffer->space -= num_dwords * sizeof(uint32_t);
return 0;
}
 
/* Align the ring tail to a cacheline boundary */
int intel_ring_cacheline_align(struct intel_engine_cs *ring)
int intel_ring_cacheline_align(struct drm_i915_gem_request *req)
{
struct intel_engine_cs *ring = req->ring;
int num_dwords = (ring->buffer->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
int ret;
 
2097,7 → 2397,7
return 0;
 
num_dwords = CACHELINE_BYTES / sizeof(uint32_t) - num_dwords;
ret = intel_ring_begin(ring, num_dwords);
ret = intel_ring_begin(req, num_dwords);
if (ret)
return ret;
 
2114,8 → 2414,6
struct drm_device *dev = ring->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
 
BUG_ON(ring->outstanding_lazy_seqno);
 
if (INTEL_INFO(dev)->gen == 6 || INTEL_INFO(dev)->gen == 7) {
I915_WRITE(RING_SYNC_0(ring->mmio_base), 0);
I915_WRITE(RING_SYNC_1(ring->mmio_base), 0);
2160,13 → 2458,14
_MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
}
 
static int gen6_bsd_ring_flush(struct intel_engine_cs *ring,
static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req,
u32 invalidate, u32 flush)
{
struct intel_engine_cs *ring = req->ring;
uint32_t cmd;
int ret;
 
ret = intel_ring_begin(ring, 4);
ret = intel_ring_begin(req, 4);
if (ret)
return ret;
 
2173,6 → 2472,14
cmd = MI_FLUSH_DW;
if (INTEL_INFO(ring->dev)->gen >= 8)
cmd += 1;
 
/* We always require a command barrier so that subsequent
* commands, such as breadcrumb interrupts, are strictly ordered
* wrt the contents of the write cache being flushed to memory
* (and thus being coherent from the CPU).
*/
cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
 
/*
* Bspec vol 1c.5 - video engine command streamer:
* "If ENABLED, all TLBs will be invalidated once the flush
2180,8 → 2487,8
* Post-Sync Operation field is a value of 1h or 3h."
*/
if (invalidate & I915_GEM_GPU_DOMAINS)
cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD |
MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD;
 
intel_ring_emit(ring, cmd);
intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
if (INTEL_INFO(ring->dev)->gen >= 8) {
2196,19 → 2503,23
}
 
static int
gen8_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
gen8_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
u64 offset, u32 len,
unsigned flags)
unsigned dispatch_flags)
{
bool ppgtt = USES_PPGTT(ring->dev) && !(flags & I915_DISPATCH_SECURE);
struct intel_engine_cs *ring = req->ring;
bool ppgtt = USES_PPGTT(ring->dev) &&
!(dispatch_flags & I915_DISPATCH_SECURE);
int ret;
 
ret = intel_ring_begin(ring, 4);
ret = intel_ring_begin(req, 4);
if (ret)
return ret;
 
/* FIXME(BDW): Address space and security selectors. */
intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8));
intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8) |
(dispatch_flags & I915_DISPATCH_RS ?
MI_BATCH_RESOURCE_STREAMER : 0));
intel_ring_emit(ring, lower_32_bits(offset));
intel_ring_emit(ring, upper_32_bits(offset));
intel_ring_emit(ring, MI_NOOP);
2218,20 → 2529,23
}
 
static int
hsw_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
hsw_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
u64 offset, u32 len,
unsigned flags)
unsigned dispatch_flags)
{
struct intel_engine_cs *ring = req->ring;
int ret;
 
ret = intel_ring_begin(ring, 2);
ret = intel_ring_begin(req, 2);
if (ret)
return ret;
 
intel_ring_emit(ring,
MI_BATCH_BUFFER_START |
(flags & I915_DISPATCH_SECURE ?
0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW));
(dispatch_flags & I915_DISPATCH_SECURE ?
0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW) |
(dispatch_flags & I915_DISPATCH_RS ?
MI_BATCH_RESOURCE_STREAMER : 0));
/* bit0-7 is the length on GEN6+ */
intel_ring_emit(ring, offset);
intel_ring_advance(ring);
2240,19 → 2554,21
}
 
static int
gen6_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
gen6_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,
u64 offset, u32 len,
unsigned flags)
unsigned dispatch_flags)
{
struct intel_engine_cs *ring = req->ring;
int ret;
 
ret = intel_ring_begin(ring, 2);
ret = intel_ring_begin(req, 2);
if (ret)
return ret;
 
intel_ring_emit(ring,
MI_BATCH_BUFFER_START |
(flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965));
(dispatch_flags & I915_DISPATCH_SECURE ?
0 : MI_BATCH_NON_SECURE_I965));
/* bit0-7 is the length on GEN6+ */
intel_ring_emit(ring, offset);
intel_ring_advance(ring);
2262,21 → 2578,29
 
/* Blitter support (SandyBridge+) */
 
static int gen6_ring_flush(struct intel_engine_cs *ring,
static int gen6_ring_flush(struct drm_i915_gem_request *req,
u32 invalidate, u32 flush)
{
struct intel_engine_cs *ring = req->ring;
struct drm_device *dev = ring->dev;
struct drm_i915_private *dev_priv = dev->dev_private;
uint32_t cmd;
int ret;
 
ret = intel_ring_begin(ring, 4);
ret = intel_ring_begin(req, 4);
if (ret)
return ret;
 
cmd = MI_FLUSH_DW;
if (INTEL_INFO(ring->dev)->gen >= 8)
if (INTEL_INFO(dev)->gen >= 8)
cmd += 1;
 
/* We always require a command barrier so that subsequent
* commands, such as breadcrumb interrupts, are strictly ordered
* wrt the contents of the write cache being flushed to memory
* (and thus being coherent from the CPU).
*/
cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
 
/*
* Bspec vol 1c.3 - blitter engine command streamer:
* "If ENABLED, all TLBs will be invalidated once the flush
2284,11 → 2608,10
* Post-Sync Operation field is a value of 1h or 3h."
*/
if (invalidate & I915_GEM_DOMAIN_RENDER)
cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX |
MI_FLUSH_DW_OP_STOREDW;
cmd |= MI_INVALIDATE_TLB;
intel_ring_emit(ring, cmd);
intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
if (INTEL_INFO(ring->dev)->gen >= 8) {
if (INTEL_INFO(dev)->gen >= 8) {
intel_ring_emit(ring, 0); /* upper addr */
intel_ring_emit(ring, 0); /* value */
} else {
2297,13 → 2620,6
}
intel_ring_advance(ring);
 
if (!invalidate && flush) {
if (IS_GEN7(dev))
return gen7_ring_fbc_flush(ring, FBC_REND_CACHE_CLEAN);
else if (IS_BROADWELL(dev))
dev_priv->fbc.need_sw_cache_clean = true;
}
 
return 0;
}
 
2336,7 → 2652,7
}
}
 
ring->init_context = intel_ring_workarounds_emit;
ring->init_context = intel_rcs_ctx_init;
ring->add_request = gen6_add_request;
ring->flush = gen8_render_ring_flush;
ring->irq_get = gen8_ring_get_irq;
2351,6 → 2667,7
GEN8_RING_SEMAPHORE_INIT;
}
} else if (INTEL_INFO(dev)->gen >= 6) {
ring->init_context = intel_rcs_ctx_init;
ring->add_request = gen6_add_request;
ring->flush = gen7_render_ring_flush;
if (INTEL_INFO(dev)->gen == 6)
2421,7 → 2738,7
ring->dispatch_execbuffer = i830_dispatch_execbuffer;
else
ring->dispatch_execbuffer = i915_dispatch_execbuffer;
ring->init = init_render_ring;
ring->init_hw = init_render_ring;
ring->cleanup = render_ring_cleanup;
 
/* Workaround batchbuffer to combat CS tlb bug. */
2443,9 → 2760,19
ring->scratch.gtt_offset = i915_gem_obj_ggtt_offset(obj);
}
 
return intel_init_ring_buffer(dev, ring);
ret = intel_init_ring_buffer(dev, ring);
if (ret)
return ret;
 
if (INTEL_INFO(dev)->gen >= 5) {
ret = intel_init_pipe_control(ring);
if (ret)
return ret;
}
 
return 0;
}
 
int intel_init_bsd_ring_buffer(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
2514,14 → 2841,13
}
ring->dispatch_execbuffer = i965_dispatch_execbuffer;
}
ring->init = init_ring_common;
ring->init_hw = init_ring_common;
 
return intel_init_ring_buffer(dev, ring);
}
 
/**
* Initialize the second BSD ring for Broadwell GT3.
* It is noted that this only exists on Broadwell GT3.
* Initialize the second BSD ring (eg. Broadwell GT3, Skylake GT3)
*/
int intel_init_bsd2_ring_buffer(struct drm_device *dev)
{
2528,11 → 2854,6
struct drm_i915_private *dev_priv = dev->dev_private;
struct intel_engine_cs *ring = &dev_priv->ring[VCS2];
 
if ((INTEL_INFO(dev)->gen != 8)) {
DRM_ERROR("No dual-BSD ring on non-BDW machine\n");
return -EINVAL;
}
 
ring->name = "bsd2 ring";
ring->id = VCS2;
 
2553,7 → 2874,7
ring->semaphore.signal = gen8_xcs_signal;
GEN8_RING_SEMAPHORE_INIT;
}
ring->init = init_ring_common;
ring->init_hw = init_ring_common;
 
return intel_init_ring_buffer(dev, ring);
}
2610,7 → 2931,7
ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
}
}
ring->init = init_ring_common;
ring->init_hw = init_ring_common;
 
return intel_init_ring_buffer(dev, ring);
}
2661,24 → 2982,25
ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
}
}
ring->init = init_ring_common;
ring->init_hw = init_ring_common;
 
return intel_init_ring_buffer(dev, ring);
}
 
int
intel_ring_flush_all_caches(struct intel_engine_cs *ring)
intel_ring_flush_all_caches(struct drm_i915_gem_request *req)
{
struct intel_engine_cs *ring = req->ring;
int ret;
 
if (!ring->gpu_caches_dirty)
return 0;
 
ret = ring->flush(ring, 0, I915_GEM_GPU_DOMAINS);
ret = ring->flush(req, 0, I915_GEM_GPU_DOMAINS);
if (ret)
return ret;
 
trace_i915_gem_ring_flush(ring, 0, I915_GEM_GPU_DOMAINS);
trace_i915_gem_ring_flush(req, 0, I915_GEM_GPU_DOMAINS);
 
ring->gpu_caches_dirty = false;
return 0;
2685,8 → 3007,9
}
 
int
intel_ring_invalidate_all_caches(struct intel_engine_cs *ring)
intel_ring_invalidate_all_caches(struct drm_i915_gem_request *req)
{
struct intel_engine_cs *ring = req->ring;
uint32_t flush_domains;
int ret;
 
2694,11 → 3017,11
if (ring->gpu_caches_dirty)
flush_domains = I915_GEM_GPU_DOMAINS;
 
ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, flush_domains);
ret = ring->flush(req, I915_GEM_GPU_DOMAINS, flush_domains);
if (ret)
return ret;
 
trace_i915_gem_ring_flush(ring, I915_GEM_GPU_DOMAINS, flush_domains);
trace_i915_gem_ring_flush(req, I915_GEM_GPU_DOMAINS, flush_domains);
 
ring->gpu_caches_dirty = false;
return 0;