31,17 → 31,13 |
#include "i915_drv.h" |
#include "i915_trace.h" |
#include "intel_drv.h" |
//#include <linux/dma_remapping.h> |
#include <linux/dma_remapping.h> |
|
#define __EXEC_OBJECT_HAS_PIN (1<<31) |
#define __EXEC_OBJECT_HAS_FENCE (1<<30) |
#define __EXEC_OBJECT_NEEDS_BIAS (1<<28) |
|
static unsigned long |
copy_to_user(void __user *to, const void *from, unsigned long n) |
{ |
memcpy(to, from, n); |
return 0; |
} |
#define BATCH_OFFSET_BIAS (256*1024) |
|
static unsigned long |
copy_from_user(void *to, const void __user *from, unsigned long n) |
105,6 → 101,7 |
struct i915_address_space *vm, |
struct drm_file *file) |
{ |
struct drm_i915_private *dev_priv = vm->dev->dev_private; |
struct drm_i915_gem_object *obj; |
struct list_head objects; |
int i, ret; |
139,7 → 136,21 |
i = 0; |
while (!list_empty(&objects)) { |
struct i915_vma *vma; |
struct i915_address_space *bind_vm = vm; |
|
if (exec[i].flags & EXEC_OBJECT_NEEDS_GTT && |
USES_FULL_PPGTT(vm->dev)) { |
ret = -EINVAL; |
goto err; |
} |
|
/* If we have secure dispatch, or the userspace assures us that |
* they know what they're doing, use the GGTT VM. |
*/ |
if (((args->flags & I915_EXEC_SECURE) && |
(i == (args->buffer_count - 1)))) |
bind_vm = &dev_priv->gtt.base; |
|
obj = list_first_entry(&objects, |
struct drm_i915_gem_object, |
obj_exec_link); |
152,7 → 163,7 |
* from the (obj, vm) we don't run the risk of creating |
* duplicated vmas for the same vm. |
*/ |
vma = i915_gem_obj_lookup_or_create_vma(obj, vm); |
vma = i915_gem_obj_lookup_or_create_vma(obj, bind_vm); |
if (IS_ERR(vma)) { |
DRM_DEBUG("Failed to lookup VMA\n"); |
ret = PTR_ERR(vma); |
231,7 → 242,7 |
i915_gem_object_unpin_fence(obj); |
|
if (entry->flags & __EXEC_OBJECT_HAS_PIN) |
i915_gem_object_unpin(obj); |
vma->pin_count--; |
|
entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN); |
} |
261,11 → 272,13 |
|
static int |
relocate_entry_cpu(struct drm_i915_gem_object *obj, |
struct drm_i915_gem_relocation_entry *reloc) |
struct drm_i915_gem_relocation_entry *reloc, |
uint64_t target_offset) |
{ |
struct drm_device *dev = obj->base.dev; |
struct drm_i915_private *dev_priv = dev->dev_private; |
uint32_t page_offset = offset_in_page(reloc->offset); |
uint64_t delta = reloc->delta + target_offset; |
char *vaddr; |
int ret; |
|
273,19 → 286,32 |
if (ret) |
return ret; |
|
vaddr = dev_priv->gtt.mappable+4096; |
vaddr = (char*)dev_priv->gtt.mappable+4096; |
MapPage(vaddr,(addr_t)i915_gem_object_get_page(obj,reloc->offset >> PAGE_SHIFT), PG_SW); |
*(uint32_t *)(vaddr + page_offset) = reloc->delta; |
*(uint32_t *)(vaddr + page_offset) = lower_32_bits(delta); |
|
if (INTEL_INFO(dev)->gen >= 8) { |
page_offset = offset_in_page(page_offset + sizeof(uint32_t)); |
|
if (page_offset == 0) { |
MapPage(vaddr,(addr_t)i915_gem_object_get_page(obj, |
(reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT), PG_SW); |
} |
|
*(uint32_t *)(vaddr + page_offset) = upper_32_bits(delta); |
} |
|
return 0; |
} |
|
static int |
relocate_entry_gtt(struct drm_i915_gem_object *obj, |
struct drm_i915_gem_relocation_entry *reloc) |
struct drm_i915_gem_relocation_entry *reloc, |
uint64_t target_offset) |
{ |
struct drm_device *dev = obj->base.dev; |
struct drm_i915_private *dev_priv = dev->dev_private; |
uint64_t delta = reloc->delta + target_offset; |
uint32_t __iomem *reloc_entry; |
void __iomem *reloc_page; |
int ret; |
305,8 → 331,9 |
reloc_page = dev_priv->gtt.mappable; |
reloc_entry = (uint32_t __iomem *) |
(reloc_page + offset_in_page(reloc->offset)); |
iowrite32(reloc->delta, reloc_entry); |
iowrite32(lower_32_bits(delta), reloc_entry); |
|
|
return 0; |
} |
|
313,14 → 340,13 |
static int |
i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, |
struct eb_vmas *eb, |
struct drm_i915_gem_relocation_entry *reloc, |
struct i915_address_space *vm) |
struct drm_i915_gem_relocation_entry *reloc) |
{ |
struct drm_device *dev = obj->base.dev; |
struct drm_gem_object *target_obj; |
struct drm_i915_gem_object *target_i915_obj; |
struct i915_vma *target_vma; |
uint32_t target_offset; |
uint64_t target_offset; |
int ret; |
|
/* we've already hold a reference to all valid objects */ |
338,8 → 364,10 |
if (unlikely(IS_GEN6(dev) && |
reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION && |
!target_i915_obj->has_global_gtt_mapping)) { |
i915_gem_gtt_bind_object(target_i915_obj, |
target_i915_obj->cache_level); |
struct i915_vma *vma = |
list_first_entry(&target_i915_obj->vma_list, |
typeof(*vma), vma_link); |
vma->bind_vma(vma, target_i915_obj->cache_level, GLOBAL_BIND); |
} |
|
/* Validate that the target is in a valid r/w GPU domain */ |
394,11 → 422,10 |
|
/* We can't wait for rendering with pagefaults disabled */ |
|
reloc->delta += target_offset; |
if (use_cpu_reloc(obj)) |
ret = relocate_entry_cpu(obj, reloc); |
ret = relocate_entry_cpu(obj, reloc, target_offset); |
else |
ret = relocate_entry_gtt(obj, reloc); |
ret = relocate_entry_gtt(obj, reloc, target_offset); |
|
if (ret) |
return ret; |
434,8 → 461,7 |
do { |
u64 offset = r->presumed_offset; |
|
ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r, |
vma->vm); |
ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r); |
if (ret) |
return ret; |
|
464,8 → 490,7 |
int i, ret; |
|
for (i = 0; i < entry->relocation_count; i++) { |
ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i], |
vma->vm); |
ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i]); |
if (ret) |
return ret; |
} |
507,24 → 532,31 |
|
static int |
i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, |
struct intel_ring_buffer *ring, |
struct intel_engine_cs *ring, |
bool *need_reloc) |
{ |
struct drm_i915_private *dev_priv = ring->dev->dev_private; |
struct drm_i915_gem_object *obj = vma->obj; |
struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; |
bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; |
bool need_fence, need_mappable; |
struct drm_i915_gem_object *obj = vma->obj; |
bool need_fence; |
uint64_t flags; |
int ret; |
|
flags = 0; |
|
need_fence = |
has_fenced_gpu_access && |
entry->flags & EXEC_OBJECT_NEEDS_FENCE && |
obj->tiling_mode != I915_TILING_NONE; |
need_mappable = need_fence || need_reloc_mappable(vma); |
if (need_fence || need_reloc_mappable(vma)) |
flags |= PIN_MAPPABLE; |
|
ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, need_mappable, |
false); |
if (entry->flags & EXEC_OBJECT_NEEDS_GTT) |
flags |= PIN_GLOBAL; |
if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS) |
flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS; |
|
ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags); |
if (ret) |
return ret; |
|
543,14 → 575,6 |
} |
} |
|
/* Ensure ppgtt mapping exists if needed */ |
if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) { |
i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt, |
obj, obj->cache_level); |
|
obj->has_aliasing_ppgtt_mapping = 1; |
} |
|
if (entry->offset != vma->node.start) { |
entry->offset = vma->node.start; |
*need_reloc = true; |
561,15 → 585,41 |
obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER; |
} |
|
if (entry->flags & EXEC_OBJECT_NEEDS_GTT && |
!obj->has_global_gtt_mapping) |
i915_gem_gtt_bind_object(obj, obj->cache_level); |
|
return 0; |
} |
|
static bool |
eb_vma_misplaced(struct i915_vma *vma, bool has_fenced_gpu_access) |
{ |
struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; |
struct drm_i915_gem_object *obj = vma->obj; |
bool need_fence, need_mappable; |
|
need_fence = |
has_fenced_gpu_access && |
entry->flags & EXEC_OBJECT_NEEDS_FENCE && |
obj->tiling_mode != I915_TILING_NONE; |
need_mappable = need_fence || need_reloc_mappable(vma); |
|
WARN_ON((need_mappable || need_fence) && |
!i915_is_ggtt(vma->vm)); |
|
if (entry->alignment && |
vma->node.start & (entry->alignment - 1)) |
return true; |
|
if (need_mappable && !obj->map_and_fenceable) |
return true; |
|
if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS && |
vma->node.start < BATCH_OFFSET_BIAS) |
return true; |
|
return false; |
} |
|
static int |
i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, |
i915_gem_execbuffer_reserve(struct intel_engine_cs *ring, |
struct list_head *vmas, |
bool *need_relocs) |
{ |
583,6 → 633,8 |
if (list_empty(vmas)) |
return 0; |
|
i915_gem_retire_requests_ring(ring); |
|
vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm; |
|
INIT_LIST_HEAD(&ordered_vmas); |
629,26 → 681,10 |
|
/* Unbind any ill-fitting objects or pin. */ |
list_for_each_entry(vma, vmas, exec_list) { |
struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; |
bool need_fence, need_mappable; |
|
obj = vma->obj; |
|
if (!drm_mm_node_allocated(&vma->node)) |
continue; |
|
need_fence = |
has_fenced_gpu_access && |
entry->flags & EXEC_OBJECT_NEEDS_FENCE && |
obj->tiling_mode != I915_TILING_NONE; |
need_mappable = need_fence || need_reloc_mappable(vma); |
|
WARN_ON((need_mappable || need_fence) && |
!i915_is_ggtt(vma->vm)); |
|
if ((entry->alignment && |
vma->node.start & (entry->alignment - 1)) || |
(need_mappable && !obj->map_and_fenceable)) |
if (eb_vma_misplaced(vma, has_fenced_gpu_access)) |
ret = i915_vma_unbind(vma); |
else |
ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs); |
674,7 → 710,7 |
list_for_each_entry(vma, vmas, exec_list) |
i915_gem_execbuffer_unreserve_vma(vma); |
|
// ret = i915_gem_evict_vm(vm, true); |
ret = i915_gem_evict_vm(vm, true); |
if (ret) |
return ret; |
} while (1); |
684,7 → 720,7 |
i915_gem_execbuffer_relocate_slow(struct drm_device *dev, |
struct drm_i915_gem_execbuffer2 *args, |
struct drm_file *file, |
struct intel_ring_buffer *ring, |
struct intel_engine_cs *ring, |
struct eb_vmas *eb, |
struct drm_i915_gem_exec_object2 *exec) |
{ |
749,7 → 785,7 |
* relocations were valid. |
*/ |
for (j = 0; j < exec[i].relocation_count; j++) { |
if (copy_to_user(&user_relocs[j].presumed_offset, |
if (__copy_to_user(&user_relocs[j].presumed_offset, |
&invalid_offset, |
sizeof(invalid_offset))) { |
ret = -EFAULT; |
800,7 → 836,7 |
} |
|
static int |
i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, |
i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring, |
struct list_head *vmas) |
{ |
struct i915_vma *vma; |
876,27 → 912,32 |
return 0; |
} |
|
static int |
static struct intel_context * |
i915_gem_validate_context(struct drm_device *dev, struct drm_file *file, |
const u32 ctx_id) |
struct intel_engine_cs *ring, const u32 ctx_id) |
{ |
struct intel_context *ctx = NULL; |
struct i915_ctx_hang_stats *hs; |
|
hs = i915_gem_context_get_hang_stats(dev, file, ctx_id); |
if (IS_ERR(hs)) |
return PTR_ERR(hs); |
if (ring->id != RCS && ctx_id != DEFAULT_CONTEXT_HANDLE) |
return ERR_PTR(-EINVAL); |
|
ctx = i915_gem_context_get(file->driver_priv, ctx_id); |
if (IS_ERR(ctx)) |
return ctx; |
|
hs = &ctx->hang_stats; |
if (hs->banned) { |
DRM_DEBUG("Context %u tried to submit while banned\n", ctx_id); |
return -EIO; |
return ERR_PTR(-EIO); |
} |
|
return 0; |
return ctx; |
} |
|
static void |
i915_gem_execbuffer_move_to_active(struct list_head *vmas, |
struct intel_ring_buffer *ring) |
struct intel_engine_cs *ring) |
{ |
struct i915_vma *vma; |
|
915,8 → 956,11 |
if (obj->base.write_domain) { |
obj->dirty = 1; |
obj->last_write_seqno = intel_ring_get_seqno(ring); |
if (obj->pin_count) /* check for potential scanout */ |
intel_mark_fb_busy(obj, ring); |
|
intel_fb_obj_invalidate(obj, ring); |
|
/* update for the implicit flush after a batch */ |
obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; |
} |
|
trace_i915_gem_object_change_domain(obj, old_read, old_write); |
926,7 → 970,7 |
static void |
i915_gem_execbuffer_retire_commands(struct drm_device *dev, |
struct drm_file *file, |
struct intel_ring_buffer *ring, |
struct intel_engine_cs *ring, |
struct drm_i915_gem_object *obj) |
{ |
/* Unconditionally force add_request to emit a full flush. */ |
938,13 → 982,15 |
|
static int |
i915_reset_gen7_sol_offsets(struct drm_device *dev, |
struct intel_ring_buffer *ring) |
struct intel_engine_cs *ring) |
{ |
drm_i915_private_t *dev_priv = dev->dev_private; |
struct drm_i915_private *dev_priv = dev->dev_private; |
int ret, i; |
|
if (!IS_GEN7(dev) || ring != &dev_priv->ring[RCS]) |
return 0; |
if (!IS_GEN7(dev) || ring != &dev_priv->ring[RCS]) { |
DRM_DEBUG("sol reset is gen7/rcs only\n"); |
return -EINVAL; |
} |
|
ret = intel_ring_begin(ring, 4 * 3); |
if (ret) |
962,141 → 1008,273 |
} |
|
static int |
i915_gem_do_execbuffer(struct drm_device *dev, void *data, |
struct drm_file *file, |
legacy_ringbuffer_submission(struct drm_device *dev, struct drm_file *file, |
struct intel_engine_cs *ring, |
struct intel_context *ctx, |
struct drm_i915_gem_execbuffer2 *args, |
struct drm_i915_gem_exec_object2 *exec, |
struct i915_address_space *vm) |
struct list_head *vmas, |
struct drm_i915_gem_object *batch_obj, |
u64 exec_start, u32 flags) |
{ |
drm_i915_private_t *dev_priv = dev->dev_private; |
struct eb_vmas *eb; |
struct drm_i915_gem_object *batch_obj; |
struct drm_clip_rect *cliprects = NULL; |
struct intel_ring_buffer *ring; |
const u32 ctx_id = i915_execbuffer2_get_context_id(*args); |
u32 exec_start, exec_len; |
u32 mask, flags; |
int ret, mode, i; |
bool need_relocs; |
struct drm_i915_private *dev_priv = dev->dev_private; |
u64 exec_len; |
int instp_mode; |
u32 instp_mask; |
int i, ret = 0; |
|
if (!i915_gem_check_execbuffer(args)) |
if (args->num_cliprects != 0) { |
if (ring != &dev_priv->ring[RCS]) { |
DRM_DEBUG("clip rectangles are only valid with the render ring\n"); |
return -EINVAL; |
} |
|
ret = validate_exec_list(exec, args->buffer_count); |
if (ret) |
return ret; |
if (INTEL_INFO(dev)->gen >= 5) { |
DRM_DEBUG("clip rectangles are only valid on pre-gen5\n"); |
return -EINVAL; |
} |
|
flags = 0; |
if (args->flags & I915_EXEC_SECURE) { |
if (args->num_cliprects > UINT_MAX / sizeof(*cliprects)) { |
DRM_DEBUG("execbuf with %u cliprects\n", |
args->num_cliprects); |
return -EINVAL; |
} |
|
flags |= I915_DISPATCH_SECURE; |
cliprects = kcalloc(args->num_cliprects, |
sizeof(*cliprects), |
GFP_KERNEL); |
if (cliprects == NULL) { |
ret = -ENOMEM; |
goto error; |
} |
if (args->flags & I915_EXEC_IS_PINNED) |
flags |= I915_DISPATCH_PINNED; |
|
switch (args->flags & I915_EXEC_RING_MASK) { |
case I915_EXEC_DEFAULT: |
case I915_EXEC_RENDER: |
ring = &dev_priv->ring[RCS]; |
break; |
case I915_EXEC_BSD: |
ring = &dev_priv->ring[VCS]; |
if (ctx_id != DEFAULT_CONTEXT_ID) { |
DRM_DEBUG("Ring %s doesn't support contexts\n", |
ring->name); |
return -EPERM; |
if (copy_from_user(cliprects, |
to_user_ptr(args->cliprects_ptr), |
sizeof(*cliprects)*args->num_cliprects)) { |
ret = -EFAULT; |
goto error; |
} |
break; |
case I915_EXEC_BLT: |
ring = &dev_priv->ring[BCS]; |
if (ctx_id != DEFAULT_CONTEXT_ID) { |
DRM_DEBUG("Ring %s doesn't support contexts\n", |
ring->name); |
return -EPERM; |
} else { |
if (args->DR4 == 0xffffffff) { |
DRM_DEBUG("UXA submitting garbage DR4, fixing up\n"); |
args->DR4 = 0; |
} |
break; |
case I915_EXEC_VEBOX: |
ring = &dev_priv->ring[VECS]; |
if (ctx_id != DEFAULT_CONTEXT_ID) { |
DRM_DEBUG("Ring %s doesn't support contexts\n", |
ring->name); |
return -EPERM; |
} |
break; |
|
default: |
DRM_DEBUG("execbuf with unknown ring: %d\n", |
(int)(args->flags & I915_EXEC_RING_MASK)); |
if (args->DR1 || args->DR4 || args->cliprects_ptr) { |
DRM_DEBUG("0 cliprects but dirt in cliprects fields\n"); |
return -EINVAL; |
} |
if (!intel_ring_initialized(ring)) { |
DRM_DEBUG("execbuf with invalid ring: %d\n", |
(int)(args->flags & I915_EXEC_RING_MASK)); |
return -EINVAL; |
} |
|
mode = args->flags & I915_EXEC_CONSTANTS_MASK; |
mask = I915_EXEC_CONSTANTS_MASK; |
switch (mode) { |
ret = i915_gem_execbuffer_move_to_gpu(ring, vmas); |
if (ret) |
goto error; |
|
ret = i915_switch_context(ring, ctx); |
if (ret) |
goto error; |
|
instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK; |
instp_mask = I915_EXEC_CONSTANTS_MASK; |
switch (instp_mode) { |
case I915_EXEC_CONSTANTS_REL_GENERAL: |
case I915_EXEC_CONSTANTS_ABSOLUTE: |
case I915_EXEC_CONSTANTS_REL_SURFACE: |
if (ring == &dev_priv->ring[RCS] && |
mode != dev_priv->relative_constants_mode) { |
if (INTEL_INFO(dev)->gen < 4) |
return -EINVAL; |
if (instp_mode != 0 && ring != &dev_priv->ring[RCS]) { |
DRM_DEBUG("non-0 rel constants mode on non-RCS\n"); |
ret = -EINVAL; |
goto error; |
} |
|
if (instp_mode != dev_priv->relative_constants_mode) { |
if (INTEL_INFO(dev)->gen < 4) { |
DRM_DEBUG("no rel constants on pre-gen4\n"); |
ret = -EINVAL; |
goto error; |
} |
|
if (INTEL_INFO(dev)->gen > 5 && |
mode == I915_EXEC_CONSTANTS_REL_SURFACE) |
return -EINVAL; |
instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) { |
DRM_DEBUG("rel surface constants mode invalid on gen5+\n"); |
ret = -EINVAL; |
goto error; |
} |
|
/* The HW changed the meaning on this bit on gen6 */ |
if (INTEL_INFO(dev)->gen >= 6) |
mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE; |
instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE; |
} |
break; |
default: |
DRM_DEBUG("execbuf with unknown constants: %d\n", mode); |
return -EINVAL; |
DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode); |
ret = -EINVAL; |
goto error; |
} |
|
if (args->buffer_count < 1) { |
DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count); |
return -EINVAL; |
if (ring == &dev_priv->ring[RCS] && |
instp_mode != dev_priv->relative_constants_mode) { |
ret = intel_ring_begin(ring, 4); |
if (ret) |
goto error; |
|
intel_ring_emit(ring, MI_NOOP); |
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); |
intel_ring_emit(ring, INSTPM); |
intel_ring_emit(ring, instp_mask << 16 | instp_mode); |
intel_ring_advance(ring); |
|
dev_priv->relative_constants_mode = instp_mode; |
} |
|
if (args->num_cliprects != 0) { |
if (ring != &dev_priv->ring[RCS]) { |
DRM_DEBUG("clip rectangles are only valid with the render ring\n"); |
return -EINVAL; |
if (args->flags & I915_EXEC_GEN7_SOL_RESET) { |
ret = i915_reset_gen7_sol_offsets(dev, ring); |
if (ret) |
goto error; |
} |
|
if (INTEL_INFO(dev)->gen >= 5) { |
DRM_DEBUG("clip rectangles are only valid on pre-gen5\n"); |
exec_len = args->batch_len; |
if (cliprects) { |
for (i = 0; i < args->num_cliprects; i++) { |
ret = i915_emit_box(dev, &cliprects[i], |
args->DR1, args->DR4); |
if (ret) |
goto error; |
|
ret = ring->dispatch_execbuffer(ring, |
exec_start, exec_len, |
flags); |
if (ret) |
goto error; |
} |
} else { |
ret = ring->dispatch_execbuffer(ring, |
exec_start, exec_len, |
flags); |
if (ret) |
return ret; |
} |
|
trace_i915_gem_ring_dispatch(ring, intel_ring_get_seqno(ring), flags); |
|
i915_gem_execbuffer_move_to_active(vmas, ring); |
i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj); |
|
error: |
kfree(cliprects); |
return ret; |
} |
|
/** |
* Find one BSD ring to dispatch the corresponding BSD command. |
* The Ring ID is returned. |
*/ |
static int gen8_dispatch_bsd_ring(struct drm_device *dev, |
struct drm_file *file) |
{ |
struct drm_i915_private *dev_priv = dev->dev_private; |
struct drm_i915_file_private *file_priv = file->driver_priv; |
|
/* Check whether the file_priv is using one ring */ |
if (file_priv->bsd_ring) |
return file_priv->bsd_ring->id; |
else { |
/* If no, use the ping-pong mechanism to select one ring */ |
int ring_id; |
|
mutex_lock(&dev->struct_mutex); |
if (dev_priv->mm.bsd_ring_dispatch_index == 0) { |
ring_id = VCS; |
dev_priv->mm.bsd_ring_dispatch_index = 1; |
} else { |
ring_id = VCS2; |
dev_priv->mm.bsd_ring_dispatch_index = 0; |
} |
file_priv->bsd_ring = &dev_priv->ring[ring_id]; |
mutex_unlock(&dev->struct_mutex); |
return ring_id; |
} |
} |
|
static struct drm_i915_gem_object * |
eb_get_batch(struct eb_vmas *eb) |
{ |
struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list); |
|
/* |
* SNA is doing fancy tricks with compressing batch buffers, which leads |
* to negative relocation deltas. Usually that works out ok since the |
* relocate address is still positive, except when the batch is placed |
* very low in the GTT. Ensure this doesn't happen. |
* |
* Note that actual hangs have only been observed on gen7, but for |
* paranoia do it everywhere. |
*/ |
vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; |
|
return vma->obj; |
} |
|
static int |
i915_gem_do_execbuffer(struct drm_device *dev, void *data, |
struct drm_file *file, |
struct drm_i915_gem_execbuffer2 *args, |
struct drm_i915_gem_exec_object2 *exec) |
{ |
struct drm_i915_private *dev_priv = dev->dev_private; |
struct eb_vmas *eb; |
struct drm_i915_gem_object *batch_obj; |
struct intel_engine_cs *ring; |
struct intel_context *ctx; |
struct i915_address_space *vm; |
const u32 ctx_id = i915_execbuffer2_get_context_id(*args); |
u64 exec_start = args->batch_start_offset; |
u32 flags; |
int ret; |
bool need_relocs; |
|
if (!i915_gem_check_execbuffer(args)) |
return -EINVAL; |
|
ret = validate_exec_list(exec, args->buffer_count); |
if (ret) |
return ret; |
|
flags = 0; |
if (args->flags & I915_EXEC_SECURE) { |
|
flags |= I915_DISPATCH_SECURE; |
} |
if (args->flags & I915_EXEC_IS_PINNED) |
flags |= I915_DISPATCH_PINNED; |
|
if (args->num_cliprects > UINT_MAX / sizeof(*cliprects)) { |
DRM_DEBUG("execbuf with %u cliprects\n", |
args->num_cliprects); |
if ((args->flags & I915_EXEC_RING_MASK) > LAST_USER_RING) { |
DRM_DEBUG("execbuf with unknown ring: %d\n", |
(int)(args->flags & I915_EXEC_RING_MASK)); |
return -EINVAL; |
} |
|
cliprects = kcalloc(args->num_cliprects, |
sizeof(*cliprects), |
GFP_KERNEL); |
if (cliprects == NULL) { |
ret = -ENOMEM; |
goto pre_mutex_err; |
if ((args->flags & I915_EXEC_RING_MASK) == I915_EXEC_DEFAULT) |
ring = &dev_priv->ring[RCS]; |
else if ((args->flags & I915_EXEC_RING_MASK) == I915_EXEC_BSD) { |
if (HAS_BSD2(dev)) { |
int ring_id; |
ring_id = gen8_dispatch_bsd_ring(dev, file); |
ring = &dev_priv->ring[ring_id]; |
} else |
ring = &dev_priv->ring[VCS]; |
} else |
ring = &dev_priv->ring[(args->flags & I915_EXEC_RING_MASK) - 1]; |
|
if (!intel_ring_initialized(ring)) { |
DRM_DEBUG("execbuf with invalid ring: %d\n", |
(int)(args->flags & I915_EXEC_RING_MASK)); |
return -EINVAL; |
} |
|
if (copy_from_user(cliprects, |
to_user_ptr(args->cliprects_ptr), |
sizeof(*cliprects)*args->num_cliprects)) { |
ret = -EFAULT; |
goto pre_mutex_err; |
if (args->buffer_count < 1) { |
DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count); |
return -EINVAL; |
} |
} |
|
intel_runtime_pm_get(dev_priv); |
|
1110,14 → 1288,22 |
goto pre_mutex_err; |
} |
|
ret = i915_gem_validate_context(dev, file, ctx_id); |
if (ret) { |
ctx = i915_gem_validate_context(dev, file, ring, ctx_id); |
if (IS_ERR(ctx)) { |
mutex_unlock(&dev->struct_mutex); |
ret = PTR_ERR(ctx); |
goto pre_mutex_err; |
} |
|
i915_gem_context_reference(ctx); |
|
vm = ctx->vm; |
if (!USES_FULL_PPGTT(dev)) |
vm = &dev_priv->gtt.base; |
|
eb = eb_create(args); |
if (eb == NULL) { |
i915_gem_context_unreference(ctx); |
mutex_unlock(&dev->struct_mutex); |
ret = -ENOMEM; |
goto pre_mutex_err; |
1129,7 → 1315,7 |
goto err; |
|
/* take note of the batch buffer before we might reorder the lists */ |
batch_obj = list_entry(eb->vmas.prev, struct i915_vma, exec_list)->obj; |
batch_obj = eb_get_batch(eb); |
|
/* Move the objects en-masse into the GTT, evicting if necessary. */ |
need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0; |
1157,79 → 1343,55 |
goto err; |
} |
batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND; |
#if 0 |
if (i915_needs_cmd_parser(ring)) { |
ret = i915_parse_cmds(ring, |
batch_obj, |
args->batch_start_offset, |
file->is_master); |
if (ret) |
goto err; |
|
/* |
* XXX: Actually do this when enabling batch copy... |
* |
* Set the DISPATCH_SECURE bit to remove the NON_SECURE bit |
* from MI_BATCH_BUFFER_START commands issued in the |
* dispatch_execbuffer implementations. We specifically don't |
* want that set when the command parser is enabled. |
*/ |
} |
#endif |
/* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure |
* batch" bit. Hence we need to pin secure batches into the global gtt. |
* hsw should have this fixed, but bdw mucks it up again. */ |
if (flags & I915_DISPATCH_SECURE && !batch_obj->has_global_gtt_mapping) |
i915_gem_gtt_bind_object(batch_obj, batch_obj->cache_level); |
|
ret = i915_gem_execbuffer_move_to_gpu(ring, &eb->vmas); |
if (ret) |
goto err; |
|
ret = i915_switch_context(ring, file, ctx_id); |
if (ret) |
goto err; |
|
if (ring == &dev_priv->ring[RCS] && |
mode != dev_priv->relative_constants_mode) { |
ret = intel_ring_begin(ring, 4); |
if (ret) |
goto err; |
|
intel_ring_emit(ring, MI_NOOP); |
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); |
intel_ring_emit(ring, INSTPM); |
intel_ring_emit(ring, mask << 16 | mode); |
intel_ring_advance(ring); |
|
dev_priv->relative_constants_mode = mode; |
if (flags & I915_DISPATCH_SECURE && |
!batch_obj->has_global_gtt_mapping) { |
/* When we have multiple VMs, we'll need to make sure that we |
* allocate space first */ |
struct i915_vma *vma = i915_gem_obj_to_ggtt(batch_obj); |
BUG_ON(!vma); |
vma->bind_vma(vma, batch_obj->cache_level, GLOBAL_BIND); |
} |
|
if (args->flags & I915_EXEC_GEN7_SOL_RESET) { |
ret = i915_reset_gen7_sol_offsets(dev, ring); |
if (ret) |
goto err; |
} |
if (flags & I915_DISPATCH_SECURE) |
exec_start += i915_gem_obj_ggtt_offset(batch_obj); |
else |
exec_start += i915_gem_obj_offset(batch_obj, vm); |
|
exec_start = i915_gem_obj_offset(batch_obj, vm) + |
args->batch_start_offset; |
exec_len = args->batch_len; |
if (cliprects) { |
for (i = 0; i < args->num_cliprects; i++) { |
ret = i915_emit_box(dev, &cliprects[i], |
args->DR1, args->DR4); |
ret = legacy_ringbuffer_submission(dev, file, ring, ctx, |
args, &eb->vmas, batch_obj, exec_start, flags); |
if (ret) |
goto err; |
|
ret = ring->dispatch_execbuffer(ring, |
exec_start, exec_len, |
flags); |
if (ret) |
goto err; |
} |
} else { |
ret = ring->dispatch_execbuffer(ring, |
exec_start, exec_len, |
flags); |
if (ret) |
goto err; |
} |
|
trace_i915_gem_ring_dispatch(ring, intel_ring_get_seqno(ring), flags); |
|
i915_gem_execbuffer_move_to_active(&eb->vmas, ring); |
i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj); |
|
err: |
/* the request owns the ref now */ |
i915_gem_context_unreference(ctx); |
eb_destroy(eb); |
|
mutex_unlock(&dev->struct_mutex); |
|
pre_mutex_err: |
kfree(cliprects); |
|
/* intel_gpu_busy should also get a ref, so it will free when the device |
* is really idle. */ |
intel_runtime_pm_put(dev_priv); |
1245,7 → 1407,6 |
i915_gem_execbuffer(struct drm_device *dev, void *data, |
struct drm_file *file) |
{ |
struct drm_i915_private *dev_priv = dev->dev_private; |
struct drm_i915_gem_execbuffer *args = data; |
struct drm_i915_gem_execbuffer2 exec2; |
struct drm_i915_gem_exec_object *exec_list = NULL; |
1301,23 → 1462,25 |
exec2.flags = I915_EXEC_RENDER; |
i915_execbuffer2_set_context_id(exec2, 0); |
|
ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list, |
&dev_priv->gtt.base); |
ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list); |
if (!ret) { |
struct drm_i915_gem_exec_object __user *user_exec_list = |
to_user_ptr(args->buffers_ptr); |
|
/* Copy the new buffer offsets back to the user's exec list. */ |
for (i = 0; i < args->buffer_count; i++) |
exec_list[i].offset = exec2_list[i].offset; |
/* ... and back out to userspace */ |
ret = copy_to_user(to_user_ptr(args->buffers_ptr), |
exec_list, |
sizeof(*exec_list) * args->buffer_count); |
for (i = 0; i < args->buffer_count; i++) { |
ret = __copy_to_user(&user_exec_list[i].offset, |
&exec2_list[i].offset, |
sizeof(user_exec_list[i].offset)); |
if (ret) { |
ret = -EFAULT; |
DRM_DEBUG("failed to copy %d exec entries " |
"back to user (%d)\n", |
args->buffer_count, ret); |
break; |
} |
} |
} |
|
drm_free_large(exec_list); |
drm_free_large(exec2_list); |
1329,7 → 1492,6 |
i915_gem_execbuffer2(struct drm_device *dev, void *data, |
struct drm_file *file) |
{ |
struct drm_i915_private *dev_priv = dev->dev_private; |
struct drm_i915_gem_execbuffer2 *args = data; |
struct drm_i915_gem_exec_object2 *exec2_list = NULL; |
int ret; |
1340,6 → 1502,11 |
return -EINVAL; |
} |
|
if (args->rsvd2 != 0) { |
DRM_DEBUG("dirty rvsd2 field\n"); |
return -EINVAL; |
} |
|
exec2_list = kmalloc(sizeof(*exec2_list)*args->buffer_count, |
GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY); |
if (exec2_list == NULL) { |
1358,20 → 1525,26 |
return -EFAULT; |
} |
|
ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list, |
&dev_priv->gtt.base); |
ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list); |
if (!ret) { |
/* Copy the new buffer offsets back to the user's exec list. */ |
ret = copy_to_user(to_user_ptr(args->buffers_ptr), |
exec2_list, |
sizeof(*exec2_list) * args->buffer_count); |
struct drm_i915_gem_exec_object2 __user *user_exec_list = |
to_user_ptr(args->buffers_ptr); |
int i; |
|
for (i = 0; i < args->buffer_count; i++) { |
ret = __copy_to_user(&user_exec_list[i].offset, |
&exec2_list[i].offset, |
sizeof(user_exec_list[i].offset)); |
if (ret) { |
ret = -EFAULT; |
DRM_DEBUG("failed to copy %d exec entries " |
"back to user (%d)\n", |
args->buffer_count, ret); |
"back to user\n", |
args->buffer_count); |
break; |
} |
} |
} |
|
kfree(exec2_list); |
return ret; |