35,6 → 35,7 |
|
#define __EXEC_OBJECT_HAS_PIN (1<<31) |
#define __EXEC_OBJECT_HAS_FENCE (1<<30) |
#define __EXEC_OBJECT_NEEDS_MAP (1<<29) |
#define __EXEC_OBJECT_NEEDS_BIAS (1<<28) |
|
#define BATCH_OFFSET_BIAS (256*1024) |
101,7 → 102,6 |
struct i915_address_space *vm, |
struct drm_file *file) |
{ |
struct drm_i915_private *dev_priv = vm->dev->dev_private; |
struct drm_i915_gem_object *obj; |
struct list_head objects; |
int i, ret; |
136,21 → 136,7 |
i = 0; |
while (!list_empty(&objects)) { |
struct i915_vma *vma; |
struct i915_address_space *bind_vm = vm; |
|
if (exec[i].flags & EXEC_OBJECT_NEEDS_GTT && |
USES_FULL_PPGTT(vm->dev)) { |
ret = -EINVAL; |
goto err; |
} |
|
/* If we have secure dispatch, or the userspace assures us that |
* they know what they're doing, use the GGTT VM. |
*/ |
if (((args->flags & I915_EXEC_SECURE) && |
(i == (args->buffer_count - 1)))) |
bind_vm = &dev_priv->gtt.base; |
|
obj = list_first_entry(&objects, |
struct drm_i915_gem_object, |
obj_exec_link); |
163,7 → 149,7 |
* from the (obj, vm) we don't run the risk of creating |
* duplicated vmas for the same vm. |
*/ |
vma = i915_gem_obj_lookup_or_create_vma(obj, bind_vm); |
vma = i915_gem_obj_lookup_or_create_vma(obj, vm); |
if (IS_ERR(vma)) { |
DRM_DEBUG("Failed to lookup VMA\n"); |
ret = PTR_ERR(vma); |
276,7 → 262,6 |
uint64_t target_offset) |
{ |
struct drm_device *dev = obj->base.dev; |
struct drm_i915_private *dev_priv = dev->dev_private; |
uint32_t page_offset = offset_in_page(reloc->offset); |
uint64_t delta = reloc->delta + target_offset; |
char *vaddr; |
286,8 → 271,8 |
if (ret) |
return ret; |
|
vaddr = (char*)dev_priv->gtt.mappable+4096; |
MapPage(vaddr,(addr_t)i915_gem_object_get_page(obj,reloc->offset >> PAGE_SHIFT), PG_SW); |
vaddr = kmap_atomic(i915_gem_object_get_page(obj, |
reloc->offset >> PAGE_SHIFT)); |
*(uint32_t *)(vaddr + page_offset) = lower_32_bits(delta); |
|
if (INTEL_INFO(dev)->gen >= 8) { |
294,13 → 279,16 |
page_offset = offset_in_page(page_offset + sizeof(uint32_t)); |
|
if (page_offset == 0) { |
MapPage(vaddr,(addr_t)i915_gem_object_get_page(obj, |
(reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT), PG_SW); |
kunmap_atomic(vaddr); |
vaddr = kmap_atomic(i915_gem_object_get_page(obj, |
(reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT)); |
} |
|
*(uint32_t *)(vaddr + page_offset) = upper_32_bits(delta); |
} |
|
kunmap_atomic(vaddr); |
|
return 0; |
} |
|
312,7 → 300,7 |
struct drm_device *dev = obj->base.dev; |
struct drm_i915_private *dev_priv = dev->dev_private; |
uint64_t delta = reloc->delta + target_offset; |
uint32_t __iomem *reloc_entry; |
uint64_t offset; |
void __iomem *reloc_page; |
int ret; |
|
325,15 → 313,15 |
return ret; |
|
/* Map the page containing the relocation we're going to perform. */ |
reloc->offset += i915_gem_obj_ggtt_offset(obj); |
offset = i915_gem_obj_ggtt_offset(obj); |
offset += reloc->offset; |
MapPage(dev_priv->gtt.mappable,dev_priv->gtt.mappable_base + |
(reloc->offset & PAGE_MASK), PG_SW); |
(offset & PAGE_MASK), PG_SW); |
reloc_page = dev_priv->gtt.mappable; |
reloc_entry = (uint32_t __iomem *) |
(reloc_page + offset_in_page(reloc->offset)); |
iowrite32(lower_32_bits(delta), reloc_entry); |
iowrite32(lower_32_bits(delta), reloc_page + offset_in_page(offset)); |
|
|
|
return 0; |
} |
|
363,12 → 351,9 |
* through the ppgtt for non_secure batchbuffers. */ |
if (unlikely(IS_GEN6(dev) && |
reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION && |
!target_i915_obj->has_global_gtt_mapping)) { |
struct i915_vma *vma = |
list_first_entry(&target_i915_obj->vma_list, |
typeof(*vma), vma_link); |
vma->bind_vma(vma, target_i915_obj->cache_level, GLOBAL_BIND); |
} |
!(target_vma->bound & GLOBAL_BIND))) |
target_vma->bind_vma(target_vma, target_i915_obj->cache_level, |
GLOBAL_BIND); |
|
/* Validate that the target is in a valid r/w GPU domain */ |
if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) { |
523,14 → 508,6 |
} |
|
static int |
need_reloc_mappable(struct i915_vma *vma) |
{ |
struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; |
return entry->relocation_count && !use_cpu_reloc(vma->obj) && |
i915_is_ggtt(vma->vm); |
} |
|
static int |
i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, |
struct intel_engine_cs *ring, |
bool *need_reloc) |
537,20 → 514,12 |
{ |
struct drm_i915_gem_object *obj = vma->obj; |
struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; |
bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; |
bool need_fence; |
uint64_t flags; |
int ret; |
|
flags = 0; |
|
need_fence = |
has_fenced_gpu_access && |
entry->flags & EXEC_OBJECT_NEEDS_FENCE && |
obj->tiling_mode != I915_TILING_NONE; |
if (need_fence || need_reloc_mappable(vma)) |
flags |= PIN_MAPPABLE; |
|
if (entry->flags & __EXEC_OBJECT_NEEDS_MAP) |
flags |= PIN_GLOBAL | PIN_MAPPABLE; |
if (entry->flags & EXEC_OBJECT_NEEDS_GTT) |
flags |= PIN_GLOBAL; |
if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS) |
562,7 → 531,6 |
|
entry->flags |= __EXEC_OBJECT_HAS_PIN; |
|
if (has_fenced_gpu_access) { |
if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { |
ret = i915_gem_object_get_fence(obj); |
if (ret) |
570,9 → 538,6 |
|
if (i915_gem_object_pin_fence(obj)) |
entry->flags |= __EXEC_OBJECT_HAS_FENCE; |
|
obj->pending_fenced_gpu_access = true; |
} |
} |
|
if (entry->offset != vma->node.start) { |
589,19 → 554,33 |
} |
|
static bool |
eb_vma_misplaced(struct i915_vma *vma, bool has_fenced_gpu_access) |
need_reloc_mappable(struct i915_vma *vma) |
{ |
struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; |
|
if (entry->relocation_count == 0) |
return false; |
|
if (!i915_is_ggtt(vma->vm)) |
return false; |
|
/* See also use_cpu_reloc() */ |
if (HAS_LLC(vma->obj->base.dev)) |
return false; |
|
if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU) |
return false; |
|
return true; |
} |
|
static bool |
eb_vma_misplaced(struct i915_vma *vma) |
{ |
struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; |
struct drm_i915_gem_object *obj = vma->obj; |
bool need_fence, need_mappable; |
|
need_fence = |
has_fenced_gpu_access && |
entry->flags & EXEC_OBJECT_NEEDS_FENCE && |
obj->tiling_mode != I915_TILING_NONE; |
need_mappable = need_fence || need_reloc_mappable(vma); |
|
WARN_ON((need_mappable || need_fence) && |
WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP && |
!i915_is_ggtt(vma->vm)); |
|
if (entry->alignment && |
608,7 → 587,7 |
vma->node.start & (entry->alignment - 1)) |
return true; |
|
if (need_mappable && !obj->map_and_fenceable) |
if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable) |
return true; |
|
if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS && |
630,9 → 609,6 |
bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; |
int retry; |
|
if (list_empty(vmas)) |
return 0; |
|
i915_gem_retire_requests_ring(ring); |
|
vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm; |
646,20 → 622,21 |
obj = vma->obj; |
entry = vma->exec_entry; |
|
if (!has_fenced_gpu_access) |
entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE; |
need_fence = |
has_fenced_gpu_access && |
entry->flags & EXEC_OBJECT_NEEDS_FENCE && |
obj->tiling_mode != I915_TILING_NONE; |
need_mappable = need_fence || need_reloc_mappable(vma); |
|
if (need_mappable) |
if (need_mappable) { |
entry->flags |= __EXEC_OBJECT_NEEDS_MAP; |
list_move(&vma->exec_list, &ordered_vmas); |
else |
} else |
list_move_tail(&vma->exec_list, &ordered_vmas); |
|
obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND; |
obj->base.pending_write_domain = 0; |
obj->pending_fenced_gpu_access = false; |
} |
list_splice(&ordered_vmas, vmas); |
|
684,7 → 661,7 |
if (!drm_mm_node_allocated(&vma->node)) |
continue; |
|
if (eb_vma_misplaced(vma, has_fenced_gpu_access)) |
if (eb_vma_misplaced(vma)) |
ret = i915_vma_unbind(vma); |
else |
ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs); |
732,9 → 709,6 |
int i, total, ret; |
unsigned count = args->buffer_count; |
|
if (WARN_ON(list_empty(&eb->vmas))) |
return 0; |
|
vm = list_first_entry(&eb->vmas, struct i915_vma, exec_list)->vm; |
|
/* We may process another execbuffer during the unlock... */ |
878,18 → 852,24 |
} |
|
static int |
validate_exec_list(struct drm_i915_gem_exec_object2 *exec, |
validate_exec_list(struct drm_device *dev, |
struct drm_i915_gem_exec_object2 *exec, |
int count) |
{ |
int i; |
unsigned relocs_total = 0; |
unsigned relocs_max = UINT_MAX / sizeof(struct drm_i915_gem_relocation_entry); |
unsigned invalid_flags; |
int i; |
|
invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS; |
if (USES_FULL_PPGTT(dev)) |
invalid_flags |= EXEC_OBJECT_NEEDS_GTT; |
|
for (i = 0; i < count; i++) { |
char __user *ptr = to_user_ptr(exec[i].relocs_ptr); |
int length; /* limited by fault_in_pages_readable() */ |
|
if (exec[i].flags & __EXEC_OBJECT_UNKNOWN_FLAGS) |
if (exec[i].flags & invalid_flags) |
return -EINVAL; |
|
/* First check for malicious input causing overflow in |
932,16 → 912,26 |
return ERR_PTR(-EIO); |
} |
|
if (i915.enable_execlists && !ctx->engine[ring->id].state) { |
int ret = intel_lr_context_deferred_create(ctx, ring); |
if (ret) { |
DRM_DEBUG("Could not create LRC %u: %d\n", ctx_id, ret); |
return ERR_PTR(ret); |
} |
} |
|
return ctx; |
} |
|
static void |
void |
i915_gem_execbuffer_move_to_active(struct list_head *vmas, |
struct intel_engine_cs *ring) |
{ |
u32 seqno = intel_ring_get_seqno(ring); |
struct i915_vma *vma; |
|
list_for_each_entry(vma, vmas, exec_list) { |
struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; |
struct drm_i915_gem_object *obj = vma->obj; |
u32 old_read = obj->base.read_domains; |
u32 old_write = obj->base.write_domain; |
950,12 → 940,11 |
if (obj->base.write_domain == 0) |
obj->base.pending_read_domains |= obj->base.read_domains; |
obj->base.read_domains = obj->base.pending_read_domains; |
obj->fenced_gpu_access = obj->pending_fenced_gpu_access; |
|
i915_vma_move_to_active(vma, ring); |
if (obj->base.write_domain) { |
obj->dirty = 1; |
obj->last_write_seqno = intel_ring_get_seqno(ring); |
obj->last_write_seqno = seqno; |
|
intel_fb_obj_invalidate(obj, ring); |
|
962,12 → 951,20 |
/* update for the implicit flush after a batch */ |
obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; |
} |
if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { |
obj->last_fenced_seqno = seqno; |
if (entry->flags & __EXEC_OBJECT_HAS_FENCE) { |
struct drm_i915_private *dev_priv = to_i915(ring->dev); |
list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list, |
&dev_priv->mm.fence_list); |
} |
} |
|
trace_i915_gem_object_change_domain(obj, old_read, old_write); |
} |
} |
|
static void |
void |
i915_gem_execbuffer_retire_commands(struct drm_device *dev, |
struct drm_file *file, |
struct intel_engine_cs *ring, |
1008,7 → 1005,48 |
} |
|
static int |
legacy_ringbuffer_submission(struct drm_device *dev, struct drm_file *file, |
i915_emit_box(struct intel_engine_cs *ring, |
struct drm_clip_rect *box, |
int DR1, int DR4) |
{ |
int ret; |
|
if (box->y2 <= box->y1 || box->x2 <= box->x1 || |
box->y2 <= 0 || box->x2 <= 0) { |
DRM_ERROR("Bad box %d,%d..%d,%d\n", |
box->x1, box->y1, box->x2, box->y2); |
return -EINVAL; |
} |
|
if (INTEL_INFO(ring->dev)->gen >= 4) { |
ret = intel_ring_begin(ring, 4); |
if (ret) |
return ret; |
|
intel_ring_emit(ring, GFX_OP_DRAWRECT_INFO_I965); |
intel_ring_emit(ring, (box->x1 & 0xffff) | box->y1 << 16); |
intel_ring_emit(ring, ((box->x2 - 1) & 0xffff) | (box->y2 - 1) << 16); |
intel_ring_emit(ring, DR4); |
} else { |
ret = intel_ring_begin(ring, 6); |
if (ret) |
return ret; |
|
intel_ring_emit(ring, GFX_OP_DRAWRECT_INFO); |
intel_ring_emit(ring, DR1); |
intel_ring_emit(ring, (box->x1 & 0xffff) | box->y1 << 16); |
intel_ring_emit(ring, ((box->x2 - 1) & 0xffff) | (box->y2 - 1) << 16); |
intel_ring_emit(ring, DR4); |
intel_ring_emit(ring, 0); |
} |
intel_ring_advance(ring); |
|
return 0; |
} |
|
|
int |
i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file, |
struct intel_engine_cs *ring, |
struct intel_context *ctx, |
struct drm_i915_gem_execbuffer2 *args, |
1135,7 → 1173,7 |
exec_len = args->batch_len; |
if (cliprects) { |
for (i = 0; i < args->num_cliprects; i++) { |
ret = i915_emit_box(dev, &cliprects[i], |
ret = i915_emit_box(ring, &cliprects[i], |
args->DR1, args->DR4); |
if (ret) |
goto error; |
1235,7 → 1273,7 |
if (!i915_gem_check_execbuffer(args)) |
return -EINVAL; |
|
ret = validate_exec_list(exec, args->buffer_count); |
ret = validate_exec_list(dev, exec, args->buffer_count); |
if (ret) |
return ret; |
|
1282,12 → 1320,6 |
if (ret) |
goto pre_mutex_err; |
|
if (dev_priv->ums.mm_suspended) { |
mutex_unlock(&dev->struct_mutex); |
ret = -EBUSY; |
goto pre_mutex_err; |
} |
|
ctx = i915_gem_validate_context(dev, file, ring, ctx_id); |
if (IS_ERR(ctx)) { |
mutex_unlock(&dev->struct_mutex); |
1297,8 → 1329,9 |
|
i915_gem_context_reference(ctx); |
|
vm = ctx->vm; |
if (!USES_FULL_PPGTT(dev)) |
if (ctx->ppgtt) |
vm = &ctx->ppgtt->base; |
else |
vm = &dev_priv->gtt.base; |
|
eb = eb_create(args); |
1365,25 → 1398,36 |
/* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure |
* batch" bit. Hence we need to pin secure batches into the global gtt. |
* hsw should have this fixed, but bdw mucks it up again. */ |
if (flags & I915_DISPATCH_SECURE && |
!batch_obj->has_global_gtt_mapping) { |
/* When we have multiple VMs, we'll need to make sure that we |
* allocate space first */ |
struct i915_vma *vma = i915_gem_obj_to_ggtt(batch_obj); |
BUG_ON(!vma); |
vma->bind_vma(vma, batch_obj->cache_level, GLOBAL_BIND); |
} |
if (flags & I915_DISPATCH_SECURE) { |
/* |
* So on first glance it looks freaky that we pin the batch here |
* outside of the reservation loop. But: |
* - The batch is already pinned into the relevant ppgtt, so we |
* already have the backing storage fully allocated. |
* - No other BO uses the global gtt (well contexts, but meh), |
* so we don't really have issues with mutliple objects not |
* fitting due to fragmentation. |
* So this is actually safe. |
*/ |
ret = i915_gem_obj_ggtt_pin(batch_obj, 0, 0); |
if (ret) |
goto err; |
|
if (flags & I915_DISPATCH_SECURE) |
exec_start += i915_gem_obj_ggtt_offset(batch_obj); |
else |
} else |
exec_start += i915_gem_obj_offset(batch_obj, vm); |
|
ret = legacy_ringbuffer_submission(dev, file, ring, ctx, |
args, &eb->vmas, batch_obj, exec_start, flags); |
if (ret) |
goto err; |
ret = dev_priv->gt.do_execbuf(dev, file, ring, ctx, args, |
&eb->vmas, batch_obj, exec_start, flags); |
|
/* |
* FIXME: We crucially rely upon the active tracking for the (ppgtt) |
* batch vma for correctness. For less ugly and less fragility this |
* needs to be adjusted to also track the ggtt batch vma properly as |
* active. |
*/ |
if (flags & I915_DISPATCH_SECURE) |
i915_gem_object_ggtt_unpin(batch_obj); |
err: |
/* the request owns the ref now */ |
i915_gem_context_unreference(ctx); |