Subversion Repositories Kolibri OS

Compare Revisions

Regard whitespace Rev 5353 → Rev 5354

/drivers/video/drm/i915/i915_gem_execbuffer.c
35,6 → 35,7
 
#define __EXEC_OBJECT_HAS_PIN (1<<31)
#define __EXEC_OBJECT_HAS_FENCE (1<<30)
#define __EXEC_OBJECT_NEEDS_MAP (1<<29)
#define __EXEC_OBJECT_NEEDS_BIAS (1<<28)
 
#define BATCH_OFFSET_BIAS (256*1024)
101,7 → 102,6
struct i915_address_space *vm,
struct drm_file *file)
{
struct drm_i915_private *dev_priv = vm->dev->dev_private;
struct drm_i915_gem_object *obj;
struct list_head objects;
int i, ret;
136,21 → 136,7
i = 0;
while (!list_empty(&objects)) {
struct i915_vma *vma;
struct i915_address_space *bind_vm = vm;
 
if (exec[i].flags & EXEC_OBJECT_NEEDS_GTT &&
USES_FULL_PPGTT(vm->dev)) {
ret = -EINVAL;
goto err;
}
 
/* If we have secure dispatch, or the userspace assures us that
* they know what they're doing, use the GGTT VM.
*/
if (((args->flags & I915_EXEC_SECURE) &&
(i == (args->buffer_count - 1))))
bind_vm = &dev_priv->gtt.base;
 
obj = list_first_entry(&objects,
struct drm_i915_gem_object,
obj_exec_link);
163,7 → 149,7
* from the (obj, vm) we don't run the risk of creating
* duplicated vmas for the same vm.
*/
vma = i915_gem_obj_lookup_or_create_vma(obj, bind_vm);
vma = i915_gem_obj_lookup_or_create_vma(obj, vm);
if (IS_ERR(vma)) {
DRM_DEBUG("Failed to lookup VMA\n");
ret = PTR_ERR(vma);
276,7 → 262,6
uint64_t target_offset)
{
struct drm_device *dev = obj->base.dev;
struct drm_i915_private *dev_priv = dev->dev_private;
uint32_t page_offset = offset_in_page(reloc->offset);
uint64_t delta = reloc->delta + target_offset;
char *vaddr;
286,8 → 271,8
if (ret)
return ret;
 
vaddr = (char*)dev_priv->gtt.mappable+4096;
MapPage(vaddr,(addr_t)i915_gem_object_get_page(obj,reloc->offset >> PAGE_SHIFT), PG_SW);
vaddr = kmap_atomic(i915_gem_object_get_page(obj,
reloc->offset >> PAGE_SHIFT));
*(uint32_t *)(vaddr + page_offset) = lower_32_bits(delta);
 
if (INTEL_INFO(dev)->gen >= 8) {
294,13 → 279,16
page_offset = offset_in_page(page_offset + sizeof(uint32_t));
 
if (page_offset == 0) {
MapPage(vaddr,(addr_t)i915_gem_object_get_page(obj,
(reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT), PG_SW);
kunmap_atomic(vaddr);
vaddr = kmap_atomic(i915_gem_object_get_page(obj,
(reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT));
}
 
*(uint32_t *)(vaddr + page_offset) = upper_32_bits(delta);
}
 
kunmap_atomic(vaddr);
 
return 0;
}
 
312,7 → 300,7
struct drm_device *dev = obj->base.dev;
struct drm_i915_private *dev_priv = dev->dev_private;
uint64_t delta = reloc->delta + target_offset;
uint32_t __iomem *reloc_entry;
uint64_t offset;
void __iomem *reloc_page;
int ret;
 
325,15 → 313,15
return ret;
 
/* Map the page containing the relocation we're going to perform. */
reloc->offset += i915_gem_obj_ggtt_offset(obj);
offset = i915_gem_obj_ggtt_offset(obj);
offset += reloc->offset;
MapPage(dev_priv->gtt.mappable,dev_priv->gtt.mappable_base +
(reloc->offset & PAGE_MASK), PG_SW);
(offset & PAGE_MASK), PG_SW);
reloc_page = dev_priv->gtt.mappable;
reloc_entry = (uint32_t __iomem *)
(reloc_page + offset_in_page(reloc->offset));
iowrite32(lower_32_bits(delta), reloc_entry);
iowrite32(lower_32_bits(delta), reloc_page + offset_in_page(offset));
 
 
 
return 0;
}
 
363,12 → 351,9
* through the ppgtt for non_secure batchbuffers. */
if (unlikely(IS_GEN6(dev) &&
reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
!target_i915_obj->has_global_gtt_mapping)) {
struct i915_vma *vma =
list_first_entry(&target_i915_obj->vma_list,
typeof(*vma), vma_link);
vma->bind_vma(vma, target_i915_obj->cache_level, GLOBAL_BIND);
}
!(target_vma->bound & GLOBAL_BIND)))
target_vma->bind_vma(target_vma, target_i915_obj->cache_level,
GLOBAL_BIND);
 
/* Validate that the target is in a valid r/w GPU domain */
if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
523,14 → 508,6
}
 
static int
need_reloc_mappable(struct i915_vma *vma)
{
struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
return entry->relocation_count && !use_cpu_reloc(vma->obj) &&
i915_is_ggtt(vma->vm);
}
 
static int
i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
struct intel_engine_cs *ring,
bool *need_reloc)
537,20 → 514,12
{
struct drm_i915_gem_object *obj = vma->obj;
struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
bool need_fence;
uint64_t flags;
int ret;
 
flags = 0;
 
need_fence =
has_fenced_gpu_access &&
entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
obj->tiling_mode != I915_TILING_NONE;
if (need_fence || need_reloc_mappable(vma))
flags |= PIN_MAPPABLE;
 
if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
flags |= PIN_GLOBAL | PIN_MAPPABLE;
if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
flags |= PIN_GLOBAL;
if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
562,7 → 531,6
 
entry->flags |= __EXEC_OBJECT_HAS_PIN;
 
if (has_fenced_gpu_access) {
if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
ret = i915_gem_object_get_fence(obj);
if (ret)
570,10 → 538,7
 
if (i915_gem_object_pin_fence(obj))
entry->flags |= __EXEC_OBJECT_HAS_FENCE;
 
obj->pending_fenced_gpu_access = true;
}
}
 
if (entry->offset != vma->node.start) {
entry->offset = vma->node.start;
589,19 → 554,33
}
 
static bool
eb_vma_misplaced(struct i915_vma *vma, bool has_fenced_gpu_access)
need_reloc_mappable(struct i915_vma *vma)
{
struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
 
if (entry->relocation_count == 0)
return false;
 
if (!i915_is_ggtt(vma->vm))
return false;
 
/* See also use_cpu_reloc() */
if (HAS_LLC(vma->obj->base.dev))
return false;
 
if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU)
return false;
 
return true;
}
 
static bool
eb_vma_misplaced(struct i915_vma *vma)
{
struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
struct drm_i915_gem_object *obj = vma->obj;
bool need_fence, need_mappable;
 
need_fence =
has_fenced_gpu_access &&
entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
obj->tiling_mode != I915_TILING_NONE;
need_mappable = need_fence || need_reloc_mappable(vma);
 
WARN_ON((need_mappable || need_fence) &&
WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP &&
!i915_is_ggtt(vma->vm));
 
if (entry->alignment &&
608,7 → 587,7
vma->node.start & (entry->alignment - 1))
return true;
 
if (need_mappable && !obj->map_and_fenceable)
if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable)
return true;
 
if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
630,9 → 609,6
bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
int retry;
 
if (list_empty(vmas))
return 0;
 
i915_gem_retire_requests_ring(ring);
 
vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
646,20 → 622,21
obj = vma->obj;
entry = vma->exec_entry;
 
if (!has_fenced_gpu_access)
entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;
need_fence =
has_fenced_gpu_access &&
entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
obj->tiling_mode != I915_TILING_NONE;
need_mappable = need_fence || need_reloc_mappable(vma);
 
if (need_mappable)
if (need_mappable) {
entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
list_move(&vma->exec_list, &ordered_vmas);
else
} else
list_move_tail(&vma->exec_list, &ordered_vmas);
 
obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
obj->base.pending_write_domain = 0;
obj->pending_fenced_gpu_access = false;
}
list_splice(&ordered_vmas, vmas);
 
684,7 → 661,7
if (!drm_mm_node_allocated(&vma->node))
continue;
 
if (eb_vma_misplaced(vma, has_fenced_gpu_access))
if (eb_vma_misplaced(vma))
ret = i915_vma_unbind(vma);
else
ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
732,9 → 709,6
int i, total, ret;
unsigned count = args->buffer_count;
 
if (WARN_ON(list_empty(&eb->vmas)))
return 0;
 
vm = list_first_entry(&eb->vmas, struct i915_vma, exec_list)->vm;
 
/* We may process another execbuffer during the unlock... */
878,18 → 852,24
}
 
static int
validate_exec_list(struct drm_i915_gem_exec_object2 *exec,
validate_exec_list(struct drm_device *dev,
struct drm_i915_gem_exec_object2 *exec,
int count)
{
int i;
unsigned relocs_total = 0;
unsigned relocs_max = UINT_MAX / sizeof(struct drm_i915_gem_relocation_entry);
unsigned invalid_flags;
int i;
 
invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
if (USES_FULL_PPGTT(dev))
invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
 
for (i = 0; i < count; i++) {
char __user *ptr = to_user_ptr(exec[i].relocs_ptr);
int length; /* limited by fault_in_pages_readable() */
 
if (exec[i].flags & __EXEC_OBJECT_UNKNOWN_FLAGS)
if (exec[i].flags & invalid_flags)
return -EINVAL;
 
/* First check for malicious input causing overflow in
932,16 → 912,26
return ERR_PTR(-EIO);
}
 
if (i915.enable_execlists && !ctx->engine[ring->id].state) {
int ret = intel_lr_context_deferred_create(ctx, ring);
if (ret) {
DRM_DEBUG("Could not create LRC %u: %d\n", ctx_id, ret);
return ERR_PTR(ret);
}
}
 
return ctx;
}
 
static void
void
i915_gem_execbuffer_move_to_active(struct list_head *vmas,
struct intel_engine_cs *ring)
{
u32 seqno = intel_ring_get_seqno(ring);
struct i915_vma *vma;
 
list_for_each_entry(vma, vmas, exec_list) {
struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
struct drm_i915_gem_object *obj = vma->obj;
u32 old_read = obj->base.read_domains;
u32 old_write = obj->base.write_domain;
950,12 → 940,11
if (obj->base.write_domain == 0)
obj->base.pending_read_domains |= obj->base.read_domains;
obj->base.read_domains = obj->base.pending_read_domains;
obj->fenced_gpu_access = obj->pending_fenced_gpu_access;
 
i915_vma_move_to_active(vma, ring);
if (obj->base.write_domain) {
obj->dirty = 1;
obj->last_write_seqno = intel_ring_get_seqno(ring);
obj->last_write_seqno = seqno;
 
intel_fb_obj_invalidate(obj, ring);
 
962,12 → 951,20
/* update for the implicit flush after a batch */
obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
}
if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
obj->last_fenced_seqno = seqno;
if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
struct drm_i915_private *dev_priv = to_i915(ring->dev);
list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list,
&dev_priv->mm.fence_list);
}
}
 
trace_i915_gem_object_change_domain(obj, old_read, old_write);
}
}
 
static void
void
i915_gem_execbuffer_retire_commands(struct drm_device *dev,
struct drm_file *file,
struct intel_engine_cs *ring,
1008,7 → 1005,48
}
 
static int
legacy_ringbuffer_submission(struct drm_device *dev, struct drm_file *file,
i915_emit_box(struct intel_engine_cs *ring,
struct drm_clip_rect *box,
int DR1, int DR4)
{
int ret;
 
if (box->y2 <= box->y1 || box->x2 <= box->x1 ||
box->y2 <= 0 || box->x2 <= 0) {
DRM_ERROR("Bad box %d,%d..%d,%d\n",
box->x1, box->y1, box->x2, box->y2);
return -EINVAL;
}
 
if (INTEL_INFO(ring->dev)->gen >= 4) {
ret = intel_ring_begin(ring, 4);
if (ret)
return ret;
 
intel_ring_emit(ring, GFX_OP_DRAWRECT_INFO_I965);
intel_ring_emit(ring, (box->x1 & 0xffff) | box->y1 << 16);
intel_ring_emit(ring, ((box->x2 - 1) & 0xffff) | (box->y2 - 1) << 16);
intel_ring_emit(ring, DR4);
} else {
ret = intel_ring_begin(ring, 6);
if (ret)
return ret;
 
intel_ring_emit(ring, GFX_OP_DRAWRECT_INFO);
intel_ring_emit(ring, DR1);
intel_ring_emit(ring, (box->x1 & 0xffff) | box->y1 << 16);
intel_ring_emit(ring, ((box->x2 - 1) & 0xffff) | (box->y2 - 1) << 16);
intel_ring_emit(ring, DR4);
intel_ring_emit(ring, 0);
}
intel_ring_advance(ring);
 
return 0;
}
 
 
int
i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file,
struct intel_engine_cs *ring,
struct intel_context *ctx,
struct drm_i915_gem_execbuffer2 *args,
1135,7 → 1173,7
exec_len = args->batch_len;
if (cliprects) {
for (i = 0; i < args->num_cliprects; i++) {
ret = i915_emit_box(dev, &cliprects[i],
ret = i915_emit_box(ring, &cliprects[i],
args->DR1, args->DR4);
if (ret)
goto error;
1235,7 → 1273,7
if (!i915_gem_check_execbuffer(args))
return -EINVAL;
 
ret = validate_exec_list(exec, args->buffer_count);
ret = validate_exec_list(dev, exec, args->buffer_count);
if (ret)
return ret;
 
1282,12 → 1320,6
if (ret)
goto pre_mutex_err;
 
if (dev_priv->ums.mm_suspended) {
mutex_unlock(&dev->struct_mutex);
ret = -EBUSY;
goto pre_mutex_err;
}
 
ctx = i915_gem_validate_context(dev, file, ring, ctx_id);
if (IS_ERR(ctx)) {
mutex_unlock(&dev->struct_mutex);
1297,8 → 1329,9
 
i915_gem_context_reference(ctx);
 
vm = ctx->vm;
if (!USES_FULL_PPGTT(dev))
if (ctx->ppgtt)
vm = &ctx->ppgtt->base;
else
vm = &dev_priv->gtt.base;
 
eb = eb_create(args);
1365,25 → 1398,36
/* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
* batch" bit. Hence we need to pin secure batches into the global gtt.
* hsw should have this fixed, but bdw mucks it up again. */
if (flags & I915_DISPATCH_SECURE &&
!batch_obj->has_global_gtt_mapping) {
/* When we have multiple VMs, we'll need to make sure that we
* allocate space first */
struct i915_vma *vma = i915_gem_obj_to_ggtt(batch_obj);
BUG_ON(!vma);
vma->bind_vma(vma, batch_obj->cache_level, GLOBAL_BIND);
}
if (flags & I915_DISPATCH_SECURE) {
/*
* So on first glance it looks freaky that we pin the batch here
* outside of the reservation loop. But:
* - The batch is already pinned into the relevant ppgtt, so we
* already have the backing storage fully allocated.
* - No other BO uses the global gtt (well contexts, but meh),
* so we don't really have issues with mutliple objects not
* fitting due to fragmentation.
* So this is actually safe.
*/
ret = i915_gem_obj_ggtt_pin(batch_obj, 0, 0);
if (ret)
goto err;
 
if (flags & I915_DISPATCH_SECURE)
exec_start += i915_gem_obj_ggtt_offset(batch_obj);
else
} else
exec_start += i915_gem_obj_offset(batch_obj, vm);
 
ret = legacy_ringbuffer_submission(dev, file, ring, ctx,
args, &eb->vmas, batch_obj, exec_start, flags);
if (ret)
goto err;
ret = dev_priv->gt.do_execbuf(dev, file, ring, ctx, args,
&eb->vmas, batch_obj, exec_start, flags);
 
/*
* FIXME: We crucially rely upon the active tracking for the (ppgtt)
* batch vma for correctness. For less ugly and less fragility this
* needs to be adjusted to also track the ggtt batch vma properly as
* active.
*/
if (flags & I915_DISPATCH_SECURE)
i915_gem_object_ggtt_unpin(batch_obj);
err:
/* the request owns the ref now */
i915_gem_context_unreference(ctx);