249,6 → 249,31 |
obj->cache_level != I915_CACHE_NONE); |
} |
|
/* Used to convert any address to canonical form. |
* Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS, |
* MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the |
* addresses to be in a canonical form: |
* "GraphicsAddress[63:48] are ignored by the HW and assumed to be in correct |
* canonical form [63:48] == [47]." |
*/ |
#define GEN8_HIGH_ADDRESS_BIT 47 |
static inline uint64_t gen8_canonical_addr(uint64_t address) |
{ |
return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT); |
} |
|
static inline uint64_t gen8_noncanonical_addr(uint64_t address) |
{ |
return address & ((1ULL << (GEN8_HIGH_ADDRESS_BIT + 1)) - 1); |
} |
|
static inline uint64_t |
relocation_target(struct drm_i915_gem_relocation_entry *reloc, |
uint64_t target_offset) |
{ |
return gen8_canonical_addr((int)reloc->delta + target_offset); |
} |
|
static int |
relocate_entry_cpu(struct drm_i915_gem_object *obj, |
struct drm_i915_gem_relocation_entry *reloc, |
256,7 → 281,7 |
{ |
struct drm_device *dev = obj->base.dev; |
uint32_t page_offset = offset_in_page(reloc->offset); |
uint64_t delta = reloc->delta + target_offset; |
uint64_t delta = relocation_target(reloc, target_offset); |
char *vaddr; |
int ret; |
|
264,7 → 289,7 |
if (ret) |
return ret; |
|
vaddr = kmap_atomic(i915_gem_object_get_page(obj, |
vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, |
reloc->offset >> PAGE_SHIFT)); |
*(uint32_t *)(vaddr + page_offset) = lower_32_bits(delta); |
|
273,7 → 298,7 |
|
if (page_offset == 0) { |
kunmap_atomic(vaddr); |
vaddr = kmap_atomic(i915_gem_object_get_page(obj, |
vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, |
(reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT)); |
} |
|
292,7 → 317,7 |
{ |
struct drm_device *dev = obj->base.dev; |
struct drm_i915_private *dev_priv = dev->dev_private; |
uint64_t delta = reloc->delta + target_offset; |
uint64_t delta = relocation_target(reloc, target_offset); |
uint64_t offset; |
void __iomem *reloc_page; |
int ret; |
334,7 → 359,7 |
{ |
struct drm_device *dev = obj->base.dev; |
uint32_t page_offset = offset_in_page(reloc->offset); |
uint64_t delta = (int)reloc->delta + target_offset; |
uint64_t delta = relocation_target(reloc, target_offset); |
char *vaddr; |
int ret; |
|
342,7 → 367,7 |
if (ret) |
return ret; |
|
vaddr = kmap_atomic(i915_gem_object_get_page(obj, |
vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, |
reloc->offset >> PAGE_SHIFT)); |
clflush_write32(vaddr + page_offset, lower_32_bits(delta)); |
|
351,7 → 376,7 |
|
if (page_offset == 0) { |
kunmap_atomic(vaddr); |
vaddr = kmap_atomic(i915_gem_object_get_page(obj, |
vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, |
(reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT)); |
} |
|
382,7 → 407,7 |
target_i915_obj = target_vma->obj; |
target_obj = &target_vma->obj->base; |
|
target_offset = target_vma->node.start; |
target_offset = gen8_canonical_addr(target_vma->node.start); |
|
/* Sandybridge PPGTT errata: We need a global gtt mapping for MI and |
* pipe_control writes because the gpu doesn't properly redirect them |
583,6 → 608,8 |
flags |= PIN_GLOBAL | PIN_MAPPABLE; |
if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS) |
flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS; |
if (entry->flags & EXEC_OBJECT_PINNED) |
flags |= entry->offset | PIN_OFFSET_FIXED; |
if ((flags & PIN_MAPPABLE) == 0) |
flags |= PIN_HIGH; |
} |
654,6 → 681,10 |
vma->node.start & (entry->alignment - 1)) |
return true; |
|
if (entry->flags & EXEC_OBJECT_PINNED && |
vma->node.start != entry->offset) |
return true; |
|
if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS && |
vma->node.start < BATCH_OFFSET_BIAS) |
return true; |
679,6 → 710,7 |
struct i915_vma *vma; |
struct i915_address_space *vm; |
struct list_head ordered_vmas; |
struct list_head pinned_vmas; |
bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; |
int retry; |
|
687,6 → 719,7 |
vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm; |
|
INIT_LIST_HEAD(&ordered_vmas); |
INIT_LIST_HEAD(&pinned_vmas); |
while (!list_empty(vmas)) { |
struct drm_i915_gem_exec_object2 *entry; |
bool need_fence, need_mappable; |
705,7 → 738,9 |
obj->tiling_mode != I915_TILING_NONE; |
need_mappable = need_fence || need_reloc_mappable(vma); |
|
if (need_mappable) { |
if (entry->flags & EXEC_OBJECT_PINNED) |
list_move_tail(&vma->exec_list, &pinned_vmas); |
else if (need_mappable) { |
entry->flags |= __EXEC_OBJECT_NEEDS_MAP; |
list_move(&vma->exec_list, &ordered_vmas); |
} else |
715,6 → 750,7 |
obj->base.pending_write_domain = 0; |
} |
list_splice(&ordered_vmas, vmas); |
list_splice(&pinned_vmas, vmas); |
|
/* Attempt to pin all of the buffers into the GTT. |
* This is done in 3 phases: |
967,6 → 1003,21 |
if (exec[i].flags & invalid_flags) |
return -EINVAL; |
|
/* Offset can be used as input (EXEC_OBJECT_PINNED), reject |
* any non-page-aligned or non-canonical addresses. |
*/ |
if (exec[i].flags & EXEC_OBJECT_PINNED) { |
if (exec[i].offset != |
gen8_canonical_addr(exec[i].offset & PAGE_MASK)) |
return -EINVAL; |
|
/* From drm_mm perspective address space is continuous, |
* so from this point we're always using non-canonical |
* form internally. |
*/ |
exec[i].offset = gen8_noncanonical_addr(exec[i].offset); |
} |
|
if (exec[i].alignment && !is_power_of_2(exec[i].alignment)) |
return -EINVAL; |
|
1091,7 → 1142,7 |
|
for (i = 0; i < 4; i++) { |
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); |
intel_ring_emit(ring, GEN7_SO_WRITE_OFFSET(i)); |
intel_ring_emit_reg(ring, GEN7_SO_WRITE_OFFSET(i)); |
intel_ring_emit(ring, 0); |
} |
|
1218,7 → 1269,7 |
|
intel_ring_emit(ring, MI_NOOP); |
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); |
intel_ring_emit(ring, INSTPM); |
intel_ring_emit_reg(ring, INSTPM); |
intel_ring_emit(ring, instp_mask << 16 | instp_mode); |
intel_ring_advance(ring); |
|
1294,6 → 1345,7 |
* Note that actual hangs have only been observed on gen7, but for |
* paranoia do it everywhere. |
*/ |
if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0) |
vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; |
|
return vma->obj; |
1654,6 → 1706,8 |
|
/* Copy the new buffer offsets back to the user's exec list. */ |
for (i = 0; i < args->buffer_count; i++) { |
exec2_list[i].offset = |
gen8_canonical_addr(exec2_list[i].offset); |
ret = __copy_to_user(&user_exec_list[i].offset, |
&exec2_list[i].offset, |
sizeof(user_exec_list[i].offset)); |
1718,6 → 1772,8 |
int i; |
|
for (i = 0; i < args->buffer_count; i++) { |
exec2_list[i].offset = |
gen8_canonical_addr(exec2_list[i].offset); |
ret = __copy_to_user(&user_exec_list[i].offset, |
&exec2_list[i].offset, |
sizeof(user_exec_list[i].offset)); |