96,40 → 96,83 |
{ |
u32 c = 0, i; |
|
rbo->placement.fpfn = 0; |
rbo->placement.lpfn = 0; |
rbo->placement.placement = rbo->placements; |
rbo->placement.busy_placement = rbo->placements; |
if (domain & RADEON_GEM_DOMAIN_VRAM) |
rbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | |
if (domain & RADEON_GEM_DOMAIN_VRAM) { |
/* Try placing BOs which don't need CPU access outside of the |
* CPU accessible part of VRAM |
*/ |
if ((rbo->flags & RADEON_GEM_NO_CPU_ACCESS) && |
rbo->rdev->mc.visible_vram_size < rbo->rdev->mc.real_vram_size) { |
rbo->placements[c].fpfn = |
rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT; |
rbo->placements[c++].flags = TTM_PL_FLAG_WC | |
TTM_PL_FLAG_UNCACHED | |
TTM_PL_FLAG_VRAM; |
} |
|
rbo->placements[c].fpfn = 0; |
rbo->placements[c++].flags = TTM_PL_FLAG_WC | |
TTM_PL_FLAG_UNCACHED | |
TTM_PL_FLAG_VRAM; |
} |
|
if (domain & RADEON_GEM_DOMAIN_GTT) { |
if (rbo->flags & RADEON_GEM_GTT_UC) { |
rbo->placements[c++] = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_TT; |
rbo->placements[c].fpfn = 0; |
rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED | |
TTM_PL_FLAG_TT; |
|
} else if ((rbo->flags & RADEON_GEM_GTT_WC) || |
(rbo->rdev->flags & RADEON_IS_AGP)) { |
rbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | |
rbo->placements[c].fpfn = 0; |
rbo->placements[c++].flags = TTM_PL_FLAG_WC | |
TTM_PL_FLAG_UNCACHED | |
TTM_PL_FLAG_TT; |
} else { |
rbo->placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_TT; |
rbo->placements[c].fpfn = 0; |
rbo->placements[c++].flags = TTM_PL_FLAG_CACHED | |
TTM_PL_FLAG_TT; |
} |
} |
|
if (domain & RADEON_GEM_DOMAIN_CPU) { |
if (rbo->flags & RADEON_GEM_GTT_UC) { |
rbo->placements[c++] = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_SYSTEM; |
rbo->placements[c].fpfn = 0; |
rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED | |
TTM_PL_FLAG_SYSTEM; |
|
} else if ((rbo->flags & RADEON_GEM_GTT_WC) || |
rbo->rdev->flags & RADEON_IS_AGP) { |
rbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | |
rbo->placements[c].fpfn = 0; |
rbo->placements[c++].flags = TTM_PL_FLAG_WC | |
TTM_PL_FLAG_UNCACHED | |
TTM_PL_FLAG_SYSTEM; |
} else { |
rbo->placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_SYSTEM; |
rbo->placements[c].fpfn = 0; |
rbo->placements[c++].flags = TTM_PL_FLAG_CACHED | |
TTM_PL_FLAG_SYSTEM; |
} |
} |
if (!c) |
rbo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; |
if (!c) { |
rbo->placements[c].fpfn = 0; |
rbo->placements[c++].flags = TTM_PL_MASK_CACHING | |
TTM_PL_FLAG_SYSTEM; |
} |
|
rbo->placement.num_placement = c; |
rbo->placement.num_busy_placement = c; |
|
for (i = 0; i < c; ++i) { |
if ((rbo->flags & RADEON_GEM_CPU_ACCESS) && |
(rbo->placements[i].flags & TTM_PL_FLAG_VRAM) && |
!rbo->placements[i].fpfn) |
rbo->placements[i].lpfn = |
rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT; |
else |
rbo->placements[i].lpfn = 0; |
} |
|
/* |
* Use two-ended allocation depending on the buffer size to |
* improve fragmentation quality. |
137,14 → 180,16 |
*/ |
if (rbo->tbo.mem.size > 512 * 1024) { |
for (i = 0; i < c; i++) { |
rbo->placements[i] |= TTM_PL_FLAG_TOPDOWN; |
rbo->placements[i].flags |= TTM_PL_FLAG_TOPDOWN; |
} |
} |
} |
|
int radeon_bo_create(struct radeon_device *rdev, |
unsigned long size, int byte_align, bool kernel, u32 domain, |
u32 flags, struct sg_table *sg, struct radeon_bo **bo_ptr) |
unsigned long size, int byte_align, bool kernel, |
u32 domain, u32 flags, struct sg_table *sg, |
struct reservation_object *resv, |
struct radeon_bo **bo_ptr) |
{ |
struct radeon_bo *bo; |
enum ttm_bo_type type; |
187,11 → 232,12 |
if (!(rdev->flags & RADEON_IS_PCIE)) |
bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC); |
|
// printf("%s rdev->flags %x bo->flags %x\n", |
// __FUNCTION__, bo->flags); |
|
if(flags & RADEON_GEM_GTT_WC) |
#ifdef CONFIG_X86_32 |
/* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit |
* See https://bugs.freedesktop.org/show_bug.cgi?id=84627 |
*/ |
bo->flags&= ~RADEON_GEM_GTT_WC; |
#endif |
|
radeon_ttm_placement_from_domain(bo, domain); |
/* Kernel allocation are uninterruptible */ |
198,7 → 244,7 |
// down_read(&rdev->pm.mclk_lock); |
r = ttm_bo_init(&rdev->mman.bdev, &bo->tbo, size, type, |
&bo->placement, page_align, !kernel, NULL, |
acc_size, sg, &radeon_ttm_bo_destroy); |
acc_size, sg, resv, &radeon_ttm_bo_destroy); |
// up_read(&rdev->pm.mclk_lock); |
if (unlikely(r != 0)) { |
return r; |
289,21 → 335,19 |
return 0; |
} |
radeon_ttm_placement_from_domain(bo, domain); |
if (domain == RADEON_GEM_DOMAIN_VRAM) { |
for (i = 0; i < bo->placement.num_placement; i++) { |
/* force to pin into visible video ram */ |
bo->placement.lpfn = bo->rdev->mc.visible_vram_size >> PAGE_SHIFT; |
if ((bo->placements[i].flags & TTM_PL_FLAG_VRAM) && |
!(bo->flags & RADEON_GEM_NO_CPU_ACCESS) && |
(!max_offset || max_offset > bo->rdev->mc.visible_vram_size)) |
bo->placements[i].lpfn = |
bo->rdev->mc.visible_vram_size >> PAGE_SHIFT; |
else |
bo->placements[i].lpfn = max_offset >> PAGE_SHIFT; |
|
bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT; |
} |
if (max_offset) { |
u64 lpfn = max_offset >> PAGE_SHIFT; |
|
if (!bo->placement.lpfn) |
bo->placement.lpfn = bo->rdev->mc.gtt_size >> PAGE_SHIFT; |
|
if (lpfn < bo->placement.lpfn) |
bo->placement.lpfn = lpfn; |
} |
for (i = 0; i < bo->placement.num_placement; i++) |
bo->placements[i] |= TTM_PL_FLAG_NO_EVICT; |
r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); |
if (likely(r == 0)) { |
bo->pin_count = 1; |
335,8 → 379,10 |
bo->pin_count--; |
if (bo->pin_count) |
return 0; |
for (i = 0; i < bo->placement.num_placement; i++) |
bo->placements[i] &= ~TTM_PL_FLAG_NO_EVICT; |
for (i = 0; i < bo->placement.num_placement; i++) { |
bo->placements[i].lpfn = 0; |
bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT; |
} |
r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); |
if (likely(r == 0)) { |
if (bo->tbo.mem.mem_type == TTM_PL_VRAM) |
422,24 → 468,29 |
struct ww_acquire_ctx *ticket, |
struct list_head *head, int ring) |
{ |
struct radeon_cs_reloc *lobj; |
struct radeon_bo *bo; |
struct radeon_bo_list *lobj; |
struct list_head duplicates; |
int r; |
u64 bytes_moved = 0, initial_bytes_moved; |
u64 bytes_moved_threshold = radeon_bo_get_threshold_for_moves(rdev); |
|
r = ttm_eu_reserve_buffers(ticket, head); |
INIT_LIST_HEAD(&duplicates); |
r = ttm_eu_reserve_buffers(ticket, head, true, &duplicates); |
if (unlikely(r != 0)) { |
return r; |
} |
|
list_for_each_entry(lobj, head, tv.head) { |
bo = lobj->robj; |
struct radeon_bo *bo = lobj->robj; |
if (!bo->pin_count) { |
u32 domain = lobj->prefered_domains; |
u32 allowed = lobj->allowed_domains; |
u32 current_domain = |
radeon_mem_type_to_domain(bo->tbo.mem.mem_type); |
|
WARN_ONCE(bo->gem_base.dumb, |
"GPU use of dumb buffer is illegal.\n"); |
|
/* Check if this buffer will be moved and don't move it |
* if we have moved too many buffers for this IB already. |
* |
448,7 → 499,7 |
* into account. We don't want to disallow buffer moves |
* completely. |
*/ |
if ((lobj->allowed_domains & current_domain) != 0 && |
if ((allowed & current_domain) != 0 && |
(domain & current_domain) == 0 && /* will be moved */ |
bytes_moved > bytes_moved_threshold) { |
/* don't move it */ |
458,7 → 509,7 |
retry: |
radeon_ttm_placement_from_domain(bo, domain); |
if (ring == R600_RING_TYPE_UVD_INDEX) |
radeon_uvd_force_into_uvd_segment(bo); |
radeon_uvd_force_into_uvd_segment(bo, allowed); |
|
initial_bytes_moved = atomic64_read(&rdev->num_bytes_moved); |
r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); |
478,6 → 529,12 |
lobj->gpu_offset = radeon_bo_gpu_offset(bo); |
lobj->tiling_flags = bo->tiling_flags; |
} |
|
list_for_each_entry(lobj, &duplicates, tv.head) { |
lobj->gpu_offset = radeon_bo_gpu_offset(lobj->robj); |
lobj->tiling_flags = lobj->robj->tiling_flags; |
} |
|
return 0; |
} |
|
678,12 → 735,29 |
r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, NULL); |
if (unlikely(r != 0)) |
return r; |
spin_lock(&bo->tbo.bdev->fence_lock); |
if (mem_type) |
*mem_type = bo->tbo.mem.mem_type; |
if (bo->tbo.sync_obj) |
|
r = ttm_bo_wait(&bo->tbo, true, true, no_wait); |
spin_unlock(&bo->tbo.bdev->fence_lock); |
ttm_bo_unreserve(&bo->tbo); |
return r; |
} |
|
/** |
* radeon_bo_fence - add fence to buffer object |
* |
* @bo: buffer object in question |
* @fence: fence to add |
* @shared: true if fence should be added shared |
* |
*/ |
void radeon_bo_fence(struct radeon_bo *bo, struct radeon_fence *fence, |
bool shared) |
{ |
struct reservation_object *resv = bo->tbo.resv; |
|
if (shared) |
reservation_object_add_shared_fence(resv, &fence->base); |
else |
reservation_object_add_excl_fence(resv, &fence->base); |
} |