125,26 → 125,25 |
* Add the page directory to the list of BOs to |
* validate for command submission (cayman+). |
*/ |
struct radeon_cs_reloc *radeon_vm_get_bos(struct radeon_device *rdev, |
struct radeon_bo_list *radeon_vm_get_bos(struct radeon_device *rdev, |
struct radeon_vm *vm, |
struct list_head *head) |
{ |
struct radeon_cs_reloc *list; |
struct radeon_bo_list *list; |
unsigned i, idx; |
|
list = kmalloc_array(vm->max_pde_used + 2, |
sizeof(struct radeon_cs_reloc), GFP_KERNEL); |
sizeof(struct radeon_bo_list), GFP_KERNEL); |
if (!list) |
return NULL; |
|
/* add the vm page table to the list */ |
list[0].gobj = NULL; |
list[0].robj = vm->page_directory; |
list[0].prefered_domains = RADEON_GEM_DOMAIN_VRAM; |
list[0].allowed_domains = RADEON_GEM_DOMAIN_VRAM; |
list[0].tv.bo = &vm->page_directory->tbo; |
list[0].tv.shared = true; |
list[0].tiling_flags = 0; |
list[0].handle = 0; |
list_add(&list[0].tv.head, head); |
|
for (i = 0, idx = 1; i <= vm->max_pde_used; i++) { |
151,13 → 150,12 |
if (!vm->page_tables[i].bo) |
continue; |
|
list[idx].gobj = NULL; |
list[idx].robj = vm->page_tables[i].bo; |
list[idx].prefered_domains = RADEON_GEM_DOMAIN_VRAM; |
list[idx].allowed_domains = RADEON_GEM_DOMAIN_VRAM; |
list[idx].tv.bo = &list[idx].robj->tbo; |
list[idx].tv.shared = true; |
list[idx].tiling_flags = 0; |
list[idx].handle = 0; |
list_add(&list[idx++].tv.head, head); |
} |
|
180,15 → 178,18 |
struct radeon_vm *vm, int ring) |
{ |
struct radeon_fence *best[RADEON_NUM_RINGS] = {}; |
struct radeon_vm_id *vm_id = &vm->ids[ring]; |
|
unsigned choices[2] = {}; |
unsigned i; |
|
/* check if the id is still valid */ |
if (vm->last_id_use && vm->last_id_use == rdev->vm_manager.active[vm->id]) |
if (vm_id->id && vm_id->last_id_use && |
vm_id->last_id_use == rdev->vm_manager.active[vm_id->id]) |
return NULL; |
|
/* we definately need to flush */ |
radeon_fence_unref(&vm->last_flush); |
vm_id->pd_gpu_addr = ~0ll; |
|
/* skip over VMID 0, since it is the system VM */ |
for (i = 1; i < rdev->vm_manager.nvm; ++i) { |
196,8 → 197,8 |
|
if (fence == NULL) { |
/* found a free one */ |
vm->id = i; |
trace_radeon_vm_grab_id(vm->id, ring); |
vm_id->id = i; |
trace_radeon_vm_grab_id(i, ring); |
return NULL; |
} |
|
209,8 → 210,8 |
|
for (i = 0; i < 2; ++i) { |
if (choices[i]) { |
vm->id = choices[i]; |
trace_radeon_vm_grab_id(vm->id, ring); |
vm_id->id = choices[i]; |
trace_radeon_vm_grab_id(choices[i], ring); |
return rdev->vm_manager.active[choices[i]]; |
} |
} |
226,6 → 227,7 |
* @rdev: radeon_device pointer |
* @vm: vm we want to flush |
* @ring: ring to use for flush |
* @updates: last vm update that is waited for |
* |
* Flush the vm (cayman+). |
* |
233,15 → 235,21 |
*/ |
void radeon_vm_flush(struct radeon_device *rdev, |
struct radeon_vm *vm, |
int ring) |
int ring, struct radeon_fence *updates) |
{ |
uint64_t pd_addr = radeon_bo_gpu_offset(vm->page_directory); |
struct radeon_vm_id *vm_id = &vm->ids[ring]; |
|
/* if we can't remember our last VM flush then flush now! */ |
if (!vm->last_flush || pd_addr != vm->pd_gpu_addr) { |
trace_radeon_vm_flush(pd_addr, ring, vm->id); |
vm->pd_gpu_addr = pd_addr; |
radeon_ring_vm_flush(rdev, ring, vm); |
if (pd_addr != vm_id->pd_gpu_addr || !vm_id->flushed_updates || |
radeon_fence_is_earlier(vm_id->flushed_updates, updates)) { |
|
trace_radeon_vm_flush(pd_addr, ring, vm->ids[ring].id); |
radeon_fence_unref(&vm_id->flushed_updates); |
vm_id->flushed_updates = radeon_fence_ref(updates); |
vm_id->pd_gpu_addr = pd_addr; |
radeon_ring_vm_flush(rdev, &rdev->ring[ring], |
vm_id->id, vm_id->pd_gpu_addr); |
|
} |
} |
|
261,18 → 269,13 |
struct radeon_vm *vm, |
struct radeon_fence *fence) |
{ |
radeon_fence_unref(&vm->fence); |
vm->fence = radeon_fence_ref(fence); |
unsigned vm_id = vm->ids[fence->ring].id; |
|
radeon_fence_unref(&rdev->vm_manager.active[vm->id]); |
rdev->vm_manager.active[vm->id] = radeon_fence_ref(fence); |
radeon_fence_unref(&rdev->vm_manager.active[vm_id]); |
rdev->vm_manager.active[vm_id] = radeon_fence_ref(fence); |
|
radeon_fence_unref(&vm->last_id_use); |
vm->last_id_use = radeon_fence_ref(fence); |
|
/* we just flushed the VM, remember that */ |
if (!vm->last_flush) |
vm->last_flush = radeon_fence_ref(fence); |
radeon_fence_unref(&vm->ids[fence->ring].last_id_use); |
vm->ids[fence->ring].last_id_use = radeon_fence_ref(fence); |
} |
|
/** |
385,27 → 388,18 |
static int radeon_vm_clear_bo(struct radeon_device *rdev, |
struct radeon_bo *bo) |
{ |
struct ttm_validate_buffer tv; |
struct ww_acquire_ctx ticket; |
struct list_head head; |
struct radeon_ib ib; |
unsigned entries; |
uint64_t addr; |
int r; |
|
memset(&tv, 0, sizeof(tv)); |
tv.bo = &bo->tbo; |
|
INIT_LIST_HEAD(&head); |
list_add(&tv.head, &head); |
|
r = ttm_eu_reserve_buffers(&ticket, &head); |
r = radeon_bo_reserve(bo, false); |
if (r) |
return r; |
|
r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); |
if (r) |
goto error; |
goto error_unreserve; |
|
addr = radeon_bo_gpu_offset(bo); |
entries = radeon_bo_size(bo) / 8; |
412,7 → 406,7 |
|
r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, 256); |
if (r) |
goto error; |
goto error_unreserve; |
|
ib.length_dw = 0; |
|
422,15 → 416,16 |
|
r = radeon_ib_schedule(rdev, &ib, NULL, false); |
if (r) |
goto error; |
goto error_free; |
|
ttm_eu_fence_buffer_objects(&ticket, &head, ib.fence); |
ib.fence->is_vm_update = true; |
radeon_bo_fence(bo, ib.fence, false); |
|
error_free: |
radeon_ib_free(rdev, &ib); |
|
return 0; |
|
error: |
ttm_eu_backoff_reservation(&ticket, &head); |
error_unreserve: |
radeon_bo_unreserve(bo); |
return r; |
} |
|
446,7 → 441,7 |
* Validate and set the offset requested within the vm address space. |
* Returns 0 for success, error for failure. |
* |
* Object has to be reserved! |
* Object has to be reserved and gets unreserved by this function! |
*/ |
int radeon_vm_bo_set_addr(struct radeon_device *rdev, |
struct radeon_bo_va *bo_va, |
492,7 → 487,9 |
tmp->vm = vm; |
tmp->addr = bo_va->addr; |
tmp->bo = radeon_bo_ref(bo_va->bo); |
spin_lock(&vm->status_lock); |
list_add(&tmp->vm_status, &vm->freed); |
spin_unlock(&vm->status_lock); |
} |
|
interval_tree_remove(&bo_va->it, &vm->va); |
545,7 → 542,8 |
|
r = radeon_bo_create(rdev, RADEON_VM_PTE_COUNT * 8, |
RADEON_GPU_PAGE_SIZE, true, |
RADEON_GEM_DOMAIN_VRAM, 0, NULL, &pt); |
RADEON_GEM_DOMAIN_VRAM, 0, |
NULL, NULL, &pt); |
if (r) |
return r; |
|
571,7 → 569,7 |
} |
|
mutex_unlock(&vm->mutex); |
return radeon_bo_reserve(bo_va->bo, false); |
return 0; |
} |
|
/** |
694,8 → 692,8 |
|
if (ib.length_dw != 0) { |
radeon_asic_vm_pad_ib(rdev, &ib); |
radeon_semaphore_sync_to(ib.semaphore, pd->tbo.sync_obj); |
radeon_semaphore_sync_to(ib.semaphore, vm->last_id_use); |
|
radeon_sync_resv(rdev, &ib.sync, pd->tbo.resv, true); |
WARN_ON(ib.length_dw > ndw); |
r = radeon_ib_schedule(rdev, &ib, NULL, false); |
if (r) { |
702,9 → 700,8 |
radeon_ib_free(rdev, &ib); |
return r; |
} |
radeon_fence_unref(&vm->fence); |
vm->fence = radeon_fence_ref(ib.fence); |
radeon_fence_unref(&vm->last_flush); |
ib.fence->is_vm_update = true; |
radeon_bo_fence(pd, ib.fence, false); |
} |
radeon_ib_free(rdev, &ib); |
|
803,7 → 800,7 |
* |
* Global and local mutex must be locked! |
*/ |
static void radeon_vm_update_ptes(struct radeon_device *rdev, |
static int radeon_vm_update_ptes(struct radeon_device *rdev, |
struct radeon_vm *vm, |
struct radeon_ib *ib, |
uint64_t start, uint64_t end, |
820,8 → 817,12 |
struct radeon_bo *pt = vm->page_tables[pt_idx].bo; |
unsigned nptes; |
uint64_t pte; |
int r; |
|
radeon_semaphore_sync_to(ib->semaphore, pt->tbo.sync_obj); |
radeon_sync_resv(rdev, &ib->sync, pt->tbo.resv, true); |
r = reservation_object_reserve_shared(pt->tbo.resv); |
if (r) |
return r; |
|
if ((addr & ~mask) == (end & ~mask)) |
nptes = end - addr; |
855,9 → 856,36 |
last_pte + 8 * count, |
last_dst, flags); |
} |
|
return 0; |
} |
|
/** |
* radeon_vm_fence_pts - fence page tables after an update |
* |
* @vm: requested vm |
* @start: start of GPU address range |
* @end: end of GPU address range |
* @fence: fence to use |
* |
* Fence the page tables in the range @start - @end (cayman+). |
* |
* Global and local mutex must be locked! |
*/ |
static void radeon_vm_fence_pts(struct radeon_vm *vm, |
uint64_t start, uint64_t end, |
struct radeon_fence *fence) |
{ |
unsigned i; |
|
start >>= radeon_vm_block_size; |
end >>= radeon_vm_block_size; |
|
for (i = start; i <= end; ++i) |
radeon_bo_fence(vm->page_tables[i].bo, fence, true); |
} |
|
/** |
* radeon_vm_bo_update - map a bo into the vm page table |
* |
* @rdev: radeon_device pointer |
887,11 → 915,16 |
return -EINVAL; |
} |
|
spin_lock(&vm->status_lock); |
list_del_init(&bo_va->vm_status); |
spin_unlock(&vm->status_lock); |
|
bo_va->flags &= ~RADEON_VM_PAGE_VALID; |
bo_va->flags &= ~RADEON_VM_PAGE_SYSTEM; |
bo_va->flags &= ~RADEON_VM_PAGE_SNOOPED; |
// if (bo_va->bo && radeon_ttm_tt_is_readonly(bo_va->bo->tbo.ttm)) |
// bo_va->flags &= ~RADEON_VM_PAGE_WRITEABLE; |
|
if (mem) { |
addr = mem->start << PAGE_SHIFT; |
if (mem->mem_type != TTM_PL_SYSTEM) { |
953,23 → 986,34 |
return r; |
ib.length_dw = 0; |
|
radeon_vm_update_ptes(rdev, vm, &ib, bo_va->it.start, |
if (!(bo_va->flags & RADEON_VM_PAGE_VALID)) { |
unsigned i; |
|
for (i = 0; i < RADEON_NUM_RINGS; ++i) |
radeon_sync_fence(&ib.sync, vm->ids[i].last_id_use); |
} |
|
r = radeon_vm_update_ptes(rdev, vm, &ib, bo_va->it.start, |
bo_va->it.last + 1, addr, |
radeon_vm_page_flags(bo_va->flags)); |
if (r) { |
radeon_ib_free(rdev, &ib); |
return r; |
} |
|
radeon_asic_vm_pad_ib(rdev, &ib); |
WARN_ON(ib.length_dw > ndw); |
|
radeon_semaphore_sync_to(ib.semaphore, vm->fence); |
r = radeon_ib_schedule(rdev, &ib, NULL, false); |
if (r) { |
radeon_ib_free(rdev, &ib); |
return r; |
} |
radeon_fence_unref(&vm->fence); |
vm->fence = radeon_fence_ref(ib.fence); |
ib.fence->is_vm_update = true; |
radeon_vm_fence_pts(vm, bo_va->it.start, bo_va->it.last + 1, ib.fence); |
radeon_fence_unref(&bo_va->last_pt_update); |
bo_va->last_pt_update = radeon_fence_ref(ib.fence); |
radeon_ib_free(rdev, &ib); |
radeon_fence_unref(&vm->last_flush); |
|
return 0; |
} |
988,16 → 1032,25 |
int radeon_vm_clear_freed(struct radeon_device *rdev, |
struct radeon_vm *vm) |
{ |
struct radeon_bo_va *bo_va, *tmp; |
struct radeon_bo_va *bo_va; |
int r; |
|
list_for_each_entry_safe(bo_va, tmp, &vm->freed, vm_status) { |
spin_lock(&vm->status_lock); |
while (!list_empty(&vm->freed)) { |
bo_va = list_first_entry(&vm->freed, |
struct radeon_bo_va, vm_status); |
spin_unlock(&vm->status_lock); |
|
r = radeon_vm_bo_update(rdev, bo_va, NULL); |
radeon_bo_unref(&bo_va->bo); |
radeon_fence_unref(&bo_va->last_pt_update); |
kfree(bo_va); |
if (r) |
return r; |
|
spin_lock(&vm->status_lock); |
} |
spin_unlock(&vm->status_lock); |
return 0; |
|
} |
1016,14 → 1069,23 |
int radeon_vm_clear_invalids(struct radeon_device *rdev, |
struct radeon_vm *vm) |
{ |
struct radeon_bo_va *bo_va, *tmp; |
struct radeon_bo_va *bo_va; |
int r; |
|
list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, vm_status) { |
spin_lock(&vm->status_lock); |
while (!list_empty(&vm->invalidated)) { |
bo_va = list_first_entry(&vm->invalidated, |
struct radeon_bo_va, vm_status); |
spin_unlock(&vm->status_lock); |
|
r = radeon_vm_bo_update(rdev, bo_va, NULL); |
if (r) |
return r; |
|
spin_lock(&vm->status_lock); |
} |
spin_unlock(&vm->status_lock); |
|
return 0; |
} |
|
1046,6 → 1108,7 |
|
mutex_lock(&vm->mutex); |
interval_tree_remove(&bo_va->it, &vm->va); |
spin_lock(&vm->status_lock); |
list_del(&bo_va->vm_status); |
|
if (bo_va->addr) { |
1052,8 → 1115,10 |
bo_va->bo = radeon_bo_ref(bo_va->bo); |
list_add(&bo_va->vm_status, &vm->freed); |
} else { |
radeon_fence_unref(&bo_va->last_pt_update); |
kfree(bo_va); |
} |
spin_unlock(&vm->status_lock); |
|
mutex_unlock(&vm->mutex); |
} |
1074,10 → 1139,10 |
|
list_for_each_entry(bo_va, &bo->va, bo_list) { |
if (bo_va->addr) { |
mutex_lock(&bo_va->vm->mutex); |
spin_lock(&bo_va->vm->status_lock); |
list_del(&bo_va->vm_status); |
list_add(&bo_va->vm_status, &bo_va->vm->invalidated); |
mutex_unlock(&bo_va->vm->mutex); |
spin_unlock(&bo_va->vm->status_lock); |
} |
} |
} |
1095,15 → 1160,17 |
const unsigned align = min(RADEON_VM_PTB_ALIGN_SIZE, |
RADEON_VM_PTE_COUNT * 8); |
unsigned pd_size, pd_entries, pts_size; |
int r; |
int i, r; |
|
vm->id = 0; |
vm->ib_bo_va = NULL; |
vm->fence = NULL; |
vm->last_flush = NULL; |
vm->last_id_use = NULL; |
for (i = 0; i < RADEON_NUM_RINGS; ++i) { |
vm->ids[i].id = 0; |
vm->ids[i].flushed_updates = NULL; |
vm->ids[i].last_id_use = NULL; |
} |
mutex_init(&vm->mutex); |
vm->va = RB_ROOT; |
spin_lock_init(&vm->status_lock); |
INIT_LIST_HEAD(&vm->invalidated); |
INIT_LIST_HEAD(&vm->freed); |
|
1120,7 → 1187,7 |
|
r = radeon_bo_create(rdev, pd_size, align, true, |
RADEON_GEM_DOMAIN_VRAM, 0, NULL, |
&vm->page_directory); |
NULL, &vm->page_directory); |
if (r) |
return r; |
|
1157,11 → 1224,13 |
if (!r) { |
list_del_init(&bo_va->bo_list); |
radeon_bo_unreserve(bo_va->bo); |
radeon_fence_unref(&bo_va->last_pt_update); |
kfree(bo_va); |
} |
} |
list_for_each_entry_safe(bo_va, tmp, &vm->freed, vm_status) { |
radeon_bo_unref(&bo_va->bo); |
radeon_fence_unref(&bo_va->last_pt_update); |
kfree(bo_va); |
} |
|
1171,9 → 1240,10 |
|
radeon_bo_unref(&vm->page_directory); |
|
radeon_fence_unref(&vm->fence); |
radeon_fence_unref(&vm->last_flush); |
radeon_fence_unref(&vm->last_id_use); |
for (i = 0; i < RADEON_NUM_RINGS; ++i) { |
radeon_fence_unref(&vm->ids[i].flushed_updates); |
radeon_fence_unref(&vm->ids[i].last_id_use); |
} |
|
mutex_destroy(&vm->mutex); |
} |