37,11 → 37,11 |
#include <linux/slab.h> |
#include <linux/sched.h> |
#include <linux/mm.h> |
#include <linux/file.h> |
#include <linux/module.h> |
#include <linux/atomic.h> |
#include <linux/reservation.h> |
|
#define pr_err(fmt, ...) \ |
printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__) |
|
#define TTM_ASSERT_LOCKED(param) |
#define TTM_DEBUG(fmt, arg...) |
#define TTM_BO_HASH_ORDER 13 |
48,12 → 48,13 |
|
|
|
static inline int ttm_mem_type_from_flags(uint32_t flags, uint32_t *mem_type) |
static inline int ttm_mem_type_from_place(const struct ttm_place *place, |
uint32_t *mem_type) |
{ |
int i; |
|
for (i = 0; i <= TTM_PL_PRIV5; i++) |
if (flags & (1 << i)) { |
if (place->flags & (1 << i)) { |
*mem_type = i; |
return 0; |
} |
83,7 → 84,6 |
BUG_ON(atomic_read(&bo->list_kref.refcount)); |
BUG_ON(atomic_read(&bo->kref.refcount)); |
BUG_ON(atomic_read(&bo->cpu_writers)); |
BUG_ON(bo->sync_obj != NULL); |
BUG_ON(bo->mem.mm_node != NULL); |
BUG_ON(!list_empty(&bo->lru)); |
BUG_ON(!list_empty(&bo->ddestroy)); |
343,12 → 343,30 |
ww_mutex_unlock (&bo->resv->lock); |
} |
|
static void ttm_bo_flush_all_fences(struct ttm_buffer_object *bo) |
{ |
struct reservation_object_list *fobj; |
struct fence *fence; |
int i; |
|
fobj = reservation_object_get_list(bo->resv); |
fence = reservation_object_get_excl(bo->resv); |
if (fence && !fence->ops->signaled) |
fence_enable_sw_signaling(fence); |
|
for (i = 0; fobj && i < fobj->shared_count; ++i) { |
fence = rcu_dereference_protected(fobj->shared[i], |
reservation_object_held(bo->resv)); |
|
if (!fence->ops->signaled) |
fence_enable_sw_signaling(fence); |
} |
} |
|
static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo) |
{ |
struct ttm_bo_device *bdev = bo->bdev; |
struct ttm_bo_global *glob = bo->glob; |
struct ttm_bo_driver *driver = bdev->driver; |
void *sync_obj = NULL; |
int put_count; |
int ret; |
|
355,10 → 373,8 |
spin_lock(&glob->lru_lock); |
ret = __ttm_bo_reserve(bo, false, true, false, NULL); |
|
spin_lock(&bdev->fence_lock); |
(void) ttm_bo_wait(bo, false, false, true); |
if (!ret && !bo->sync_obj) { |
spin_unlock(&bdev->fence_lock); |
if (!ret) { |
if (!ttm_bo_wait(bo, false, false, true)) { |
put_count = ttm_bo_del_from_lru(bo); |
|
spin_unlock(&glob->lru_lock); |
367,13 → 383,9 |
ttm_bo_list_ref_sub(bo, put_count, true); |
|
return; |
} |
if (bo->sync_obj) |
sync_obj = driver->sync_obj_ref(bo->sync_obj); |
spin_unlock(&bdev->fence_lock); |
} else |
ttm_bo_flush_all_fences(bo); |
|
if (!ret) { |
|
/* |
* Make NO_EVICT bos immediately available to |
* shrinkers, now that they are queued for |
391,10 → 403,6 |
list_add_tail(&bo->ddestroy, &bdev->ddestroy); |
spin_unlock(&glob->lru_lock); |
|
if (sync_obj) { |
driver->sync_obj_flush(sync_obj); |
driver->sync_obj_unref(&sync_obj); |
} |
// schedule_delayed_work(&bdev->wq, |
// ((HZ / 100) < 1) ? 1 : HZ / 100); |
} |
415,44 → 423,26 |
bool interruptible, |
bool no_wait_gpu) |
{ |
struct ttm_bo_device *bdev = bo->bdev; |
struct ttm_bo_driver *driver = bdev->driver; |
struct ttm_bo_global *glob = bo->glob; |
int put_count; |
int ret; |
|
spin_lock(&bdev->fence_lock); |
ret = ttm_bo_wait(bo, false, false, true); |
|
if (ret && !no_wait_gpu) { |
void *sync_obj; |
|
/* |
* Take a reference to the fence and unreserve, |
* at this point the buffer should be dead, so |
* no new sync objects can be attached. |
*/ |
sync_obj = driver->sync_obj_ref(bo->sync_obj); |
spin_unlock(&bdev->fence_lock); |
|
__ttm_bo_unreserve(bo); |
long lret; |
ww_mutex_unlock(&bo->resv->lock); |
spin_unlock(&glob->lru_lock); |
|
ret = driver->sync_obj_wait(sync_obj, false, interruptible); |
driver->sync_obj_unref(&sync_obj); |
if (ret) |
return ret; |
lret = reservation_object_wait_timeout_rcu(bo->resv, |
true, |
interruptible, |
30 * HZ); |
|
/* |
* remove sync_obj with ttm_bo_wait, the wait should be |
* finished, and no new wait object should have been added. |
*/ |
spin_lock(&bdev->fence_lock); |
ret = ttm_bo_wait(bo, false, false, true); |
WARN_ON(ret); |
spin_unlock(&bdev->fence_lock); |
if (ret) |
return ret; |
if (lret < 0) |
return lret; |
else if (lret == 0) |
return -EBUSY; |
|
spin_lock(&glob->lru_lock); |
ret = __ttm_bo_reserve(bo, false, true, false, NULL); |
469,9 → 459,15 |
spin_unlock(&glob->lru_lock); |
return 0; |
} |
} else |
spin_unlock(&bdev->fence_lock); |
|
/* |
* remove sync_obj with ttm_bo_wait, the wait should be |
* finished, and no new wait object should have been added. |
*/ |
ret = ttm_bo_wait(bo, false, false, true); |
WARN_ON(ret); |
} |
|
if (ret || unlikely(list_empty(&bo->ddestroy))) { |
__ttm_bo_unreserve(bo); |
spin_unlock(&glob->lru_lock); |
601,7 → 597,7 |
*/ |
static int ttm_bo_mem_force_space(struct ttm_buffer_object *bo, |
uint32_t mem_type, |
struct ttm_placement *placement, |
const struct ttm_place *place, |
struct ttm_mem_reg *mem, |
bool interruptible, |
bool no_wait_gpu) |
611,7 → 607,7 |
int ret; |
|
do { |
ret = (*man->func->get_node)(man, bo, placement, 0, mem); |
ret = (*man->func->get_node)(man, bo, place, mem); |
if (unlikely(ret != 0)) |
return ret; |
if (mem->mm_node) |
654,18 → 650,18 |
|
static bool ttm_bo_mt_compatible(struct ttm_mem_type_manager *man, |
uint32_t mem_type, |
uint32_t proposed_placement, |
const struct ttm_place *place, |
uint32_t *masked_placement) |
{ |
uint32_t cur_flags = ttm_bo_type_flags(mem_type); |
|
if ((cur_flags & proposed_placement & TTM_PL_MASK_MEM) == 0) |
if ((cur_flags & place->flags & TTM_PL_MASK_MEM) == 0) |
return false; |
|
if ((proposed_placement & man->available_caching) == 0) |
if ((place->flags & man->available_caching) == 0) |
return false; |
|
cur_flags |= (proposed_placement & man->available_caching); |
cur_flags |= (place->flags & man->available_caching); |
|
*masked_placement = cur_flags; |
return true; |
696,15 → 692,14 |
|
mem->mm_node = NULL; |
for (i = 0; i < placement->num_placement; ++i) { |
ret = ttm_mem_type_from_flags(placement->placement[i], |
&mem_type); |
const struct ttm_place *place = &placement->placement[i]; |
|
ret = ttm_mem_type_from_place(place, &mem_type); |
if (ret) |
return ret; |
man = &bdev->man[mem_type]; |
|
type_ok = ttm_bo_mt_compatible(man, |
mem_type, |
placement->placement[i], |
type_ok = ttm_bo_mt_compatible(man, mem_type, place, |
&cur_flags); |
|
if (!type_ok) |
716,7 → 711,7 |
* Use the access and other non-mapping-related flag bits from |
* the memory placement flags to the current flags |
*/ |
ttm_flag_masked(&cur_flags, placement->placement[i], |
ttm_flag_masked(&cur_flags, place->flags, |
~TTM_PL_MASK_MEMTYPE); |
|
if (mem_type == TTM_PL_SYSTEM) |
724,8 → 719,7 |
|
if (man->has_type && man->use_type) { |
type_found = true; |
ret = (*man->func->get_node)(man, bo, placement, |
cur_flags, mem); |
ret = (*man->func->get_node)(man, bo, place, mem); |
if (unlikely(ret)) |
return ret; |
} |
743,17 → 737,15 |
return -EINVAL; |
|
for (i = 0; i < placement->num_busy_placement; ++i) { |
ret = ttm_mem_type_from_flags(placement->busy_placement[i], |
&mem_type); |
const struct ttm_place *place = &placement->busy_placement[i]; |
|
ret = ttm_mem_type_from_place(place, &mem_type); |
if (ret) |
return ret; |
man = &bdev->man[mem_type]; |
if (!man->has_type) |
continue; |
if (!ttm_bo_mt_compatible(man, |
mem_type, |
placement->busy_placement[i], |
&cur_flags)) |
if (!ttm_bo_mt_compatible(man, mem_type, place, &cur_flags)) |
continue; |
|
cur_flags = ttm_bo_select_caching(man, bo->mem.placement, |
762,7 → 754,7 |
* Use the access and other non-mapping-related flag bits from |
* the memory placement flags to the current flags |
*/ |
ttm_flag_masked(&cur_flags, placement->busy_placement[i], |
ttm_flag_masked(&cur_flags, place->flags, |
~TTM_PL_MASK_MEMTYPE); |
|
if (mem_type == TTM_PL_SYSTEM) { |
772,7 → 764,7 |
return 0; |
} |
|
ret = ttm_bo_mem_force_space(bo, mem_type, placement, mem, |
ret = ttm_bo_mem_force_space(bo, mem_type, place, mem, |
interruptible, no_wait_gpu); |
if (ret == 0 && mem->mm_node) { |
mem->placement = cur_flags; |
793,7 → 785,6 |
{ |
int ret = 0; |
struct ttm_mem_reg mem; |
struct ttm_bo_device *bdev = bo->bdev; |
|
lockdep_assert_held(&bo->resv->lock.base); |
|
802,9 → 793,7 |
* Have the driver move function wait for idle when necessary, |
* instead of doing it here. |
*/ |
spin_lock(&bdev->fence_lock); |
ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu); |
spin_unlock(&bdev->fence_lock); |
if (ret) |
return ret; |
mem.num_pages = bo->num_pages; |
833,13 → 822,14 |
{ |
int i; |
|
if (mem->mm_node && placement->lpfn != 0 && |
(mem->start < placement->fpfn || |
mem->start + mem->num_pages > placement->lpfn)) |
return false; |
for (i = 0; i < placement->num_placement; i++) { |
const struct ttm_place *heap = &placement->placement[i]; |
if (mem->mm_node && |
(mem->start < heap->fpfn || |
(heap->lpfn != 0 && (mem->start + mem->num_pages) > heap->lpfn))) |
continue; |
|
for (i = 0; i < placement->num_placement; i++) { |
*new_flags = placement->placement[i]; |
*new_flags = heap->flags; |
if ((*new_flags & mem->placement & TTM_PL_MASK_CACHING) && |
(*new_flags & mem->placement & TTM_PL_MASK_MEM)) |
return true; |
846,7 → 836,13 |
} |
|
for (i = 0; i < placement->num_busy_placement; i++) { |
*new_flags = placement->busy_placement[i]; |
const struct ttm_place *heap = &placement->busy_placement[i]; |
if (mem->mm_node && |
(mem->start < heap->fpfn || |
(heap->lpfn != 0 && (mem->start + mem->num_pages) > heap->lpfn))) |
continue; |
|
*new_flags = heap->flags; |
if ((*new_flags & mem->placement & TTM_PL_MASK_CACHING) && |
(*new_flags & mem->placement & TTM_PL_MASK_MEM)) |
return true; |
864,11 → 860,6 |
uint32_t new_flags; |
|
lockdep_assert_held(&bo->resv->lock.base); |
/* Check that range is valid */ |
if (placement->lpfn || placement->fpfn) |
if (placement->fpfn > placement->lpfn || |
(placement->lpfn - placement->fpfn) < bo->num_pages) |
return -EINVAL; |
/* |
* Check whether we need to move buffer. |
*/ |
897,15 → 888,6 |
} |
EXPORT_SYMBOL(ttm_bo_validate); |
|
int ttm_bo_check_placement(struct ttm_buffer_object *bo, |
struct ttm_placement *placement) |
{ |
BUG_ON((placement->fpfn || placement->lpfn) && |
(bo->mem.num_pages > (placement->lpfn - placement->fpfn))); |
|
return 0; |
} |
|
int ttm_bo_init(struct ttm_bo_device *bdev, |
struct ttm_buffer_object *bo, |
unsigned long size, |
916,6 → 898,7 |
struct file *persistent_swap_storage, |
size_t acc_size, |
struct sg_table *sg, |
struct reservation_object *resv, |
void (*destroy) (struct ttm_buffer_object *)) |
{ |
int ret = 0; |
957,29 → 940,37 |
bo->persistent_swap_storage = persistent_swap_storage; |
bo->acc_size = acc_size; |
bo->sg = sg; |
if (resv) { |
bo->resv = resv; |
lockdep_assert_held(&bo->resv->lock.base); |
} else { |
bo->resv = &bo->ttm_resv; |
reservation_object_init(bo->resv); |
reservation_object_init(&bo->ttm_resv); |
} |
atomic_inc(&bo->glob->bo_count); |
drm_vma_node_reset(&bo->vma_node); |
|
ret = ttm_bo_check_placement(bo, placement); |
|
/* |
* For ttm_bo_type_device buffers, allocate |
* address space from the device. |
*/ |
if (likely(!ret) && |
(bo->type == ttm_bo_type_device || |
bo->type == ttm_bo_type_sg)) |
if (bo->type == ttm_bo_type_device || |
bo->type == ttm_bo_type_sg) |
ret = drm_vma_offset_add(&bdev->vma_manager, &bo->vma_node, |
bo->mem.num_pages); |
|
/* passed reservation objects should already be locked, |
* since otherwise lockdep will be angered in radeon. |
*/ |
if (!resv) { |
locked = ww_mutex_trylock(&bo->resv->lock); |
WARN_ON(!locked); |
} |
|
if (likely(!ret)) |
ret = ttm_bo_validate(bo, placement, interruptible, false); |
|
if (!resv) |
ttm_bo_unreserve(bo); |
|
if (unlikely(ret)) |
1118,7 → 1109,6 |
bdev->glob = glob; |
bdev->need_dma32 = need_dma32; |
bdev->val_seq = 0; |
spin_lock_init(&bdev->fence_lock); |
mutex_lock(&glob->device_list_mutex); |
list_add_tail(&bdev->device_list, &glob->device_list); |
mutex_unlock(&glob->device_list_mutex); |
1171,59 → 1161,52 |
|
EXPORT_SYMBOL(ttm_bo_unmap_virtual); |
|
|
int ttm_bo_wait(struct ttm_buffer_object *bo, |
bool lazy, bool interruptible, bool no_wait) |
{ |
struct ttm_bo_driver *driver = bo->bdev->driver; |
struct ttm_bo_device *bdev = bo->bdev; |
void *sync_obj; |
int ret = 0; |
struct reservation_object_list *fobj; |
struct reservation_object *resv; |
struct fence *excl; |
long timeout = 15 * HZ; |
int i; |
|
if (likely(bo->sync_obj == NULL)) |
return 0; |
resv = bo->resv; |
fobj = reservation_object_get_list(resv); |
excl = reservation_object_get_excl(resv); |
if (excl) { |
if (!fence_is_signaled(excl)) { |
if (no_wait) |
return -EBUSY; |
|
while (bo->sync_obj) { |
|
if (driver->sync_obj_signaled(bo->sync_obj)) { |
void *tmp_obj = bo->sync_obj; |
bo->sync_obj = NULL; |
clear_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags); |
spin_unlock(&bdev->fence_lock); |
driver->sync_obj_unref(&tmp_obj); |
spin_lock(&bdev->fence_lock); |
continue; |
timeout = fence_wait_timeout(excl, |
interruptible, timeout); |
} |
} |
|
for (i = 0; fobj && timeout > 0 && i < fobj->shared_count; ++i) { |
struct fence *fence; |
fence = rcu_dereference_protected(fobj->shared[i], |
reservation_object_held(resv)); |
|
if (!fence_is_signaled(fence)) { |
if (no_wait) |
return -EBUSY; |
|
sync_obj = driver->sync_obj_ref(bo->sync_obj); |
spin_unlock(&bdev->fence_lock); |
ret = driver->sync_obj_wait(sync_obj, |
lazy, interruptible); |
if (unlikely(ret != 0)) { |
driver->sync_obj_unref(&sync_obj); |
spin_lock(&bdev->fence_lock); |
return ret; |
timeout = fence_wait_timeout(fence, |
interruptible, timeout); |
} |
spin_lock(&bdev->fence_lock); |
if (likely(bo->sync_obj == sync_obj)) { |
void *tmp_obj = bo->sync_obj; |
bo->sync_obj = NULL; |
clear_bit(TTM_BO_PRIV_FLAG_MOVING, |
&bo->priv_flags); |
spin_unlock(&bdev->fence_lock); |
driver->sync_obj_unref(&sync_obj); |
driver->sync_obj_unref(&tmp_obj); |
spin_lock(&bdev->fence_lock); |
} else { |
spin_unlock(&bdev->fence_lock); |
driver->sync_obj_unref(&sync_obj); |
spin_lock(&bdev->fence_lock); |
} |
} |
|
if (timeout < 0) |
return timeout; |
|
if (timeout == 0) |
return -EBUSY; |
|
reservation_object_add_excl_fence(resv, NULL); |
clear_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags); |
return 0; |
} |
EXPORT_SYMBOL(ttm_bo_wait); |
|
|