84,21 → 84,18 |
struct drm_device *ddev = p->rdev->ddev; |
struct radeon_cs_chunk *chunk; |
struct radeon_cs_buckets buckets; |
unsigned i, j; |
bool duplicate; |
unsigned i; |
bool need_mmap_lock = false; |
int r; |
|
if (p->chunk_relocs_idx == -1) { |
if (p->chunk_relocs == NULL) { |
return 0; |
} |
chunk = &p->chunks[p->chunk_relocs_idx]; |
chunk = p->chunk_relocs; |
p->dma_reloc_idx = 0; |
/* FIXME: we assume that each relocs use 4 dwords */ |
p->nrelocs = chunk->length_dw / 4; |
p->relocs_ptr = kcalloc(p->nrelocs, sizeof(void *), GFP_KERNEL); |
if (p->relocs_ptr == NULL) { |
return -ENOMEM; |
} |
p->relocs = kcalloc(p->nrelocs, sizeof(struct radeon_cs_reloc), GFP_KERNEL); |
p->relocs = kcalloc(p->nrelocs, sizeof(struct radeon_bo_list), GFP_KERNEL); |
if (p->relocs == NULL) { |
return -ENOMEM; |
} |
107,31 → 104,17 |
|
for (i = 0; i < p->nrelocs; i++) { |
struct drm_radeon_cs_reloc *r; |
struct drm_gem_object *gobj; |
unsigned priority; |
|
duplicate = false; |
r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4]; |
for (j = 0; j < i; j++) { |
if (r->handle == p->relocs[j].handle) { |
p->relocs_ptr[i] = &p->relocs[j]; |
duplicate = true; |
break; |
} |
} |
if (duplicate) { |
p->relocs[i].handle = 0; |
continue; |
} |
|
p->relocs[i].gobj = drm_gem_object_lookup(ddev, p->filp, |
r->handle); |
if (p->relocs[i].gobj == NULL) { |
gobj = drm_gem_object_lookup(ddev, p->filp, r->handle); |
if (gobj == NULL) { |
DRM_ERROR("gem object lookup failed 0x%x\n", |
r->handle); |
return -ENOENT; |
} |
p->relocs_ptr[i] = &p->relocs[i]; |
p->relocs[i].robj = gem_to_radeon_bo(p->relocs[i].gobj); |
p->relocs[i].robj = gem_to_radeon_bo(gobj); |
|
/* The userspace buffer priorities are from 0 to 15. A higher |
* number means the buffer is more important. |
143,10 → 126,13 |
+ !!r->write_domain; |
|
/* the first reloc of an UVD job is the msg and that must be in |
VRAM, also but everything into VRAM on AGP cards to avoid |
image corruptions */ |
VRAM, also but everything into VRAM on AGP cards and older |
IGP chips to avoid image corruptions */ |
if (p->ring == R600_RING_TYPE_UVD_INDEX && |
(i == 0 || drm_pci_device_is_agp(p->rdev->ddev))) { |
(i == 0 || drm_pci_device_is_agp(p->rdev->ddev) || |
p->rdev->family == CHIP_RS780 || |
p->rdev->family == CHIP_RS880)) { |
|
/* TODO: is this still needed for NI+ ? */ |
p->relocs[i].prefered_domains = |
RADEON_GEM_DOMAIN_VRAM; |
171,9 → 157,22 |
domain |= RADEON_GEM_DOMAIN_GTT; |
p->relocs[i].allowed_domains = domain; |
} |
|
/* |
if (radeon_ttm_tt_has_userptr(p->relocs[i].robj->tbo.ttm)) { |
uint32_t domain = p->relocs[i].prefered_domains; |
if (!(domain & RADEON_GEM_DOMAIN_GTT)) { |
DRM_ERROR("Only RADEON_GEM_DOMAIN_GTT is " |
"allowed for userptr BOs\n"); |
return -EINVAL; |
} |
need_mmap_lock = true; |
domain = RADEON_GEM_DOMAIN_GTT; |
p->relocs[i].prefered_domains = domain; |
p->relocs[i].allowed_domains = domain; |
} |
*/ |
p->relocs[i].tv.bo = &p->relocs[i].robj->tbo; |
p->relocs[i].handle = r->handle; |
p->relocs[i].tv.shared = !r->write_domain; |
|
radeon_cs_buckets_add(&buckets, &p->relocs[i].tv.head, |
priority); |
184,8 → 183,15 |
if (p->cs_flags & RADEON_CS_USE_VM) |
p->vm_bos = radeon_vm_get_bos(p->rdev, p->ib.vm, |
&p->validated); |
// if (need_mmap_lock) |
// down_read(¤t->mm->mmap_sem); |
|
return radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring); |
r = radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring); |
|
// if (need_mmap_lock) |
// up_read(¤t->mm->mmap_sem); |
|
return r; |
} |
|
static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority) |
231,17 → 237,21 |
return 0; |
} |
|
static void radeon_cs_sync_rings(struct radeon_cs_parser *p) |
static int radeon_cs_sync_rings(struct radeon_cs_parser *p) |
{ |
int i; |
struct radeon_bo_list *reloc; |
int r; |
|
for (i = 0; i < p->nrelocs; i++) { |
if (!p->relocs[i].robj) |
continue; |
list_for_each_entry(reloc, &p->validated, tv.head) { |
struct reservation_object *resv; |
|
radeon_semaphore_sync_to(p->ib.semaphore, |
p->relocs[i].robj->tbo.sync_obj); |
resv = reloc->robj->tbo.resv; |
r = radeon_sync_resv(p->rdev, &p->ib.sync, resv, |
reloc->tv.shared); |
if (r) |
return r; |
} |
return 0; |
} |
|
/* XXX: note that this is called from the legacy UMS CS ioctl as well */ |
260,13 → 270,11 |
INIT_LIST_HEAD(&p->validated); |
p->idx = 0; |
p->ib.sa_bo = NULL; |
p->ib.semaphore = NULL; |
p->const_ib.sa_bo = NULL; |
p->const_ib.semaphore = NULL; |
p->chunk_ib_idx = -1; |
p->chunk_relocs_idx = -1; |
p->chunk_flags_idx = -1; |
p->chunk_const_ib_idx = -1; |
p->chunk_ib = NULL; |
p->chunk_relocs = NULL; |
p->chunk_flags = NULL; |
p->chunk_const_ib = NULL; |
p->chunks_array = kcalloc(cs->num_chunks, sizeof(uint64_t), GFP_KERNEL); |
if (p->chunks_array == NULL) { |
return -ENOMEM; |
293,24 → 301,23 |
return -EFAULT; |
} |
p->chunks[i].length_dw = user_chunk.length_dw; |
p->chunks[i].chunk_id = user_chunk.chunk_id; |
if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_RELOCS) { |
p->chunk_relocs_idx = i; |
if (user_chunk.chunk_id == RADEON_CHUNK_ID_RELOCS) { |
p->chunk_relocs = &p->chunks[i]; |
} |
if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_IB) { |
p->chunk_ib_idx = i; |
if (user_chunk.chunk_id == RADEON_CHUNK_ID_IB) { |
p->chunk_ib = &p->chunks[i]; |
/* zero length IB isn't useful */ |
if (p->chunks[i].length_dw == 0) |
return -EINVAL; |
} |
if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_CONST_IB) { |
p->chunk_const_ib_idx = i; |
if (user_chunk.chunk_id == RADEON_CHUNK_ID_CONST_IB) { |
p->chunk_const_ib = &p->chunks[i]; |
/* zero length CONST IB isn't useful */ |
if (p->chunks[i].length_dw == 0) |
return -EINVAL; |
} |
if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS) { |
p->chunk_flags_idx = i; |
if (user_chunk.chunk_id == RADEON_CHUNK_ID_FLAGS) { |
p->chunk_flags = &p->chunks[i]; |
/* zero length flags aren't useful */ |
if (p->chunks[i].length_dw == 0) |
return -EINVAL; |
319,10 → 326,10 |
size = p->chunks[i].length_dw; |
cdata = (void __user *)(unsigned long)user_chunk.chunk_data; |
p->chunks[i].user_ptr = cdata; |
if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_CONST_IB) |
if (user_chunk.chunk_id == RADEON_CHUNK_ID_CONST_IB) |
continue; |
|
if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_IB) { |
if (user_chunk.chunk_id == RADEON_CHUNK_ID_IB) { |
if (!p->rdev || !(p->rdev->flags & RADEON_IS_AGP)) |
continue; |
} |
335,7 → 342,7 |
if (copy_from_user(p->chunks[i].kdata, cdata, size)) { |
return -EFAULT; |
} |
if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS) { |
if (user_chunk.chunk_id == RADEON_CHUNK_ID_FLAGS) { |
p->cs_flags = p->chunks[i].kdata[0]; |
if (p->chunks[i].length_dw > 1) |
ring = p->chunks[i].kdata[1]; |
376,8 → 383,8 |
static int cmp_size_smaller_first(void *priv, struct list_head *a, |
struct list_head *b) |
{ |
struct radeon_cs_reloc *la = list_entry(a, struct radeon_cs_reloc, tv.head); |
struct radeon_cs_reloc *lb = list_entry(b, struct radeon_cs_reloc, tv.head); |
struct radeon_bo_list *la = list_entry(a, struct radeon_bo_list, tv.head); |
struct radeon_bo_list *lb = list_entry(b, struct radeon_bo_list, tv.head); |
|
/* Sort A before B if A is smaller. */ |
return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages; |
410,7 → 417,7 |
|
ttm_eu_fence_buffer_objects(&parser->ticket, |
&parser->validated, |
parser->ib.fence); |
&parser->ib.fence->base); |
} else if (backoff) { |
ttm_eu_backoff_reservation(&parser->ticket, |
&parser->validated); |
418,14 → 425,16 |
|
if (parser->relocs != NULL) { |
for (i = 0; i < parser->nrelocs; i++) { |
if (parser->relocs[i].gobj) |
drm_gem_object_unreference_unlocked(parser->relocs[i].gobj); |
struct radeon_bo *bo = parser->relocs[i].robj; |
if (bo == NULL) |
continue; |
|
drm_gem_object_unreference_unlocked(&bo->gem_base); |
} |
} |
kfree(parser->track); |
kfree(parser->relocs); |
kfree(parser->relocs_ptr); |
kfree(parser->vm_bos); |
drm_free_large(parser->vm_bos); |
for (i = 0; i < parser->nchunks; i++) |
drm_free_large(parser->chunks[i].kdata); |
kfree(parser->chunks); |
439,7 → 448,7 |
{ |
int r; |
|
if (parser->chunk_ib_idx == -1) |
if (parser->chunk_ib == NULL) |
return 0; |
|
if (parser->cs_flags & RADEON_CS_USE_VM) |
451,6 → 460,13 |
return r; |
} |
|
r = radeon_cs_sync_rings(parser); |
if (r) { |
if (r != -ERESTARTSYS) |
DRM_ERROR("Failed to sync rings: %i\n", r); |
return r; |
} |
|
if (parser->ring == R600_RING_TYPE_UVD_INDEX) |
radeon_uvd_note_usage(rdev); |
else if ((parser->ring == TN_RING_TYPE_VCE1_INDEX) || |
457,7 → 473,6 |
(parser->ring == TN_RING_TYPE_VCE2_INDEX)) |
radeon_vce_note_usage(rdev); |
|
radeon_cs_sync_rings(parser); |
r = radeon_ib_schedule(rdev, &parser->ib, NULL, true); |
if (r) { |
DRM_ERROR("Failed to schedule IB !\n"); |
493,10 → 508,6 |
for (i = 0; i < p->nrelocs; i++) { |
struct radeon_bo *bo; |
|
/* ignore duplicates */ |
if (p->relocs_ptr[i] != &p->relocs[i]) |
continue; |
|
bo = p->relocs[i].robj; |
bo_va = radeon_vm_bo_find(vm, bo); |
if (bo_va == NULL) { |
507,6 → 518,8 |
r = radeon_vm_bo_update(rdev, bo_va, &bo->tbo.mem); |
if (r) |
return r; |
|
radeon_sync_fence(&p->ib.sync, bo_va->last_pt_update); |
} |
|
return radeon_vm_clear_invalids(rdev, vm); |
519,7 → 532,7 |
struct radeon_vm *vm = &fpriv->vm; |
int r; |
|
if (parser->chunk_ib_idx == -1) |
if (parser->chunk_ib == NULL) |
return 0; |
if ((parser->cs_flags & RADEON_CS_USE_VM) == 0) |
return 0; |
544,11 → 557,16 |
if (r) { |
goto out; |
} |
radeon_cs_sync_rings(parser); |
radeon_semaphore_sync_to(parser->ib.semaphore, vm->fence); |
|
r = radeon_cs_sync_rings(parser); |
if (r) { |
if (r != -ERESTARTSYS) |
DRM_ERROR("Failed to sync rings: %i\n", r); |
goto out; |
} |
|
if ((rdev->family >= CHIP_TAHITI) && |
(parser->chunk_const_ib_idx != -1)) { |
(parser->chunk_const_ib != NULL)) { |
r = radeon_ib_schedule(rdev, &parser->ib, &parser->const_ib, true); |
} else { |
r = radeon_ib_schedule(rdev, &parser->ib, NULL, true); |
575,7 → 593,7 |
struct radeon_vm *vm = NULL; |
int r; |
|
if (parser->chunk_ib_idx == -1) |
if (parser->chunk_ib == NULL) |
return 0; |
|
if (parser->cs_flags & RADEON_CS_USE_VM) { |
583,8 → 601,8 |
vm = &fpriv->vm; |
|
if ((rdev->family >= CHIP_TAHITI) && |
(parser->chunk_const_ib_idx != -1)) { |
ib_chunk = &parser->chunks[parser->chunk_const_ib_idx]; |
(parser->chunk_const_ib != NULL)) { |
ib_chunk = parser->chunk_const_ib; |
if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) { |
DRM_ERROR("cs IB CONST too big: %d\n", ib_chunk->length_dw); |
return -EINVAL; |
603,13 → 621,13 |
return -EFAULT; |
} |
|
ib_chunk = &parser->chunks[parser->chunk_ib_idx]; |
ib_chunk = parser->chunk_ib; |
if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) { |
DRM_ERROR("cs IB too big: %d\n", ib_chunk->length_dw); |
return -EINVAL; |
} |
} |
ib_chunk = &parser->chunks[parser->chunk_ib_idx]; |
ib_chunk = parser->chunk_ib; |
|
r = radeon_ib_get(rdev, parser->ring, &parser->ib, |
vm, ib_chunk->length_dw * 4); |
694,7 → 712,7 |
struct radeon_cs_packet *pkt, |
unsigned idx) |
{ |
struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx]; |
struct radeon_cs_chunk *ib_chunk = p->chunk_ib; |
struct radeon_device *rdev = p->rdev; |
uint32_t header; |
|
788,7 → 806,7 |
* GPU offset using the provided start. |
**/ |
int radeon_cs_packet_next_reloc(struct radeon_cs_parser *p, |
struct radeon_cs_reloc **cs_reloc, |
struct radeon_bo_list **cs_reloc, |
int nomm) |
{ |
struct radeon_cs_chunk *relocs_chunk; |
796,12 → 814,12 |
unsigned idx; |
int r; |
|
if (p->chunk_relocs_idx == -1) { |
if (p->chunk_relocs == NULL) { |
DRM_ERROR("No relocation chunk !\n"); |
return -EINVAL; |
} |
*cs_reloc = NULL; |
relocs_chunk = &p->chunks[p->chunk_relocs_idx]; |
relocs_chunk = p->chunk_relocs; |
r = radeon_cs_packet_parse(p, &p3reloc, p->idx); |
if (r) |
return r; |
827,6 → 845,6 |
(u64)relocs_chunk->kdata[idx + 3] << 32; |
(*cs_reloc)->gpu_offset |= relocs_chunk->kdata[idx + 0]; |
} else |
*cs_reloc = p->relocs_ptr[(idx / 4)]; |
*cs_reloc = &p->relocs[(idx / 4)]; |
return 0; |
} |