WebSVN – Kolibri OS – Blame – /drivers/video/drm/radeon/radeon_vm.c

Rev	Author	Line No.	Line
5078	serge	1	/*
		2	* Copyright 2008 Advanced Micro Devices, Inc.
		3	* Copyright 2008 Red Hat Inc.
		4	* Copyright 2009 Jerome Glisse.
		5	*
		6	* Permission is hereby granted, free of charge, to any person obtaining a
		7	* copy of this software and associated documentation files (the "Software"),
		8	* to deal in the Software without restriction, including without limitation
		9	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
		10	* and/or sell copies of the Software, and to permit persons to whom the
		11	* Software is furnished to do so, subject to the following conditions:
		12	*
		13	* The above copyright notice and this permission notice shall be included in
		14	* all copies or substantial portions of the Software.
		15	*
		16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
		17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
		18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
		19	* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
		20	* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
		21	* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
		22	* OTHER DEALINGS IN THE SOFTWARE.
		23	*
		24	* Authors: Dave Airlie
		25	* Alex Deucher
		26	* Jerome Glisse
		27	*/
		28	#include
		29	#include
		30	#include "radeon.h"
		31	#include "radeon_trace.h"
		32
		33	/*
		34	* GPUVM
		35	* GPUVM is similar to the legacy gart on older asics, however
		36	* rather than there being a single global gart table
		37	* for the entire GPU, there are multiple VM page tables active
		38	* at any given time. The VM page tables can contain a mix
		39	* vram pages and system memory pages and system memory pages
		40	* can be mapped as snooped (cached system pages) or unsnooped
		41	* (uncached system pages).
		42	* Each VM has an ID associated with it and there is a page table
		43	* associated with each VMID. When execting a command buffer,
		44	* the kernel tells the the ring what VMID to use for that command
		45	* buffer. VMIDs are allocated dynamically as commands are submitted.
		46	* The userspace drivers maintain their own address space and the kernel
		47	* sets up their pages tables accordingly when they submit their
		48	* command buffers and a VMID is assigned.
		49	* Cayman/Trinity support up to 8 active VMs at any given time;
		50	* SI supports 16.
		51	*/
		52
		53	/**
		54	* radeon_vm_num_pde - return the number of page directory entries
		55	*
		56	* @rdev: radeon_device pointer
		57	*
		58	* Calculate the number of page directory entries (cayman+).
		59	*/
		60	static unsigned radeon_vm_num_pdes(struct radeon_device *rdev)
		61	{
		62	return rdev->vm_manager.max_pfn >> radeon_vm_block_size;
		63	}
		64
		65	/**
		66	* radeon_vm_directory_size - returns the size of the page directory in bytes
		67	*
		68	* @rdev: radeon_device pointer
		69	*
		70	* Calculate the size of the page directory in bytes (cayman+).
		71	*/
		72	static unsigned radeon_vm_directory_size(struct radeon_device *rdev)
		73	{
		74	return RADEON_GPU_PAGE_ALIGN(radeon_vm_num_pdes(rdev) * 8);
		75	}
		76
		77	/**
		78	* radeon_vm_manager_init - init the vm manager
		79	*
		80	* @rdev: radeon_device pointer
		81	*
		82	* Init the vm manager (cayman+).
		83	* Returns 0 for success, error for failure.
		84	*/
		85	int radeon_vm_manager_init(struct radeon_device *rdev)
		86	{
		87	int r;
		88
		89	if (!rdev->vm_manager.enabled) {
		90	r = radeon_asic_vm_init(rdev);
		91	if (r)
		92	return r;
		93
		94	rdev->vm_manager.enabled = true;
		95	}
		96	return 0;
		97	}
		98
		99	/**
		100	* radeon_vm_manager_fini - tear down the vm manager
		101	*
		102	* @rdev: radeon_device pointer
		103	*
		104	* Tear down the VM manager (cayman+).
		105	*/
		106	void radeon_vm_manager_fini(struct radeon_device *rdev)
		107	{
		108	int i;
		109
		110	if (!rdev->vm_manager.enabled)
		111	return;
		112
		113	for (i = 0; i < RADEON_NUM_VM; ++i)
		114	radeon_fence_unref(&rdev->vm_manager.active[i]);
		115	radeon_asic_vm_fini(rdev);
		116	rdev->vm_manager.enabled = false;
		117	}
		118
		119	/**
		120	* radeon_vm_get_bos - add the vm BOs to a validation list
		121	*
		122	* @vm: vm providing the BOs
		123	* @head: head of validation list
		124	*
		125	* Add the page directory to the list of BOs to
		126	* validate for command submission (cayman+).
		127	*/
		128	struct radeon_cs_reloc radeon_vm_get_bos(struct radeon_device rdev,
		129	struct radeon_vm *vm,
		130	struct list_head *head)
		131	{
		132	struct radeon_cs_reloc *list;
		133	unsigned i, idx;
		134
		135	list = kmalloc_array(vm->max_pde_used + 2,
		136	sizeof(struct radeon_cs_reloc), GFP_KERNEL);
		137	if (!list)
		138	return NULL;
		139
		140	/* add the vm page table to the list */
		141	list[0].gobj = NULL;
		142	list[0].robj = vm->page_directory;
		143	list[0].prefered_domains = RADEON_GEM_DOMAIN_VRAM;
		144	list[0].allowed_domains = RADEON_GEM_DOMAIN_VRAM;
		145	list[0].tv.bo = &vm->page_directory->tbo;
		146	list[0].tiling_flags = 0;
		147	list[0].handle = 0;
		148	list_add(&list[0].tv.head, head);
		149
		150	for (i = 0, idx = 1; i <= vm->max_pde_used; i++) {
		151	if (!vm->page_tables[i].bo)
		152	continue;
		153
		154	list[idx].gobj = NULL;
		155	list[idx].robj = vm->page_tables[i].bo;
		156	list[idx].prefered_domains = RADEON_GEM_DOMAIN_VRAM;
		157	list[idx].allowed_domains = RADEON_GEM_DOMAIN_VRAM;
		158	list[idx].tv.bo = &list[idx].robj->tbo;
		159	list[idx].tiling_flags = 0;
		160	list[idx].handle = 0;
		161	list_add(&list[idx++].tv.head, head);
		162	}
		163
		164	return list;
		165	}
		166
		167	/**
		168	* radeon_vm_grab_id - allocate the next free VMID
		169	*
		170	* @rdev: radeon_device pointer
		171	* @vm: vm to allocate id for
		172	* @ring: ring we want to submit job to
		173	*
		174	* Allocate an id for the vm (cayman+).
		175	* Returns the fence we need to sync to (if any).
		176	*
		177	* Global and local mutex must be locked!
		178	*/
		179	struct radeon_fence radeon_vm_grab_id(struct radeon_device rdev,
		180	struct radeon_vm *vm, int ring)
		181	{
		182	struct radeon_fence *best[RADEON_NUM_RINGS] = {};
		183	unsigned choices[2] = {};
		184	unsigned i;
		185
		186	/* check if the id is still valid */
		187	if (vm->last_id_use && vm->last_id_use == rdev->vm_manager.active[vm->id])
		188	return NULL;
		189
		190	/* we definately need to flush */
		191	radeon_fence_unref(&vm->last_flush);
		192
		193	/* skip over VMID 0, since it is the system VM */
		194	for (i = 1; i < rdev->vm_manager.nvm; ++i) {
		195	struct radeon_fence *fence = rdev->vm_manager.active[i];
		196
		197	if (fence == NULL) {
		198	/* found a free one */
		199	vm->id = i;
		200	trace_radeon_vm_grab_id(vm->id, ring);
		201	return NULL;
		202	}
		203
		204	if (radeon_fence_is_earlier(fence, best[fence->ring])) {
		205	best[fence->ring] = fence;
		206	choices[fence->ring == ring ? 0 : 1] = i;
		207	}
		208	}
		209
		210	for (i = 0; i < 2; ++i) {
		211	if (choices[i]) {
		212	vm->id = choices[i];
		213	trace_radeon_vm_grab_id(vm->id, ring);
		214	return rdev->vm_manager.active[choices[i]];
		215	}
		216	}
		217
		218	/* should never happen */
		219	BUG();
		220	return NULL;
		221	}
		222
		223	/**
		224	* radeon_vm_flush - hardware flush the vm
		225	*
		226	* @rdev: radeon_device pointer
		227	* @vm: vm we want to flush
		228	* @ring: ring to use for flush
		229	*
		230	* Flush the vm (cayman+).
		231	*
		232	* Global and local mutex must be locked!
		233	*/
		234	void radeon_vm_flush(struct radeon_device *rdev,
		235	struct radeon_vm *vm,
		236	int ring)
		237	{
		238	uint64_t pd_addr = radeon_bo_gpu_offset(vm->page_directory);
		239
		240	/* if we can't remember our last VM flush then flush now! */
		241	if (!vm->last_flush \|\| pd_addr != vm->pd_gpu_addr) {
		242	trace_radeon_vm_flush(pd_addr, ring, vm->id);
		243	vm->pd_gpu_addr = pd_addr;
		244	radeon_ring_vm_flush(rdev, ring, vm);
		245	}
		246	}
		247
		248	/**
		249	* radeon_vm_fence - remember fence for vm
		250	*
		251	* @rdev: radeon_device pointer
		252	* @vm: vm we want to fence
		253	* @fence: fence to remember
		254	*
		255	* Fence the vm (cayman+).
		256	* Set the fence used to protect page table and id.
		257	*
		258	* Global and local mutex must be locked!
		259	*/
		260	void radeon_vm_fence(struct radeon_device *rdev,
		261	struct radeon_vm *vm,
		262	struct radeon_fence *fence)
		263	{
		264	radeon_fence_unref(&vm->fence);
		265	vm->fence = radeon_fence_ref(fence);
		266
		267	radeon_fence_unref(&rdev->vm_manager.active[vm->id]);
		268	rdev->vm_manager.active[vm->id] = radeon_fence_ref(fence);
		269
		270	radeon_fence_unref(&vm->last_id_use);
		271	vm->last_id_use = radeon_fence_ref(fence);
		272
		273	/* we just flushed the VM, remember that */
		274	if (!vm->last_flush)
		275	vm->last_flush = radeon_fence_ref(fence);
		276	}
		277
		278	/**
		279	* radeon_vm_bo_find - find the bo_va for a specific vm & bo
		280	*
		281	* @vm: requested vm
		282	* @bo: requested buffer object
		283	*
		284	* Find @bo inside the requested vm (cayman+).
		285	* Search inside the @bos vm list for the requested vm
		286	* Returns the found bo_va or NULL if none is found
		287	*
		288	* Object has to be reserved!
		289	*/
		290	struct radeon_bo_va radeon_vm_bo_find(struct radeon_vm vm,
		291	struct radeon_bo *bo)
		292	{
		293	struct radeon_bo_va *bo_va;
		294
		295	list_for_each_entry(bo_va, &bo->va, bo_list) {
		296	if (bo_va->vm == vm) {
		297	return bo_va;
		298	}
		299	}
		300	return NULL;
		301	}
		302
		303	/**
		304	* radeon_vm_bo_add - add a bo to a specific vm
		305	*
		306	* @rdev: radeon_device pointer
		307	* @vm: requested vm
		308	* @bo: radeon buffer object
		309	*
		310	* Add @bo into the requested vm (cayman+).
		311	* Add @bo to the list of bos associated with the vm
		312	* Returns newly added bo_va or NULL for failure
		313	*
		314	* Object has to be reserved!
		315	*/
		316	struct radeon_bo_va radeon_vm_bo_add(struct radeon_device rdev,
		317	struct radeon_vm *vm,
		318	struct radeon_bo *bo)
		319	{
		320	struct radeon_bo_va *bo_va;
		321
		322	bo_va = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL);
		323	if (bo_va == NULL) {
		324	return NULL;
		325	}
		326	bo_va->vm = vm;
		327	bo_va->bo = bo;
		328	bo_va->it.start = 0;
		329	bo_va->it.last = 0;
		330	bo_va->flags = 0;
		331	bo_va->addr = 0;
		332	bo_va->ref_count = 1;
		333	INIT_LIST_HEAD(&bo_va->bo_list);
		334	INIT_LIST_HEAD(&bo_va->vm_status);
		335
		336	mutex_lock(&vm->mutex);
		337	list_add_tail(&bo_va->bo_list, &bo->va);
		338	mutex_unlock(&vm->mutex);
		339
		340	return bo_va;
		341	}
		342
		343	/**
		344	* radeon_vm_set_pages - helper to call the right asic function
		345	*
		346	* @rdev: radeon_device pointer
		347	* @ib: indirect buffer to fill with commands
		348	* @pe: addr of the page entry
		349	* @addr: dst addr to write into pe
		350	* @count: number of page entries to update
		351	* @incr: increase next addr by incr bytes
		352	* @flags: hw access flags
		353	*
		354	* Traces the parameters and calls the right asic functions
		355	* to setup the page table using the DMA.
		356	*/
		357	static void radeon_vm_set_pages(struct radeon_device *rdev,
		358	struct radeon_ib *ib,
		359	uint64_t pe,
		360	uint64_t addr, unsigned count,
		361	uint32_t incr, uint32_t flags)
		362	{
		363	trace_radeon_vm_set_page(pe, addr, count, incr, flags);
		364
		365	if ((flags & R600_PTE_GART_MASK) == R600_PTE_GART_MASK) {
		366	uint64_t src = rdev->gart.table_addr + (addr >> 12) * 8;
		367	radeon_asic_vm_copy_pages(rdev, ib, pe, src, count);
		368
		369	} else if ((flags & R600_PTE_SYSTEM) \|\| (count < 3)) {
		370	radeon_asic_vm_write_pages(rdev, ib, pe, addr,
		371	count, incr, flags);
		372
		373	} else {
		374	radeon_asic_vm_set_pages(rdev, ib, pe, addr,
		375	count, incr, flags);
		376	}
		377	}
		378
		379	/**
		380	* radeon_vm_clear_bo - initially clear the page dir/table
		381	*
		382	* @rdev: radeon_device pointer
		383	* @bo: bo to clear
		384	*/
		385	static int radeon_vm_clear_bo(struct radeon_device *rdev,
		386	struct radeon_bo *bo)
		387	{
		388	struct ttm_validate_buffer tv;
		389	struct ww_acquire_ctx ticket;
		390	struct list_head head;
		391	struct radeon_ib ib;
		392	unsigned entries;
		393	uint64_t addr;
		394	int r;
		395
		396	memset(&tv, 0, sizeof(tv));
		397	tv.bo = &bo->tbo;
		398
		399	INIT_LIST_HEAD(&head);
		400	list_add(&tv.head, &head);
		401
		402	r = ttm_eu_reserve_buffers(&ticket, &head);
		403	if (r)
		404	return r;
		405
		406	r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
		407	if (r)
		408	goto error;
		409
		410	addr = radeon_bo_gpu_offset(bo);
		411	entries = radeon_bo_size(bo) / 8;
		412
		413	r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, 256);
		414	if (r)
		415	goto error;
		416
		417	ib.length_dw = 0;
		418
		419	radeon_vm_set_pages(rdev, &ib, addr, 0, entries, 0, 0);
		420	radeon_asic_vm_pad_ib(rdev, &ib);
		421	WARN_ON(ib.length_dw > 64);
		422
		423	r = radeon_ib_schedule(rdev, &ib, NULL, false);
		424	if (r)
		425	goto error;
		426
		427	ttm_eu_fence_buffer_objects(&ticket, &head, ib.fence);
		428	radeon_ib_free(rdev, &ib);
		429
		430	return 0;
		431
		432	error:
		433	ttm_eu_backoff_reservation(&ticket, &head);
		434	return r;
		435	}
		436
		437	/**
		438	* radeon_vm_bo_set_addr - set bos virtual address inside a vm
		439	*
		440	* @rdev: radeon_device pointer
		441	* @bo_va: bo_va to store the address
		442	* @soffset: requested offset of the buffer in the VM address space
		443	* @flags: attributes of pages (read/write/valid/etc.)
		444	*
		445	* Set offset of @bo_va (cayman+).
		446	* Validate and set the offset requested within the vm address space.
		447	* Returns 0 for success, error for failure.
		448	*
		449	* Object has to be reserved!
		450	*/
		451	int radeon_vm_bo_set_addr(struct radeon_device *rdev,
		452	struct radeon_bo_va *bo_va,
		453	uint64_t soffset,
		454	uint32_t flags)
		455	{
		456	uint64_t size = radeon_bo_size(bo_va->bo);
		457	struct radeon_vm *vm = bo_va->vm;
		458	unsigned last_pfn, pt_idx;
		459	uint64_t eoffset;
		460	int r;
		461
		462	if (soffset) {
		463	/* make sure object fit at this offset */
		464	eoffset = soffset + size;
		465	if (soffset >= eoffset) {
		466	return -EINVAL;
		467	}
		468
		469	last_pfn = eoffset / RADEON_GPU_PAGE_SIZE;
		470	if (last_pfn > rdev->vm_manager.max_pfn) {
		471	dev_err(rdev->dev, "va above limit (0x%08X > 0x%08X)\n",
		472	last_pfn, rdev->vm_manager.max_pfn);
		473	return -EINVAL;
		474	}
		475
		476	} else {
		477	eoffset = last_pfn = 0;
		478	}
		479
		480	mutex_lock(&vm->mutex);
		481	if (bo_va->it.start \|\| bo_va->it.last) {
		482	if (bo_va->addr) {
		483	/* add a clone of the bo_va to clear the old address */
		484	struct radeon_bo_va *tmp;
		485	tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL);
		486	if (!tmp) {
		487	mutex_unlock(&vm->mutex);
		488	return -ENOMEM;
		489	}
		490	tmp->it.start = bo_va->it.start;
		491	tmp->it.last = bo_va->it.last;
		492	tmp->vm = vm;
		493	tmp->addr = bo_va->addr;
		494	tmp->bo = radeon_bo_ref(bo_va->bo);
		495	list_add(&tmp->vm_status, &vm->freed);
		496	}
		497
		498	interval_tree_remove(&bo_va->it, &vm->va);
		499	bo_va->it.start = 0;
		500	bo_va->it.last = 0;
		501	}
		502
		503	soffset /= RADEON_GPU_PAGE_SIZE;
		504	eoffset /= RADEON_GPU_PAGE_SIZE;
		505	if (soffset \|\| eoffset) {
		506	struct interval_tree_node *it;
		507	it = interval_tree_iter_first(&vm->va, soffset, eoffset - 1);
		508	if (it) {
		509	struct radeon_bo_va *tmp;
		510	tmp = container_of(it, struct radeon_bo_va, it);
		511	/* bo and tmp overlap, invalid offset */
		512	dev_err(rdev->dev, "bo %p va 0x%010Lx conflict with "
		513	"(bo %p 0x%010lx 0x%010lx)\n", bo_va->bo,
		514	soffset, tmp->bo, tmp->it.start, tmp->it.last);
		515	mutex_unlock(&vm->mutex);
		516	return -EINVAL;
		517	}
		518	bo_va->it.start = soffset;
		519	bo_va->it.last = eoffset - 1;
		520	interval_tree_insert(&bo_va->it, &vm->va);
		521	}
		522
		523	bo_va->flags = flags;
		524	bo_va->addr = 0;
		525
		526	soffset >>= radeon_vm_block_size;
		527	eoffset >>= radeon_vm_block_size;
		528
		529	BUG_ON(eoffset >= radeon_vm_num_pdes(rdev));
		530
		531	if (eoffset > vm->max_pde_used)
		532	vm->max_pde_used = eoffset;
		533
		534	radeon_bo_unreserve(bo_va->bo);
		535
		536	/* walk over the address space and allocate the page tables */
		537	for (pt_idx = soffset; pt_idx <= eoffset; ++pt_idx) {
		538	struct radeon_bo *pt;
		539
		540	if (vm->page_tables[pt_idx].bo)
		541	continue;
		542
		543	/* drop mutex to allocate and clear page table */
		544	mutex_unlock(&vm->mutex);
		545
		546	r = radeon_bo_create(rdev, RADEON_VM_PTE_COUNT * 8,
		547	RADEON_GPU_PAGE_SIZE, true,
		548	RADEON_GEM_DOMAIN_VRAM, 0, NULL, &pt);
		549	if (r)
		550	return r;
		551
		552	r = radeon_vm_clear_bo(rdev, pt);
		553	if (r) {
		554	radeon_bo_unref(&pt);
		555	radeon_bo_reserve(bo_va->bo, false);
		556	return r;
		557	}
		558
		559	/* aquire mutex again */
		560	mutex_lock(&vm->mutex);
		561	if (vm->page_tables[pt_idx].bo) {
		562	/* someone else allocated the pt in the meantime */
		563	mutex_unlock(&vm->mutex);
		564	radeon_bo_unref(&pt);
		565	mutex_lock(&vm->mutex);
		566	continue;
		567	}
		568
		569	vm->page_tables[pt_idx].addr = 0;
		570	vm->page_tables[pt_idx].bo = pt;
		571	}
		572
		573	mutex_unlock(&vm->mutex);
		574	return radeon_bo_reserve(bo_va->bo, false);
		575	}
		576
		577	/**
		578	* radeon_vm_map_gart - get the physical address of a gart page
		579	*
		580	* @rdev: radeon_device pointer
		581	* @addr: the unmapped addr
		582	*
		583	* Look up the physical address of the page that the pte resolves
		584	* to (cayman+).
		585	* Returns the physical address of the page.
		586	*/
		587	uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr)
		588	{
		589	uint64_t result;
		590
		591	/* page table offset */
		592	result = rdev->gart.pages_addr[addr >> PAGE_SHIFT];
		593
		594	/* in case cpu page size != gpu page size*/
		595	result \|= addr & (~PAGE_MASK);
		596
		597	return result;
		598	}
		599
		600	/**
		601	* radeon_vm_page_flags - translate page flags to what the hw uses
		602	*
		603	* @flags: flags comming from userspace
		604	*
		605	* Translate the flags the userspace ABI uses to hw flags.
		606	*/
		607	static uint32_t radeon_vm_page_flags(uint32_t flags)
		608	{
		609	uint32_t hw_flags = 0;
		610	hw_flags \|= (flags & RADEON_VM_PAGE_VALID) ? R600_PTE_VALID : 0;
		611	hw_flags \|= (flags & RADEON_VM_PAGE_READABLE) ? R600_PTE_READABLE : 0;
		612	hw_flags \|= (flags & RADEON_VM_PAGE_WRITEABLE) ? R600_PTE_WRITEABLE : 0;
		613	if (flags & RADEON_VM_PAGE_SYSTEM) {
		614	hw_flags \|= R600_PTE_SYSTEM;
		615	hw_flags \|= (flags & RADEON_VM_PAGE_SNOOPED) ? R600_PTE_SNOOPED : 0;
		616	}
		617	return hw_flags;
		618	}
		619
		620	/**
		621	* radeon_vm_update_pdes - make sure that page directory is valid
		622	*
		623	* @rdev: radeon_device pointer
		624	* @vm: requested vm
		625	* @start: start of GPU address range
		626	* @end: end of GPU address range
		627	*
		628	* Allocates new page tables if necessary
		629	* and updates the page directory (cayman+).
		630	* Returns 0 for success, error for failure.
		631	*
		632	* Global and local mutex must be locked!
		633	*/
		634	int radeon_vm_update_page_directory(struct radeon_device *rdev,
		635	struct radeon_vm *vm)
		636	{
		637	struct radeon_bo *pd = vm->page_directory;
		638	uint64_t pd_addr = radeon_bo_gpu_offset(pd);
		639	uint32_t incr = RADEON_VM_PTE_COUNT * 8;
		640	uint64_t last_pde = ~0, last_pt = ~0;
		641	unsigned count = 0, pt_idx, ndw;
		642	struct radeon_ib ib;
		643	int r;
		644
		645	/* padding, etc. */
		646	ndw = 64;
		647
		648	/* assume the worst case */
		649	ndw += vm->max_pde_used * 6;
		650
		651	/* update too big for an IB */
		652	if (ndw > 0xfffff)
		653	return -ENOMEM;
		654
		655	r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4);
		656	if (r)
		657	return r;
		658	ib.length_dw = 0;
		659
		660	/* walk over the address space and update the page directory */
		661	for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) {
		662	struct radeon_bo *bo = vm->page_tables[pt_idx].bo;
		663	uint64_t pde, pt;
		664
		665	if (bo == NULL)
		666	continue;
		667
		668	pt = radeon_bo_gpu_offset(bo);
		669	if (vm->page_tables[pt_idx].addr == pt)
		670	continue;
		671	vm->page_tables[pt_idx].addr = pt;
		672
		673	pde = pd_addr + pt_idx * 8;
		674	if (((last_pde + 8 * count) != pde) \|\|
		675	((last_pt + incr * count) != pt)) {
		676
		677	if (count) {
		678	radeon_vm_set_pages(rdev, &ib, last_pde,
		679	last_pt, count, incr,
		680	R600_PTE_VALID);
		681	}
		682
		683	count = 1;
		684	last_pde = pde;
		685	last_pt = pt;
		686	} else {
		687	++count;
		688	}
		689	}
		690
		691	if (count)
		692	radeon_vm_set_pages(rdev, &ib, last_pde, last_pt, count,
		693	incr, R600_PTE_VALID);
		694
		695	if (ib.length_dw != 0) {
		696	radeon_asic_vm_pad_ib(rdev, &ib);
		697	radeon_semaphore_sync_to(ib.semaphore, pd->tbo.sync_obj);
		698	radeon_semaphore_sync_to(ib.semaphore, vm->last_id_use);
		699	WARN_ON(ib.length_dw > ndw);
		700	r = radeon_ib_schedule(rdev, &ib, NULL, false);
		701	if (r) {
		702	radeon_ib_free(rdev, &ib);
		703	return r;
		704	}
		705	radeon_fence_unref(&vm->fence);
		706	vm->fence = radeon_fence_ref(ib.fence);
		707	radeon_fence_unref(&vm->last_flush);
		708	}
		709	radeon_ib_free(rdev, &ib);
		710
		711	return 0;
		712	}
		713
		714	/**
		715	* radeon_vm_frag_ptes - add fragment information to PTEs
		716	*
		717	* @rdev: radeon_device pointer
		718	* @ib: IB for the update
		719	* @pe_start: first PTE to handle
		720	* @pe_end: last PTE to handle
		721	* @addr: addr those PTEs should point to
		722	* @flags: hw mapping flags
		723	*
		724	* Global and local mutex must be locked!
		725	*/
		726	static void radeon_vm_frag_ptes(struct radeon_device *rdev,
		727	struct radeon_ib *ib,
		728	uint64_t pe_start, uint64_t pe_end,
		729	uint64_t addr, uint32_t flags)
		730	{
		731	/**
		732	* The MC L1 TLB supports variable sized pages, based on a fragment
		733	* field in the PTE. When this field is set to a non-zero value, page
		734	* granularity is increased from 4KB to (1 << (12 + frag)). The PTE
		735	* flags are considered valid for all PTEs within the fragment range
		736	* and corresponding mappings are assumed to be physically contiguous.
		737	*
		738	* The L1 TLB can store a single PTE for the whole fragment,
		739	* significantly increasing the space available for translation
		740	* caching. This leads to large improvements in throughput when the
		741	* TLB is under pressure.
		742	*
		743	* The L2 TLB distributes small and large fragments into two
		744	* asymmetric partitions. The large fragment cache is significantly
		745	* larger. Thus, we try to use large fragments wherever possible.
		746	* Userspace can support this by aligning virtual base address and
		747	* allocation size to the fragment size.
		748	*/
		749
		750	/* NI is optimized for 256KB fragments, SI and newer for 64KB */
		751	uint64_t frag_flags = rdev->family == CHIP_CAYMAN ?
		752	R600_PTE_FRAG_256KB : R600_PTE_FRAG_64KB;
		753	uint64_t frag_align = rdev->family == CHIP_CAYMAN ? 0x200 : 0x80;
		754
		755	uint64_t frag_start = ALIGN(pe_start, frag_align);
		756	uint64_t frag_end = pe_end & ~(frag_align - 1);
		757
		758	unsigned count;
		759
		760	/* system pages are non continuously */
		761	if ((flags & R600_PTE_SYSTEM) \|\| !(flags & R600_PTE_VALID) \|\|
		762	(frag_start >= frag_end)) {
		763
		764	count = (pe_end - pe_start) / 8;
		765	radeon_vm_set_pages(rdev, ib, pe_start, addr, count,
		766	RADEON_GPU_PAGE_SIZE, flags);
		767	return;
		768	}
		769
		770	/* handle the 4K area at the beginning */
		771	if (pe_start != frag_start) {
		772	count = (frag_start - pe_start) / 8;
		773	radeon_vm_set_pages(rdev, ib, pe_start, addr, count,
		774	RADEON_GPU_PAGE_SIZE, flags);
		775	addr += RADEON_GPU_PAGE_SIZE * count;
		776	}
		777
		778	/* handle the area in the middle */
		779	count = (frag_end - frag_start) / 8;
		780	radeon_vm_set_pages(rdev, ib, frag_start, addr, count,
		781	RADEON_GPU_PAGE_SIZE, flags \| frag_flags);
		782
		783	/* handle the 4K area at the end */
		784	if (frag_end != pe_end) {
		785	addr += RADEON_GPU_PAGE_SIZE * count;
		786	count = (pe_end - frag_end) / 8;
		787	radeon_vm_set_pages(rdev, ib, frag_end, addr, count,
		788	RADEON_GPU_PAGE_SIZE, flags);
		789	}
		790	}
		791
		792	/**
		793	* radeon_vm_update_ptes - make sure that page tables are valid
		794	*
		795	* @rdev: radeon_device pointer
		796	* @vm: requested vm
		797	* @start: start of GPU address range
		798	* @end: end of GPU address range
		799	* @dst: destination address to map to
		800	* @flags: mapping flags
		801	*
		802	* Update the page tables in the range @start - @end (cayman+).
		803	*
		804	* Global and local mutex must be locked!
		805	*/
		806	static void radeon_vm_update_ptes(struct radeon_device *rdev,
		807	struct radeon_vm *vm,
		808	struct radeon_ib *ib,
		809	uint64_t start, uint64_t end,
		810	uint64_t dst, uint32_t flags)
		811	{
		812	uint64_t mask = RADEON_VM_PTE_COUNT - 1;
		813	uint64_t last_pte = ~0, last_dst = ~0;
		814	unsigned count = 0;
		815	uint64_t addr;
		816
		817	/* walk over the address space and update the page tables */
		818	for (addr = start; addr < end; ) {
		819	uint64_t pt_idx = addr >> radeon_vm_block_size;
		820	struct radeon_bo *pt = vm->page_tables[pt_idx].bo;
		821	unsigned nptes;
		822	uint64_t pte;
		823
		824	radeon_semaphore_sync_to(ib->semaphore, pt->tbo.sync_obj);
		825
		826	if ((addr & ~mask) == (end & ~mask))
		827	nptes = end - addr;
		828	else
		829	nptes = RADEON_VM_PTE_COUNT - (addr & mask);
		830
		831	pte = radeon_bo_gpu_offset(pt);
		832	pte += (addr & mask) * 8;
		833
		834	if ((last_pte + 8 * count) != pte) {
		835
		836	if (count) {
		837	radeon_vm_frag_ptes(rdev, ib, last_pte,
		838	last_pte + 8 * count,
		839	last_dst, flags);
		840	}
		841
		842	count = nptes;
		843	last_pte = pte;
		844	last_dst = dst;
		845	} else {
		846	count += nptes;
		847	}
		848
		849	addr += nptes;
		850	dst += nptes * RADEON_GPU_PAGE_SIZE;
		851	}
		852
		853	if (count) {
		854	radeon_vm_frag_ptes(rdev, ib, last_pte,
		855	last_pte + 8 * count,
		856	last_dst, flags);
		857	}
		858	}
		859
		860	/**
		861	* radeon_vm_bo_update - map a bo into the vm page table
		862	*
		863	* @rdev: radeon_device pointer
		864	* @vm: requested vm
		865	* @bo: radeon buffer object
		866	* @mem: ttm mem
		867	*
		868	* Fill in the page table entries for @bo (cayman+).
		869	* Returns 0 for success, -EINVAL for failure.
		870	*
		871	* Object have to be reserved and mutex must be locked!
		872	*/
		873	int radeon_vm_bo_update(struct radeon_device *rdev,
		874	struct radeon_bo_va *bo_va,
		875	struct ttm_mem_reg *mem)
		876	{
		877	struct radeon_vm *vm = bo_va->vm;
		878	struct radeon_ib ib;
		879	unsigned nptes, ncmds, ndw;
		880	uint64_t addr;
		881	uint32_t flags;
		882	int r;
		883
		884	if (!bo_va->it.start) {
		885	dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n",
		886	bo_va->bo, vm);
		887	return -EINVAL;
		888	}
		889
		890	list_del_init(&bo_va->vm_status);
		891
		892	bo_va->flags &= ~RADEON_VM_PAGE_VALID;
		893	bo_va->flags &= ~RADEON_VM_PAGE_SYSTEM;
		894	bo_va->flags &= ~RADEON_VM_PAGE_SNOOPED;
		895	if (mem) {
		896	addr = mem->start << PAGE_SHIFT;
		897	if (mem->mem_type != TTM_PL_SYSTEM) {
		898	bo_va->flags \|= RADEON_VM_PAGE_VALID;
		899	}
		900	if (mem->mem_type == TTM_PL_TT) {
		901	bo_va->flags \|= RADEON_VM_PAGE_SYSTEM;
		902	if (!(bo_va->bo->flags & (RADEON_GEM_GTT_WC \| RADEON_GEM_GTT_UC)))
		903	bo_va->flags \|= RADEON_VM_PAGE_SNOOPED;
		904
		905	} else {
		906	addr += rdev->vm_manager.vram_base_offset;
		907	}
		908	} else {
		909	addr = 0;
		910	}
		911
		912	if (addr == bo_va->addr)
		913	return 0;
		914	bo_va->addr = addr;
		915
		916	trace_radeon_vm_bo_update(bo_va);
		917
		918	nptes = bo_va->it.last - bo_va->it.start + 1;
		919
		920	/* reserve space for one command every (1 << BLOCK_SIZE) entries
		921	or 2k dwords (whatever is smaller) */
		922	ncmds = (nptes >> min(radeon_vm_block_size, 11)) + 1;
		923
		924	/* padding, etc. */
		925	ndw = 64;
		926
		927	flags = radeon_vm_page_flags(bo_va->flags);
		928	if ((flags & R600_PTE_GART_MASK) == R600_PTE_GART_MASK) {
		929	/* only copy commands needed */
		930	ndw += ncmds * 7;
		931
		932	} else if (flags & R600_PTE_SYSTEM) {
		933	/* header for write data commands */
		934	ndw += ncmds * 4;
		935
		936	/* body of write data command */
		937	ndw += nptes * 2;
		938
		939	} else {
		940	/* set page commands needed */
		941	ndw += ncmds * 10;
		942
		943	/* two extra commands for begin/end of fragment */
		944	ndw += 2 * 10;
		945	}
		946
		947	/* update too big for an IB */
		948	if (ndw > 0xfffff)
		949	return -ENOMEM;
		950
		951	r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4);
		952	if (r)
		953	return r;
		954	ib.length_dw = 0;
		955
		956	radeon_vm_update_ptes(rdev, vm, &ib, bo_va->it.start,
		957	bo_va->it.last + 1, addr,
		958	radeon_vm_page_flags(bo_va->flags));
		959
		960	radeon_asic_vm_pad_ib(rdev, &ib);
		961	WARN_ON(ib.length_dw > ndw);
		962
		963	radeon_semaphore_sync_to(ib.semaphore, vm->fence);
		964	r = radeon_ib_schedule(rdev, &ib, NULL, false);
		965	if (r) {
		966	radeon_ib_free(rdev, &ib);
		967	return r;
		968	}
		969	radeon_fence_unref(&vm->fence);
		970	vm->fence = radeon_fence_ref(ib.fence);
		971	radeon_ib_free(rdev, &ib);
		972	radeon_fence_unref(&vm->last_flush);
		973
		974	return 0;
		975	}
		976
		977	/**
		978	* radeon_vm_clear_freed - clear freed BOs in the PT
		979	*
		980	* @rdev: radeon_device pointer
		981	* @vm: requested vm
		982	*
		983	* Make sure all freed BOs are cleared in the PT.
		984	* Returns 0 for success.
		985	*
		986	* PTs have to be reserved and mutex must be locked!
		987	*/
		988	int radeon_vm_clear_freed(struct radeon_device *rdev,
		989	struct radeon_vm *vm)
		990	{
		991	struct radeon_bo_va bo_va, tmp;
		992	int r;
		993
		994	list_for_each_entry_safe(bo_va, tmp, &vm->freed, vm_status) {
		995	r = radeon_vm_bo_update(rdev, bo_va, NULL);
		996	radeon_bo_unref(&bo_va->bo);
		997	kfree(bo_va);
		998	if (r)
		999	return r;
		1000	}
		1001	return 0;
		1002
		1003	}
		1004
		1005	/**
		1006	* radeon_vm_clear_invalids - clear invalidated BOs in the PT
		1007	*
		1008	* @rdev: radeon_device pointer
		1009	* @vm: requested vm
		1010	*
		1011	* Make sure all invalidated BOs are cleared in the PT.
		1012	* Returns 0 for success.
		1013	*
		1014	* PTs have to be reserved and mutex must be locked!
		1015	*/
		1016	int radeon_vm_clear_invalids(struct radeon_device *rdev,
		1017	struct radeon_vm *vm)
		1018	{
		1019	struct radeon_bo_va bo_va, tmp;
		1020	int r;
		1021
		1022	list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, vm_status) {
		1023	r = radeon_vm_bo_update(rdev, bo_va, NULL);
		1024	if (r)
		1025	return r;
		1026	}
		1027	return 0;
		1028	}
		1029
		1030	/**
		1031	* radeon_vm_bo_rmv - remove a bo to a specific vm
		1032	*
		1033	* @rdev: radeon_device pointer
		1034	* @bo_va: requested bo_va
		1035	*
		1036	* Remove @bo_va->bo from the requested vm (cayman+).
		1037	*
		1038	* Object have to be reserved!
		1039	*/
		1040	void radeon_vm_bo_rmv(struct radeon_device *rdev,
		1041	struct radeon_bo_va *bo_va)
		1042	{
		1043	struct radeon_vm *vm = bo_va->vm;
		1044
		1045	list_del(&bo_va->bo_list);
		1046
		1047	mutex_lock(&vm->mutex);
		1048	interval_tree_remove(&bo_va->it, &vm->va);
		1049	list_del(&bo_va->vm_status);
		1050
		1051	if (bo_va->addr) {
		1052	bo_va->bo = radeon_bo_ref(bo_va->bo);
		1053	list_add(&bo_va->vm_status, &vm->freed);
		1054	} else {
		1055	kfree(bo_va);
		1056	}
		1057
		1058	mutex_unlock(&vm->mutex);
		1059	}
		1060
		1061	/**
		1062	* radeon_vm_bo_invalidate - mark the bo as invalid
		1063	*
		1064	* @rdev: radeon_device pointer
		1065	* @vm: requested vm
		1066	* @bo: radeon buffer object
		1067	*
		1068	* Mark @bo as invalid (cayman+).
		1069	*/
		1070	void radeon_vm_bo_invalidate(struct radeon_device *rdev,
		1071	struct radeon_bo *bo)
		1072	{
		1073	struct radeon_bo_va *bo_va;
		1074
		1075	list_for_each_entry(bo_va, &bo->va, bo_list) {
		1076	if (bo_va->addr) {
		1077	mutex_lock(&bo_va->vm->mutex);
		1078	list_del(&bo_va->vm_status);
		1079	list_add(&bo_va->vm_status, &bo_va->vm->invalidated);
		1080	mutex_unlock(&bo_va->vm->mutex);
		1081	}
		1082	}
		1083	}
		1084
		1085	/**
		1086	* radeon_vm_init - initialize a vm instance
		1087	*
		1088	* @rdev: radeon_device pointer
		1089	* @vm: requested vm
		1090	*
		1091	* Init @vm fields (cayman+).
		1092	*/
		1093	int radeon_vm_init(struct radeon_device rdev, struct radeon_vm vm)
		1094	{
		1095	const unsigned align = min(RADEON_VM_PTB_ALIGN_SIZE,
		1096	RADEON_VM_PTE_COUNT * 8);
		1097	unsigned pd_size, pd_entries, pts_size;
		1098	int r;
		1099
		1100	vm->id = 0;
		1101	vm->ib_bo_va = NULL;
		1102	vm->fence = NULL;
		1103	vm->last_flush = NULL;
		1104	vm->last_id_use = NULL;
		1105	mutex_init(&vm->mutex);
		1106	vm->va = RB_ROOT;
		1107	INIT_LIST_HEAD(&vm->invalidated);
		1108	INIT_LIST_HEAD(&vm->freed);
		1109
		1110	pd_size = radeon_vm_directory_size(rdev);
		1111	pd_entries = radeon_vm_num_pdes(rdev);
		1112
		1113	/* allocate page table array */
		1114	pts_size = pd_entries * sizeof(struct radeon_vm_pt);
		1115	vm->page_tables = kzalloc(pts_size, GFP_KERNEL);
		1116	if (vm->page_tables == NULL) {
		1117	DRM_ERROR("Cannot allocate memory for page table array\n");
		1118	return -ENOMEM;
		1119	}
		1120
		1121	r = radeon_bo_create(rdev, pd_size, align, true,
		1122	RADEON_GEM_DOMAIN_VRAM, 0, NULL,
		1123	&vm->page_directory);
		1124	if (r)
		1125	return r;
		1126
		1127	r = radeon_vm_clear_bo(rdev, vm->page_directory);
		1128	if (r) {
		1129	radeon_bo_unref(&vm->page_directory);
		1130	vm->page_directory = NULL;
		1131	return r;
		1132	}
		1133
		1134	return 0;
		1135	}
		1136
		1137	/**
		1138	* radeon_vm_fini - tear down a vm instance
		1139	*
		1140	* @rdev: radeon_device pointer
		1141	* @vm: requested vm
		1142	*
		1143	* Tear down @vm (cayman+).
		1144	* Unbind the VM and remove all bos from the vm bo list
		1145	*/
		1146	void radeon_vm_fini(struct radeon_device rdev, struct radeon_vm vm)
		1147	{
		1148	struct radeon_bo_va bo_va, tmp;
		1149	int i, r;
		1150
		1151	if (!RB_EMPTY_ROOT(&vm->va)) {
		1152	dev_err(rdev->dev, "still active bo inside vm\n");
		1153	}
		1154	rbtree_postorder_for_each_entry_safe(bo_va, tmp, &vm->va, it.rb) {
		1155	interval_tree_remove(&bo_va->it, &vm->va);
		1156	r = radeon_bo_reserve(bo_va->bo, false);
		1157	if (!r) {
		1158	list_del_init(&bo_va->bo_list);
		1159	radeon_bo_unreserve(bo_va->bo);
		1160	kfree(bo_va);
		1161	}
		1162	}
		1163	list_for_each_entry_safe(bo_va, tmp, &vm->freed, vm_status) {
		1164	radeon_bo_unref(&bo_va->bo);
		1165	kfree(bo_va);
		1166	}
		1167
		1168	for (i = 0; i < radeon_vm_num_pdes(rdev); i++)
		1169	radeon_bo_unref(&vm->page_tables[i].bo);
		1170	kfree(vm->page_tables);
		1171
		1172	radeon_bo_unref(&vm->page_directory);
		1173
		1174	radeon_fence_unref(&vm->fence);
		1175	radeon_fence_unref(&vm->last_flush);
		1176	radeon_fence_unref(&vm->last_id_use);
		1177
		1178	mutex_destroy(&vm->mutex);
		1179	}

Subversion Repositories Kolibri OS

(root)/drivers/video/drm/radeon/radeon_vm.c @ 7146 – Rev 5078