Subversion Repositories Kolibri OS

Rev

Rev 5271 | Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
5078 serge 1
/*
2
 * Copyright 2008 Advanced Micro Devices, Inc.
3
 * Copyright 2008 Red Hat Inc.
4
 * Copyright 2009 Jerome Glisse.
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a
7
 * copy of this software and associated documentation files (the "Software"),
8
 * to deal in the Software without restriction, including without limitation
9
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10
 * and/or sell copies of the Software, and to permit persons to whom the
11
 * Software is furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice shall be included in
14
 * all copies or substantial portions of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22
 * OTHER DEALINGS IN THE SOFTWARE.
23
 *
24
 * Authors: Dave Airlie
25
 *          Alex Deucher
26
 *          Jerome Glisse
27
 */
28
#include 
29
#include 
30
#include "radeon.h"
31
#include "radeon_trace.h"
32
 
33
/*
34
 * GPUVM
35
 * GPUVM is similar to the legacy gart on older asics, however
36
 * rather than there being a single global gart table
37
 * for the entire GPU, there are multiple VM page tables active
38
 * at any given time.  The VM page tables can contain a mix
39
 * vram pages and system memory pages and system memory pages
40
 * can be mapped as snooped (cached system pages) or unsnooped
41
 * (uncached system pages).
42
 * Each VM has an ID associated with it and there is a page table
43
 * associated with each VMID.  When execting a command buffer,
44
 * the kernel tells the the ring what VMID to use for that command
45
 * buffer.  VMIDs are allocated dynamically as commands are submitted.
46
 * The userspace drivers maintain their own address space and the kernel
47
 * sets up their pages tables accordingly when they submit their
48
 * command buffers and a VMID is assigned.
49
 * Cayman/Trinity support up to 8 active VMs at any given time;
50
 * SI supports 16.
51
 */
52
 
53
/**
54
 * radeon_vm_num_pde - return the number of page directory entries
55
 *
56
 * @rdev: radeon_device pointer
57
 *
58
 * Calculate the number of page directory entries (cayman+).
59
 */
60
static unsigned radeon_vm_num_pdes(struct radeon_device *rdev)
61
{
62
	return rdev->vm_manager.max_pfn >> radeon_vm_block_size;
63
}
64
 
65
/**
66
 * radeon_vm_directory_size - returns the size of the page directory in bytes
67
 *
68
 * @rdev: radeon_device pointer
69
 *
70
 * Calculate the size of the page directory in bytes (cayman+).
71
 */
72
static unsigned radeon_vm_directory_size(struct radeon_device *rdev)
73
{
74
	return RADEON_GPU_PAGE_ALIGN(radeon_vm_num_pdes(rdev) * 8);
75
}
76
 
77
/**
78
 * radeon_vm_manager_init - init the vm manager
79
 *
80
 * @rdev: radeon_device pointer
81
 *
82
 * Init the vm manager (cayman+).
83
 * Returns 0 for success, error for failure.
84
 */
85
int radeon_vm_manager_init(struct radeon_device *rdev)
86
{
87
	int r;
88
 
89
	if (!rdev->vm_manager.enabled) {
90
		r = radeon_asic_vm_init(rdev);
91
		if (r)
92
			return r;
93
 
94
		rdev->vm_manager.enabled = true;
95
	}
96
	return 0;
97
}
98
 
99
/**
100
 * radeon_vm_manager_fini - tear down the vm manager
101
 *
102
 * @rdev: radeon_device pointer
103
 *
104
 * Tear down the VM manager (cayman+).
105
 */
106
void radeon_vm_manager_fini(struct radeon_device *rdev)
107
{
108
	int i;
109
 
110
	if (!rdev->vm_manager.enabled)
111
		return;
112
 
113
	for (i = 0; i < RADEON_NUM_VM; ++i)
114
		radeon_fence_unref(&rdev->vm_manager.active[i]);
115
	radeon_asic_vm_fini(rdev);
116
	rdev->vm_manager.enabled = false;
117
}
118
 
119
/**
120
 * radeon_vm_get_bos - add the vm BOs to a validation list
121
 *
122
 * @vm: vm providing the BOs
123
 * @head: head of validation list
124
 *
125
 * Add the page directory to the list of BOs to
126
 * validate for command submission (cayman+).
127
 */
128
struct radeon_cs_reloc *radeon_vm_get_bos(struct radeon_device *rdev,
129
					  struct radeon_vm *vm,
130
					  struct list_head *head)
131
{
132
	struct radeon_cs_reloc *list;
133
	unsigned i, idx;
134
 
135
	list = kmalloc_array(vm->max_pde_used + 2,
136
			     sizeof(struct radeon_cs_reloc), GFP_KERNEL);
137
	if (!list)
138
		return NULL;
139
 
140
	/* add the vm page table to the list */
141
	list[0].gobj = NULL;
142
	list[0].robj = vm->page_directory;
143
	list[0].prefered_domains = RADEON_GEM_DOMAIN_VRAM;
144
	list[0].allowed_domains = RADEON_GEM_DOMAIN_VRAM;
145
	list[0].tv.bo = &vm->page_directory->tbo;
146
	list[0].tiling_flags = 0;
147
	list[0].handle = 0;
148
	list_add(&list[0].tv.head, head);
149
 
150
	for (i = 0, idx = 1; i <= vm->max_pde_used; i++) {
151
		if (!vm->page_tables[i].bo)
152
			continue;
153
 
154
		list[idx].gobj = NULL;
155
		list[idx].robj = vm->page_tables[i].bo;
156
		list[idx].prefered_domains = RADEON_GEM_DOMAIN_VRAM;
157
		list[idx].allowed_domains = RADEON_GEM_DOMAIN_VRAM;
158
		list[idx].tv.bo = &list[idx].robj->tbo;
159
		list[idx].tiling_flags = 0;
160
		list[idx].handle = 0;
161
		list_add(&list[idx++].tv.head, head);
162
	}
163
 
164
	return list;
165
}
166
 
167
/**
168
 * radeon_vm_grab_id - allocate the next free VMID
169
 *
170
 * @rdev: radeon_device pointer
171
 * @vm: vm to allocate id for
172
 * @ring: ring we want to submit job to
173
 *
174
 * Allocate an id for the vm (cayman+).
175
 * Returns the fence we need to sync to (if any).
176
 *
177
 * Global and local mutex must be locked!
178
 */
179
struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
180
				       struct radeon_vm *vm, int ring)
181
{
182
	struct radeon_fence *best[RADEON_NUM_RINGS] = {};
183
	unsigned choices[2] = {};
184
	unsigned i;
185
 
186
	/* check if the id is still valid */
187
	if (vm->last_id_use && vm->last_id_use == rdev->vm_manager.active[vm->id])
188
		return NULL;
189
 
190
	/* we definately need to flush */
191
	radeon_fence_unref(&vm->last_flush);
192
 
193
	/* skip over VMID 0, since it is the system VM */
194
	for (i = 1; i < rdev->vm_manager.nvm; ++i) {
195
		struct radeon_fence *fence = rdev->vm_manager.active[i];
196
 
197
		if (fence == NULL) {
198
			/* found a free one */
199
			vm->id = i;
200
			trace_radeon_vm_grab_id(vm->id, ring);
201
			return NULL;
202
		}
203
 
204
		if (radeon_fence_is_earlier(fence, best[fence->ring])) {
205
			best[fence->ring] = fence;
206
			choices[fence->ring == ring ? 0 : 1] = i;
207
		}
208
	}
209
 
210
	for (i = 0; i < 2; ++i) {
211
		if (choices[i]) {
212
			vm->id = choices[i];
213
			trace_radeon_vm_grab_id(vm->id, ring);
214
			return rdev->vm_manager.active[choices[i]];
215
		}
216
	}
217
 
218
	/* should never happen */
219
	BUG();
220
	return NULL;
221
}
222
 
223
/**
224
 * radeon_vm_flush - hardware flush the vm
225
 *
226
 * @rdev: radeon_device pointer
227
 * @vm: vm we want to flush
228
 * @ring: ring to use for flush
229
 *
230
 * Flush the vm (cayman+).
231
 *
232
 * Global and local mutex must be locked!
233
 */
234
void radeon_vm_flush(struct radeon_device *rdev,
235
		     struct radeon_vm *vm,
236
		     int ring)
237
{
238
	uint64_t pd_addr = radeon_bo_gpu_offset(vm->page_directory);
239
 
240
	/* if we can't remember our last VM flush then flush now! */
241
	if (!vm->last_flush || pd_addr != vm->pd_gpu_addr) {
242
		trace_radeon_vm_flush(pd_addr, ring, vm->id);
243
		vm->pd_gpu_addr = pd_addr;
244
		radeon_ring_vm_flush(rdev, ring, vm);
245
	}
246
}
247
 
248
/**
249
 * radeon_vm_fence - remember fence for vm
250
 *
251
 * @rdev: radeon_device pointer
252
 * @vm: vm we want to fence
253
 * @fence: fence to remember
254
 *
255
 * Fence the vm (cayman+).
256
 * Set the fence used to protect page table and id.
257
 *
258
 * Global and local mutex must be locked!
259
 */
260
void radeon_vm_fence(struct radeon_device *rdev,
261
		     struct radeon_vm *vm,
262
		     struct radeon_fence *fence)
263
{
264
	radeon_fence_unref(&vm->fence);
265
	vm->fence = radeon_fence_ref(fence);
266
 
267
	radeon_fence_unref(&rdev->vm_manager.active[vm->id]);
268
	rdev->vm_manager.active[vm->id] = radeon_fence_ref(fence);
269
 
270
	radeon_fence_unref(&vm->last_id_use);
271
	vm->last_id_use = radeon_fence_ref(fence);
272
 
273
        /* we just flushed the VM, remember that */
274
        if (!vm->last_flush)
275
                vm->last_flush = radeon_fence_ref(fence);
276
}
277
 
278
/**
279
 * radeon_vm_bo_find - find the bo_va for a specific vm & bo
280
 *
281
 * @vm: requested vm
282
 * @bo: requested buffer object
283
 *
284
 * Find @bo inside the requested vm (cayman+).
285
 * Search inside the @bos vm list for the requested vm
286
 * Returns the found bo_va or NULL if none is found
287
 *
288
 * Object has to be reserved!
289
 */
290
struct radeon_bo_va *radeon_vm_bo_find(struct radeon_vm *vm,
291
				       struct radeon_bo *bo)
292
{
293
	struct radeon_bo_va *bo_va;
294
 
295
	list_for_each_entry(bo_va, &bo->va, bo_list) {
296
		if (bo_va->vm == vm) {
297
			return bo_va;
298
		}
299
	}
300
	return NULL;
301
}
302
 
303
/**
304
 * radeon_vm_bo_add - add a bo to a specific vm
305
 *
306
 * @rdev: radeon_device pointer
307
 * @vm: requested vm
308
 * @bo: radeon buffer object
309
 *
310
 * Add @bo into the requested vm (cayman+).
311
 * Add @bo to the list of bos associated with the vm
312
 * Returns newly added bo_va or NULL for failure
313
 *
314
 * Object has to be reserved!
315
 */
316
struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev,
317
				      struct radeon_vm *vm,
318
				      struct radeon_bo *bo)
319
{
320
	struct radeon_bo_va *bo_va;
321
 
322
	bo_va = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL);
323
	if (bo_va == NULL) {
324
		return NULL;
325
	}
326
	bo_va->vm = vm;
327
	bo_va->bo = bo;
328
	bo_va->it.start = 0;
329
	bo_va->it.last = 0;
330
	bo_va->flags = 0;
331
	bo_va->addr = 0;
332
	bo_va->ref_count = 1;
333
	INIT_LIST_HEAD(&bo_va->bo_list);
334
	INIT_LIST_HEAD(&bo_va->vm_status);
335
 
336
	mutex_lock(&vm->mutex);
337
	list_add_tail(&bo_va->bo_list, &bo->va);
338
	mutex_unlock(&vm->mutex);
339
 
340
	return bo_va;
341
}
342
 
343
/**
344
 * radeon_vm_set_pages - helper to call the right asic function
345
 *
346
 * @rdev: radeon_device pointer
347
 * @ib: indirect buffer to fill with commands
348
 * @pe: addr of the page entry
349
 * @addr: dst addr to write into pe
350
 * @count: number of page entries to update
351
 * @incr: increase next addr by incr bytes
352
 * @flags: hw access flags
353
 *
354
 * Traces the parameters and calls the right asic functions
355
 * to setup the page table using the DMA.
356
 */
357
static void radeon_vm_set_pages(struct radeon_device *rdev,
358
				struct radeon_ib *ib,
359
				uint64_t pe,
360
				uint64_t addr, unsigned count,
361
				uint32_t incr, uint32_t flags)
362
{
363
	trace_radeon_vm_set_page(pe, addr, count, incr, flags);
364
 
365
	if ((flags & R600_PTE_GART_MASK) == R600_PTE_GART_MASK) {
366
		uint64_t src = rdev->gart.table_addr + (addr >> 12) * 8;
367
		radeon_asic_vm_copy_pages(rdev, ib, pe, src, count);
368
 
369
	} else if ((flags & R600_PTE_SYSTEM) || (count < 3)) {
370
		radeon_asic_vm_write_pages(rdev, ib, pe, addr,
371
					   count, incr, flags);
372
 
373
	} else {
374
		radeon_asic_vm_set_pages(rdev, ib, pe, addr,
375
					 count, incr, flags);
376
	}
377
}
378
 
379
/**
380
 * radeon_vm_clear_bo - initially clear the page dir/table
381
 *
382
 * @rdev: radeon_device pointer
383
 * @bo: bo to clear
384
 */
385
static int radeon_vm_clear_bo(struct radeon_device *rdev,
386
			      struct radeon_bo *bo)
387
{
388
        struct ttm_validate_buffer tv;
389
        struct ww_acquire_ctx ticket;
390
        struct list_head head;
391
	struct radeon_ib ib;
392
	unsigned entries;
393
	uint64_t addr;
394
	int r;
395
 
396
        memset(&tv, 0, sizeof(tv));
397
        tv.bo = &bo->tbo;
398
 
399
        INIT_LIST_HEAD(&head);
400
        list_add(&tv.head, &head);
401
 
402
        r = ttm_eu_reserve_buffers(&ticket, &head);
403
        if (r)
404
		return r;
405
 
406
        r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
407
        if (r)
408
                goto error;
409
 
410
	addr = radeon_bo_gpu_offset(bo);
411
	entries = radeon_bo_size(bo) / 8;
412
 
413
	r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, 256);
414
	if (r)
415
                goto error;
416
 
417
	ib.length_dw = 0;
418
 
419
	radeon_vm_set_pages(rdev, &ib, addr, 0, entries, 0, 0);
420
	radeon_asic_vm_pad_ib(rdev, &ib);
421
	WARN_ON(ib.length_dw > 64);
422
 
423
	r = radeon_ib_schedule(rdev, &ib, NULL, false);
424
	if (r)
425
                goto error;
426
 
427
	ttm_eu_fence_buffer_objects(&ticket, &head, ib.fence);
428
	radeon_ib_free(rdev, &ib);
429
 
430
	return 0;
431
 
432
error:
433
	ttm_eu_backoff_reservation(&ticket, &head);
434
	return r;
435
}
436
 
437
/**
438
 * radeon_vm_bo_set_addr - set bos virtual address inside a vm
439
 *
440
 * @rdev: radeon_device pointer
441
 * @bo_va: bo_va to store the address
442
 * @soffset: requested offset of the buffer in the VM address space
443
 * @flags: attributes of pages (read/write/valid/etc.)
444
 *
445
 * Set offset of @bo_va (cayman+).
446
 * Validate and set the offset requested within the vm address space.
447
 * Returns 0 for success, error for failure.
448
 *
449
 * Object has to be reserved!
450
 */
451
int radeon_vm_bo_set_addr(struct radeon_device *rdev,
452
			  struct radeon_bo_va *bo_va,
453
			  uint64_t soffset,
454
			  uint32_t flags)
455
{
456
	uint64_t size = radeon_bo_size(bo_va->bo);
457
	struct radeon_vm *vm = bo_va->vm;
458
	unsigned last_pfn, pt_idx;
459
	uint64_t eoffset;
460
	int r;
461
 
462
	if (soffset) {
463
		/* make sure object fit at this offset */
464
		eoffset = soffset + size;
465
		if (soffset >= eoffset) {
466
			return -EINVAL;
467
		}
468
 
469
		last_pfn = eoffset / RADEON_GPU_PAGE_SIZE;
470
		if (last_pfn > rdev->vm_manager.max_pfn) {
471
			dev_err(rdev->dev, "va above limit (0x%08X > 0x%08X)\n",
472
				last_pfn, rdev->vm_manager.max_pfn);
473
			return -EINVAL;
474
		}
475
 
476
	} else {
477
		eoffset = last_pfn = 0;
478
	}
479
 
480
	mutex_lock(&vm->mutex);
481
	if (bo_va->it.start || bo_va->it.last) {
482
		if (bo_va->addr) {
483
			/* add a clone of the bo_va to clear the old address */
484
			struct radeon_bo_va *tmp;
485
			tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL);
486
			if (!tmp) {
487
				mutex_unlock(&vm->mutex);
488
				return -ENOMEM;
489
			}
490
			tmp->it.start = bo_va->it.start;
491
			tmp->it.last = bo_va->it.last;
492
			tmp->vm = vm;
493
			tmp->addr = bo_va->addr;
494
			tmp->bo = radeon_bo_ref(bo_va->bo);
495
			list_add(&tmp->vm_status, &vm->freed);
496
		}
497
 
498
		interval_tree_remove(&bo_va->it, &vm->va);
499
		bo_va->it.start = 0;
500
		bo_va->it.last = 0;
501
	}
502
 
503
	soffset /= RADEON_GPU_PAGE_SIZE;
504
	eoffset /= RADEON_GPU_PAGE_SIZE;
505
	if (soffset || eoffset) {
506
		struct interval_tree_node *it;
507
		it = interval_tree_iter_first(&vm->va, soffset, eoffset - 1);
508
		if (it) {
509
			struct radeon_bo_va *tmp;
510
			tmp = container_of(it, struct radeon_bo_va, it);
511
			/* bo and tmp overlap, invalid offset */
512
			dev_err(rdev->dev, "bo %p va 0x%010Lx conflict with "
513
				"(bo %p 0x%010lx 0x%010lx)\n", bo_va->bo,
514
				soffset, tmp->bo, tmp->it.start, tmp->it.last);
515
			mutex_unlock(&vm->mutex);
516
			return -EINVAL;
517
		}
518
		bo_va->it.start = soffset;
519
		bo_va->it.last = eoffset - 1;
520
		interval_tree_insert(&bo_va->it, &vm->va);
521
	}
522
 
523
	bo_va->flags = flags;
524
	bo_va->addr = 0;
525
 
526
	soffset >>= radeon_vm_block_size;
527
	eoffset >>= radeon_vm_block_size;
528
 
529
	BUG_ON(eoffset >= radeon_vm_num_pdes(rdev));
530
 
531
	if (eoffset > vm->max_pde_used)
532
		vm->max_pde_used = eoffset;
533
 
534
	radeon_bo_unreserve(bo_va->bo);
535
 
536
	/* walk over the address space and allocate the page tables */
537
	for (pt_idx = soffset; pt_idx <= eoffset; ++pt_idx) {
538
		struct radeon_bo *pt;
539
 
540
		if (vm->page_tables[pt_idx].bo)
541
			continue;
542
 
543
		/* drop mutex to allocate and clear page table */
544
		mutex_unlock(&vm->mutex);
545
 
546
		r = radeon_bo_create(rdev, RADEON_VM_PTE_COUNT * 8,
547
				     RADEON_GPU_PAGE_SIZE, true,
548
				     RADEON_GEM_DOMAIN_VRAM, 0, NULL, &pt);
549
		if (r)
550
			return r;
551
 
552
		r = radeon_vm_clear_bo(rdev, pt);
553
		if (r) {
554
			radeon_bo_unref(&pt);
555
			radeon_bo_reserve(bo_va->bo, false);
556
			return r;
557
		}
558
 
559
		/* aquire mutex again */
560
		mutex_lock(&vm->mutex);
561
		if (vm->page_tables[pt_idx].bo) {
562
			/* someone else allocated the pt in the meantime */
563
			mutex_unlock(&vm->mutex);
564
			radeon_bo_unref(&pt);
565
			mutex_lock(&vm->mutex);
566
			continue;
567
		}
568
 
569
		vm->page_tables[pt_idx].addr = 0;
570
		vm->page_tables[pt_idx].bo = pt;
571
	}
572
 
573
	mutex_unlock(&vm->mutex);
574
	return radeon_bo_reserve(bo_va->bo, false);
575
}
576
 
577
/**
578
 * radeon_vm_map_gart - get the physical address of a gart page
579
 *
580
 * @rdev: radeon_device pointer
581
 * @addr: the unmapped addr
582
 *
583
 * Look up the physical address of the page that the pte resolves
584
 * to (cayman+).
585
 * Returns the physical address of the page.
586
 */
587
uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr)
588
{
589
	uint64_t result;
590
 
591
	/* page table offset */
592
	result = rdev->gart.pages_addr[addr >> PAGE_SHIFT];
593
 
594
	/* in case cpu page size != gpu page size*/
595
	result |= addr & (~PAGE_MASK);
596
 
597
	return result;
598
}
599
 
600
/**
601
 * radeon_vm_page_flags - translate page flags to what the hw uses
602
 *
603
 * @flags: flags comming from userspace
604
 *
605
 * Translate the flags the userspace ABI uses to hw flags.
606
 */
607
static uint32_t radeon_vm_page_flags(uint32_t flags)
608
{
609
        uint32_t hw_flags = 0;
610
        hw_flags |= (flags & RADEON_VM_PAGE_VALID) ? R600_PTE_VALID : 0;
611
        hw_flags |= (flags & RADEON_VM_PAGE_READABLE) ? R600_PTE_READABLE : 0;
612
        hw_flags |= (flags & RADEON_VM_PAGE_WRITEABLE) ? R600_PTE_WRITEABLE : 0;
613
        if (flags & RADEON_VM_PAGE_SYSTEM) {
614
                hw_flags |= R600_PTE_SYSTEM;
615
                hw_flags |= (flags & RADEON_VM_PAGE_SNOOPED) ? R600_PTE_SNOOPED : 0;
616
        }
617
        return hw_flags;
618
}
619
 
620
/**
621
 * radeon_vm_update_pdes - make sure that page directory is valid
622
 *
623
 * @rdev: radeon_device pointer
624
 * @vm: requested vm
625
 * @start: start of GPU address range
626
 * @end: end of GPU address range
627
 *
628
 * Allocates new page tables if necessary
629
 * and updates the page directory (cayman+).
630
 * Returns 0 for success, error for failure.
631
 *
632
 * Global and local mutex must be locked!
633
 */
634
int radeon_vm_update_page_directory(struct radeon_device *rdev,
635
				    struct radeon_vm *vm)
636
{
637
	struct radeon_bo *pd = vm->page_directory;
638
	uint64_t pd_addr = radeon_bo_gpu_offset(pd);
639
	uint32_t incr = RADEON_VM_PTE_COUNT * 8;
640
	uint64_t last_pde = ~0, last_pt = ~0;
641
	unsigned count = 0, pt_idx, ndw;
642
	struct radeon_ib ib;
643
	int r;
644
 
645
	/* padding, etc. */
646
	ndw = 64;
647
 
648
	/* assume the worst case */
649
	ndw += vm->max_pde_used * 6;
650
 
651
	/* update too big for an IB */
652
	if (ndw > 0xfffff)
653
		return -ENOMEM;
654
 
655
	r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4);
656
	if (r)
657
		return r;
658
	ib.length_dw = 0;
659
 
660
	/* walk over the address space and update the page directory */
661
	for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) {
662
		struct radeon_bo *bo = vm->page_tables[pt_idx].bo;
663
		uint64_t pde, pt;
664
 
665
		if (bo == NULL)
666
			continue;
667
 
668
		pt = radeon_bo_gpu_offset(bo);
669
		if (vm->page_tables[pt_idx].addr == pt)
670
			continue;
671
		vm->page_tables[pt_idx].addr = pt;
672
 
673
		pde = pd_addr + pt_idx * 8;
674
		if (((last_pde + 8 * count) != pde) ||
675
		    ((last_pt + incr * count) != pt)) {
676
 
677
			if (count) {
678
				radeon_vm_set_pages(rdev, &ib, last_pde,
679
							last_pt, count, incr,
680
							R600_PTE_VALID);
681
			}
682
 
683
			count = 1;
684
			last_pde = pde;
685
			last_pt = pt;
686
		} else {
687
			++count;
688
		}
689
	}
690
 
691
	if (count)
692
		radeon_vm_set_pages(rdev, &ib, last_pde, last_pt, count,
693
					incr, R600_PTE_VALID);
694
 
695
	if (ib.length_dw != 0) {
696
		radeon_asic_vm_pad_ib(rdev, &ib);
697
		radeon_semaphore_sync_to(ib.semaphore, pd->tbo.sync_obj);
698
		radeon_semaphore_sync_to(ib.semaphore, vm->last_id_use);
699
		WARN_ON(ib.length_dw > ndw);
700
		r = radeon_ib_schedule(rdev, &ib, NULL, false);
701
		if (r) {
702
			radeon_ib_free(rdev, &ib);
703
			return r;
704
		}
705
		radeon_fence_unref(&vm->fence);
706
		vm->fence = radeon_fence_ref(ib.fence);
707
		radeon_fence_unref(&vm->last_flush);
708
	}
709
	radeon_ib_free(rdev, &ib);
710
 
711
	return 0;
712
}
713
 
714
/**
715
 * radeon_vm_frag_ptes - add fragment information to PTEs
716
 *
717
 * @rdev: radeon_device pointer
718
 * @ib: IB for the update
719
 * @pe_start: first PTE to handle
720
 * @pe_end: last PTE to handle
721
 * @addr: addr those PTEs should point to
722
 * @flags: hw mapping flags
723
 *
724
 * Global and local mutex must be locked!
725
 */
726
static void radeon_vm_frag_ptes(struct radeon_device *rdev,
727
				struct radeon_ib *ib,
728
				uint64_t pe_start, uint64_t pe_end,
729
				uint64_t addr, uint32_t flags)
730
{
731
	/**
732
	 * The MC L1 TLB supports variable sized pages, based on a fragment
733
	 * field in the PTE. When this field is set to a non-zero value, page
734
	 * granularity is increased from 4KB to (1 << (12 + frag)). The PTE
735
	 * flags are considered valid for all PTEs within the fragment range
736
	 * and corresponding mappings are assumed to be physically contiguous.
737
	 *
738
	 * The L1 TLB can store a single PTE for the whole fragment,
739
	 * significantly increasing the space available for translation
740
	 * caching. This leads to large improvements in throughput when the
741
	 * TLB is under pressure.
742
	 *
743
	 * The L2 TLB distributes small and large fragments into two
744
	 * asymmetric partitions. The large fragment cache is significantly
745
	 * larger. Thus, we try to use large fragments wherever possible.
746
	 * Userspace can support this by aligning virtual base address and
747
	 * allocation size to the fragment size.
748
	 */
749
 
750
	/* NI is optimized for 256KB fragments, SI and newer for 64KB */
751
	uint64_t frag_flags = rdev->family == CHIP_CAYMAN ?
752
			R600_PTE_FRAG_256KB : R600_PTE_FRAG_64KB;
753
	uint64_t frag_align = rdev->family == CHIP_CAYMAN ? 0x200 : 0x80;
754
 
755
	uint64_t frag_start = ALIGN(pe_start, frag_align);
756
	uint64_t frag_end = pe_end & ~(frag_align - 1);
757
 
758
	unsigned count;
759
 
760
	/* system pages are non continuously */
761
	if ((flags & R600_PTE_SYSTEM) || !(flags & R600_PTE_VALID) ||
762
	    (frag_start >= frag_end)) {
763
 
764
		count = (pe_end - pe_start) / 8;
765
		radeon_vm_set_pages(rdev, ib, pe_start, addr, count,
766
					RADEON_GPU_PAGE_SIZE, flags);
767
		return;
768
	}
769
 
770
	/* handle the 4K area at the beginning */
771
	if (pe_start != frag_start) {
772
		count = (frag_start - pe_start) / 8;
773
		radeon_vm_set_pages(rdev, ib, pe_start, addr, count,
774
					RADEON_GPU_PAGE_SIZE, flags);
775
		addr += RADEON_GPU_PAGE_SIZE * count;
776
	}
777
 
778
	/* handle the area in the middle */
779
	count = (frag_end - frag_start) / 8;
780
	radeon_vm_set_pages(rdev, ib, frag_start, addr, count,
781
				RADEON_GPU_PAGE_SIZE, flags | frag_flags);
782
 
783
	/* handle the 4K area at the end */
784
	if (frag_end != pe_end) {
785
		addr += RADEON_GPU_PAGE_SIZE * count;
786
		count = (pe_end - frag_end) / 8;
787
		radeon_vm_set_pages(rdev, ib, frag_end, addr, count,
788
					RADEON_GPU_PAGE_SIZE, flags);
789
	}
790
}
791
 
792
/**
793
 * radeon_vm_update_ptes - make sure that page tables are valid
794
 *
795
 * @rdev: radeon_device pointer
796
 * @vm: requested vm
797
 * @start: start of GPU address range
798
 * @end: end of GPU address range
799
 * @dst: destination address to map to
800
 * @flags: mapping flags
801
 *
802
 * Update the page tables in the range @start - @end (cayman+).
803
 *
804
 * Global and local mutex must be locked!
805
 */
806
static void radeon_vm_update_ptes(struct radeon_device *rdev,
807
				  struct radeon_vm *vm,
808
				  struct radeon_ib *ib,
809
				  uint64_t start, uint64_t end,
810
				  uint64_t dst, uint32_t flags)
811
{
812
	uint64_t mask = RADEON_VM_PTE_COUNT - 1;
813
	uint64_t last_pte = ~0, last_dst = ~0;
814
	unsigned count = 0;
815
	uint64_t addr;
816
 
817
	/* walk over the address space and update the page tables */
818
	for (addr = start; addr < end; ) {
819
		uint64_t pt_idx = addr >> radeon_vm_block_size;
820
		struct radeon_bo *pt = vm->page_tables[pt_idx].bo;
821
		unsigned nptes;
822
		uint64_t pte;
823
 
824
		radeon_semaphore_sync_to(ib->semaphore, pt->tbo.sync_obj);
825
 
826
		if ((addr & ~mask) == (end & ~mask))
827
			nptes = end - addr;
828
		else
829
			nptes = RADEON_VM_PTE_COUNT - (addr & mask);
830
 
831
		pte = radeon_bo_gpu_offset(pt);
832
		pte += (addr & mask) * 8;
833
 
834
		if ((last_pte + 8 * count) != pte) {
835
 
836
			if (count) {
837
				radeon_vm_frag_ptes(rdev, ib, last_pte,
838
						    last_pte + 8 * count,
839
						    last_dst, flags);
840
			}
841
 
842
			count = nptes;
843
			last_pte = pte;
844
			last_dst = dst;
845
		} else {
846
			count += nptes;
847
		}
848
 
849
		addr += nptes;
850
		dst += nptes * RADEON_GPU_PAGE_SIZE;
851
	}
852
 
853
	if (count) {
854
		radeon_vm_frag_ptes(rdev, ib, last_pte,
855
				    last_pte + 8 * count,
856
				    last_dst, flags);
857
	}
858
}
859
 
860
/**
861
 * radeon_vm_bo_update - map a bo into the vm page table
862
 *
863
 * @rdev: radeon_device pointer
864
 * @vm: requested vm
865
 * @bo: radeon buffer object
866
 * @mem: ttm mem
867
 *
868
 * Fill in the page table entries for @bo (cayman+).
869
 * Returns 0 for success, -EINVAL for failure.
870
 *
871
 * Object have to be reserved and mutex must be locked!
872
 */
873
int radeon_vm_bo_update(struct radeon_device *rdev,
874
			struct radeon_bo_va *bo_va,
875
			struct ttm_mem_reg *mem)
876
{
877
	struct radeon_vm *vm = bo_va->vm;
878
	struct radeon_ib ib;
879
	unsigned nptes, ncmds, ndw;
880
	uint64_t addr;
881
	uint32_t flags;
882
	int r;
883
 
884
	if (!bo_va->it.start) {
885
		dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n",
886
			bo_va->bo, vm);
887
		return -EINVAL;
888
	}
889
 
890
	list_del_init(&bo_va->vm_status);
891
 
892
	bo_va->flags &= ~RADEON_VM_PAGE_VALID;
893
	bo_va->flags &= ~RADEON_VM_PAGE_SYSTEM;
894
	bo_va->flags &= ~RADEON_VM_PAGE_SNOOPED;
895
	if (mem) {
896
		addr = mem->start << PAGE_SHIFT;
897
		if (mem->mem_type != TTM_PL_SYSTEM) {
898
			bo_va->flags |= RADEON_VM_PAGE_VALID;
899
		}
900
		if (mem->mem_type == TTM_PL_TT) {
901
			bo_va->flags |= RADEON_VM_PAGE_SYSTEM;
902
			if (!(bo_va->bo->flags & (RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC)))
903
				bo_va->flags |= RADEON_VM_PAGE_SNOOPED;
904
 
905
		} else {
906
			addr += rdev->vm_manager.vram_base_offset;
907
		}
908
	} else {
909
		addr = 0;
910
	}
911
 
912
	if (addr == bo_va->addr)
913
		return 0;
914
	bo_va->addr = addr;
915
 
916
	trace_radeon_vm_bo_update(bo_va);
917
 
918
	nptes = bo_va->it.last - bo_va->it.start + 1;
919
 
920
	/* reserve space for one command every (1 << BLOCK_SIZE) entries
921
	   or 2k dwords (whatever is smaller) */
922
	ncmds = (nptes >> min(radeon_vm_block_size, 11)) + 1;
923
 
924
	/* padding, etc. */
925
	ndw = 64;
926
 
927
	flags = radeon_vm_page_flags(bo_va->flags);
928
	if ((flags & R600_PTE_GART_MASK) == R600_PTE_GART_MASK) {
929
		/* only copy commands needed */
930
		ndw += ncmds * 7;
931
 
932
	} else if (flags & R600_PTE_SYSTEM) {
933
		/* header for write data commands */
934
		ndw += ncmds * 4;
935
 
936
		/* body of write data command */
937
	ndw += nptes * 2;
938
 
939
	} else {
940
		/* set page commands needed */
941
		ndw += ncmds * 10;
942
 
943
		/* two extra commands for begin/end of fragment */
944
		ndw += 2 * 10;
945
	}
946
 
947
	/* update too big for an IB */
948
	if (ndw > 0xfffff)
949
		return -ENOMEM;
950
 
951
	r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4);
952
	if (r)
953
		return r;
954
	ib.length_dw = 0;
955
 
956
	radeon_vm_update_ptes(rdev, vm, &ib, bo_va->it.start,
957
			      bo_va->it.last + 1, addr,
958
			      radeon_vm_page_flags(bo_va->flags));
959
 
960
	radeon_asic_vm_pad_ib(rdev, &ib);
961
	WARN_ON(ib.length_dw > ndw);
962
 
963
	radeon_semaphore_sync_to(ib.semaphore, vm->fence);
964
	r = radeon_ib_schedule(rdev, &ib, NULL, false);
965
	if (r) {
966
		radeon_ib_free(rdev, &ib);
967
		return r;
968
	}
969
	radeon_fence_unref(&vm->fence);
970
	vm->fence = radeon_fence_ref(ib.fence);
971
	radeon_ib_free(rdev, &ib);
972
	radeon_fence_unref(&vm->last_flush);
973
 
974
	return 0;
975
}
976
 
977
/**
978
 * radeon_vm_clear_freed - clear freed BOs in the PT
979
 *
980
 * @rdev: radeon_device pointer
981
 * @vm: requested vm
982
 *
983
 * Make sure all freed BOs are cleared in the PT.
984
 * Returns 0 for success.
985
 *
986
 * PTs have to be reserved and mutex must be locked!
987
 */
988
int radeon_vm_clear_freed(struct radeon_device *rdev,
989
			  struct radeon_vm *vm)
990
{
991
	struct radeon_bo_va *bo_va, *tmp;
992
	int r;
993
 
994
	list_for_each_entry_safe(bo_va, tmp, &vm->freed, vm_status) {
995
		r = radeon_vm_bo_update(rdev, bo_va, NULL);
996
		radeon_bo_unref(&bo_va->bo);
997
		kfree(bo_va);
998
		if (r)
999
			return r;
1000
	}
1001
	return 0;
1002
 
1003
}
1004
 
1005
/**
1006
 * radeon_vm_clear_invalids - clear invalidated BOs in the PT
1007
 *
1008
 * @rdev: radeon_device pointer
1009
 * @vm: requested vm
1010
 *
1011
 * Make sure all invalidated BOs are cleared in the PT.
1012
 * Returns 0 for success.
1013
 *
1014
 * PTs have to be reserved and mutex must be locked!
1015
 */
1016
int radeon_vm_clear_invalids(struct radeon_device *rdev,
1017
			     struct radeon_vm *vm)
1018
{
1019
	struct radeon_bo_va *bo_va, *tmp;
1020
	int r;
1021
 
1022
	list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, vm_status) {
1023
		r = radeon_vm_bo_update(rdev, bo_va, NULL);
1024
		if (r)
1025
			return r;
1026
	}
1027
	return 0;
1028
}
1029
 
1030
/**
1031
 * radeon_vm_bo_rmv - remove a bo to a specific vm
1032
 *
1033
 * @rdev: radeon_device pointer
1034
 * @bo_va: requested bo_va
1035
 *
1036
 * Remove @bo_va->bo from the requested vm (cayman+).
1037
 *
1038
 * Object have to be reserved!
1039
 */
1040
void radeon_vm_bo_rmv(struct radeon_device *rdev,
1041
		     struct radeon_bo_va *bo_va)
1042
{
1043
	struct radeon_vm *vm = bo_va->vm;
1044
 
1045
	list_del(&bo_va->bo_list);
1046
 
1047
	mutex_lock(&vm->mutex);
1048
	interval_tree_remove(&bo_va->it, &vm->va);
1049
	list_del(&bo_va->vm_status);
1050
 
1051
	if (bo_va->addr) {
1052
		bo_va->bo = radeon_bo_ref(bo_va->bo);
1053
		list_add(&bo_va->vm_status, &vm->freed);
1054
	} else {
1055
	kfree(bo_va);
1056
	}
1057
 
1058
	mutex_unlock(&vm->mutex);
1059
}
1060
 
1061
/**
1062
 * radeon_vm_bo_invalidate - mark the bo as invalid
1063
 *
1064
 * @rdev: radeon_device pointer
1065
 * @vm: requested vm
1066
 * @bo: radeon buffer object
1067
 *
1068
 * Mark @bo as invalid (cayman+).
1069
 */
1070
void radeon_vm_bo_invalidate(struct radeon_device *rdev,
1071
			     struct radeon_bo *bo)
1072
{
1073
	struct radeon_bo_va *bo_va;
1074
 
1075
	list_for_each_entry(bo_va, &bo->va, bo_list) {
1076
		if (bo_va->addr) {
1077
			mutex_lock(&bo_va->vm->mutex);
1078
			list_del(&bo_va->vm_status);
1079
			list_add(&bo_va->vm_status, &bo_va->vm->invalidated);
1080
			mutex_unlock(&bo_va->vm->mutex);
1081
		}
1082
	}
1083
}
1084
 
1085
/**
1086
 * radeon_vm_init - initialize a vm instance
1087
 *
1088
 * @rdev: radeon_device pointer
1089
 * @vm: requested vm
1090
 *
1091
 * Init @vm fields (cayman+).
1092
 */
1093
int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)
1094
{
1095
	const unsigned align = min(RADEON_VM_PTB_ALIGN_SIZE,
1096
		RADEON_VM_PTE_COUNT * 8);
1097
	unsigned pd_size, pd_entries, pts_size;
1098
	int r;
1099
 
1100
	vm->id = 0;
1101
	vm->ib_bo_va = NULL;
1102
	vm->fence = NULL;
1103
	vm->last_flush = NULL;
1104
	vm->last_id_use = NULL;
1105
	mutex_init(&vm->mutex);
1106
	vm->va = RB_ROOT;
1107
	INIT_LIST_HEAD(&vm->invalidated);
1108
	INIT_LIST_HEAD(&vm->freed);
1109
 
1110
	pd_size = radeon_vm_directory_size(rdev);
1111
	pd_entries = radeon_vm_num_pdes(rdev);
1112
 
1113
	/* allocate page table array */
1114
	pts_size = pd_entries * sizeof(struct radeon_vm_pt);
1115
	vm->page_tables = kzalloc(pts_size, GFP_KERNEL);
1116
	if (vm->page_tables == NULL) {
1117
		DRM_ERROR("Cannot allocate memory for page table array\n");
1118
		return -ENOMEM;
1119
	}
1120
 
1121
	r = radeon_bo_create(rdev, pd_size, align, true,
1122
			     RADEON_GEM_DOMAIN_VRAM, 0, NULL,
1123
			     &vm->page_directory);
1124
	if (r)
1125
		return r;
1126
 
1127
	r = radeon_vm_clear_bo(rdev, vm->page_directory);
1128
	if (r) {
1129
		radeon_bo_unref(&vm->page_directory);
1130
		vm->page_directory = NULL;
1131
		return r;
1132
	}
1133
 
1134
	return 0;
1135
}
1136
 
1137
/**
1138
 * radeon_vm_fini - tear down a vm instance
1139
 *
1140
 * @rdev: radeon_device pointer
1141
 * @vm: requested vm
1142
 *
1143
 * Tear down @vm (cayman+).
1144
 * Unbind the VM and remove all bos from the vm bo list
1145
 */
1146
void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm)
1147
{
1148
	struct radeon_bo_va *bo_va, *tmp;
1149
	int i, r;
1150
 
1151
	if (!RB_EMPTY_ROOT(&vm->va)) {
1152
		dev_err(rdev->dev, "still active bo inside vm\n");
1153
	}
1154
	rbtree_postorder_for_each_entry_safe(bo_va, tmp, &vm->va, it.rb) {
1155
		interval_tree_remove(&bo_va->it, &vm->va);
1156
		r = radeon_bo_reserve(bo_va->bo, false);
1157
		if (!r) {
1158
			list_del_init(&bo_va->bo_list);
1159
			radeon_bo_unreserve(bo_va->bo);
1160
			kfree(bo_va);
1161
		}
1162
	}
1163
	list_for_each_entry_safe(bo_va, tmp, &vm->freed, vm_status) {
1164
		radeon_bo_unref(&bo_va->bo);
1165
		kfree(bo_va);
1166
	}
1167
 
1168
	for (i = 0; i < radeon_vm_num_pdes(rdev); i++)
1169
		radeon_bo_unref(&vm->page_tables[i].bo);
1170
	kfree(vm->page_tables);
1171
 
1172
	radeon_bo_unref(&vm->page_directory);
1173
 
1174
	radeon_fence_unref(&vm->fence);
1175
	radeon_fence_unref(&vm->last_flush);
1176
	radeon_fence_unref(&vm->last_id_use);
1177
 
1178
	mutex_destroy(&vm->mutex);
1179
}