Rev 5271 | Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5078 | serge | 1 | /* |
2 | * Copyright 2008 Advanced Micro Devices, Inc. |
||
3 | * Copyright 2008 Red Hat Inc. |
||
4 | * Copyright 2009 Jerome Glisse. |
||
5 | * |
||
6 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
7 | * copy of this software and associated documentation files (the "Software"), |
||
8 | * to deal in the Software without restriction, including without limitation |
||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
10 | * and/or sell copies of the Software, and to permit persons to whom the |
||
11 | * Software is furnished to do so, subject to the following conditions: |
||
12 | * |
||
13 | * The above copyright notice and this permission notice shall be included in |
||
14 | * all copies or substantial portions of the Software. |
||
15 | * |
||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
19 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
||
20 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
||
21 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
||
22 | * OTHER DEALINGS IN THE SOFTWARE. |
||
23 | * |
||
24 | * Authors: Dave Airlie |
||
25 | * Alex Deucher |
||
26 | * Jerome Glisse |
||
27 | */ |
||
28 | #include |
||
29 | #include |
||
30 | #include "radeon.h" |
||
31 | #include "radeon_trace.h" |
||
32 | |||
33 | /* |
||
34 | * GPUVM |
||
35 | * GPUVM is similar to the legacy gart on older asics, however |
||
36 | * rather than there being a single global gart table |
||
37 | * for the entire GPU, there are multiple VM page tables active |
||
38 | * at any given time. The VM page tables can contain a mix |
||
39 | * vram pages and system memory pages and system memory pages |
||
40 | * can be mapped as snooped (cached system pages) or unsnooped |
||
41 | * (uncached system pages). |
||
42 | * Each VM has an ID associated with it and there is a page table |
||
43 | * associated with each VMID. When execting a command buffer, |
||
44 | * the kernel tells the the ring what VMID to use for that command |
||
45 | * buffer. VMIDs are allocated dynamically as commands are submitted. |
||
46 | * The userspace drivers maintain their own address space and the kernel |
||
47 | * sets up their pages tables accordingly when they submit their |
||
48 | * command buffers and a VMID is assigned. |
||
49 | * Cayman/Trinity support up to 8 active VMs at any given time; |
||
50 | * SI supports 16. |
||
51 | */ |
||
52 | |||
53 | /** |
||
54 | * radeon_vm_num_pde - return the number of page directory entries |
||
55 | * |
||
56 | * @rdev: radeon_device pointer |
||
57 | * |
||
58 | * Calculate the number of page directory entries (cayman+). |
||
59 | */ |
||
60 | static unsigned radeon_vm_num_pdes(struct radeon_device *rdev) |
||
61 | { |
||
62 | return rdev->vm_manager.max_pfn >> radeon_vm_block_size; |
||
63 | } |
||
64 | |||
65 | /** |
||
66 | * radeon_vm_directory_size - returns the size of the page directory in bytes |
||
67 | * |
||
68 | * @rdev: radeon_device pointer |
||
69 | * |
||
70 | * Calculate the size of the page directory in bytes (cayman+). |
||
71 | */ |
||
72 | static unsigned radeon_vm_directory_size(struct radeon_device *rdev) |
||
73 | { |
||
74 | return RADEON_GPU_PAGE_ALIGN(radeon_vm_num_pdes(rdev) * 8); |
||
75 | } |
||
76 | |||
77 | /** |
||
78 | * radeon_vm_manager_init - init the vm manager |
||
79 | * |
||
80 | * @rdev: radeon_device pointer |
||
81 | * |
||
82 | * Init the vm manager (cayman+). |
||
83 | * Returns 0 for success, error for failure. |
||
84 | */ |
||
85 | int radeon_vm_manager_init(struct radeon_device *rdev) |
||
86 | { |
||
87 | int r; |
||
88 | |||
89 | if (!rdev->vm_manager.enabled) { |
||
90 | r = radeon_asic_vm_init(rdev); |
||
91 | if (r) |
||
92 | return r; |
||
93 | |||
94 | rdev->vm_manager.enabled = true; |
||
95 | } |
||
96 | return 0; |
||
97 | } |
||
98 | |||
99 | /** |
||
100 | * radeon_vm_manager_fini - tear down the vm manager |
||
101 | * |
||
102 | * @rdev: radeon_device pointer |
||
103 | * |
||
104 | * Tear down the VM manager (cayman+). |
||
105 | */ |
||
106 | void radeon_vm_manager_fini(struct radeon_device *rdev) |
||
107 | { |
||
108 | int i; |
||
109 | |||
110 | if (!rdev->vm_manager.enabled) |
||
111 | return; |
||
112 | |||
113 | for (i = 0; i < RADEON_NUM_VM; ++i) |
||
114 | radeon_fence_unref(&rdev->vm_manager.active[i]); |
||
115 | radeon_asic_vm_fini(rdev); |
||
116 | rdev->vm_manager.enabled = false; |
||
117 | } |
||
118 | |||
119 | /** |
||
120 | * radeon_vm_get_bos - add the vm BOs to a validation list |
||
121 | * |
||
122 | * @vm: vm providing the BOs |
||
123 | * @head: head of validation list |
||
124 | * |
||
125 | * Add the page directory to the list of BOs to |
||
126 | * validate for command submission (cayman+). |
||
127 | */ |
||
128 | struct radeon_cs_reloc *radeon_vm_get_bos(struct radeon_device *rdev, |
||
129 | struct radeon_vm *vm, |
||
130 | struct list_head *head) |
||
131 | { |
||
132 | struct radeon_cs_reloc *list; |
||
133 | unsigned i, idx; |
||
134 | |||
135 | list = kmalloc_array(vm->max_pde_used + 2, |
||
136 | sizeof(struct radeon_cs_reloc), GFP_KERNEL); |
||
137 | if (!list) |
||
138 | return NULL; |
||
139 | |||
140 | /* add the vm page table to the list */ |
||
141 | list[0].gobj = NULL; |
||
142 | list[0].robj = vm->page_directory; |
||
143 | list[0].prefered_domains = RADEON_GEM_DOMAIN_VRAM; |
||
144 | list[0].allowed_domains = RADEON_GEM_DOMAIN_VRAM; |
||
145 | list[0].tv.bo = &vm->page_directory->tbo; |
||
146 | list[0].tiling_flags = 0; |
||
147 | list[0].handle = 0; |
||
148 | list_add(&list[0].tv.head, head); |
||
149 | |||
150 | for (i = 0, idx = 1; i <= vm->max_pde_used; i++) { |
||
151 | if (!vm->page_tables[i].bo) |
||
152 | continue; |
||
153 | |||
154 | list[idx].gobj = NULL; |
||
155 | list[idx].robj = vm->page_tables[i].bo; |
||
156 | list[idx].prefered_domains = RADEON_GEM_DOMAIN_VRAM; |
||
157 | list[idx].allowed_domains = RADEON_GEM_DOMAIN_VRAM; |
||
158 | list[idx].tv.bo = &list[idx].robj->tbo; |
||
159 | list[idx].tiling_flags = 0; |
||
160 | list[idx].handle = 0; |
||
161 | list_add(&list[idx++].tv.head, head); |
||
162 | } |
||
163 | |||
164 | return list; |
||
165 | } |
||
166 | |||
167 | /** |
||
168 | * radeon_vm_grab_id - allocate the next free VMID |
||
169 | * |
||
170 | * @rdev: radeon_device pointer |
||
171 | * @vm: vm to allocate id for |
||
172 | * @ring: ring we want to submit job to |
||
173 | * |
||
174 | * Allocate an id for the vm (cayman+). |
||
175 | * Returns the fence we need to sync to (if any). |
||
176 | * |
||
177 | * Global and local mutex must be locked! |
||
178 | */ |
||
179 | struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, |
||
180 | struct radeon_vm *vm, int ring) |
||
181 | { |
||
182 | struct radeon_fence *best[RADEON_NUM_RINGS] = {}; |
||
183 | unsigned choices[2] = {}; |
||
184 | unsigned i; |
||
185 | |||
186 | /* check if the id is still valid */ |
||
187 | if (vm->last_id_use && vm->last_id_use == rdev->vm_manager.active[vm->id]) |
||
188 | return NULL; |
||
189 | |||
190 | /* we definately need to flush */ |
||
191 | radeon_fence_unref(&vm->last_flush); |
||
192 | |||
193 | /* skip over VMID 0, since it is the system VM */ |
||
194 | for (i = 1; i < rdev->vm_manager.nvm; ++i) { |
||
195 | struct radeon_fence *fence = rdev->vm_manager.active[i]; |
||
196 | |||
197 | if (fence == NULL) { |
||
198 | /* found a free one */ |
||
199 | vm->id = i; |
||
200 | trace_radeon_vm_grab_id(vm->id, ring); |
||
201 | return NULL; |
||
202 | } |
||
203 | |||
204 | if (radeon_fence_is_earlier(fence, best[fence->ring])) { |
||
205 | best[fence->ring] = fence; |
||
206 | choices[fence->ring == ring ? 0 : 1] = i; |
||
207 | } |
||
208 | } |
||
209 | |||
210 | for (i = 0; i < 2; ++i) { |
||
211 | if (choices[i]) { |
||
212 | vm->id = choices[i]; |
||
213 | trace_radeon_vm_grab_id(vm->id, ring); |
||
214 | return rdev->vm_manager.active[choices[i]]; |
||
215 | } |
||
216 | } |
||
217 | |||
218 | /* should never happen */ |
||
219 | BUG(); |
||
220 | return NULL; |
||
221 | } |
||
222 | |||
223 | /** |
||
224 | * radeon_vm_flush - hardware flush the vm |
||
225 | * |
||
226 | * @rdev: radeon_device pointer |
||
227 | * @vm: vm we want to flush |
||
228 | * @ring: ring to use for flush |
||
229 | * |
||
230 | * Flush the vm (cayman+). |
||
231 | * |
||
232 | * Global and local mutex must be locked! |
||
233 | */ |
||
234 | void radeon_vm_flush(struct radeon_device *rdev, |
||
235 | struct radeon_vm *vm, |
||
236 | int ring) |
||
237 | { |
||
238 | uint64_t pd_addr = radeon_bo_gpu_offset(vm->page_directory); |
||
239 | |||
240 | /* if we can't remember our last VM flush then flush now! */ |
||
241 | if (!vm->last_flush || pd_addr != vm->pd_gpu_addr) { |
||
242 | trace_radeon_vm_flush(pd_addr, ring, vm->id); |
||
243 | vm->pd_gpu_addr = pd_addr; |
||
244 | radeon_ring_vm_flush(rdev, ring, vm); |
||
245 | } |
||
246 | } |
||
247 | |||
248 | /** |
||
249 | * radeon_vm_fence - remember fence for vm |
||
250 | * |
||
251 | * @rdev: radeon_device pointer |
||
252 | * @vm: vm we want to fence |
||
253 | * @fence: fence to remember |
||
254 | * |
||
255 | * Fence the vm (cayman+). |
||
256 | * Set the fence used to protect page table and id. |
||
257 | * |
||
258 | * Global and local mutex must be locked! |
||
259 | */ |
||
260 | void radeon_vm_fence(struct radeon_device *rdev, |
||
261 | struct radeon_vm *vm, |
||
262 | struct radeon_fence *fence) |
||
263 | { |
||
264 | radeon_fence_unref(&vm->fence); |
||
265 | vm->fence = radeon_fence_ref(fence); |
||
266 | |||
267 | radeon_fence_unref(&rdev->vm_manager.active[vm->id]); |
||
268 | rdev->vm_manager.active[vm->id] = radeon_fence_ref(fence); |
||
269 | |||
270 | radeon_fence_unref(&vm->last_id_use); |
||
271 | vm->last_id_use = radeon_fence_ref(fence); |
||
272 | |||
273 | /* we just flushed the VM, remember that */ |
||
274 | if (!vm->last_flush) |
||
275 | vm->last_flush = radeon_fence_ref(fence); |
||
276 | } |
||
277 | |||
278 | /** |
||
279 | * radeon_vm_bo_find - find the bo_va for a specific vm & bo |
||
280 | * |
||
281 | * @vm: requested vm |
||
282 | * @bo: requested buffer object |
||
283 | * |
||
284 | * Find @bo inside the requested vm (cayman+). |
||
285 | * Search inside the @bos vm list for the requested vm |
||
286 | * Returns the found bo_va or NULL if none is found |
||
287 | * |
||
288 | * Object has to be reserved! |
||
289 | */ |
||
290 | struct radeon_bo_va *radeon_vm_bo_find(struct radeon_vm *vm, |
||
291 | struct radeon_bo *bo) |
||
292 | { |
||
293 | struct radeon_bo_va *bo_va; |
||
294 | |||
295 | list_for_each_entry(bo_va, &bo->va, bo_list) { |
||
296 | if (bo_va->vm == vm) { |
||
297 | return bo_va; |
||
298 | } |
||
299 | } |
||
300 | return NULL; |
||
301 | } |
||
302 | |||
303 | /** |
||
304 | * radeon_vm_bo_add - add a bo to a specific vm |
||
305 | * |
||
306 | * @rdev: radeon_device pointer |
||
307 | * @vm: requested vm |
||
308 | * @bo: radeon buffer object |
||
309 | * |
||
310 | * Add @bo into the requested vm (cayman+). |
||
311 | * Add @bo to the list of bos associated with the vm |
||
312 | * Returns newly added bo_va or NULL for failure |
||
313 | * |
||
314 | * Object has to be reserved! |
||
315 | */ |
||
316 | struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev, |
||
317 | struct radeon_vm *vm, |
||
318 | struct radeon_bo *bo) |
||
319 | { |
||
320 | struct radeon_bo_va *bo_va; |
||
321 | |||
322 | bo_va = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL); |
||
323 | if (bo_va == NULL) { |
||
324 | return NULL; |
||
325 | } |
||
326 | bo_va->vm = vm; |
||
327 | bo_va->bo = bo; |
||
328 | bo_va->it.start = 0; |
||
329 | bo_va->it.last = 0; |
||
330 | bo_va->flags = 0; |
||
331 | bo_va->addr = 0; |
||
332 | bo_va->ref_count = 1; |
||
333 | INIT_LIST_HEAD(&bo_va->bo_list); |
||
334 | INIT_LIST_HEAD(&bo_va->vm_status); |
||
335 | |||
336 | mutex_lock(&vm->mutex); |
||
337 | list_add_tail(&bo_va->bo_list, &bo->va); |
||
338 | mutex_unlock(&vm->mutex); |
||
339 | |||
340 | return bo_va; |
||
341 | } |
||
342 | |||
343 | /** |
||
344 | * radeon_vm_set_pages - helper to call the right asic function |
||
345 | * |
||
346 | * @rdev: radeon_device pointer |
||
347 | * @ib: indirect buffer to fill with commands |
||
348 | * @pe: addr of the page entry |
||
349 | * @addr: dst addr to write into pe |
||
350 | * @count: number of page entries to update |
||
351 | * @incr: increase next addr by incr bytes |
||
352 | * @flags: hw access flags |
||
353 | * |
||
354 | * Traces the parameters and calls the right asic functions |
||
355 | * to setup the page table using the DMA. |
||
356 | */ |
||
357 | static void radeon_vm_set_pages(struct radeon_device *rdev, |
||
358 | struct radeon_ib *ib, |
||
359 | uint64_t pe, |
||
360 | uint64_t addr, unsigned count, |
||
361 | uint32_t incr, uint32_t flags) |
||
362 | { |
||
363 | trace_radeon_vm_set_page(pe, addr, count, incr, flags); |
||
364 | |||
365 | if ((flags & R600_PTE_GART_MASK) == R600_PTE_GART_MASK) { |
||
366 | uint64_t src = rdev->gart.table_addr + (addr >> 12) * 8; |
||
367 | radeon_asic_vm_copy_pages(rdev, ib, pe, src, count); |
||
368 | |||
369 | } else if ((flags & R600_PTE_SYSTEM) || (count < 3)) { |
||
370 | radeon_asic_vm_write_pages(rdev, ib, pe, addr, |
||
371 | count, incr, flags); |
||
372 | |||
373 | } else { |
||
374 | radeon_asic_vm_set_pages(rdev, ib, pe, addr, |
||
375 | count, incr, flags); |
||
376 | } |
||
377 | } |
||
378 | |||
379 | /** |
||
380 | * radeon_vm_clear_bo - initially clear the page dir/table |
||
381 | * |
||
382 | * @rdev: radeon_device pointer |
||
383 | * @bo: bo to clear |
||
384 | */ |
||
385 | static int radeon_vm_clear_bo(struct radeon_device *rdev, |
||
386 | struct radeon_bo *bo) |
||
387 | { |
||
388 | struct ttm_validate_buffer tv; |
||
389 | struct ww_acquire_ctx ticket; |
||
390 | struct list_head head; |
||
391 | struct radeon_ib ib; |
||
392 | unsigned entries; |
||
393 | uint64_t addr; |
||
394 | int r; |
||
395 | |||
396 | memset(&tv, 0, sizeof(tv)); |
||
397 | tv.bo = &bo->tbo; |
||
398 | |||
399 | INIT_LIST_HEAD(&head); |
||
400 | list_add(&tv.head, &head); |
||
401 | |||
402 | r = ttm_eu_reserve_buffers(&ticket, &head); |
||
403 | if (r) |
||
404 | return r; |
||
405 | |||
406 | r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); |
||
407 | if (r) |
||
408 | goto error; |
||
409 | |||
410 | addr = radeon_bo_gpu_offset(bo); |
||
411 | entries = radeon_bo_size(bo) / 8; |
||
412 | |||
413 | r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, 256); |
||
414 | if (r) |
||
415 | goto error; |
||
416 | |||
417 | ib.length_dw = 0; |
||
418 | |||
419 | radeon_vm_set_pages(rdev, &ib, addr, 0, entries, 0, 0); |
||
420 | radeon_asic_vm_pad_ib(rdev, &ib); |
||
421 | WARN_ON(ib.length_dw > 64); |
||
422 | |||
423 | r = radeon_ib_schedule(rdev, &ib, NULL, false); |
||
424 | if (r) |
||
425 | goto error; |
||
426 | |||
427 | ttm_eu_fence_buffer_objects(&ticket, &head, ib.fence); |
||
428 | radeon_ib_free(rdev, &ib); |
||
429 | |||
430 | return 0; |
||
431 | |||
432 | error: |
||
433 | ttm_eu_backoff_reservation(&ticket, &head); |
||
434 | return r; |
||
435 | } |
||
436 | |||
437 | /** |
||
438 | * radeon_vm_bo_set_addr - set bos virtual address inside a vm |
||
439 | * |
||
440 | * @rdev: radeon_device pointer |
||
441 | * @bo_va: bo_va to store the address |
||
442 | * @soffset: requested offset of the buffer in the VM address space |
||
443 | * @flags: attributes of pages (read/write/valid/etc.) |
||
444 | * |
||
445 | * Set offset of @bo_va (cayman+). |
||
446 | * Validate and set the offset requested within the vm address space. |
||
447 | * Returns 0 for success, error for failure. |
||
448 | * |
||
449 | * Object has to be reserved! |
||
450 | */ |
||
451 | int radeon_vm_bo_set_addr(struct radeon_device *rdev, |
||
452 | struct radeon_bo_va *bo_va, |
||
453 | uint64_t soffset, |
||
454 | uint32_t flags) |
||
455 | { |
||
456 | uint64_t size = radeon_bo_size(bo_va->bo); |
||
457 | struct radeon_vm *vm = bo_va->vm; |
||
458 | unsigned last_pfn, pt_idx; |
||
459 | uint64_t eoffset; |
||
460 | int r; |
||
461 | |||
462 | if (soffset) { |
||
463 | /* make sure object fit at this offset */ |
||
464 | eoffset = soffset + size; |
||
465 | if (soffset >= eoffset) { |
||
466 | return -EINVAL; |
||
467 | } |
||
468 | |||
469 | last_pfn = eoffset / RADEON_GPU_PAGE_SIZE; |
||
470 | if (last_pfn > rdev->vm_manager.max_pfn) { |
||
471 | dev_err(rdev->dev, "va above limit (0x%08X > 0x%08X)\n", |
||
472 | last_pfn, rdev->vm_manager.max_pfn); |
||
473 | return -EINVAL; |
||
474 | } |
||
475 | |||
476 | } else { |
||
477 | eoffset = last_pfn = 0; |
||
478 | } |
||
479 | |||
480 | mutex_lock(&vm->mutex); |
||
481 | if (bo_va->it.start || bo_va->it.last) { |
||
482 | if (bo_va->addr) { |
||
483 | /* add a clone of the bo_va to clear the old address */ |
||
484 | struct radeon_bo_va *tmp; |
||
485 | tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL); |
||
486 | if (!tmp) { |
||
487 | mutex_unlock(&vm->mutex); |
||
488 | return -ENOMEM; |
||
489 | } |
||
490 | tmp->it.start = bo_va->it.start; |
||
491 | tmp->it.last = bo_va->it.last; |
||
492 | tmp->vm = vm; |
||
493 | tmp->addr = bo_va->addr; |
||
494 | tmp->bo = radeon_bo_ref(bo_va->bo); |
||
495 | list_add(&tmp->vm_status, &vm->freed); |
||
496 | } |
||
497 | |||
498 | interval_tree_remove(&bo_va->it, &vm->va); |
||
499 | bo_va->it.start = 0; |
||
500 | bo_va->it.last = 0; |
||
501 | } |
||
502 | |||
503 | soffset /= RADEON_GPU_PAGE_SIZE; |
||
504 | eoffset /= RADEON_GPU_PAGE_SIZE; |
||
505 | if (soffset || eoffset) { |
||
506 | struct interval_tree_node *it; |
||
507 | it = interval_tree_iter_first(&vm->va, soffset, eoffset - 1); |
||
508 | if (it) { |
||
509 | struct radeon_bo_va *tmp; |
||
510 | tmp = container_of(it, struct radeon_bo_va, it); |
||
511 | /* bo and tmp overlap, invalid offset */ |
||
512 | dev_err(rdev->dev, "bo %p va 0x%010Lx conflict with " |
||
513 | "(bo %p 0x%010lx 0x%010lx)\n", bo_va->bo, |
||
514 | soffset, tmp->bo, tmp->it.start, tmp->it.last); |
||
515 | mutex_unlock(&vm->mutex); |
||
516 | return -EINVAL; |
||
517 | } |
||
518 | bo_va->it.start = soffset; |
||
519 | bo_va->it.last = eoffset - 1; |
||
520 | interval_tree_insert(&bo_va->it, &vm->va); |
||
521 | } |
||
522 | |||
523 | bo_va->flags = flags; |
||
524 | bo_va->addr = 0; |
||
525 | |||
526 | soffset >>= radeon_vm_block_size; |
||
527 | eoffset >>= radeon_vm_block_size; |
||
528 | |||
529 | BUG_ON(eoffset >= radeon_vm_num_pdes(rdev)); |
||
530 | |||
531 | if (eoffset > vm->max_pde_used) |
||
532 | vm->max_pde_used = eoffset; |
||
533 | |||
534 | radeon_bo_unreserve(bo_va->bo); |
||
535 | |||
536 | /* walk over the address space and allocate the page tables */ |
||
537 | for (pt_idx = soffset; pt_idx <= eoffset; ++pt_idx) { |
||
538 | struct radeon_bo *pt; |
||
539 | |||
540 | if (vm->page_tables[pt_idx].bo) |
||
541 | continue; |
||
542 | |||
543 | /* drop mutex to allocate and clear page table */ |
||
544 | mutex_unlock(&vm->mutex); |
||
545 | |||
546 | r = radeon_bo_create(rdev, RADEON_VM_PTE_COUNT * 8, |
||
547 | RADEON_GPU_PAGE_SIZE, true, |
||
548 | RADEON_GEM_DOMAIN_VRAM, 0, NULL, &pt); |
||
549 | if (r) |
||
550 | return r; |
||
551 | |||
552 | r = radeon_vm_clear_bo(rdev, pt); |
||
553 | if (r) { |
||
554 | radeon_bo_unref(&pt); |
||
555 | radeon_bo_reserve(bo_va->bo, false); |
||
556 | return r; |
||
557 | } |
||
558 | |||
559 | /* aquire mutex again */ |
||
560 | mutex_lock(&vm->mutex); |
||
561 | if (vm->page_tables[pt_idx].bo) { |
||
562 | /* someone else allocated the pt in the meantime */ |
||
563 | mutex_unlock(&vm->mutex); |
||
564 | radeon_bo_unref(&pt); |
||
565 | mutex_lock(&vm->mutex); |
||
566 | continue; |
||
567 | } |
||
568 | |||
569 | vm->page_tables[pt_idx].addr = 0; |
||
570 | vm->page_tables[pt_idx].bo = pt; |
||
571 | } |
||
572 | |||
573 | mutex_unlock(&vm->mutex); |
||
574 | return radeon_bo_reserve(bo_va->bo, false); |
||
575 | } |
||
576 | |||
577 | /** |
||
578 | * radeon_vm_map_gart - get the physical address of a gart page |
||
579 | * |
||
580 | * @rdev: radeon_device pointer |
||
581 | * @addr: the unmapped addr |
||
582 | * |
||
583 | * Look up the physical address of the page that the pte resolves |
||
584 | * to (cayman+). |
||
585 | * Returns the physical address of the page. |
||
586 | */ |
||
587 | uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr) |
||
588 | { |
||
589 | uint64_t result; |
||
590 | |||
591 | /* page table offset */ |
||
592 | result = rdev->gart.pages_addr[addr >> PAGE_SHIFT]; |
||
593 | |||
594 | /* in case cpu page size != gpu page size*/ |
||
595 | result |= addr & (~PAGE_MASK); |
||
596 | |||
597 | return result; |
||
598 | } |
||
599 | |||
600 | /** |
||
601 | * radeon_vm_page_flags - translate page flags to what the hw uses |
||
602 | * |
||
603 | * @flags: flags comming from userspace |
||
604 | * |
||
605 | * Translate the flags the userspace ABI uses to hw flags. |
||
606 | */ |
||
607 | static uint32_t radeon_vm_page_flags(uint32_t flags) |
||
608 | { |
||
609 | uint32_t hw_flags = 0; |
||
610 | hw_flags |= (flags & RADEON_VM_PAGE_VALID) ? R600_PTE_VALID : 0; |
||
611 | hw_flags |= (flags & RADEON_VM_PAGE_READABLE) ? R600_PTE_READABLE : 0; |
||
612 | hw_flags |= (flags & RADEON_VM_PAGE_WRITEABLE) ? R600_PTE_WRITEABLE : 0; |
||
613 | if (flags & RADEON_VM_PAGE_SYSTEM) { |
||
614 | hw_flags |= R600_PTE_SYSTEM; |
||
615 | hw_flags |= (flags & RADEON_VM_PAGE_SNOOPED) ? R600_PTE_SNOOPED : 0; |
||
616 | } |
||
617 | return hw_flags; |
||
618 | } |
||
619 | |||
620 | /** |
||
621 | * radeon_vm_update_pdes - make sure that page directory is valid |
||
622 | * |
||
623 | * @rdev: radeon_device pointer |
||
624 | * @vm: requested vm |
||
625 | * @start: start of GPU address range |
||
626 | * @end: end of GPU address range |
||
627 | * |
||
628 | * Allocates new page tables if necessary |
||
629 | * and updates the page directory (cayman+). |
||
630 | * Returns 0 for success, error for failure. |
||
631 | * |
||
632 | * Global and local mutex must be locked! |
||
633 | */ |
||
634 | int radeon_vm_update_page_directory(struct radeon_device *rdev, |
||
635 | struct radeon_vm *vm) |
||
636 | { |
||
637 | struct radeon_bo *pd = vm->page_directory; |
||
638 | uint64_t pd_addr = radeon_bo_gpu_offset(pd); |
||
639 | uint32_t incr = RADEON_VM_PTE_COUNT * 8; |
||
640 | uint64_t last_pde = ~0, last_pt = ~0; |
||
641 | unsigned count = 0, pt_idx, ndw; |
||
642 | struct radeon_ib ib; |
||
643 | int r; |
||
644 | |||
645 | /* padding, etc. */ |
||
646 | ndw = 64; |
||
647 | |||
648 | /* assume the worst case */ |
||
649 | ndw += vm->max_pde_used * 6; |
||
650 | |||
651 | /* update too big for an IB */ |
||
652 | if (ndw > 0xfffff) |
||
653 | return -ENOMEM; |
||
654 | |||
655 | r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4); |
||
656 | if (r) |
||
657 | return r; |
||
658 | ib.length_dw = 0; |
||
659 | |||
660 | /* walk over the address space and update the page directory */ |
||
661 | for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) { |
||
662 | struct radeon_bo *bo = vm->page_tables[pt_idx].bo; |
||
663 | uint64_t pde, pt; |
||
664 | |||
665 | if (bo == NULL) |
||
666 | continue; |
||
667 | |||
668 | pt = radeon_bo_gpu_offset(bo); |
||
669 | if (vm->page_tables[pt_idx].addr == pt) |
||
670 | continue; |
||
671 | vm->page_tables[pt_idx].addr = pt; |
||
672 | |||
673 | pde = pd_addr + pt_idx * 8; |
||
674 | if (((last_pde + 8 * count) != pde) || |
||
675 | ((last_pt + incr * count) != pt)) { |
||
676 | |||
677 | if (count) { |
||
678 | radeon_vm_set_pages(rdev, &ib, last_pde, |
||
679 | last_pt, count, incr, |
||
680 | R600_PTE_VALID); |
||
681 | } |
||
682 | |||
683 | count = 1; |
||
684 | last_pde = pde; |
||
685 | last_pt = pt; |
||
686 | } else { |
||
687 | ++count; |
||
688 | } |
||
689 | } |
||
690 | |||
691 | if (count) |
||
692 | radeon_vm_set_pages(rdev, &ib, last_pde, last_pt, count, |
||
693 | incr, R600_PTE_VALID); |
||
694 | |||
695 | if (ib.length_dw != 0) { |
||
696 | radeon_asic_vm_pad_ib(rdev, &ib); |
||
697 | radeon_semaphore_sync_to(ib.semaphore, pd->tbo.sync_obj); |
||
698 | radeon_semaphore_sync_to(ib.semaphore, vm->last_id_use); |
||
699 | WARN_ON(ib.length_dw > ndw); |
||
700 | r = radeon_ib_schedule(rdev, &ib, NULL, false); |
||
701 | if (r) { |
||
702 | radeon_ib_free(rdev, &ib); |
||
703 | return r; |
||
704 | } |
||
705 | radeon_fence_unref(&vm->fence); |
||
706 | vm->fence = radeon_fence_ref(ib.fence); |
||
707 | radeon_fence_unref(&vm->last_flush); |
||
708 | } |
||
709 | radeon_ib_free(rdev, &ib); |
||
710 | |||
711 | return 0; |
||
712 | } |
||
713 | |||
714 | /** |
||
715 | * radeon_vm_frag_ptes - add fragment information to PTEs |
||
716 | * |
||
717 | * @rdev: radeon_device pointer |
||
718 | * @ib: IB for the update |
||
719 | * @pe_start: first PTE to handle |
||
720 | * @pe_end: last PTE to handle |
||
721 | * @addr: addr those PTEs should point to |
||
722 | * @flags: hw mapping flags |
||
723 | * |
||
724 | * Global and local mutex must be locked! |
||
725 | */ |
||
726 | static void radeon_vm_frag_ptes(struct radeon_device *rdev, |
||
727 | struct radeon_ib *ib, |
||
728 | uint64_t pe_start, uint64_t pe_end, |
||
729 | uint64_t addr, uint32_t flags) |
||
730 | { |
||
731 | /** |
||
732 | * The MC L1 TLB supports variable sized pages, based on a fragment |
||
733 | * field in the PTE. When this field is set to a non-zero value, page |
||
734 | * granularity is increased from 4KB to (1 << (12 + frag)). The PTE |
||
735 | * flags are considered valid for all PTEs within the fragment range |
||
736 | * and corresponding mappings are assumed to be physically contiguous. |
||
737 | * |
||
738 | * The L1 TLB can store a single PTE for the whole fragment, |
||
739 | * significantly increasing the space available for translation |
||
740 | * caching. This leads to large improvements in throughput when the |
||
741 | * TLB is under pressure. |
||
742 | * |
||
743 | * The L2 TLB distributes small and large fragments into two |
||
744 | * asymmetric partitions. The large fragment cache is significantly |
||
745 | * larger. Thus, we try to use large fragments wherever possible. |
||
746 | * Userspace can support this by aligning virtual base address and |
||
747 | * allocation size to the fragment size. |
||
748 | */ |
||
749 | |||
750 | /* NI is optimized for 256KB fragments, SI and newer for 64KB */ |
||
751 | uint64_t frag_flags = rdev->family == CHIP_CAYMAN ? |
||
752 | R600_PTE_FRAG_256KB : R600_PTE_FRAG_64KB; |
||
753 | uint64_t frag_align = rdev->family == CHIP_CAYMAN ? 0x200 : 0x80; |
||
754 | |||
755 | uint64_t frag_start = ALIGN(pe_start, frag_align); |
||
756 | uint64_t frag_end = pe_end & ~(frag_align - 1); |
||
757 | |||
758 | unsigned count; |
||
759 | |||
760 | /* system pages are non continuously */ |
||
761 | if ((flags & R600_PTE_SYSTEM) || !(flags & R600_PTE_VALID) || |
||
762 | (frag_start >= frag_end)) { |
||
763 | |||
764 | count = (pe_end - pe_start) / 8; |
||
765 | radeon_vm_set_pages(rdev, ib, pe_start, addr, count, |
||
766 | RADEON_GPU_PAGE_SIZE, flags); |
||
767 | return; |
||
768 | } |
||
769 | |||
770 | /* handle the 4K area at the beginning */ |
||
771 | if (pe_start != frag_start) { |
||
772 | count = (frag_start - pe_start) / 8; |
||
773 | radeon_vm_set_pages(rdev, ib, pe_start, addr, count, |
||
774 | RADEON_GPU_PAGE_SIZE, flags); |
||
775 | addr += RADEON_GPU_PAGE_SIZE * count; |
||
776 | } |
||
777 | |||
778 | /* handle the area in the middle */ |
||
779 | count = (frag_end - frag_start) / 8; |
||
780 | radeon_vm_set_pages(rdev, ib, frag_start, addr, count, |
||
781 | RADEON_GPU_PAGE_SIZE, flags | frag_flags); |
||
782 | |||
783 | /* handle the 4K area at the end */ |
||
784 | if (frag_end != pe_end) { |
||
785 | addr += RADEON_GPU_PAGE_SIZE * count; |
||
786 | count = (pe_end - frag_end) / 8; |
||
787 | radeon_vm_set_pages(rdev, ib, frag_end, addr, count, |
||
788 | RADEON_GPU_PAGE_SIZE, flags); |
||
789 | } |
||
790 | } |
||
791 | |||
792 | /** |
||
793 | * radeon_vm_update_ptes - make sure that page tables are valid |
||
794 | * |
||
795 | * @rdev: radeon_device pointer |
||
796 | * @vm: requested vm |
||
797 | * @start: start of GPU address range |
||
798 | * @end: end of GPU address range |
||
799 | * @dst: destination address to map to |
||
800 | * @flags: mapping flags |
||
801 | * |
||
802 | * Update the page tables in the range @start - @end (cayman+). |
||
803 | * |
||
804 | * Global and local mutex must be locked! |
||
805 | */ |
||
806 | static void radeon_vm_update_ptes(struct radeon_device *rdev, |
||
807 | struct radeon_vm *vm, |
||
808 | struct radeon_ib *ib, |
||
809 | uint64_t start, uint64_t end, |
||
810 | uint64_t dst, uint32_t flags) |
||
811 | { |
||
812 | uint64_t mask = RADEON_VM_PTE_COUNT - 1; |
||
813 | uint64_t last_pte = ~0, last_dst = ~0; |
||
814 | unsigned count = 0; |
||
815 | uint64_t addr; |
||
816 | |||
817 | /* walk over the address space and update the page tables */ |
||
818 | for (addr = start; addr < end; ) { |
||
819 | uint64_t pt_idx = addr >> radeon_vm_block_size; |
||
820 | struct radeon_bo *pt = vm->page_tables[pt_idx].bo; |
||
821 | unsigned nptes; |
||
822 | uint64_t pte; |
||
823 | |||
824 | radeon_semaphore_sync_to(ib->semaphore, pt->tbo.sync_obj); |
||
825 | |||
826 | if ((addr & ~mask) == (end & ~mask)) |
||
827 | nptes = end - addr; |
||
828 | else |
||
829 | nptes = RADEON_VM_PTE_COUNT - (addr & mask); |
||
830 | |||
831 | pte = radeon_bo_gpu_offset(pt); |
||
832 | pte += (addr & mask) * 8; |
||
833 | |||
834 | if ((last_pte + 8 * count) != pte) { |
||
835 | |||
836 | if (count) { |
||
837 | radeon_vm_frag_ptes(rdev, ib, last_pte, |
||
838 | last_pte + 8 * count, |
||
839 | last_dst, flags); |
||
840 | } |
||
841 | |||
842 | count = nptes; |
||
843 | last_pte = pte; |
||
844 | last_dst = dst; |
||
845 | } else { |
||
846 | count += nptes; |
||
847 | } |
||
848 | |||
849 | addr += nptes; |
||
850 | dst += nptes * RADEON_GPU_PAGE_SIZE; |
||
851 | } |
||
852 | |||
853 | if (count) { |
||
854 | radeon_vm_frag_ptes(rdev, ib, last_pte, |
||
855 | last_pte + 8 * count, |
||
856 | last_dst, flags); |
||
857 | } |
||
858 | } |
||
859 | |||
860 | /** |
||
861 | * radeon_vm_bo_update - map a bo into the vm page table |
||
862 | * |
||
863 | * @rdev: radeon_device pointer |
||
864 | * @vm: requested vm |
||
865 | * @bo: radeon buffer object |
||
866 | * @mem: ttm mem |
||
867 | * |
||
868 | * Fill in the page table entries for @bo (cayman+). |
||
869 | * Returns 0 for success, -EINVAL for failure. |
||
870 | * |
||
871 | * Object have to be reserved and mutex must be locked! |
||
872 | */ |
||
873 | int radeon_vm_bo_update(struct radeon_device *rdev, |
||
874 | struct radeon_bo_va *bo_va, |
||
875 | struct ttm_mem_reg *mem) |
||
876 | { |
||
877 | struct radeon_vm *vm = bo_va->vm; |
||
878 | struct radeon_ib ib; |
||
879 | unsigned nptes, ncmds, ndw; |
||
880 | uint64_t addr; |
||
881 | uint32_t flags; |
||
882 | int r; |
||
883 | |||
884 | if (!bo_va->it.start) { |
||
885 | dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n", |
||
886 | bo_va->bo, vm); |
||
887 | return -EINVAL; |
||
888 | } |
||
889 | |||
890 | list_del_init(&bo_va->vm_status); |
||
891 | |||
892 | bo_va->flags &= ~RADEON_VM_PAGE_VALID; |
||
893 | bo_va->flags &= ~RADEON_VM_PAGE_SYSTEM; |
||
894 | bo_va->flags &= ~RADEON_VM_PAGE_SNOOPED; |
||
895 | if (mem) { |
||
896 | addr = mem->start << PAGE_SHIFT; |
||
897 | if (mem->mem_type != TTM_PL_SYSTEM) { |
||
898 | bo_va->flags |= RADEON_VM_PAGE_VALID; |
||
899 | } |
||
900 | if (mem->mem_type == TTM_PL_TT) { |
||
901 | bo_va->flags |= RADEON_VM_PAGE_SYSTEM; |
||
902 | if (!(bo_va->bo->flags & (RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC))) |
||
903 | bo_va->flags |= RADEON_VM_PAGE_SNOOPED; |
||
904 | |||
905 | } else { |
||
906 | addr += rdev->vm_manager.vram_base_offset; |
||
907 | } |
||
908 | } else { |
||
909 | addr = 0; |
||
910 | } |
||
911 | |||
912 | if (addr == bo_va->addr) |
||
913 | return 0; |
||
914 | bo_va->addr = addr; |
||
915 | |||
916 | trace_radeon_vm_bo_update(bo_va); |
||
917 | |||
918 | nptes = bo_va->it.last - bo_va->it.start + 1; |
||
919 | |||
920 | /* reserve space for one command every (1 << BLOCK_SIZE) entries |
||
921 | or 2k dwords (whatever is smaller) */ |
||
922 | ncmds = (nptes >> min(radeon_vm_block_size, 11)) + 1; |
||
923 | |||
924 | /* padding, etc. */ |
||
925 | ndw = 64; |
||
926 | |||
927 | flags = radeon_vm_page_flags(bo_va->flags); |
||
928 | if ((flags & R600_PTE_GART_MASK) == R600_PTE_GART_MASK) { |
||
929 | /* only copy commands needed */ |
||
930 | ndw += ncmds * 7; |
||
931 | |||
932 | } else if (flags & R600_PTE_SYSTEM) { |
||
933 | /* header for write data commands */ |
||
934 | ndw += ncmds * 4; |
||
935 | |||
936 | /* body of write data command */ |
||
937 | ndw += nptes * 2; |
||
938 | |||
939 | } else { |
||
940 | /* set page commands needed */ |
||
941 | ndw += ncmds * 10; |
||
942 | |||
943 | /* two extra commands for begin/end of fragment */ |
||
944 | ndw += 2 * 10; |
||
945 | } |
||
946 | |||
947 | /* update too big for an IB */ |
||
948 | if (ndw > 0xfffff) |
||
949 | return -ENOMEM; |
||
950 | |||
951 | r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4); |
||
952 | if (r) |
||
953 | return r; |
||
954 | ib.length_dw = 0; |
||
955 | |||
956 | radeon_vm_update_ptes(rdev, vm, &ib, bo_va->it.start, |
||
957 | bo_va->it.last + 1, addr, |
||
958 | radeon_vm_page_flags(bo_va->flags)); |
||
959 | |||
960 | radeon_asic_vm_pad_ib(rdev, &ib); |
||
961 | WARN_ON(ib.length_dw > ndw); |
||
962 | |||
963 | radeon_semaphore_sync_to(ib.semaphore, vm->fence); |
||
964 | r = radeon_ib_schedule(rdev, &ib, NULL, false); |
||
965 | if (r) { |
||
966 | radeon_ib_free(rdev, &ib); |
||
967 | return r; |
||
968 | } |
||
969 | radeon_fence_unref(&vm->fence); |
||
970 | vm->fence = radeon_fence_ref(ib.fence); |
||
971 | radeon_ib_free(rdev, &ib); |
||
972 | radeon_fence_unref(&vm->last_flush); |
||
973 | |||
974 | return 0; |
||
975 | } |
||
976 | |||
977 | /** |
||
978 | * radeon_vm_clear_freed - clear freed BOs in the PT |
||
979 | * |
||
980 | * @rdev: radeon_device pointer |
||
981 | * @vm: requested vm |
||
982 | * |
||
983 | * Make sure all freed BOs are cleared in the PT. |
||
984 | * Returns 0 for success. |
||
985 | * |
||
986 | * PTs have to be reserved and mutex must be locked! |
||
987 | */ |
||
988 | int radeon_vm_clear_freed(struct radeon_device *rdev, |
||
989 | struct radeon_vm *vm) |
||
990 | { |
||
991 | struct radeon_bo_va *bo_va, *tmp; |
||
992 | int r; |
||
993 | |||
994 | list_for_each_entry_safe(bo_va, tmp, &vm->freed, vm_status) { |
||
995 | r = radeon_vm_bo_update(rdev, bo_va, NULL); |
||
996 | radeon_bo_unref(&bo_va->bo); |
||
997 | kfree(bo_va); |
||
998 | if (r) |
||
999 | return r; |
||
1000 | } |
||
1001 | return 0; |
||
1002 | |||
1003 | } |
||
1004 | |||
1005 | /** |
||
1006 | * radeon_vm_clear_invalids - clear invalidated BOs in the PT |
||
1007 | * |
||
1008 | * @rdev: radeon_device pointer |
||
1009 | * @vm: requested vm |
||
1010 | * |
||
1011 | * Make sure all invalidated BOs are cleared in the PT. |
||
1012 | * Returns 0 for success. |
||
1013 | * |
||
1014 | * PTs have to be reserved and mutex must be locked! |
||
1015 | */ |
||
1016 | int radeon_vm_clear_invalids(struct radeon_device *rdev, |
||
1017 | struct radeon_vm *vm) |
||
1018 | { |
||
1019 | struct radeon_bo_va *bo_va, *tmp; |
||
1020 | int r; |
||
1021 | |||
1022 | list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, vm_status) { |
||
1023 | r = radeon_vm_bo_update(rdev, bo_va, NULL); |
||
1024 | if (r) |
||
1025 | return r; |
||
1026 | } |
||
1027 | return 0; |
||
1028 | } |
||
1029 | |||
1030 | /** |
||
1031 | * radeon_vm_bo_rmv - remove a bo to a specific vm |
||
1032 | * |
||
1033 | * @rdev: radeon_device pointer |
||
1034 | * @bo_va: requested bo_va |
||
1035 | * |
||
1036 | * Remove @bo_va->bo from the requested vm (cayman+). |
||
1037 | * |
||
1038 | * Object have to be reserved! |
||
1039 | */ |
||
1040 | void radeon_vm_bo_rmv(struct radeon_device *rdev, |
||
1041 | struct radeon_bo_va *bo_va) |
||
1042 | { |
||
1043 | struct radeon_vm *vm = bo_va->vm; |
||
1044 | |||
1045 | list_del(&bo_va->bo_list); |
||
1046 | |||
1047 | mutex_lock(&vm->mutex); |
||
1048 | interval_tree_remove(&bo_va->it, &vm->va); |
||
1049 | list_del(&bo_va->vm_status); |
||
1050 | |||
1051 | if (bo_va->addr) { |
||
1052 | bo_va->bo = radeon_bo_ref(bo_va->bo); |
||
1053 | list_add(&bo_va->vm_status, &vm->freed); |
||
1054 | } else { |
||
1055 | kfree(bo_va); |
||
1056 | } |
||
1057 | |||
1058 | mutex_unlock(&vm->mutex); |
||
1059 | } |
||
1060 | |||
1061 | /** |
||
1062 | * radeon_vm_bo_invalidate - mark the bo as invalid |
||
1063 | * |
||
1064 | * @rdev: radeon_device pointer |
||
1065 | * @vm: requested vm |
||
1066 | * @bo: radeon buffer object |
||
1067 | * |
||
1068 | * Mark @bo as invalid (cayman+). |
||
1069 | */ |
||
1070 | void radeon_vm_bo_invalidate(struct radeon_device *rdev, |
||
1071 | struct radeon_bo *bo) |
||
1072 | { |
||
1073 | struct radeon_bo_va *bo_va; |
||
1074 | |||
1075 | list_for_each_entry(bo_va, &bo->va, bo_list) { |
||
1076 | if (bo_va->addr) { |
||
1077 | mutex_lock(&bo_va->vm->mutex); |
||
1078 | list_del(&bo_va->vm_status); |
||
1079 | list_add(&bo_va->vm_status, &bo_va->vm->invalidated); |
||
1080 | mutex_unlock(&bo_va->vm->mutex); |
||
1081 | } |
||
1082 | } |
||
1083 | } |
||
1084 | |||
1085 | /** |
||
1086 | * radeon_vm_init - initialize a vm instance |
||
1087 | * |
||
1088 | * @rdev: radeon_device pointer |
||
1089 | * @vm: requested vm |
||
1090 | * |
||
1091 | * Init @vm fields (cayman+). |
||
1092 | */ |
||
1093 | int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) |
||
1094 | { |
||
1095 | const unsigned align = min(RADEON_VM_PTB_ALIGN_SIZE, |
||
1096 | RADEON_VM_PTE_COUNT * 8); |
||
1097 | unsigned pd_size, pd_entries, pts_size; |
||
1098 | int r; |
||
1099 | |||
1100 | vm->id = 0; |
||
1101 | vm->ib_bo_va = NULL; |
||
1102 | vm->fence = NULL; |
||
1103 | vm->last_flush = NULL; |
||
1104 | vm->last_id_use = NULL; |
||
1105 | mutex_init(&vm->mutex); |
||
1106 | vm->va = RB_ROOT; |
||
1107 | INIT_LIST_HEAD(&vm->invalidated); |
||
1108 | INIT_LIST_HEAD(&vm->freed); |
||
1109 | |||
1110 | pd_size = radeon_vm_directory_size(rdev); |
||
1111 | pd_entries = radeon_vm_num_pdes(rdev); |
||
1112 | |||
1113 | /* allocate page table array */ |
||
1114 | pts_size = pd_entries * sizeof(struct radeon_vm_pt); |
||
1115 | vm->page_tables = kzalloc(pts_size, GFP_KERNEL); |
||
1116 | if (vm->page_tables == NULL) { |
||
1117 | DRM_ERROR("Cannot allocate memory for page table array\n"); |
||
1118 | return -ENOMEM; |
||
1119 | } |
||
1120 | |||
1121 | r = radeon_bo_create(rdev, pd_size, align, true, |
||
1122 | RADEON_GEM_DOMAIN_VRAM, 0, NULL, |
||
1123 | &vm->page_directory); |
||
1124 | if (r) |
||
1125 | return r; |
||
1126 | |||
1127 | r = radeon_vm_clear_bo(rdev, vm->page_directory); |
||
1128 | if (r) { |
||
1129 | radeon_bo_unref(&vm->page_directory); |
||
1130 | vm->page_directory = NULL; |
||
1131 | return r; |
||
1132 | } |
||
1133 | |||
1134 | return 0; |
||
1135 | } |
||
1136 | |||
1137 | /** |
||
1138 | * radeon_vm_fini - tear down a vm instance |
||
1139 | * |
||
1140 | * @rdev: radeon_device pointer |
||
1141 | * @vm: requested vm |
||
1142 | * |
||
1143 | * Tear down @vm (cayman+). |
||
1144 | * Unbind the VM and remove all bos from the vm bo list |
||
1145 | */ |
||
1146 | void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) |
||
1147 | { |
||
1148 | struct radeon_bo_va *bo_va, *tmp; |
||
1149 | int i, r; |
||
1150 | |||
1151 | if (!RB_EMPTY_ROOT(&vm->va)) { |
||
1152 | dev_err(rdev->dev, "still active bo inside vm\n"); |
||
1153 | } |
||
1154 | rbtree_postorder_for_each_entry_safe(bo_va, tmp, &vm->va, it.rb) { |
||
1155 | interval_tree_remove(&bo_va->it, &vm->va); |
||
1156 | r = radeon_bo_reserve(bo_va->bo, false); |
||
1157 | if (!r) { |
||
1158 | list_del_init(&bo_va->bo_list); |
||
1159 | radeon_bo_unreserve(bo_va->bo); |
||
1160 | kfree(bo_va); |
||
1161 | } |
||
1162 | } |
||
1163 | list_for_each_entry_safe(bo_va, tmp, &vm->freed, vm_status) { |
||
1164 | radeon_bo_unref(&bo_va->bo); |
||
1165 | kfree(bo_va); |
||
1166 | } |
||
1167 | |||
1168 | for (i = 0; i < radeon_vm_num_pdes(rdev); i++) |
||
1169 | radeon_bo_unref(&vm->page_tables[i].bo); |
||
1170 | kfree(vm->page_tables); |
||
1171 | |||
1172 | radeon_bo_unref(&vm->page_directory); |
||
1173 | |||
1174 | radeon_fence_unref(&vm->fence); |
||
1175 | radeon_fence_unref(&vm->last_flush); |
||
1176 | radeon_fence_unref(&vm->last_id_use); |
||
1177 | |||
1178 | mutex_destroy(&vm->mutex); |
||
1179 | }>><>><>>><>=>=>>>>=>> |