Rev 5078 | Rev 6104 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5078 | serge | 1 | /* |
2 | * Copyright 2008 Advanced Micro Devices, Inc. |
||
3 | * Copyright 2008 Red Hat Inc. |
||
4 | * Copyright 2009 Jerome Glisse. |
||
5 | * |
||
6 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
7 | * copy of this software and associated documentation files (the "Software"), |
||
8 | * to deal in the Software without restriction, including without limitation |
||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
10 | * and/or sell copies of the Software, and to permit persons to whom the |
||
11 | * Software is furnished to do so, subject to the following conditions: |
||
12 | * |
||
13 | * The above copyright notice and this permission notice shall be included in |
||
14 | * all copies or substantial portions of the Software. |
||
15 | * |
||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
19 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
||
20 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
||
21 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
||
22 | * OTHER DEALINGS IN THE SOFTWARE. |
||
23 | * |
||
24 | * Authors: Dave Airlie |
||
25 | * Alex Deucher |
||
26 | * Jerome Glisse |
||
27 | */ |
||
28 | #include |
||
29 | #include |
||
30 | #include "radeon.h" |
||
31 | #include "radeon_trace.h" |
||
32 | |||
33 | /* |
||
34 | * GPUVM |
||
35 | * GPUVM is similar to the legacy gart on older asics, however |
||
36 | * rather than there being a single global gart table |
||
37 | * for the entire GPU, there are multiple VM page tables active |
||
38 | * at any given time. The VM page tables can contain a mix |
||
39 | * vram pages and system memory pages and system memory pages |
||
40 | * can be mapped as snooped (cached system pages) or unsnooped |
||
41 | * (uncached system pages). |
||
42 | * Each VM has an ID associated with it and there is a page table |
||
43 | * associated with each VMID. When execting a command buffer, |
||
44 | * the kernel tells the the ring what VMID to use for that command |
||
45 | * buffer. VMIDs are allocated dynamically as commands are submitted. |
||
46 | * The userspace drivers maintain their own address space and the kernel |
||
47 | * sets up their pages tables accordingly when they submit their |
||
48 | * command buffers and a VMID is assigned. |
||
49 | * Cayman/Trinity support up to 8 active VMs at any given time; |
||
50 | * SI supports 16. |
||
51 | */ |
||
52 | |||
53 | /** |
||
54 | * radeon_vm_num_pde - return the number of page directory entries |
||
55 | * |
||
56 | * @rdev: radeon_device pointer |
||
57 | * |
||
58 | * Calculate the number of page directory entries (cayman+). |
||
59 | */ |
||
60 | static unsigned radeon_vm_num_pdes(struct radeon_device *rdev) |
||
61 | { |
||
62 | return rdev->vm_manager.max_pfn >> radeon_vm_block_size; |
||
63 | } |
||
64 | |||
65 | /** |
||
66 | * radeon_vm_directory_size - returns the size of the page directory in bytes |
||
67 | * |
||
68 | * @rdev: radeon_device pointer |
||
69 | * |
||
70 | * Calculate the size of the page directory in bytes (cayman+). |
||
71 | */ |
||
72 | static unsigned radeon_vm_directory_size(struct radeon_device *rdev) |
||
73 | { |
||
74 | return RADEON_GPU_PAGE_ALIGN(radeon_vm_num_pdes(rdev) * 8); |
||
75 | } |
||
76 | |||
77 | /** |
||
78 | * radeon_vm_manager_init - init the vm manager |
||
79 | * |
||
80 | * @rdev: radeon_device pointer |
||
81 | * |
||
82 | * Init the vm manager (cayman+). |
||
83 | * Returns 0 for success, error for failure. |
||
84 | */ |
||
85 | int radeon_vm_manager_init(struct radeon_device *rdev) |
||
86 | { |
||
87 | int r; |
||
88 | |||
89 | if (!rdev->vm_manager.enabled) { |
||
90 | r = radeon_asic_vm_init(rdev); |
||
91 | if (r) |
||
92 | return r; |
||
93 | |||
94 | rdev->vm_manager.enabled = true; |
||
95 | } |
||
96 | return 0; |
||
97 | } |
||
98 | |||
99 | /** |
||
100 | * radeon_vm_manager_fini - tear down the vm manager |
||
101 | * |
||
102 | * @rdev: radeon_device pointer |
||
103 | * |
||
104 | * Tear down the VM manager (cayman+). |
||
105 | */ |
||
106 | void radeon_vm_manager_fini(struct radeon_device *rdev) |
||
107 | { |
||
108 | int i; |
||
109 | |||
110 | if (!rdev->vm_manager.enabled) |
||
111 | return; |
||
112 | |||
113 | for (i = 0; i < RADEON_NUM_VM; ++i) |
||
114 | radeon_fence_unref(&rdev->vm_manager.active[i]); |
||
115 | radeon_asic_vm_fini(rdev); |
||
116 | rdev->vm_manager.enabled = false; |
||
117 | } |
||
118 | |||
119 | /** |
||
120 | * radeon_vm_get_bos - add the vm BOs to a validation list |
||
121 | * |
||
122 | * @vm: vm providing the BOs |
||
123 | * @head: head of validation list |
||
124 | * |
||
125 | * Add the page directory to the list of BOs to |
||
126 | * validate for command submission (cayman+). |
||
127 | */ |
||
5271 | serge | 128 | struct radeon_bo_list *radeon_vm_get_bos(struct radeon_device *rdev, |
5078 | serge | 129 | struct radeon_vm *vm, |
130 | struct list_head *head) |
||
131 | { |
||
5271 | serge | 132 | struct radeon_bo_list *list; |
5078 | serge | 133 | unsigned i, idx; |
134 | |||
135 | list = kmalloc_array(vm->max_pde_used + 2, |
||
5271 | serge | 136 | sizeof(struct radeon_bo_list), GFP_KERNEL); |
5078 | serge | 137 | if (!list) |
138 | return NULL; |
||
139 | |||
140 | /* add the vm page table to the list */ |
||
141 | list[0].robj = vm->page_directory; |
||
142 | list[0].prefered_domains = RADEON_GEM_DOMAIN_VRAM; |
||
143 | list[0].allowed_domains = RADEON_GEM_DOMAIN_VRAM; |
||
144 | list[0].tv.bo = &vm->page_directory->tbo; |
||
5271 | serge | 145 | list[0].tv.shared = true; |
5078 | serge | 146 | list[0].tiling_flags = 0; |
147 | list_add(&list[0].tv.head, head); |
||
148 | |||
149 | for (i = 0, idx = 1; i <= vm->max_pde_used; i++) { |
||
150 | if (!vm->page_tables[i].bo) |
||
151 | continue; |
||
152 | |||
153 | list[idx].robj = vm->page_tables[i].bo; |
||
154 | list[idx].prefered_domains = RADEON_GEM_DOMAIN_VRAM; |
||
155 | list[idx].allowed_domains = RADEON_GEM_DOMAIN_VRAM; |
||
156 | list[idx].tv.bo = &list[idx].robj->tbo; |
||
5271 | serge | 157 | list[idx].tv.shared = true; |
5078 | serge | 158 | list[idx].tiling_flags = 0; |
159 | list_add(&list[idx++].tv.head, head); |
||
160 | } |
||
161 | |||
162 | return list; |
||
163 | } |
||
164 | |||
165 | /** |
||
166 | * radeon_vm_grab_id - allocate the next free VMID |
||
167 | * |
||
168 | * @rdev: radeon_device pointer |
||
169 | * @vm: vm to allocate id for |
||
170 | * @ring: ring we want to submit job to |
||
171 | * |
||
172 | * Allocate an id for the vm (cayman+). |
||
173 | * Returns the fence we need to sync to (if any). |
||
174 | * |
||
175 | * Global and local mutex must be locked! |
||
176 | */ |
||
177 | struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, |
||
178 | struct radeon_vm *vm, int ring) |
||
179 | { |
||
180 | struct radeon_fence *best[RADEON_NUM_RINGS] = {}; |
||
5271 | serge | 181 | struct radeon_vm_id *vm_id = &vm->ids[ring]; |
182 | |||
5078 | serge | 183 | unsigned choices[2] = {}; |
184 | unsigned i; |
||
185 | |||
186 | /* check if the id is still valid */ |
||
5271 | serge | 187 | if (vm_id->id && vm_id->last_id_use && |
188 | vm_id->last_id_use == rdev->vm_manager.active[vm_id->id]) |
||
5078 | serge | 189 | return NULL; |
190 | |||
191 | /* we definately need to flush */ |
||
5271 | serge | 192 | vm_id->pd_gpu_addr = ~0ll; |
5078 | serge | 193 | |
194 | /* skip over VMID 0, since it is the system VM */ |
||
195 | for (i = 1; i < rdev->vm_manager.nvm; ++i) { |
||
196 | struct radeon_fence *fence = rdev->vm_manager.active[i]; |
||
197 | |||
198 | if (fence == NULL) { |
||
199 | /* found a free one */ |
||
5271 | serge | 200 | vm_id->id = i; |
201 | trace_radeon_vm_grab_id(i, ring); |
||
5078 | serge | 202 | return NULL; |
203 | } |
||
204 | |||
205 | if (radeon_fence_is_earlier(fence, best[fence->ring])) { |
||
206 | best[fence->ring] = fence; |
||
207 | choices[fence->ring == ring ? 0 : 1] = i; |
||
208 | } |
||
209 | } |
||
210 | |||
211 | for (i = 0; i < 2; ++i) { |
||
212 | if (choices[i]) { |
||
5271 | serge | 213 | vm_id->id = choices[i]; |
214 | trace_radeon_vm_grab_id(choices[i], ring); |
||
5078 | serge | 215 | return rdev->vm_manager.active[choices[i]]; |
216 | } |
||
217 | } |
||
218 | |||
219 | /* should never happen */ |
||
220 | BUG(); |
||
221 | return NULL; |
||
222 | } |
||
223 | |||
224 | /** |
||
225 | * radeon_vm_flush - hardware flush the vm |
||
226 | * |
||
227 | * @rdev: radeon_device pointer |
||
228 | * @vm: vm we want to flush |
||
229 | * @ring: ring to use for flush |
||
5271 | serge | 230 | * @updates: last vm update that is waited for |
5078 | serge | 231 | * |
232 | * Flush the vm (cayman+). |
||
233 | * |
||
234 | * Global and local mutex must be locked! |
||
235 | */ |
||
236 | void radeon_vm_flush(struct radeon_device *rdev, |
||
237 | struct radeon_vm *vm, |
||
5271 | serge | 238 | int ring, struct radeon_fence *updates) |
5078 | serge | 239 | { |
240 | uint64_t pd_addr = radeon_bo_gpu_offset(vm->page_directory); |
||
5271 | serge | 241 | struct radeon_vm_id *vm_id = &vm->ids[ring]; |
5078 | serge | 242 | |
5271 | serge | 243 | if (pd_addr != vm_id->pd_gpu_addr || !vm_id->flushed_updates || |
244 | radeon_fence_is_earlier(vm_id->flushed_updates, updates)) { |
||
245 | |||
246 | trace_radeon_vm_flush(pd_addr, ring, vm->ids[ring].id); |
||
247 | radeon_fence_unref(&vm_id->flushed_updates); |
||
248 | vm_id->flushed_updates = radeon_fence_ref(updates); |
||
249 | vm_id->pd_gpu_addr = pd_addr; |
||
250 | radeon_ring_vm_flush(rdev, &rdev->ring[ring], |
||
251 | vm_id->id, vm_id->pd_gpu_addr); |
||
252 | |||
5078 | serge | 253 | } |
254 | } |
||
255 | |||
256 | /** |
||
257 | * radeon_vm_fence - remember fence for vm |
||
258 | * |
||
259 | * @rdev: radeon_device pointer |
||
260 | * @vm: vm we want to fence |
||
261 | * @fence: fence to remember |
||
262 | * |
||
263 | * Fence the vm (cayman+). |
||
264 | * Set the fence used to protect page table and id. |
||
265 | * |
||
266 | * Global and local mutex must be locked! |
||
267 | */ |
||
268 | void radeon_vm_fence(struct radeon_device *rdev, |
||
269 | struct radeon_vm *vm, |
||
270 | struct radeon_fence *fence) |
||
271 | { |
||
5271 | serge | 272 | unsigned vm_id = vm->ids[fence->ring].id; |
5078 | serge | 273 | |
5271 | serge | 274 | radeon_fence_unref(&rdev->vm_manager.active[vm_id]); |
275 | rdev->vm_manager.active[vm_id] = radeon_fence_ref(fence); |
||
5078 | serge | 276 | |
5271 | serge | 277 | radeon_fence_unref(&vm->ids[fence->ring].last_id_use); |
278 | vm->ids[fence->ring].last_id_use = radeon_fence_ref(fence); |
||
5078 | serge | 279 | } |
280 | |||
281 | /** |
||
282 | * radeon_vm_bo_find - find the bo_va for a specific vm & bo |
||
283 | * |
||
284 | * @vm: requested vm |
||
285 | * @bo: requested buffer object |
||
286 | * |
||
287 | * Find @bo inside the requested vm (cayman+). |
||
288 | * Search inside the @bos vm list for the requested vm |
||
289 | * Returns the found bo_va or NULL if none is found |
||
290 | * |
||
291 | * Object has to be reserved! |
||
292 | */ |
||
293 | struct radeon_bo_va *radeon_vm_bo_find(struct radeon_vm *vm, |
||
294 | struct radeon_bo *bo) |
||
295 | { |
||
296 | struct radeon_bo_va *bo_va; |
||
297 | |||
298 | list_for_each_entry(bo_va, &bo->va, bo_list) { |
||
299 | if (bo_va->vm == vm) { |
||
300 | return bo_va; |
||
301 | } |
||
302 | } |
||
303 | return NULL; |
||
304 | } |
||
305 | |||
306 | /** |
||
307 | * radeon_vm_bo_add - add a bo to a specific vm |
||
308 | * |
||
309 | * @rdev: radeon_device pointer |
||
310 | * @vm: requested vm |
||
311 | * @bo: radeon buffer object |
||
312 | * |
||
313 | * Add @bo into the requested vm (cayman+). |
||
314 | * Add @bo to the list of bos associated with the vm |
||
315 | * Returns newly added bo_va or NULL for failure |
||
316 | * |
||
317 | * Object has to be reserved! |
||
318 | */ |
||
319 | struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev, |
||
320 | struct radeon_vm *vm, |
||
321 | struct radeon_bo *bo) |
||
322 | { |
||
323 | struct radeon_bo_va *bo_va; |
||
324 | |||
325 | bo_va = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL); |
||
326 | if (bo_va == NULL) { |
||
327 | return NULL; |
||
328 | } |
||
329 | bo_va->vm = vm; |
||
330 | bo_va->bo = bo; |
||
331 | bo_va->it.start = 0; |
||
332 | bo_va->it.last = 0; |
||
333 | bo_va->flags = 0; |
||
334 | bo_va->addr = 0; |
||
335 | bo_va->ref_count = 1; |
||
336 | INIT_LIST_HEAD(&bo_va->bo_list); |
||
337 | INIT_LIST_HEAD(&bo_va->vm_status); |
||
338 | |||
339 | mutex_lock(&vm->mutex); |
||
340 | list_add_tail(&bo_va->bo_list, &bo->va); |
||
341 | mutex_unlock(&vm->mutex); |
||
342 | |||
343 | return bo_va; |
||
344 | } |
||
345 | |||
346 | /** |
||
347 | * radeon_vm_set_pages - helper to call the right asic function |
||
348 | * |
||
349 | * @rdev: radeon_device pointer |
||
350 | * @ib: indirect buffer to fill with commands |
||
351 | * @pe: addr of the page entry |
||
352 | * @addr: dst addr to write into pe |
||
353 | * @count: number of page entries to update |
||
354 | * @incr: increase next addr by incr bytes |
||
355 | * @flags: hw access flags |
||
356 | * |
||
357 | * Traces the parameters and calls the right asic functions |
||
358 | * to setup the page table using the DMA. |
||
359 | */ |
||
360 | static void radeon_vm_set_pages(struct radeon_device *rdev, |
||
361 | struct radeon_ib *ib, |
||
362 | uint64_t pe, |
||
363 | uint64_t addr, unsigned count, |
||
364 | uint32_t incr, uint32_t flags) |
||
365 | { |
||
366 | trace_radeon_vm_set_page(pe, addr, count, incr, flags); |
||
367 | |||
368 | if ((flags & R600_PTE_GART_MASK) == R600_PTE_GART_MASK) { |
||
369 | uint64_t src = rdev->gart.table_addr + (addr >> 12) * 8; |
||
370 | radeon_asic_vm_copy_pages(rdev, ib, pe, src, count); |
||
371 | |||
372 | } else if ((flags & R600_PTE_SYSTEM) || (count < 3)) { |
||
373 | radeon_asic_vm_write_pages(rdev, ib, pe, addr, |
||
374 | count, incr, flags); |
||
375 | |||
376 | } else { |
||
377 | radeon_asic_vm_set_pages(rdev, ib, pe, addr, |
||
378 | count, incr, flags); |
||
379 | } |
||
380 | } |
||
381 | |||
382 | /** |
||
383 | * radeon_vm_clear_bo - initially clear the page dir/table |
||
384 | * |
||
385 | * @rdev: radeon_device pointer |
||
386 | * @bo: bo to clear |
||
387 | */ |
||
388 | static int radeon_vm_clear_bo(struct radeon_device *rdev, |
||
389 | struct radeon_bo *bo) |
||
390 | { |
||
391 | struct radeon_ib ib; |
||
392 | unsigned entries; |
||
393 | uint64_t addr; |
||
394 | int r; |
||
395 | |||
5271 | serge | 396 | r = radeon_bo_reserve(bo, false); |
5078 | serge | 397 | if (r) |
398 | return r; |
||
399 | |||
400 | r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); |
||
401 | if (r) |
||
5271 | serge | 402 | goto error_unreserve; |
5078 | serge | 403 | |
404 | addr = radeon_bo_gpu_offset(bo); |
||
405 | entries = radeon_bo_size(bo) / 8; |
||
406 | |||
407 | r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, 256); |
||
408 | if (r) |
||
5271 | serge | 409 | goto error_unreserve; |
5078 | serge | 410 | |
411 | ib.length_dw = 0; |
||
412 | |||
413 | radeon_vm_set_pages(rdev, &ib, addr, 0, entries, 0, 0); |
||
414 | radeon_asic_vm_pad_ib(rdev, &ib); |
||
415 | WARN_ON(ib.length_dw > 64); |
||
416 | |||
417 | r = radeon_ib_schedule(rdev, &ib, NULL, false); |
||
418 | if (r) |
||
5271 | serge | 419 | goto error_free; |
5078 | serge | 420 | |
5271 | serge | 421 | ib.fence->is_vm_update = true; |
422 | radeon_bo_fence(bo, ib.fence, false); |
||
423 | |||
424 | error_free: |
||
5078 | serge | 425 | radeon_ib_free(rdev, &ib); |
426 | |||
5271 | serge | 427 | error_unreserve: |
428 | radeon_bo_unreserve(bo); |
||
5078 | serge | 429 | return r; |
430 | } |
||
431 | |||
432 | /** |
||
433 | * radeon_vm_bo_set_addr - set bos virtual address inside a vm |
||
434 | * |
||
435 | * @rdev: radeon_device pointer |
||
436 | * @bo_va: bo_va to store the address |
||
437 | * @soffset: requested offset of the buffer in the VM address space |
||
438 | * @flags: attributes of pages (read/write/valid/etc.) |
||
439 | * |
||
440 | * Set offset of @bo_va (cayman+). |
||
441 | * Validate and set the offset requested within the vm address space. |
||
442 | * Returns 0 for success, error for failure. |
||
443 | * |
||
5271 | serge | 444 | * Object has to be reserved and gets unreserved by this function! |
5078 | serge | 445 | */ |
446 | int radeon_vm_bo_set_addr(struct radeon_device *rdev, |
||
447 | struct radeon_bo_va *bo_va, |
||
448 | uint64_t soffset, |
||
449 | uint32_t flags) |
||
450 | { |
||
451 | uint64_t size = radeon_bo_size(bo_va->bo); |
||
452 | struct radeon_vm *vm = bo_va->vm; |
||
453 | unsigned last_pfn, pt_idx; |
||
454 | uint64_t eoffset; |
||
455 | int r; |
||
456 | |||
457 | if (soffset) { |
||
458 | /* make sure object fit at this offset */ |
||
459 | eoffset = soffset + size; |
||
460 | if (soffset >= eoffset) { |
||
461 | return -EINVAL; |
||
462 | } |
||
463 | |||
464 | last_pfn = eoffset / RADEON_GPU_PAGE_SIZE; |
||
465 | if (last_pfn > rdev->vm_manager.max_pfn) { |
||
466 | dev_err(rdev->dev, "va above limit (0x%08X > 0x%08X)\n", |
||
467 | last_pfn, rdev->vm_manager.max_pfn); |
||
468 | return -EINVAL; |
||
469 | } |
||
470 | |||
471 | } else { |
||
472 | eoffset = last_pfn = 0; |
||
473 | } |
||
474 | |||
475 | mutex_lock(&vm->mutex); |
||
476 | if (bo_va->it.start || bo_va->it.last) { |
||
477 | if (bo_va->addr) { |
||
478 | /* add a clone of the bo_va to clear the old address */ |
||
479 | struct radeon_bo_va *tmp; |
||
480 | tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL); |
||
481 | if (!tmp) { |
||
482 | mutex_unlock(&vm->mutex); |
||
483 | return -ENOMEM; |
||
484 | } |
||
485 | tmp->it.start = bo_va->it.start; |
||
486 | tmp->it.last = bo_va->it.last; |
||
487 | tmp->vm = vm; |
||
488 | tmp->addr = bo_va->addr; |
||
489 | tmp->bo = radeon_bo_ref(bo_va->bo); |
||
5271 | serge | 490 | spin_lock(&vm->status_lock); |
5078 | serge | 491 | list_add(&tmp->vm_status, &vm->freed); |
5271 | serge | 492 | spin_unlock(&vm->status_lock); |
5078 | serge | 493 | } |
494 | |||
495 | interval_tree_remove(&bo_va->it, &vm->va); |
||
496 | bo_va->it.start = 0; |
||
497 | bo_va->it.last = 0; |
||
498 | } |
||
499 | |||
500 | soffset /= RADEON_GPU_PAGE_SIZE; |
||
501 | eoffset /= RADEON_GPU_PAGE_SIZE; |
||
502 | if (soffset || eoffset) { |
||
503 | struct interval_tree_node *it; |
||
504 | it = interval_tree_iter_first(&vm->va, soffset, eoffset - 1); |
||
505 | if (it) { |
||
506 | struct radeon_bo_va *tmp; |
||
507 | tmp = container_of(it, struct radeon_bo_va, it); |
||
508 | /* bo and tmp overlap, invalid offset */ |
||
509 | dev_err(rdev->dev, "bo %p va 0x%010Lx conflict with " |
||
510 | "(bo %p 0x%010lx 0x%010lx)\n", bo_va->bo, |
||
511 | soffset, tmp->bo, tmp->it.start, tmp->it.last); |
||
512 | mutex_unlock(&vm->mutex); |
||
513 | return -EINVAL; |
||
514 | } |
||
515 | bo_va->it.start = soffset; |
||
516 | bo_va->it.last = eoffset - 1; |
||
517 | interval_tree_insert(&bo_va->it, &vm->va); |
||
518 | } |
||
519 | |||
520 | bo_va->flags = flags; |
||
521 | bo_va->addr = 0; |
||
522 | |||
523 | soffset >>= radeon_vm_block_size; |
||
524 | eoffset >>= radeon_vm_block_size; |
||
525 | |||
526 | BUG_ON(eoffset >= radeon_vm_num_pdes(rdev)); |
||
527 | |||
528 | if (eoffset > vm->max_pde_used) |
||
529 | vm->max_pde_used = eoffset; |
||
530 | |||
531 | radeon_bo_unreserve(bo_va->bo); |
||
532 | |||
533 | /* walk over the address space and allocate the page tables */ |
||
534 | for (pt_idx = soffset; pt_idx <= eoffset; ++pt_idx) { |
||
535 | struct radeon_bo *pt; |
||
536 | |||
537 | if (vm->page_tables[pt_idx].bo) |
||
538 | continue; |
||
539 | |||
540 | /* drop mutex to allocate and clear page table */ |
||
541 | mutex_unlock(&vm->mutex); |
||
542 | |||
543 | r = radeon_bo_create(rdev, RADEON_VM_PTE_COUNT * 8, |
||
544 | RADEON_GPU_PAGE_SIZE, true, |
||
5271 | serge | 545 | RADEON_GEM_DOMAIN_VRAM, 0, |
546 | NULL, NULL, &pt); |
||
5078 | serge | 547 | if (r) |
548 | return r; |
||
549 | |||
550 | r = radeon_vm_clear_bo(rdev, pt); |
||
551 | if (r) { |
||
552 | radeon_bo_unref(&pt); |
||
553 | radeon_bo_reserve(bo_va->bo, false); |
||
554 | return r; |
||
555 | } |
||
556 | |||
557 | /* aquire mutex again */ |
||
558 | mutex_lock(&vm->mutex); |
||
559 | if (vm->page_tables[pt_idx].bo) { |
||
560 | /* someone else allocated the pt in the meantime */ |
||
561 | mutex_unlock(&vm->mutex); |
||
562 | radeon_bo_unref(&pt); |
||
563 | mutex_lock(&vm->mutex); |
||
564 | continue; |
||
565 | } |
||
566 | |||
567 | vm->page_tables[pt_idx].addr = 0; |
||
568 | vm->page_tables[pt_idx].bo = pt; |
||
569 | } |
||
570 | |||
571 | mutex_unlock(&vm->mutex); |
||
5271 | serge | 572 | return 0; |
5078 | serge | 573 | } |
574 | |||
575 | /** |
||
576 | * radeon_vm_map_gart - get the physical address of a gart page |
||
577 | * |
||
578 | * @rdev: radeon_device pointer |
||
579 | * @addr: the unmapped addr |
||
580 | * |
||
581 | * Look up the physical address of the page that the pte resolves |
||
582 | * to (cayman+). |
||
583 | * Returns the physical address of the page. |
||
584 | */ |
||
585 | uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr) |
||
586 | { |
||
587 | uint64_t result; |
||
588 | |||
589 | /* page table offset */ |
||
590 | result = rdev->gart.pages_addr[addr >> PAGE_SHIFT]; |
||
591 | |||
592 | /* in case cpu page size != gpu page size*/ |
||
593 | result |= addr & (~PAGE_MASK); |
||
594 | |||
595 | return result; |
||
596 | } |
||
597 | |||
598 | /** |
||
599 | * radeon_vm_page_flags - translate page flags to what the hw uses |
||
600 | * |
||
601 | * @flags: flags comming from userspace |
||
602 | * |
||
603 | * Translate the flags the userspace ABI uses to hw flags. |
||
604 | */ |
||
605 | static uint32_t radeon_vm_page_flags(uint32_t flags) |
||
606 | { |
||
607 | uint32_t hw_flags = 0; |
||
608 | hw_flags |= (flags & RADEON_VM_PAGE_VALID) ? R600_PTE_VALID : 0; |
||
609 | hw_flags |= (flags & RADEON_VM_PAGE_READABLE) ? R600_PTE_READABLE : 0; |
||
610 | hw_flags |= (flags & RADEON_VM_PAGE_WRITEABLE) ? R600_PTE_WRITEABLE : 0; |
||
611 | if (flags & RADEON_VM_PAGE_SYSTEM) { |
||
612 | hw_flags |= R600_PTE_SYSTEM; |
||
613 | hw_flags |= (flags & RADEON_VM_PAGE_SNOOPED) ? R600_PTE_SNOOPED : 0; |
||
614 | } |
||
615 | return hw_flags; |
||
616 | } |
||
617 | |||
618 | /** |
||
619 | * radeon_vm_update_pdes - make sure that page directory is valid |
||
620 | * |
||
621 | * @rdev: radeon_device pointer |
||
622 | * @vm: requested vm |
||
623 | * @start: start of GPU address range |
||
624 | * @end: end of GPU address range |
||
625 | * |
||
626 | * Allocates new page tables if necessary |
||
627 | * and updates the page directory (cayman+). |
||
628 | * Returns 0 for success, error for failure. |
||
629 | * |
||
630 | * Global and local mutex must be locked! |
||
631 | */ |
||
632 | int radeon_vm_update_page_directory(struct radeon_device *rdev, |
||
633 | struct radeon_vm *vm) |
||
634 | { |
||
635 | struct radeon_bo *pd = vm->page_directory; |
||
636 | uint64_t pd_addr = radeon_bo_gpu_offset(pd); |
||
637 | uint32_t incr = RADEON_VM_PTE_COUNT * 8; |
||
638 | uint64_t last_pde = ~0, last_pt = ~0; |
||
639 | unsigned count = 0, pt_idx, ndw; |
||
640 | struct radeon_ib ib; |
||
641 | int r; |
||
642 | |||
643 | /* padding, etc. */ |
||
644 | ndw = 64; |
||
645 | |||
646 | /* assume the worst case */ |
||
647 | ndw += vm->max_pde_used * 6; |
||
648 | |||
649 | /* update too big for an IB */ |
||
650 | if (ndw > 0xfffff) |
||
651 | return -ENOMEM; |
||
652 | |||
653 | r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4); |
||
654 | if (r) |
||
655 | return r; |
||
656 | ib.length_dw = 0; |
||
657 | |||
658 | /* walk over the address space and update the page directory */ |
||
659 | for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) { |
||
660 | struct radeon_bo *bo = vm->page_tables[pt_idx].bo; |
||
661 | uint64_t pde, pt; |
||
662 | |||
663 | if (bo == NULL) |
||
664 | continue; |
||
665 | |||
666 | pt = radeon_bo_gpu_offset(bo); |
||
667 | if (vm->page_tables[pt_idx].addr == pt) |
||
668 | continue; |
||
669 | vm->page_tables[pt_idx].addr = pt; |
||
670 | |||
671 | pde = pd_addr + pt_idx * 8; |
||
672 | if (((last_pde + 8 * count) != pde) || |
||
673 | ((last_pt + incr * count) != pt)) { |
||
674 | |||
675 | if (count) { |
||
676 | radeon_vm_set_pages(rdev, &ib, last_pde, |
||
677 | last_pt, count, incr, |
||
678 | R600_PTE_VALID); |
||
679 | } |
||
680 | |||
681 | count = 1; |
||
682 | last_pde = pde; |
||
683 | last_pt = pt; |
||
684 | } else { |
||
685 | ++count; |
||
686 | } |
||
687 | } |
||
688 | |||
689 | if (count) |
||
690 | radeon_vm_set_pages(rdev, &ib, last_pde, last_pt, count, |
||
691 | incr, R600_PTE_VALID); |
||
692 | |||
693 | if (ib.length_dw != 0) { |
||
694 | radeon_asic_vm_pad_ib(rdev, &ib); |
||
5271 | serge | 695 | |
696 | radeon_sync_resv(rdev, &ib.sync, pd->tbo.resv, true); |
||
5078 | serge | 697 | WARN_ON(ib.length_dw > ndw); |
698 | r = radeon_ib_schedule(rdev, &ib, NULL, false); |
||
699 | if (r) { |
||
700 | radeon_ib_free(rdev, &ib); |
||
701 | return r; |
||
702 | } |
||
5271 | serge | 703 | ib.fence->is_vm_update = true; |
704 | radeon_bo_fence(pd, ib.fence, false); |
||
5078 | serge | 705 | } |
706 | radeon_ib_free(rdev, &ib); |
||
707 | |||
708 | return 0; |
||
709 | } |
||
710 | |||
711 | /** |
||
712 | * radeon_vm_frag_ptes - add fragment information to PTEs |
||
713 | * |
||
714 | * @rdev: radeon_device pointer |
||
715 | * @ib: IB for the update |
||
716 | * @pe_start: first PTE to handle |
||
717 | * @pe_end: last PTE to handle |
||
718 | * @addr: addr those PTEs should point to |
||
719 | * @flags: hw mapping flags |
||
720 | * |
||
721 | * Global and local mutex must be locked! |
||
722 | */ |
||
723 | static void radeon_vm_frag_ptes(struct radeon_device *rdev, |
||
724 | struct radeon_ib *ib, |
||
725 | uint64_t pe_start, uint64_t pe_end, |
||
726 | uint64_t addr, uint32_t flags) |
||
727 | { |
||
728 | /** |
||
729 | * The MC L1 TLB supports variable sized pages, based on a fragment |
||
730 | * field in the PTE. When this field is set to a non-zero value, page |
||
731 | * granularity is increased from 4KB to (1 << (12 + frag)). The PTE |
||
732 | * flags are considered valid for all PTEs within the fragment range |
||
733 | * and corresponding mappings are assumed to be physically contiguous. |
||
734 | * |
||
735 | * The L1 TLB can store a single PTE for the whole fragment, |
||
736 | * significantly increasing the space available for translation |
||
737 | * caching. This leads to large improvements in throughput when the |
||
738 | * TLB is under pressure. |
||
739 | * |
||
740 | * The L2 TLB distributes small and large fragments into two |
||
741 | * asymmetric partitions. The large fragment cache is significantly |
||
742 | * larger. Thus, we try to use large fragments wherever possible. |
||
743 | * Userspace can support this by aligning virtual base address and |
||
744 | * allocation size to the fragment size. |
||
745 | */ |
||
746 | |||
747 | /* NI is optimized for 256KB fragments, SI and newer for 64KB */ |
||
748 | uint64_t frag_flags = rdev->family == CHIP_CAYMAN ? |
||
749 | R600_PTE_FRAG_256KB : R600_PTE_FRAG_64KB; |
||
750 | uint64_t frag_align = rdev->family == CHIP_CAYMAN ? 0x200 : 0x80; |
||
751 | |||
752 | uint64_t frag_start = ALIGN(pe_start, frag_align); |
||
753 | uint64_t frag_end = pe_end & ~(frag_align - 1); |
||
754 | |||
755 | unsigned count; |
||
756 | |||
757 | /* system pages are non continuously */ |
||
758 | if ((flags & R600_PTE_SYSTEM) || !(flags & R600_PTE_VALID) || |
||
759 | (frag_start >= frag_end)) { |
||
760 | |||
761 | count = (pe_end - pe_start) / 8; |
||
762 | radeon_vm_set_pages(rdev, ib, pe_start, addr, count, |
||
763 | RADEON_GPU_PAGE_SIZE, flags); |
||
764 | return; |
||
765 | } |
||
766 | |||
767 | /* handle the 4K area at the beginning */ |
||
768 | if (pe_start != frag_start) { |
||
769 | count = (frag_start - pe_start) / 8; |
||
770 | radeon_vm_set_pages(rdev, ib, pe_start, addr, count, |
||
771 | RADEON_GPU_PAGE_SIZE, flags); |
||
772 | addr += RADEON_GPU_PAGE_SIZE * count; |
||
773 | } |
||
774 | |||
775 | /* handle the area in the middle */ |
||
776 | count = (frag_end - frag_start) / 8; |
||
777 | radeon_vm_set_pages(rdev, ib, frag_start, addr, count, |
||
778 | RADEON_GPU_PAGE_SIZE, flags | frag_flags); |
||
779 | |||
780 | /* handle the 4K area at the end */ |
||
781 | if (frag_end != pe_end) { |
||
782 | addr += RADEON_GPU_PAGE_SIZE * count; |
||
783 | count = (pe_end - frag_end) / 8; |
||
784 | radeon_vm_set_pages(rdev, ib, frag_end, addr, count, |
||
785 | RADEON_GPU_PAGE_SIZE, flags); |
||
786 | } |
||
787 | } |
||
788 | |||
789 | /** |
||
790 | * radeon_vm_update_ptes - make sure that page tables are valid |
||
791 | * |
||
792 | * @rdev: radeon_device pointer |
||
793 | * @vm: requested vm |
||
794 | * @start: start of GPU address range |
||
795 | * @end: end of GPU address range |
||
796 | * @dst: destination address to map to |
||
797 | * @flags: mapping flags |
||
798 | * |
||
799 | * Update the page tables in the range @start - @end (cayman+). |
||
800 | * |
||
801 | * Global and local mutex must be locked! |
||
802 | */ |
||
5271 | serge | 803 | static int radeon_vm_update_ptes(struct radeon_device *rdev, |
5078 | serge | 804 | struct radeon_vm *vm, |
805 | struct radeon_ib *ib, |
||
806 | uint64_t start, uint64_t end, |
||
807 | uint64_t dst, uint32_t flags) |
||
808 | { |
||
809 | uint64_t mask = RADEON_VM_PTE_COUNT - 1; |
||
810 | uint64_t last_pte = ~0, last_dst = ~0; |
||
811 | unsigned count = 0; |
||
812 | uint64_t addr; |
||
813 | |||
814 | /* walk over the address space and update the page tables */ |
||
815 | for (addr = start; addr < end; ) { |
||
816 | uint64_t pt_idx = addr >> radeon_vm_block_size; |
||
817 | struct radeon_bo *pt = vm->page_tables[pt_idx].bo; |
||
818 | unsigned nptes; |
||
819 | uint64_t pte; |
||
5271 | serge | 820 | int r; |
5078 | serge | 821 | |
5271 | serge | 822 | radeon_sync_resv(rdev, &ib->sync, pt->tbo.resv, true); |
823 | r = reservation_object_reserve_shared(pt->tbo.resv); |
||
824 | if (r) |
||
825 | return r; |
||
5078 | serge | 826 | |
827 | if ((addr & ~mask) == (end & ~mask)) |
||
828 | nptes = end - addr; |
||
829 | else |
||
830 | nptes = RADEON_VM_PTE_COUNT - (addr & mask); |
||
831 | |||
832 | pte = radeon_bo_gpu_offset(pt); |
||
833 | pte += (addr & mask) * 8; |
||
834 | |||
835 | if ((last_pte + 8 * count) != pte) { |
||
836 | |||
837 | if (count) { |
||
838 | radeon_vm_frag_ptes(rdev, ib, last_pte, |
||
839 | last_pte + 8 * count, |
||
840 | last_dst, flags); |
||
841 | } |
||
842 | |||
843 | count = nptes; |
||
844 | last_pte = pte; |
||
845 | last_dst = dst; |
||
846 | } else { |
||
847 | count += nptes; |
||
848 | } |
||
849 | |||
850 | addr += nptes; |
||
851 | dst += nptes * RADEON_GPU_PAGE_SIZE; |
||
852 | } |
||
853 | |||
854 | if (count) { |
||
855 | radeon_vm_frag_ptes(rdev, ib, last_pte, |
||
856 | last_pte + 8 * count, |
||
857 | last_dst, flags); |
||
858 | } |
||
5271 | serge | 859 | |
860 | return 0; |
||
5078 | serge | 861 | } |
862 | |||
863 | /** |
||
5271 | serge | 864 | * radeon_vm_fence_pts - fence page tables after an update |
865 | * |
||
866 | * @vm: requested vm |
||
867 | * @start: start of GPU address range |
||
868 | * @end: end of GPU address range |
||
869 | * @fence: fence to use |
||
870 | * |
||
871 | * Fence the page tables in the range @start - @end (cayman+). |
||
872 | * |
||
873 | * Global and local mutex must be locked! |
||
874 | */ |
||
875 | static void radeon_vm_fence_pts(struct radeon_vm *vm, |
||
876 | uint64_t start, uint64_t end, |
||
877 | struct radeon_fence *fence) |
||
878 | { |
||
879 | unsigned i; |
||
880 | |||
881 | start >>= radeon_vm_block_size; |
||
882 | end >>= radeon_vm_block_size; |
||
883 | |||
884 | for (i = start; i <= end; ++i) |
||
885 | radeon_bo_fence(vm->page_tables[i].bo, fence, true); |
||
886 | } |
||
887 | |||
888 | /** |
||
5078 | serge | 889 | * radeon_vm_bo_update - map a bo into the vm page table |
890 | * |
||
891 | * @rdev: radeon_device pointer |
||
892 | * @vm: requested vm |
||
893 | * @bo: radeon buffer object |
||
894 | * @mem: ttm mem |
||
895 | * |
||
896 | * Fill in the page table entries for @bo (cayman+). |
||
897 | * Returns 0 for success, -EINVAL for failure. |
||
898 | * |
||
899 | * Object have to be reserved and mutex must be locked! |
||
900 | */ |
||
901 | int radeon_vm_bo_update(struct radeon_device *rdev, |
||
902 | struct radeon_bo_va *bo_va, |
||
903 | struct ttm_mem_reg *mem) |
||
904 | { |
||
905 | struct radeon_vm *vm = bo_va->vm; |
||
906 | struct radeon_ib ib; |
||
907 | unsigned nptes, ncmds, ndw; |
||
908 | uint64_t addr; |
||
909 | uint32_t flags; |
||
910 | int r; |
||
911 | |||
912 | if (!bo_va->it.start) { |
||
913 | dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n", |
||
914 | bo_va->bo, vm); |
||
915 | return -EINVAL; |
||
916 | } |
||
917 | |||
5271 | serge | 918 | spin_lock(&vm->status_lock); |
5078 | serge | 919 | list_del_init(&bo_va->vm_status); |
5271 | serge | 920 | spin_unlock(&vm->status_lock); |
5078 | serge | 921 | |
922 | bo_va->flags &= ~RADEON_VM_PAGE_VALID; |
||
923 | bo_va->flags &= ~RADEON_VM_PAGE_SYSTEM; |
||
924 | bo_va->flags &= ~RADEON_VM_PAGE_SNOOPED; |
||
5271 | serge | 925 | // if (bo_va->bo && radeon_ttm_tt_is_readonly(bo_va->bo->tbo.ttm)) |
926 | // bo_va->flags &= ~RADEON_VM_PAGE_WRITEABLE; |
||
927 | |||
5078 | serge | 928 | if (mem) { |
929 | addr = mem->start << PAGE_SHIFT; |
||
930 | if (mem->mem_type != TTM_PL_SYSTEM) { |
||
931 | bo_va->flags |= RADEON_VM_PAGE_VALID; |
||
932 | } |
||
933 | if (mem->mem_type == TTM_PL_TT) { |
||
934 | bo_va->flags |= RADEON_VM_PAGE_SYSTEM; |
||
935 | if (!(bo_va->bo->flags & (RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC))) |
||
936 | bo_va->flags |= RADEON_VM_PAGE_SNOOPED; |
||
937 | |||
938 | } else { |
||
939 | addr += rdev->vm_manager.vram_base_offset; |
||
940 | } |
||
941 | } else { |
||
942 | addr = 0; |
||
943 | } |
||
944 | |||
945 | if (addr == bo_va->addr) |
||
946 | return 0; |
||
947 | bo_va->addr = addr; |
||
948 | |||
949 | trace_radeon_vm_bo_update(bo_va); |
||
950 | |||
951 | nptes = bo_va->it.last - bo_va->it.start + 1; |
||
952 | |||
953 | /* reserve space for one command every (1 << BLOCK_SIZE) entries |
||
954 | or 2k dwords (whatever is smaller) */ |
||
955 | ncmds = (nptes >> min(radeon_vm_block_size, 11)) + 1; |
||
956 | |||
957 | /* padding, etc. */ |
||
958 | ndw = 64; |
||
959 | |||
960 | flags = radeon_vm_page_flags(bo_va->flags); |
||
961 | if ((flags & R600_PTE_GART_MASK) == R600_PTE_GART_MASK) { |
||
962 | /* only copy commands needed */ |
||
963 | ndw += ncmds * 7; |
||
964 | |||
965 | } else if (flags & R600_PTE_SYSTEM) { |
||
966 | /* header for write data commands */ |
||
967 | ndw += ncmds * 4; |
||
968 | |||
969 | /* body of write data command */ |
||
970 | ndw += nptes * 2; |
||
971 | |||
972 | } else { |
||
973 | /* set page commands needed */ |
||
974 | ndw += ncmds * 10; |
||
975 | |||
976 | /* two extra commands for begin/end of fragment */ |
||
977 | ndw += 2 * 10; |
||
978 | } |
||
979 | |||
980 | /* update too big for an IB */ |
||
981 | if (ndw > 0xfffff) |
||
982 | return -ENOMEM; |
||
983 | |||
984 | r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4); |
||
985 | if (r) |
||
986 | return r; |
||
987 | ib.length_dw = 0; |
||
988 | |||
5271 | serge | 989 | if (!(bo_va->flags & RADEON_VM_PAGE_VALID)) { |
990 | unsigned i; |
||
991 | |||
992 | for (i = 0; i < RADEON_NUM_RINGS; ++i) |
||
993 | radeon_sync_fence(&ib.sync, vm->ids[i].last_id_use); |
||
994 | } |
||
995 | |||
996 | r = radeon_vm_update_ptes(rdev, vm, &ib, bo_va->it.start, |
||
5078 | serge | 997 | bo_va->it.last + 1, addr, |
998 | radeon_vm_page_flags(bo_va->flags)); |
||
5271 | serge | 999 | if (r) { |
1000 | radeon_ib_free(rdev, &ib); |
||
1001 | return r; |
||
1002 | } |
||
5078 | serge | 1003 | |
1004 | radeon_asic_vm_pad_ib(rdev, &ib); |
||
1005 | WARN_ON(ib.length_dw > ndw); |
||
1006 | |||
1007 | r = radeon_ib_schedule(rdev, &ib, NULL, false); |
||
1008 | if (r) { |
||
1009 | radeon_ib_free(rdev, &ib); |
||
1010 | return r; |
||
1011 | } |
||
5271 | serge | 1012 | ib.fence->is_vm_update = true; |
1013 | radeon_vm_fence_pts(vm, bo_va->it.start, bo_va->it.last + 1, ib.fence); |
||
1014 | radeon_fence_unref(&bo_va->last_pt_update); |
||
1015 | bo_va->last_pt_update = radeon_fence_ref(ib.fence); |
||
5078 | serge | 1016 | radeon_ib_free(rdev, &ib); |
1017 | |||
1018 | return 0; |
||
1019 | } |
||
1020 | |||
1021 | /** |
||
1022 | * radeon_vm_clear_freed - clear freed BOs in the PT |
||
1023 | * |
||
1024 | * @rdev: radeon_device pointer |
||
1025 | * @vm: requested vm |
||
1026 | * |
||
1027 | * Make sure all freed BOs are cleared in the PT. |
||
1028 | * Returns 0 for success. |
||
1029 | * |
||
1030 | * PTs have to be reserved and mutex must be locked! |
||
1031 | */ |
||
1032 | int radeon_vm_clear_freed(struct radeon_device *rdev, |
||
1033 | struct radeon_vm *vm) |
||
1034 | { |
||
5271 | serge | 1035 | struct radeon_bo_va *bo_va; |
5078 | serge | 1036 | int r; |
1037 | |||
5271 | serge | 1038 | spin_lock(&vm->status_lock); |
1039 | while (!list_empty(&vm->freed)) { |
||
1040 | bo_va = list_first_entry(&vm->freed, |
||
1041 | struct radeon_bo_va, vm_status); |
||
1042 | spin_unlock(&vm->status_lock); |
||
1043 | |||
5078 | serge | 1044 | r = radeon_vm_bo_update(rdev, bo_va, NULL); |
1045 | radeon_bo_unref(&bo_va->bo); |
||
5271 | serge | 1046 | radeon_fence_unref(&bo_va->last_pt_update); |
5078 | serge | 1047 | kfree(bo_va); |
1048 | if (r) |
||
1049 | return r; |
||
5271 | serge | 1050 | |
1051 | spin_lock(&vm->status_lock); |
||
5078 | serge | 1052 | } |
5271 | serge | 1053 | spin_unlock(&vm->status_lock); |
5078 | serge | 1054 | return 0; |
1055 | |||
1056 | } |
||
1057 | |||
1058 | /** |
||
1059 | * radeon_vm_clear_invalids - clear invalidated BOs in the PT |
||
1060 | * |
||
1061 | * @rdev: radeon_device pointer |
||
1062 | * @vm: requested vm |
||
1063 | * |
||
1064 | * Make sure all invalidated BOs are cleared in the PT. |
||
1065 | * Returns 0 for success. |
||
1066 | * |
||
1067 | * PTs have to be reserved and mutex must be locked! |
||
1068 | */ |
||
1069 | int radeon_vm_clear_invalids(struct radeon_device *rdev, |
||
1070 | struct radeon_vm *vm) |
||
1071 | { |
||
5271 | serge | 1072 | struct radeon_bo_va *bo_va; |
5078 | serge | 1073 | int r; |
1074 | |||
5271 | serge | 1075 | spin_lock(&vm->status_lock); |
1076 | while (!list_empty(&vm->invalidated)) { |
||
1077 | bo_va = list_first_entry(&vm->invalidated, |
||
1078 | struct radeon_bo_va, vm_status); |
||
1079 | spin_unlock(&vm->status_lock); |
||
1080 | |||
5078 | serge | 1081 | r = radeon_vm_bo_update(rdev, bo_va, NULL); |
1082 | if (r) |
||
1083 | return r; |
||
5271 | serge | 1084 | |
1085 | spin_lock(&vm->status_lock); |
||
5078 | serge | 1086 | } |
5271 | serge | 1087 | spin_unlock(&vm->status_lock); |
1088 | |||
5078 | serge | 1089 | return 0; |
1090 | } |
||
1091 | |||
1092 | /** |
||
1093 | * radeon_vm_bo_rmv - remove a bo to a specific vm |
||
1094 | * |
||
1095 | * @rdev: radeon_device pointer |
||
1096 | * @bo_va: requested bo_va |
||
1097 | * |
||
1098 | * Remove @bo_va->bo from the requested vm (cayman+). |
||
1099 | * |
||
1100 | * Object have to be reserved! |
||
1101 | */ |
||
1102 | void radeon_vm_bo_rmv(struct radeon_device *rdev, |
||
1103 | struct radeon_bo_va *bo_va) |
||
1104 | { |
||
1105 | struct radeon_vm *vm = bo_va->vm; |
||
1106 | |||
1107 | list_del(&bo_va->bo_list); |
||
1108 | |||
1109 | mutex_lock(&vm->mutex); |
||
1110 | interval_tree_remove(&bo_va->it, &vm->va); |
||
5271 | serge | 1111 | spin_lock(&vm->status_lock); |
5078 | serge | 1112 | list_del(&bo_va->vm_status); |
1113 | |||
1114 | if (bo_va->addr) { |
||
1115 | bo_va->bo = radeon_bo_ref(bo_va->bo); |
||
1116 | list_add(&bo_va->vm_status, &vm->freed); |
||
1117 | } else { |
||
5271 | serge | 1118 | radeon_fence_unref(&bo_va->last_pt_update); |
5078 | serge | 1119 | kfree(bo_va); |
1120 | } |
||
5271 | serge | 1121 | spin_unlock(&vm->status_lock); |
5078 | serge | 1122 | |
1123 | mutex_unlock(&vm->mutex); |
||
1124 | } |
||
1125 | |||
1126 | /** |
||
1127 | * radeon_vm_bo_invalidate - mark the bo as invalid |
||
1128 | * |
||
1129 | * @rdev: radeon_device pointer |
||
1130 | * @vm: requested vm |
||
1131 | * @bo: radeon buffer object |
||
1132 | * |
||
1133 | * Mark @bo as invalid (cayman+). |
||
1134 | */ |
||
1135 | void radeon_vm_bo_invalidate(struct radeon_device *rdev, |
||
1136 | struct radeon_bo *bo) |
||
1137 | { |
||
1138 | struct radeon_bo_va *bo_va; |
||
1139 | |||
1140 | list_for_each_entry(bo_va, &bo->va, bo_list) { |
||
1141 | if (bo_va->addr) { |
||
5271 | serge | 1142 | spin_lock(&bo_va->vm->status_lock); |
5078 | serge | 1143 | list_del(&bo_va->vm_status); |
1144 | list_add(&bo_va->vm_status, &bo_va->vm->invalidated); |
||
5271 | serge | 1145 | spin_unlock(&bo_va->vm->status_lock); |
5078 | serge | 1146 | } |
1147 | } |
||
1148 | } |
||
1149 | |||
1150 | /** |
||
1151 | * radeon_vm_init - initialize a vm instance |
||
1152 | * |
||
1153 | * @rdev: radeon_device pointer |
||
1154 | * @vm: requested vm |
||
1155 | * |
||
1156 | * Init @vm fields (cayman+). |
||
1157 | */ |
||
1158 | int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) |
||
1159 | { |
||
1160 | const unsigned align = min(RADEON_VM_PTB_ALIGN_SIZE, |
||
1161 | RADEON_VM_PTE_COUNT * 8); |
||
1162 | unsigned pd_size, pd_entries, pts_size; |
||
5271 | serge | 1163 | int i, r; |
5078 | serge | 1164 | |
1165 | vm->ib_bo_va = NULL; |
||
5271 | serge | 1166 | for (i = 0; i < RADEON_NUM_RINGS; ++i) { |
1167 | vm->ids[i].id = 0; |
||
1168 | vm->ids[i].flushed_updates = NULL; |
||
1169 | vm->ids[i].last_id_use = NULL; |
||
1170 | } |
||
5078 | serge | 1171 | mutex_init(&vm->mutex); |
1172 | vm->va = RB_ROOT; |
||
5271 | serge | 1173 | spin_lock_init(&vm->status_lock); |
5078 | serge | 1174 | INIT_LIST_HEAD(&vm->invalidated); |
1175 | INIT_LIST_HEAD(&vm->freed); |
||
1176 | |||
1177 | pd_size = radeon_vm_directory_size(rdev); |
||
1178 | pd_entries = radeon_vm_num_pdes(rdev); |
||
1179 | |||
1180 | /* allocate page table array */ |
||
1181 | pts_size = pd_entries * sizeof(struct radeon_vm_pt); |
||
1182 | vm->page_tables = kzalloc(pts_size, GFP_KERNEL); |
||
1183 | if (vm->page_tables == NULL) { |
||
1184 | DRM_ERROR("Cannot allocate memory for page table array\n"); |
||
1185 | return -ENOMEM; |
||
1186 | } |
||
1187 | |||
1188 | r = radeon_bo_create(rdev, pd_size, align, true, |
||
1189 | RADEON_GEM_DOMAIN_VRAM, 0, NULL, |
||
5271 | serge | 1190 | NULL, &vm->page_directory); |
5078 | serge | 1191 | if (r) |
1192 | return r; |
||
1193 | |||
1194 | r = radeon_vm_clear_bo(rdev, vm->page_directory); |
||
1195 | if (r) { |
||
1196 | radeon_bo_unref(&vm->page_directory); |
||
1197 | vm->page_directory = NULL; |
||
1198 | return r; |
||
1199 | } |
||
1200 | |||
1201 | return 0; |
||
1202 | } |
||
1203 | |||
1204 | /** |
||
1205 | * radeon_vm_fini - tear down a vm instance |
||
1206 | * |
||
1207 | * @rdev: radeon_device pointer |
||
1208 | * @vm: requested vm |
||
1209 | * |
||
1210 | * Tear down @vm (cayman+). |
||
1211 | * Unbind the VM and remove all bos from the vm bo list |
||
1212 | */ |
||
1213 | void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) |
||
1214 | { |
||
1215 | struct radeon_bo_va *bo_va, *tmp; |
||
1216 | int i, r; |
||
1217 | |||
1218 | if (!RB_EMPTY_ROOT(&vm->va)) { |
||
1219 | dev_err(rdev->dev, "still active bo inside vm\n"); |
||
1220 | } |
||
1221 | rbtree_postorder_for_each_entry_safe(bo_va, tmp, &vm->va, it.rb) { |
||
1222 | interval_tree_remove(&bo_va->it, &vm->va); |
||
1223 | r = radeon_bo_reserve(bo_va->bo, false); |
||
1224 | if (!r) { |
||
1225 | list_del_init(&bo_va->bo_list); |
||
1226 | radeon_bo_unreserve(bo_va->bo); |
||
5271 | serge | 1227 | radeon_fence_unref(&bo_va->last_pt_update); |
5078 | serge | 1228 | kfree(bo_va); |
1229 | } |
||
1230 | } |
||
1231 | list_for_each_entry_safe(bo_va, tmp, &vm->freed, vm_status) { |
||
1232 | radeon_bo_unref(&bo_va->bo); |
||
5271 | serge | 1233 | radeon_fence_unref(&bo_va->last_pt_update); |
5078 | serge | 1234 | kfree(bo_va); |
1235 | } |
||
1236 | |||
1237 | for (i = 0; i < radeon_vm_num_pdes(rdev); i++) |
||
1238 | radeon_bo_unref(&vm->page_tables[i].bo); |
||
1239 | kfree(vm->page_tables); |
||
1240 | |||
1241 | radeon_bo_unref(&vm->page_directory); |
||
1242 | |||
5271 | serge | 1243 | for (i = 0; i < RADEON_NUM_RINGS; ++i) { |
1244 | radeon_fence_unref(&vm->ids[i].flushed_updates); |
||
1245 | radeon_fence_unref(&vm->ids[i].last_id_use); |
||
1246 | } |
||
5078 | serge | 1247 | |
1248 | mutex_destroy(&vm->mutex); |
||
1249 | }>>>>><>><>=>>><>=>=>>>>=>> |