Rev 5271 | Rev 6321 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5078 | serge | 1 | /* |
2 | * Copyright 2008 Advanced Micro Devices, Inc. |
||
3 | * Copyright 2008 Red Hat Inc. |
||
4 | * Copyright 2009 Jerome Glisse. |
||
5 | * |
||
6 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
7 | * copy of this software and associated documentation files (the "Software"), |
||
8 | * to deal in the Software without restriction, including without limitation |
||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
10 | * and/or sell copies of the Software, and to permit persons to whom the |
||
11 | * Software is furnished to do so, subject to the following conditions: |
||
12 | * |
||
13 | * The above copyright notice and this permission notice shall be included in |
||
14 | * all copies or substantial portions of the Software. |
||
15 | * |
||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
19 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
||
20 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
||
21 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
||
22 | * OTHER DEALINGS IN THE SOFTWARE. |
||
23 | * |
||
24 | * Authors: Dave Airlie |
||
25 | * Alex Deucher |
||
26 | * Jerome Glisse |
||
27 | */ |
||
28 | #include |
||
29 | #include |
||
30 | #include "radeon.h" |
||
31 | #include "radeon_trace.h" |
||
32 | |||
33 | /* |
||
34 | * GPUVM |
||
35 | * GPUVM is similar to the legacy gart on older asics, however |
||
36 | * rather than there being a single global gart table |
||
37 | * for the entire GPU, there are multiple VM page tables active |
||
38 | * at any given time. The VM page tables can contain a mix |
||
39 | * vram pages and system memory pages and system memory pages |
||
40 | * can be mapped as snooped (cached system pages) or unsnooped |
||
41 | * (uncached system pages). |
||
42 | * Each VM has an ID associated with it and there is a page table |
||
43 | * associated with each VMID. When execting a command buffer, |
||
44 | * the kernel tells the the ring what VMID to use for that command |
||
45 | * buffer. VMIDs are allocated dynamically as commands are submitted. |
||
46 | * The userspace drivers maintain their own address space and the kernel |
||
47 | * sets up their pages tables accordingly when they submit their |
||
48 | * command buffers and a VMID is assigned. |
||
49 | * Cayman/Trinity support up to 8 active VMs at any given time; |
||
50 | * SI supports 16. |
||
51 | */ |
||
52 | |||
53 | /** |
||
54 | * radeon_vm_num_pde - return the number of page directory entries |
||
55 | * |
||
56 | * @rdev: radeon_device pointer |
||
57 | * |
||
58 | * Calculate the number of page directory entries (cayman+). |
||
59 | */ |
||
60 | static unsigned radeon_vm_num_pdes(struct radeon_device *rdev) |
||
61 | { |
||
62 | return rdev->vm_manager.max_pfn >> radeon_vm_block_size; |
||
63 | } |
||
64 | |||
65 | /** |
||
66 | * radeon_vm_directory_size - returns the size of the page directory in bytes |
||
67 | * |
||
68 | * @rdev: radeon_device pointer |
||
69 | * |
||
70 | * Calculate the size of the page directory in bytes (cayman+). |
||
71 | */ |
||
72 | static unsigned radeon_vm_directory_size(struct radeon_device *rdev) |
||
73 | { |
||
74 | return RADEON_GPU_PAGE_ALIGN(radeon_vm_num_pdes(rdev) * 8); |
||
75 | } |
||
76 | |||
77 | /** |
||
78 | * radeon_vm_manager_init - init the vm manager |
||
79 | * |
||
80 | * @rdev: radeon_device pointer |
||
81 | * |
||
82 | * Init the vm manager (cayman+). |
||
83 | * Returns 0 for success, error for failure. |
||
84 | */ |
||
85 | int radeon_vm_manager_init(struct radeon_device *rdev) |
||
86 | { |
||
87 | int r; |
||
88 | |||
89 | if (!rdev->vm_manager.enabled) { |
||
90 | r = radeon_asic_vm_init(rdev); |
||
91 | if (r) |
||
92 | return r; |
||
93 | |||
94 | rdev->vm_manager.enabled = true; |
||
95 | } |
||
96 | return 0; |
||
97 | } |
||
98 | |||
99 | /** |
||
100 | * radeon_vm_manager_fini - tear down the vm manager |
||
101 | * |
||
102 | * @rdev: radeon_device pointer |
||
103 | * |
||
104 | * Tear down the VM manager (cayman+). |
||
105 | */ |
||
106 | void radeon_vm_manager_fini(struct radeon_device *rdev) |
||
107 | { |
||
108 | int i; |
||
109 | |||
110 | if (!rdev->vm_manager.enabled) |
||
111 | return; |
||
112 | |||
113 | for (i = 0; i < RADEON_NUM_VM; ++i) |
||
114 | radeon_fence_unref(&rdev->vm_manager.active[i]); |
||
115 | radeon_asic_vm_fini(rdev); |
||
116 | rdev->vm_manager.enabled = false; |
||
117 | } |
||
118 | |||
119 | /** |
||
120 | * radeon_vm_get_bos - add the vm BOs to a validation list |
||
121 | * |
||
122 | * @vm: vm providing the BOs |
||
123 | * @head: head of validation list |
||
124 | * |
||
125 | * Add the page directory to the list of BOs to |
||
126 | * validate for command submission (cayman+). |
||
127 | */ |
||
5271 | serge | 128 | struct radeon_bo_list *radeon_vm_get_bos(struct radeon_device *rdev, |
5078 | serge | 129 | struct radeon_vm *vm, |
130 | struct list_head *head) |
||
131 | { |
||
5271 | serge | 132 | struct radeon_bo_list *list; |
5078 | serge | 133 | unsigned i, idx; |
134 | |||
135 | list = kmalloc_array(vm->max_pde_used + 2, |
||
5271 | serge | 136 | sizeof(struct radeon_bo_list), GFP_KERNEL); |
5078 | serge | 137 | if (!list) |
138 | return NULL; |
||
139 | |||
140 | /* add the vm page table to the list */ |
||
141 | list[0].robj = vm->page_directory; |
||
142 | list[0].prefered_domains = RADEON_GEM_DOMAIN_VRAM; |
||
143 | list[0].allowed_domains = RADEON_GEM_DOMAIN_VRAM; |
||
144 | list[0].tv.bo = &vm->page_directory->tbo; |
||
5271 | serge | 145 | list[0].tv.shared = true; |
5078 | serge | 146 | list[0].tiling_flags = 0; |
147 | list_add(&list[0].tv.head, head); |
||
148 | |||
149 | for (i = 0, idx = 1; i <= vm->max_pde_used; i++) { |
||
150 | if (!vm->page_tables[i].bo) |
||
151 | continue; |
||
152 | |||
153 | list[idx].robj = vm->page_tables[i].bo; |
||
154 | list[idx].prefered_domains = RADEON_GEM_DOMAIN_VRAM; |
||
155 | list[idx].allowed_domains = RADEON_GEM_DOMAIN_VRAM; |
||
156 | list[idx].tv.bo = &list[idx].robj->tbo; |
||
5271 | serge | 157 | list[idx].tv.shared = true; |
5078 | serge | 158 | list[idx].tiling_flags = 0; |
159 | list_add(&list[idx++].tv.head, head); |
||
160 | } |
||
161 | |||
162 | return list; |
||
163 | } |
||
164 | |||
165 | /** |
||
166 | * radeon_vm_grab_id - allocate the next free VMID |
||
167 | * |
||
168 | * @rdev: radeon_device pointer |
||
169 | * @vm: vm to allocate id for |
||
170 | * @ring: ring we want to submit job to |
||
171 | * |
||
172 | * Allocate an id for the vm (cayman+). |
||
173 | * Returns the fence we need to sync to (if any). |
||
174 | * |
||
175 | * Global and local mutex must be locked! |
||
176 | */ |
||
177 | struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev, |
||
178 | struct radeon_vm *vm, int ring) |
||
179 | { |
||
180 | struct radeon_fence *best[RADEON_NUM_RINGS] = {}; |
||
5271 | serge | 181 | struct radeon_vm_id *vm_id = &vm->ids[ring]; |
182 | |||
5078 | serge | 183 | unsigned choices[2] = {}; |
184 | unsigned i; |
||
185 | |||
186 | /* check if the id is still valid */ |
||
5271 | serge | 187 | if (vm_id->id && vm_id->last_id_use && |
188 | vm_id->last_id_use == rdev->vm_manager.active[vm_id->id]) |
||
5078 | serge | 189 | return NULL; |
190 | |||
191 | /* we definately need to flush */ |
||
5271 | serge | 192 | vm_id->pd_gpu_addr = ~0ll; |
5078 | serge | 193 | |
194 | /* skip over VMID 0, since it is the system VM */ |
||
195 | for (i = 1; i < rdev->vm_manager.nvm; ++i) { |
||
196 | struct radeon_fence *fence = rdev->vm_manager.active[i]; |
||
197 | |||
198 | if (fence == NULL) { |
||
199 | /* found a free one */ |
||
5271 | serge | 200 | vm_id->id = i; |
201 | trace_radeon_vm_grab_id(i, ring); |
||
5078 | serge | 202 | return NULL; |
203 | } |
||
204 | |||
205 | if (radeon_fence_is_earlier(fence, best[fence->ring])) { |
||
206 | best[fence->ring] = fence; |
||
207 | choices[fence->ring == ring ? 0 : 1] = i; |
||
208 | } |
||
209 | } |
||
210 | |||
211 | for (i = 0; i < 2; ++i) { |
||
212 | if (choices[i]) { |
||
5271 | serge | 213 | vm_id->id = choices[i]; |
214 | trace_radeon_vm_grab_id(choices[i], ring); |
||
5078 | serge | 215 | return rdev->vm_manager.active[choices[i]]; |
216 | } |
||
217 | } |
||
218 | |||
219 | /* should never happen */ |
||
220 | BUG(); |
||
221 | return NULL; |
||
222 | } |
||
223 | |||
224 | /** |
||
225 | * radeon_vm_flush - hardware flush the vm |
||
226 | * |
||
227 | * @rdev: radeon_device pointer |
||
228 | * @vm: vm we want to flush |
||
229 | * @ring: ring to use for flush |
||
5271 | serge | 230 | * @updates: last vm update that is waited for |
5078 | serge | 231 | * |
232 | * Flush the vm (cayman+). |
||
233 | * |
||
234 | * Global and local mutex must be locked! |
||
235 | */ |
||
236 | void radeon_vm_flush(struct radeon_device *rdev, |
||
237 | struct radeon_vm *vm, |
||
5271 | serge | 238 | int ring, struct radeon_fence *updates) |
5078 | serge | 239 | { |
240 | uint64_t pd_addr = radeon_bo_gpu_offset(vm->page_directory); |
||
5271 | serge | 241 | struct radeon_vm_id *vm_id = &vm->ids[ring]; |
5078 | serge | 242 | |
5271 | serge | 243 | if (pd_addr != vm_id->pd_gpu_addr || !vm_id->flushed_updates || |
244 | radeon_fence_is_earlier(vm_id->flushed_updates, updates)) { |
||
245 | |||
246 | trace_radeon_vm_flush(pd_addr, ring, vm->ids[ring].id); |
||
247 | radeon_fence_unref(&vm_id->flushed_updates); |
||
248 | vm_id->flushed_updates = radeon_fence_ref(updates); |
||
249 | vm_id->pd_gpu_addr = pd_addr; |
||
250 | radeon_ring_vm_flush(rdev, &rdev->ring[ring], |
||
251 | vm_id->id, vm_id->pd_gpu_addr); |
||
252 | |||
5078 | serge | 253 | } |
254 | } |
||
255 | |||
256 | /** |
||
257 | * radeon_vm_fence - remember fence for vm |
||
258 | * |
||
259 | * @rdev: radeon_device pointer |
||
260 | * @vm: vm we want to fence |
||
261 | * @fence: fence to remember |
||
262 | * |
||
263 | * Fence the vm (cayman+). |
||
264 | * Set the fence used to protect page table and id. |
||
265 | * |
||
266 | * Global and local mutex must be locked! |
||
267 | */ |
||
268 | void radeon_vm_fence(struct radeon_device *rdev, |
||
269 | struct radeon_vm *vm, |
||
270 | struct radeon_fence *fence) |
||
271 | { |
||
5271 | serge | 272 | unsigned vm_id = vm->ids[fence->ring].id; |
5078 | serge | 273 | |
5271 | serge | 274 | radeon_fence_unref(&rdev->vm_manager.active[vm_id]); |
275 | rdev->vm_manager.active[vm_id] = radeon_fence_ref(fence); |
||
5078 | serge | 276 | |
5271 | serge | 277 | radeon_fence_unref(&vm->ids[fence->ring].last_id_use); |
278 | vm->ids[fence->ring].last_id_use = radeon_fence_ref(fence); |
||
5078 | serge | 279 | } |
280 | |||
281 | /** |
||
282 | * radeon_vm_bo_find - find the bo_va for a specific vm & bo |
||
283 | * |
||
284 | * @vm: requested vm |
||
285 | * @bo: requested buffer object |
||
286 | * |
||
287 | * Find @bo inside the requested vm (cayman+). |
||
288 | * Search inside the @bos vm list for the requested vm |
||
289 | * Returns the found bo_va or NULL if none is found |
||
290 | * |
||
291 | * Object has to be reserved! |
||
292 | */ |
||
293 | struct radeon_bo_va *radeon_vm_bo_find(struct radeon_vm *vm, |
||
294 | struct radeon_bo *bo) |
||
295 | { |
||
296 | struct radeon_bo_va *bo_va; |
||
297 | |||
298 | list_for_each_entry(bo_va, &bo->va, bo_list) { |
||
299 | if (bo_va->vm == vm) { |
||
300 | return bo_va; |
||
301 | } |
||
302 | } |
||
303 | return NULL; |
||
304 | } |
||
305 | |||
306 | /** |
||
307 | * radeon_vm_bo_add - add a bo to a specific vm |
||
308 | * |
||
309 | * @rdev: radeon_device pointer |
||
310 | * @vm: requested vm |
||
311 | * @bo: radeon buffer object |
||
312 | * |
||
313 | * Add @bo into the requested vm (cayman+). |
||
314 | * Add @bo to the list of bos associated with the vm |
||
315 | * Returns newly added bo_va or NULL for failure |
||
316 | * |
||
317 | * Object has to be reserved! |
||
318 | */ |
||
319 | struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev, |
||
320 | struct radeon_vm *vm, |
||
321 | struct radeon_bo *bo) |
||
322 | { |
||
323 | struct radeon_bo_va *bo_va; |
||
324 | |||
325 | bo_va = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL); |
||
326 | if (bo_va == NULL) { |
||
327 | return NULL; |
||
328 | } |
||
329 | bo_va->vm = vm; |
||
330 | bo_va->bo = bo; |
||
331 | bo_va->it.start = 0; |
||
332 | bo_va->it.last = 0; |
||
333 | bo_va->flags = 0; |
||
334 | bo_va->ref_count = 1; |
||
335 | INIT_LIST_HEAD(&bo_va->bo_list); |
||
336 | INIT_LIST_HEAD(&bo_va->vm_status); |
||
337 | |||
338 | mutex_lock(&vm->mutex); |
||
339 | list_add_tail(&bo_va->bo_list, &bo->va); |
||
340 | mutex_unlock(&vm->mutex); |
||
341 | |||
342 | return bo_va; |
||
343 | } |
||
344 | |||
345 | /** |
||
346 | * radeon_vm_set_pages - helper to call the right asic function |
||
347 | * |
||
348 | * @rdev: radeon_device pointer |
||
349 | * @ib: indirect buffer to fill with commands |
||
350 | * @pe: addr of the page entry |
||
351 | * @addr: dst addr to write into pe |
||
352 | * @count: number of page entries to update |
||
353 | * @incr: increase next addr by incr bytes |
||
354 | * @flags: hw access flags |
||
355 | * |
||
356 | * Traces the parameters and calls the right asic functions |
||
357 | * to setup the page table using the DMA. |
||
358 | */ |
||
359 | static void radeon_vm_set_pages(struct radeon_device *rdev, |
||
360 | struct radeon_ib *ib, |
||
361 | uint64_t pe, |
||
362 | uint64_t addr, unsigned count, |
||
363 | uint32_t incr, uint32_t flags) |
||
364 | { |
||
365 | trace_radeon_vm_set_page(pe, addr, count, incr, flags); |
||
366 | |||
367 | if ((flags & R600_PTE_GART_MASK) == R600_PTE_GART_MASK) { |
||
368 | uint64_t src = rdev->gart.table_addr + (addr >> 12) * 8; |
||
369 | radeon_asic_vm_copy_pages(rdev, ib, pe, src, count); |
||
370 | |||
371 | } else if ((flags & R600_PTE_SYSTEM) || (count < 3)) { |
||
372 | radeon_asic_vm_write_pages(rdev, ib, pe, addr, |
||
373 | count, incr, flags); |
||
374 | |||
375 | } else { |
||
376 | radeon_asic_vm_set_pages(rdev, ib, pe, addr, |
||
377 | count, incr, flags); |
||
378 | } |
||
379 | } |
||
380 | |||
381 | /** |
||
382 | * radeon_vm_clear_bo - initially clear the page dir/table |
||
383 | * |
||
384 | * @rdev: radeon_device pointer |
||
385 | * @bo: bo to clear |
||
386 | */ |
||
387 | static int radeon_vm_clear_bo(struct radeon_device *rdev, |
||
388 | struct radeon_bo *bo) |
||
389 | { |
||
390 | struct radeon_ib ib; |
||
391 | unsigned entries; |
||
392 | uint64_t addr; |
||
393 | int r; |
||
394 | |||
5271 | serge | 395 | r = radeon_bo_reserve(bo, false); |
6104 | serge | 396 | if (r) |
5078 | serge | 397 | return r; |
398 | |||
6104 | serge | 399 | r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); |
400 | if (r) |
||
5271 | serge | 401 | goto error_unreserve; |
5078 | serge | 402 | |
403 | addr = radeon_bo_gpu_offset(bo); |
||
404 | entries = radeon_bo_size(bo) / 8; |
||
405 | |||
406 | r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, 256); |
||
407 | if (r) |
||
5271 | serge | 408 | goto error_unreserve; |
5078 | serge | 409 | |
410 | ib.length_dw = 0; |
||
411 | |||
412 | radeon_vm_set_pages(rdev, &ib, addr, 0, entries, 0, 0); |
||
413 | radeon_asic_vm_pad_ib(rdev, &ib); |
||
414 | WARN_ON(ib.length_dw > 64); |
||
415 | |||
416 | r = radeon_ib_schedule(rdev, &ib, NULL, false); |
||
417 | if (r) |
||
5271 | serge | 418 | goto error_free; |
5078 | serge | 419 | |
5271 | serge | 420 | ib.fence->is_vm_update = true; |
421 | radeon_bo_fence(bo, ib.fence, false); |
||
422 | |||
423 | error_free: |
||
5078 | serge | 424 | radeon_ib_free(rdev, &ib); |
425 | |||
5271 | serge | 426 | error_unreserve: |
427 | radeon_bo_unreserve(bo); |
||
5078 | serge | 428 | return r; |
429 | } |
||
430 | |||
431 | /** |
||
432 | * radeon_vm_bo_set_addr - set bos virtual address inside a vm |
||
433 | * |
||
434 | * @rdev: radeon_device pointer |
||
435 | * @bo_va: bo_va to store the address |
||
436 | * @soffset: requested offset of the buffer in the VM address space |
||
437 | * @flags: attributes of pages (read/write/valid/etc.) |
||
438 | * |
||
439 | * Set offset of @bo_va (cayman+). |
||
440 | * Validate and set the offset requested within the vm address space. |
||
441 | * Returns 0 for success, error for failure. |
||
442 | * |
||
5271 | serge | 443 | * Object has to be reserved and gets unreserved by this function! |
5078 | serge | 444 | */ |
445 | int radeon_vm_bo_set_addr(struct radeon_device *rdev, |
||
446 | struct radeon_bo_va *bo_va, |
||
447 | uint64_t soffset, |
||
448 | uint32_t flags) |
||
449 | { |
||
450 | uint64_t size = radeon_bo_size(bo_va->bo); |
||
451 | struct radeon_vm *vm = bo_va->vm; |
||
452 | unsigned last_pfn, pt_idx; |
||
453 | uint64_t eoffset; |
||
454 | int r; |
||
455 | |||
456 | if (soffset) { |
||
457 | /* make sure object fit at this offset */ |
||
458 | eoffset = soffset + size; |
||
459 | if (soffset >= eoffset) { |
||
6104 | serge | 460 | r = -EINVAL; |
461 | goto error_unreserve; |
||
5078 | serge | 462 | } |
463 | |||
464 | last_pfn = eoffset / RADEON_GPU_PAGE_SIZE; |
||
465 | if (last_pfn > rdev->vm_manager.max_pfn) { |
||
466 | dev_err(rdev->dev, "va above limit (0x%08X > 0x%08X)\n", |
||
467 | last_pfn, rdev->vm_manager.max_pfn); |
||
6104 | serge | 468 | r = -EINVAL; |
469 | goto error_unreserve; |
||
5078 | serge | 470 | } |
471 | |||
472 | } else { |
||
473 | eoffset = last_pfn = 0; |
||
474 | } |
||
475 | |||
476 | mutex_lock(&vm->mutex); |
||
477 | soffset /= RADEON_GPU_PAGE_SIZE; |
||
478 | eoffset /= RADEON_GPU_PAGE_SIZE; |
||
479 | if (soffset || eoffset) { |
||
480 | struct interval_tree_node *it; |
||
481 | it = interval_tree_iter_first(&vm->va, soffset, eoffset - 1); |
||
6104 | serge | 482 | if (it && it != &bo_va->it) { |
5078 | serge | 483 | struct radeon_bo_va *tmp; |
484 | tmp = container_of(it, struct radeon_bo_va, it); |
||
485 | /* bo and tmp overlap, invalid offset */ |
||
486 | dev_err(rdev->dev, "bo %p va 0x%010Lx conflict with " |
||
487 | "(bo %p 0x%010lx 0x%010lx)\n", bo_va->bo, |
||
488 | soffset, tmp->bo, tmp->it.start, tmp->it.last); |
||
489 | mutex_unlock(&vm->mutex); |
||
6104 | serge | 490 | r = -EINVAL; |
491 | goto error_unreserve; |
||
5078 | serge | 492 | } |
6104 | serge | 493 | } |
494 | |||
495 | if (bo_va->it.start || bo_va->it.last) { |
||
496 | /* add a clone of the bo_va to clear the old address */ |
||
497 | struct radeon_bo_va *tmp; |
||
498 | tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL); |
||
499 | if (!tmp) { |
||
500 | mutex_unlock(&vm->mutex); |
||
501 | r = -ENOMEM; |
||
502 | goto error_unreserve; |
||
503 | } |
||
504 | tmp->it.start = bo_va->it.start; |
||
505 | tmp->it.last = bo_va->it.last; |
||
506 | tmp->vm = vm; |
||
507 | tmp->bo = radeon_bo_ref(bo_va->bo); |
||
508 | |||
509 | interval_tree_remove(&bo_va->it, &vm->va); |
||
510 | spin_lock(&vm->status_lock); |
||
511 | bo_va->it.start = 0; |
||
512 | bo_va->it.last = 0; |
||
513 | list_del_init(&bo_va->vm_status); |
||
514 | list_add(&tmp->vm_status, &vm->freed); |
||
515 | spin_unlock(&vm->status_lock); |
||
516 | } |
||
517 | |||
518 | if (soffset || eoffset) { |
||
519 | spin_lock(&vm->status_lock); |
||
5078 | serge | 520 | bo_va->it.start = soffset; |
521 | bo_va->it.last = eoffset - 1; |
||
6104 | serge | 522 | list_add(&bo_va->vm_status, &vm->cleared); |
523 | spin_unlock(&vm->status_lock); |
||
5078 | serge | 524 | interval_tree_insert(&bo_va->it, &vm->va); |
525 | } |
||
526 | |||
527 | bo_va->flags = flags; |
||
528 | |||
529 | soffset >>= radeon_vm_block_size; |
||
530 | eoffset >>= radeon_vm_block_size; |
||
531 | |||
532 | BUG_ON(eoffset >= radeon_vm_num_pdes(rdev)); |
||
533 | |||
534 | if (eoffset > vm->max_pde_used) |
||
535 | vm->max_pde_used = eoffset; |
||
536 | |||
537 | radeon_bo_unreserve(bo_va->bo); |
||
538 | |||
539 | /* walk over the address space and allocate the page tables */ |
||
540 | for (pt_idx = soffset; pt_idx <= eoffset; ++pt_idx) { |
||
541 | struct radeon_bo *pt; |
||
542 | |||
543 | if (vm->page_tables[pt_idx].bo) |
||
544 | continue; |
||
545 | |||
546 | /* drop mutex to allocate and clear page table */ |
||
547 | mutex_unlock(&vm->mutex); |
||
548 | |||
549 | r = radeon_bo_create(rdev, RADEON_VM_PTE_COUNT * 8, |
||
550 | RADEON_GPU_PAGE_SIZE, true, |
||
5271 | serge | 551 | RADEON_GEM_DOMAIN_VRAM, 0, |
552 | NULL, NULL, &pt); |
||
5078 | serge | 553 | if (r) |
554 | return r; |
||
555 | |||
556 | r = radeon_vm_clear_bo(rdev, pt); |
||
557 | if (r) { |
||
558 | radeon_bo_unref(&pt); |
||
559 | return r; |
||
560 | } |
||
561 | |||
562 | /* aquire mutex again */ |
||
563 | mutex_lock(&vm->mutex); |
||
564 | if (vm->page_tables[pt_idx].bo) { |
||
565 | /* someone else allocated the pt in the meantime */ |
||
566 | mutex_unlock(&vm->mutex); |
||
567 | radeon_bo_unref(&pt); |
||
568 | mutex_lock(&vm->mutex); |
||
569 | continue; |
||
570 | } |
||
571 | |||
572 | vm->page_tables[pt_idx].addr = 0; |
||
573 | vm->page_tables[pt_idx].bo = pt; |
||
574 | } |
||
575 | |||
576 | mutex_unlock(&vm->mutex); |
||
5271 | serge | 577 | return 0; |
6104 | serge | 578 | |
579 | error_unreserve: |
||
580 | radeon_bo_unreserve(bo_va->bo); |
||
581 | return r; |
||
5078 | serge | 582 | } |
583 | |||
584 | /** |
||
585 | * radeon_vm_map_gart - get the physical address of a gart page |
||
586 | * |
||
587 | * @rdev: radeon_device pointer |
||
588 | * @addr: the unmapped addr |
||
589 | * |
||
590 | * Look up the physical address of the page that the pte resolves |
||
591 | * to (cayman+). |
||
592 | * Returns the physical address of the page. |
||
593 | */ |
||
594 | uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr) |
||
595 | { |
||
596 | uint64_t result; |
||
597 | |||
598 | /* page table offset */ |
||
6104 | serge | 599 | result = rdev->gart.pages_entry[addr >> RADEON_GPU_PAGE_SHIFT]; |
600 | result &= ~RADEON_GPU_PAGE_MASK; |
||
5078 | serge | 601 | |
602 | return result; |
||
603 | } |
||
604 | |||
605 | /** |
||
606 | * radeon_vm_page_flags - translate page flags to what the hw uses |
||
607 | * |
||
608 | * @flags: flags comming from userspace |
||
609 | * |
||
610 | * Translate the flags the userspace ABI uses to hw flags. |
||
611 | */ |
||
612 | static uint32_t radeon_vm_page_flags(uint32_t flags) |
||
613 | { |
||
614 | uint32_t hw_flags = 0; |
||
615 | hw_flags |= (flags & RADEON_VM_PAGE_VALID) ? R600_PTE_VALID : 0; |
||
616 | hw_flags |= (flags & RADEON_VM_PAGE_READABLE) ? R600_PTE_READABLE : 0; |
||
617 | hw_flags |= (flags & RADEON_VM_PAGE_WRITEABLE) ? R600_PTE_WRITEABLE : 0; |
||
618 | if (flags & RADEON_VM_PAGE_SYSTEM) { |
||
619 | hw_flags |= R600_PTE_SYSTEM; |
||
620 | hw_flags |= (flags & RADEON_VM_PAGE_SNOOPED) ? R600_PTE_SNOOPED : 0; |
||
621 | } |
||
622 | return hw_flags; |
||
623 | } |
||
624 | |||
625 | /** |
||
626 | * radeon_vm_update_pdes - make sure that page directory is valid |
||
627 | * |
||
628 | * @rdev: radeon_device pointer |
||
629 | * @vm: requested vm |
||
630 | * @start: start of GPU address range |
||
631 | * @end: end of GPU address range |
||
632 | * |
||
633 | * Allocates new page tables if necessary |
||
634 | * and updates the page directory (cayman+). |
||
635 | * Returns 0 for success, error for failure. |
||
636 | * |
||
637 | * Global and local mutex must be locked! |
||
638 | */ |
||
639 | int radeon_vm_update_page_directory(struct radeon_device *rdev, |
||
640 | struct radeon_vm *vm) |
||
641 | { |
||
642 | struct radeon_bo *pd = vm->page_directory; |
||
643 | uint64_t pd_addr = radeon_bo_gpu_offset(pd); |
||
644 | uint32_t incr = RADEON_VM_PTE_COUNT * 8; |
||
645 | uint64_t last_pde = ~0, last_pt = ~0; |
||
646 | unsigned count = 0, pt_idx, ndw; |
||
647 | struct radeon_ib ib; |
||
648 | int r; |
||
649 | |||
650 | /* padding, etc. */ |
||
651 | ndw = 64; |
||
652 | |||
653 | /* assume the worst case */ |
||
654 | ndw += vm->max_pde_used * 6; |
||
655 | |||
656 | /* update too big for an IB */ |
||
657 | if (ndw > 0xfffff) |
||
658 | return -ENOMEM; |
||
659 | |||
660 | r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4); |
||
661 | if (r) |
||
662 | return r; |
||
663 | ib.length_dw = 0; |
||
664 | |||
665 | /* walk over the address space and update the page directory */ |
||
666 | for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) { |
||
667 | struct radeon_bo *bo = vm->page_tables[pt_idx].bo; |
||
668 | uint64_t pde, pt; |
||
669 | |||
670 | if (bo == NULL) |
||
671 | continue; |
||
672 | |||
673 | pt = radeon_bo_gpu_offset(bo); |
||
674 | if (vm->page_tables[pt_idx].addr == pt) |
||
675 | continue; |
||
676 | vm->page_tables[pt_idx].addr = pt; |
||
677 | |||
678 | pde = pd_addr + pt_idx * 8; |
||
679 | if (((last_pde + 8 * count) != pde) || |
||
680 | ((last_pt + incr * count) != pt)) { |
||
681 | |||
682 | if (count) { |
||
683 | radeon_vm_set_pages(rdev, &ib, last_pde, |
||
6104 | serge | 684 | last_pt, count, incr, |
685 | R600_PTE_VALID); |
||
5078 | serge | 686 | } |
687 | |||
688 | count = 1; |
||
689 | last_pde = pde; |
||
690 | last_pt = pt; |
||
691 | } else { |
||
692 | ++count; |
||
693 | } |
||
694 | } |
||
695 | |||
696 | if (count) |
||
697 | radeon_vm_set_pages(rdev, &ib, last_pde, last_pt, count, |
||
6104 | serge | 698 | incr, R600_PTE_VALID); |
5078 | serge | 699 | |
700 | if (ib.length_dw != 0) { |
||
701 | radeon_asic_vm_pad_ib(rdev, &ib); |
||
5271 | serge | 702 | |
703 | radeon_sync_resv(rdev, &ib.sync, pd->tbo.resv, true); |
||
5078 | serge | 704 | WARN_ON(ib.length_dw > ndw); |
705 | r = radeon_ib_schedule(rdev, &ib, NULL, false); |
||
706 | if (r) { |
||
707 | radeon_ib_free(rdev, &ib); |
||
708 | return r; |
||
709 | } |
||
5271 | serge | 710 | ib.fence->is_vm_update = true; |
711 | radeon_bo_fence(pd, ib.fence, false); |
||
5078 | serge | 712 | } |
713 | radeon_ib_free(rdev, &ib); |
||
714 | |||
715 | return 0; |
||
716 | } |
||
717 | |||
718 | /** |
||
719 | * radeon_vm_frag_ptes - add fragment information to PTEs |
||
720 | * |
||
721 | * @rdev: radeon_device pointer |
||
722 | * @ib: IB for the update |
||
723 | * @pe_start: first PTE to handle |
||
724 | * @pe_end: last PTE to handle |
||
725 | * @addr: addr those PTEs should point to |
||
726 | * @flags: hw mapping flags |
||
727 | * |
||
728 | * Global and local mutex must be locked! |
||
729 | */ |
||
730 | static void radeon_vm_frag_ptes(struct radeon_device *rdev, |
||
731 | struct radeon_ib *ib, |
||
732 | uint64_t pe_start, uint64_t pe_end, |
||
733 | uint64_t addr, uint32_t flags) |
||
734 | { |
||
735 | /** |
||
736 | * The MC L1 TLB supports variable sized pages, based on a fragment |
||
737 | * field in the PTE. When this field is set to a non-zero value, page |
||
738 | * granularity is increased from 4KB to (1 << (12 + frag)). The PTE |
||
739 | * flags are considered valid for all PTEs within the fragment range |
||
740 | * and corresponding mappings are assumed to be physically contiguous. |
||
741 | * |
||
742 | * The L1 TLB can store a single PTE for the whole fragment, |
||
743 | * significantly increasing the space available for translation |
||
744 | * caching. This leads to large improvements in throughput when the |
||
745 | * TLB is under pressure. |
||
746 | * |
||
747 | * The L2 TLB distributes small and large fragments into two |
||
748 | * asymmetric partitions. The large fragment cache is significantly |
||
749 | * larger. Thus, we try to use large fragments wherever possible. |
||
750 | * Userspace can support this by aligning virtual base address and |
||
751 | * allocation size to the fragment size. |
||
752 | */ |
||
753 | |||
754 | /* NI is optimized for 256KB fragments, SI and newer for 64KB */ |
||
6104 | serge | 755 | uint64_t frag_flags = ((rdev->family == CHIP_CAYMAN) || |
756 | (rdev->family == CHIP_ARUBA)) ? |
||
5078 | serge | 757 | R600_PTE_FRAG_256KB : R600_PTE_FRAG_64KB; |
6104 | serge | 758 | uint64_t frag_align = ((rdev->family == CHIP_CAYMAN) || |
759 | (rdev->family == CHIP_ARUBA)) ? 0x200 : 0x80; |
||
5078 | serge | 760 | |
761 | uint64_t frag_start = ALIGN(pe_start, frag_align); |
||
762 | uint64_t frag_end = pe_end & ~(frag_align - 1); |
||
763 | |||
764 | unsigned count; |
||
765 | |||
766 | /* system pages are non continuously */ |
||
767 | if ((flags & R600_PTE_SYSTEM) || !(flags & R600_PTE_VALID) || |
||
768 | (frag_start >= frag_end)) { |
||
769 | |||
770 | count = (pe_end - pe_start) / 8; |
||
771 | radeon_vm_set_pages(rdev, ib, pe_start, addr, count, |
||
6104 | serge | 772 | RADEON_GPU_PAGE_SIZE, flags); |
5078 | serge | 773 | return; |
774 | } |
||
775 | |||
776 | /* handle the 4K area at the beginning */ |
||
777 | if (pe_start != frag_start) { |
||
778 | count = (frag_start - pe_start) / 8; |
||
779 | radeon_vm_set_pages(rdev, ib, pe_start, addr, count, |
||
6104 | serge | 780 | RADEON_GPU_PAGE_SIZE, flags); |
5078 | serge | 781 | addr += RADEON_GPU_PAGE_SIZE * count; |
782 | } |
||
783 | |||
784 | /* handle the area in the middle */ |
||
785 | count = (frag_end - frag_start) / 8; |
||
786 | radeon_vm_set_pages(rdev, ib, frag_start, addr, count, |
||
6104 | serge | 787 | RADEON_GPU_PAGE_SIZE, flags | frag_flags); |
5078 | serge | 788 | |
789 | /* handle the 4K area at the end */ |
||
790 | if (frag_end != pe_end) { |
||
791 | addr += RADEON_GPU_PAGE_SIZE * count; |
||
792 | count = (pe_end - frag_end) / 8; |
||
793 | radeon_vm_set_pages(rdev, ib, frag_end, addr, count, |
||
6104 | serge | 794 | RADEON_GPU_PAGE_SIZE, flags); |
5078 | serge | 795 | } |
796 | } |
||
797 | |||
798 | /** |
||
799 | * radeon_vm_update_ptes - make sure that page tables are valid |
||
800 | * |
||
801 | * @rdev: radeon_device pointer |
||
802 | * @vm: requested vm |
||
803 | * @start: start of GPU address range |
||
804 | * @end: end of GPU address range |
||
805 | * @dst: destination address to map to |
||
806 | * @flags: mapping flags |
||
807 | * |
||
808 | * Update the page tables in the range @start - @end (cayman+). |
||
809 | * |
||
810 | * Global and local mutex must be locked! |
||
811 | */ |
||
5271 | serge | 812 | static int radeon_vm_update_ptes(struct radeon_device *rdev, |
6104 | serge | 813 | struct radeon_vm *vm, |
814 | struct radeon_ib *ib, |
||
815 | uint64_t start, uint64_t end, |
||
816 | uint64_t dst, uint32_t flags) |
||
5078 | serge | 817 | { |
818 | uint64_t mask = RADEON_VM_PTE_COUNT - 1; |
||
819 | uint64_t last_pte = ~0, last_dst = ~0; |
||
820 | unsigned count = 0; |
||
821 | uint64_t addr; |
||
822 | |||
823 | /* walk over the address space and update the page tables */ |
||
824 | for (addr = start; addr < end; ) { |
||
825 | uint64_t pt_idx = addr >> radeon_vm_block_size; |
||
826 | struct radeon_bo *pt = vm->page_tables[pt_idx].bo; |
||
827 | unsigned nptes; |
||
828 | uint64_t pte; |
||
5271 | serge | 829 | int r; |
5078 | serge | 830 | |
5271 | serge | 831 | radeon_sync_resv(rdev, &ib->sync, pt->tbo.resv, true); |
832 | r = reservation_object_reserve_shared(pt->tbo.resv); |
||
833 | if (r) |
||
834 | return r; |
||
5078 | serge | 835 | |
836 | if ((addr & ~mask) == (end & ~mask)) |
||
837 | nptes = end - addr; |
||
838 | else |
||
839 | nptes = RADEON_VM_PTE_COUNT - (addr & mask); |
||
840 | |||
841 | pte = radeon_bo_gpu_offset(pt); |
||
842 | pte += (addr & mask) * 8; |
||
843 | |||
844 | if ((last_pte + 8 * count) != pte) { |
||
845 | |||
846 | if (count) { |
||
847 | radeon_vm_frag_ptes(rdev, ib, last_pte, |
||
848 | last_pte + 8 * count, |
||
849 | last_dst, flags); |
||
850 | } |
||
851 | |||
852 | count = nptes; |
||
853 | last_pte = pte; |
||
854 | last_dst = dst; |
||
855 | } else { |
||
856 | count += nptes; |
||
857 | } |
||
858 | |||
859 | addr += nptes; |
||
860 | dst += nptes * RADEON_GPU_PAGE_SIZE; |
||
861 | } |
||
862 | |||
863 | if (count) { |
||
864 | radeon_vm_frag_ptes(rdev, ib, last_pte, |
||
865 | last_pte + 8 * count, |
||
866 | last_dst, flags); |
||
867 | } |
||
5271 | serge | 868 | |
869 | return 0; |
||
5078 | serge | 870 | } |
871 | |||
872 | /** |
||
5271 | serge | 873 | * radeon_vm_fence_pts - fence page tables after an update |
874 | * |
||
875 | * @vm: requested vm |
||
876 | * @start: start of GPU address range |
||
877 | * @end: end of GPU address range |
||
878 | * @fence: fence to use |
||
879 | * |
||
880 | * Fence the page tables in the range @start - @end (cayman+). |
||
881 | * |
||
882 | * Global and local mutex must be locked! |
||
883 | */ |
||
884 | static void radeon_vm_fence_pts(struct radeon_vm *vm, |
||
885 | uint64_t start, uint64_t end, |
||
886 | struct radeon_fence *fence) |
||
887 | { |
||
888 | unsigned i; |
||
889 | |||
890 | start >>= radeon_vm_block_size; |
||
891 | end >>= radeon_vm_block_size; |
||
892 | |||
893 | for (i = start; i <= end; ++i) |
||
894 | radeon_bo_fence(vm->page_tables[i].bo, fence, true); |
||
895 | } |
||
896 | |||
897 | /** |
||
5078 | serge | 898 | * radeon_vm_bo_update - map a bo into the vm page table |
899 | * |
||
900 | * @rdev: radeon_device pointer |
||
901 | * @vm: requested vm |
||
902 | * @bo: radeon buffer object |
||
903 | * @mem: ttm mem |
||
904 | * |
||
905 | * Fill in the page table entries for @bo (cayman+). |
||
906 | * Returns 0 for success, -EINVAL for failure. |
||
907 | * |
||
908 | * Object have to be reserved and mutex must be locked! |
||
909 | */ |
||
910 | int radeon_vm_bo_update(struct radeon_device *rdev, |
||
911 | struct radeon_bo_va *bo_va, |
||
912 | struct ttm_mem_reg *mem) |
||
913 | { |
||
914 | struct radeon_vm *vm = bo_va->vm; |
||
915 | struct radeon_ib ib; |
||
916 | unsigned nptes, ncmds, ndw; |
||
917 | uint64_t addr; |
||
918 | uint32_t flags; |
||
919 | int r; |
||
920 | |||
921 | if (!bo_va->it.start) { |
||
922 | dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n", |
||
923 | bo_va->bo, vm); |
||
924 | return -EINVAL; |
||
925 | } |
||
926 | |||
5271 | serge | 927 | spin_lock(&vm->status_lock); |
6104 | serge | 928 | if (mem) { |
929 | if (list_empty(&bo_va->vm_status)) { |
||
930 | spin_unlock(&vm->status_lock); |
||
931 | return 0; |
||
932 | } |
||
933 | list_del_init(&bo_va->vm_status); |
||
934 | } else { |
||
935 | list_del(&bo_va->vm_status); |
||
936 | list_add(&bo_va->vm_status, &vm->cleared); |
||
937 | } |
||
5271 | serge | 938 | spin_unlock(&vm->status_lock); |
5078 | serge | 939 | |
940 | bo_va->flags &= ~RADEON_VM_PAGE_VALID; |
||
941 | bo_va->flags &= ~RADEON_VM_PAGE_SYSTEM; |
||
942 | bo_va->flags &= ~RADEON_VM_PAGE_SNOOPED; |
||
5271 | serge | 943 | // if (bo_va->bo && radeon_ttm_tt_is_readonly(bo_va->bo->tbo.ttm)) |
944 | // bo_va->flags &= ~RADEON_VM_PAGE_WRITEABLE; |
||
945 | |||
5078 | serge | 946 | if (mem) { |
947 | addr = mem->start << PAGE_SHIFT; |
||
948 | if (mem->mem_type != TTM_PL_SYSTEM) { |
||
949 | bo_va->flags |= RADEON_VM_PAGE_VALID; |
||
950 | } |
||
951 | if (mem->mem_type == TTM_PL_TT) { |
||
952 | bo_va->flags |= RADEON_VM_PAGE_SYSTEM; |
||
953 | if (!(bo_va->bo->flags & (RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC))) |
||
954 | bo_va->flags |= RADEON_VM_PAGE_SNOOPED; |
||
955 | |||
956 | } else { |
||
957 | addr += rdev->vm_manager.vram_base_offset; |
||
958 | } |
||
959 | } else { |
||
960 | addr = 0; |
||
961 | } |
||
962 | |||
963 | trace_radeon_vm_bo_update(bo_va); |
||
964 | |||
965 | nptes = bo_va->it.last - bo_va->it.start + 1; |
||
966 | |||
967 | /* reserve space for one command every (1 << BLOCK_SIZE) entries |
||
968 | or 2k dwords (whatever is smaller) */ |
||
969 | ncmds = (nptes >> min(radeon_vm_block_size, 11)) + 1; |
||
970 | |||
971 | /* padding, etc. */ |
||
972 | ndw = 64; |
||
973 | |||
974 | flags = radeon_vm_page_flags(bo_va->flags); |
||
975 | if ((flags & R600_PTE_GART_MASK) == R600_PTE_GART_MASK) { |
||
976 | /* only copy commands needed */ |
||
977 | ndw += ncmds * 7; |
||
978 | |||
979 | } else if (flags & R600_PTE_SYSTEM) { |
||
980 | /* header for write data commands */ |
||
981 | ndw += ncmds * 4; |
||
982 | |||
983 | /* body of write data command */ |
||
6104 | serge | 984 | ndw += nptes * 2; |
5078 | serge | 985 | |
986 | } else { |
||
987 | /* set page commands needed */ |
||
988 | ndw += ncmds * 10; |
||
989 | |||
990 | /* two extra commands for begin/end of fragment */ |
||
991 | ndw += 2 * 10; |
||
992 | } |
||
993 | |||
994 | /* update too big for an IB */ |
||
995 | if (ndw > 0xfffff) |
||
996 | return -ENOMEM; |
||
997 | |||
998 | r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4); |
||
999 | if (r) |
||
1000 | return r; |
||
1001 | ib.length_dw = 0; |
||
1002 | |||
5271 | serge | 1003 | if (!(bo_va->flags & RADEON_VM_PAGE_VALID)) { |
1004 | unsigned i; |
||
1005 | |||
1006 | for (i = 0; i < RADEON_NUM_RINGS; ++i) |
||
1007 | radeon_sync_fence(&ib.sync, vm->ids[i].last_id_use); |
||
1008 | } |
||
1009 | |||
1010 | r = radeon_vm_update_ptes(rdev, vm, &ib, bo_va->it.start, |
||
6104 | serge | 1011 | bo_va->it.last + 1, addr, |
1012 | radeon_vm_page_flags(bo_va->flags)); |
||
5271 | serge | 1013 | if (r) { |
1014 | radeon_ib_free(rdev, &ib); |
||
1015 | return r; |
||
1016 | } |
||
5078 | serge | 1017 | |
1018 | radeon_asic_vm_pad_ib(rdev, &ib); |
||
1019 | WARN_ON(ib.length_dw > ndw); |
||
1020 | |||
1021 | r = radeon_ib_schedule(rdev, &ib, NULL, false); |
||
1022 | if (r) { |
||
1023 | radeon_ib_free(rdev, &ib); |
||
1024 | return r; |
||
1025 | } |
||
5271 | serge | 1026 | ib.fence->is_vm_update = true; |
1027 | radeon_vm_fence_pts(vm, bo_va->it.start, bo_va->it.last + 1, ib.fence); |
||
1028 | radeon_fence_unref(&bo_va->last_pt_update); |
||
1029 | bo_va->last_pt_update = radeon_fence_ref(ib.fence); |
||
5078 | serge | 1030 | radeon_ib_free(rdev, &ib); |
1031 | |||
1032 | return 0; |
||
1033 | } |
||
1034 | |||
1035 | /** |
||
1036 | * radeon_vm_clear_freed - clear freed BOs in the PT |
||
1037 | * |
||
1038 | * @rdev: radeon_device pointer |
||
1039 | * @vm: requested vm |
||
1040 | * |
||
1041 | * Make sure all freed BOs are cleared in the PT. |
||
1042 | * Returns 0 for success. |
||
1043 | * |
||
1044 | * PTs have to be reserved and mutex must be locked! |
||
1045 | */ |
||
1046 | int radeon_vm_clear_freed(struct radeon_device *rdev, |
||
1047 | struct radeon_vm *vm) |
||
1048 | { |
||
5271 | serge | 1049 | struct radeon_bo_va *bo_va; |
6104 | serge | 1050 | int r = 0; |
5078 | serge | 1051 | |
5271 | serge | 1052 | spin_lock(&vm->status_lock); |
1053 | while (!list_empty(&vm->freed)) { |
||
1054 | bo_va = list_first_entry(&vm->freed, |
||
1055 | struct radeon_bo_va, vm_status); |
||
1056 | spin_unlock(&vm->status_lock); |
||
1057 | |||
5078 | serge | 1058 | r = radeon_vm_bo_update(rdev, bo_va, NULL); |
1059 | radeon_bo_unref(&bo_va->bo); |
||
5271 | serge | 1060 | radeon_fence_unref(&bo_va->last_pt_update); |
6104 | serge | 1061 | spin_lock(&vm->status_lock); |
1062 | list_del(&bo_va->vm_status); |
||
5078 | serge | 1063 | kfree(bo_va); |
1064 | if (r) |
||
6104 | serge | 1065 | break; |
5271 | serge | 1066 | |
5078 | serge | 1067 | } |
5271 | serge | 1068 | spin_unlock(&vm->status_lock); |
6104 | serge | 1069 | return r; |
5078 | serge | 1070 | |
1071 | } |
||
1072 | |||
1073 | /** |
||
1074 | * radeon_vm_clear_invalids - clear invalidated BOs in the PT |
||
1075 | * |
||
1076 | * @rdev: radeon_device pointer |
||
1077 | * @vm: requested vm |
||
1078 | * |
||
1079 | * Make sure all invalidated BOs are cleared in the PT. |
||
1080 | * Returns 0 for success. |
||
1081 | * |
||
1082 | * PTs have to be reserved and mutex must be locked! |
||
1083 | */ |
||
1084 | int radeon_vm_clear_invalids(struct radeon_device *rdev, |
||
1085 | struct radeon_vm *vm) |
||
1086 | { |
||
5271 | serge | 1087 | struct radeon_bo_va *bo_va; |
5078 | serge | 1088 | int r; |
1089 | |||
5271 | serge | 1090 | spin_lock(&vm->status_lock); |
1091 | while (!list_empty(&vm->invalidated)) { |
||
1092 | bo_va = list_first_entry(&vm->invalidated, |
||
1093 | struct radeon_bo_va, vm_status); |
||
1094 | spin_unlock(&vm->status_lock); |
||
1095 | |||
5078 | serge | 1096 | r = radeon_vm_bo_update(rdev, bo_va, NULL); |
1097 | if (r) |
||
1098 | return r; |
||
5271 | serge | 1099 | |
1100 | spin_lock(&vm->status_lock); |
||
5078 | serge | 1101 | } |
5271 | serge | 1102 | spin_unlock(&vm->status_lock); |
1103 | |||
5078 | serge | 1104 | return 0; |
1105 | } |
||
1106 | |||
1107 | /** |
||
1108 | * radeon_vm_bo_rmv - remove a bo to a specific vm |
||
1109 | * |
||
1110 | * @rdev: radeon_device pointer |
||
1111 | * @bo_va: requested bo_va |
||
1112 | * |
||
1113 | * Remove @bo_va->bo from the requested vm (cayman+). |
||
1114 | * |
||
1115 | * Object have to be reserved! |
||
1116 | */ |
||
1117 | void radeon_vm_bo_rmv(struct radeon_device *rdev, |
||
6104 | serge | 1118 | struct radeon_bo_va *bo_va) |
5078 | serge | 1119 | { |
1120 | struct radeon_vm *vm = bo_va->vm; |
||
1121 | |||
1122 | list_del(&bo_va->bo_list); |
||
1123 | |||
1124 | mutex_lock(&vm->mutex); |
||
6104 | serge | 1125 | if (bo_va->it.start || bo_va->it.last) |
1126 | interval_tree_remove(&bo_va->it, &vm->va); |
||
1127 | |||
5271 | serge | 1128 | spin_lock(&vm->status_lock); |
5078 | serge | 1129 | list_del(&bo_va->vm_status); |
6104 | serge | 1130 | if (bo_va->it.start || bo_va->it.last) { |
5078 | serge | 1131 | bo_va->bo = radeon_bo_ref(bo_va->bo); |
1132 | list_add(&bo_va->vm_status, &vm->freed); |
||
1133 | } else { |
||
5271 | serge | 1134 | radeon_fence_unref(&bo_va->last_pt_update); |
6104 | serge | 1135 | kfree(bo_va); |
5078 | serge | 1136 | } |
5271 | serge | 1137 | spin_unlock(&vm->status_lock); |
5078 | serge | 1138 | |
1139 | mutex_unlock(&vm->mutex); |
||
1140 | } |
||
1141 | |||
1142 | /** |
||
1143 | * radeon_vm_bo_invalidate - mark the bo as invalid |
||
1144 | * |
||
1145 | * @rdev: radeon_device pointer |
||
1146 | * @vm: requested vm |
||
1147 | * @bo: radeon buffer object |
||
1148 | * |
||
1149 | * Mark @bo as invalid (cayman+). |
||
1150 | */ |
||
1151 | void radeon_vm_bo_invalidate(struct radeon_device *rdev, |
||
1152 | struct radeon_bo *bo) |
||
1153 | { |
||
1154 | struct radeon_bo_va *bo_va; |
||
1155 | |||
1156 | list_for_each_entry(bo_va, &bo->va, bo_list) { |
||
6104 | serge | 1157 | spin_lock(&bo_va->vm->status_lock); |
1158 | if (list_empty(&bo_va->vm_status) && |
||
1159 | (bo_va->it.start || bo_va->it.last)) |
||
5078 | serge | 1160 | list_add(&bo_va->vm_status, &bo_va->vm->invalidated); |
6104 | serge | 1161 | spin_unlock(&bo_va->vm->status_lock); |
5078 | serge | 1162 | } |
1163 | } |
||
1164 | |||
1165 | /** |
||
1166 | * radeon_vm_init - initialize a vm instance |
||
1167 | * |
||
1168 | * @rdev: radeon_device pointer |
||
1169 | * @vm: requested vm |
||
1170 | * |
||
1171 | * Init @vm fields (cayman+). |
||
1172 | */ |
||
1173 | int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm) |
||
1174 | { |
||
1175 | const unsigned align = min(RADEON_VM_PTB_ALIGN_SIZE, |
||
1176 | RADEON_VM_PTE_COUNT * 8); |
||
1177 | unsigned pd_size, pd_entries, pts_size; |
||
5271 | serge | 1178 | int i, r; |
5078 | serge | 1179 | |
1180 | vm->ib_bo_va = NULL; |
||
5271 | serge | 1181 | for (i = 0; i < RADEON_NUM_RINGS; ++i) { |
1182 | vm->ids[i].id = 0; |
||
1183 | vm->ids[i].flushed_updates = NULL; |
||
1184 | vm->ids[i].last_id_use = NULL; |
||
1185 | } |
||
5078 | serge | 1186 | mutex_init(&vm->mutex); |
1187 | vm->va = RB_ROOT; |
||
5271 | serge | 1188 | spin_lock_init(&vm->status_lock); |
5078 | serge | 1189 | INIT_LIST_HEAD(&vm->invalidated); |
1190 | INIT_LIST_HEAD(&vm->freed); |
||
6104 | serge | 1191 | INIT_LIST_HEAD(&vm->cleared); |
5078 | serge | 1192 | |
1193 | pd_size = radeon_vm_directory_size(rdev); |
||
1194 | pd_entries = radeon_vm_num_pdes(rdev); |
||
1195 | |||
1196 | /* allocate page table array */ |
||
1197 | pts_size = pd_entries * sizeof(struct radeon_vm_pt); |
||
1198 | vm->page_tables = kzalloc(pts_size, GFP_KERNEL); |
||
1199 | if (vm->page_tables == NULL) { |
||
1200 | DRM_ERROR("Cannot allocate memory for page table array\n"); |
||
1201 | return -ENOMEM; |
||
1202 | } |
||
1203 | |||
1204 | r = radeon_bo_create(rdev, pd_size, align, true, |
||
1205 | RADEON_GEM_DOMAIN_VRAM, 0, NULL, |
||
5271 | serge | 1206 | NULL, &vm->page_directory); |
5078 | serge | 1207 | if (r) |
1208 | return r; |
||
1209 | |||
1210 | r = radeon_vm_clear_bo(rdev, vm->page_directory); |
||
1211 | if (r) { |
||
1212 | radeon_bo_unref(&vm->page_directory); |
||
1213 | vm->page_directory = NULL; |
||
1214 | return r; |
||
1215 | } |
||
1216 | |||
1217 | return 0; |
||
1218 | } |
||
1219 | |||
1220 | /** |
||
1221 | * radeon_vm_fini - tear down a vm instance |
||
1222 | * |
||
1223 | * @rdev: radeon_device pointer |
||
1224 | * @vm: requested vm |
||
1225 | * |
||
1226 | * Tear down @vm (cayman+). |
||
1227 | * Unbind the VM and remove all bos from the vm bo list |
||
1228 | */ |
||
1229 | void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm) |
||
1230 | { |
||
1231 | struct radeon_bo_va *bo_va, *tmp; |
||
1232 | int i, r; |
||
1233 | |||
1234 | if (!RB_EMPTY_ROOT(&vm->va)) { |
||
1235 | dev_err(rdev->dev, "still active bo inside vm\n"); |
||
1236 | } |
||
1237 | rbtree_postorder_for_each_entry_safe(bo_va, tmp, &vm->va, it.rb) { |
||
1238 | interval_tree_remove(&bo_va->it, &vm->va); |
||
1239 | r = radeon_bo_reserve(bo_va->bo, false); |
||
1240 | if (!r) { |
||
1241 | list_del_init(&bo_va->bo_list); |
||
1242 | radeon_bo_unreserve(bo_va->bo); |
||
5271 | serge | 1243 | radeon_fence_unref(&bo_va->last_pt_update); |
5078 | serge | 1244 | kfree(bo_va); |
1245 | } |
||
1246 | } |
||
1247 | list_for_each_entry_safe(bo_va, tmp, &vm->freed, vm_status) { |
||
1248 | radeon_bo_unref(&bo_va->bo); |
||
5271 | serge | 1249 | radeon_fence_unref(&bo_va->last_pt_update); |
5078 | serge | 1250 | kfree(bo_va); |
1251 | } |
||
1252 | |||
1253 | for (i = 0; i < radeon_vm_num_pdes(rdev); i++) |
||
1254 | radeon_bo_unref(&vm->page_tables[i].bo); |
||
1255 | kfree(vm->page_tables); |
||
1256 | |||
1257 | radeon_bo_unref(&vm->page_directory); |
||
1258 | |||
5271 | serge | 1259 | for (i = 0; i < RADEON_NUM_RINGS; ++i) { |
1260 | radeon_fence_unref(&vm->ids[i].flushed_updates); |
||
1261 | radeon_fence_unref(&vm->ids[i].last_id_use); |
||
1262 | } |
||
5078 | serge | 1263 | |
1264 | mutex_destroy(&vm->mutex); |
||
1265 | }>>>>><>><>=>>><>=>=>>>>=>> |