Rev 5078 | Rev 5346 | Go to most recent revision | Show entire file | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 5078 | Rev 5271 | ||
---|---|---|---|
Line 94... | Line 94... | ||
94 | 94 | ||
95 | void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain) |
95 | void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain) |
96 | { |
96 | { |
Line 97... | Line -... | ||
97 | u32 c = 0, i; |
- | |
98 | - | ||
99 | rbo->placement.fpfn = 0; |
97 | u32 c = 0, i; |
100 | rbo->placement.lpfn = 0; |
98 | |
101 | rbo->placement.placement = rbo->placements; |
99 | rbo->placement.placement = rbo->placements; |
- | 100 | rbo->placement.busy_placement = rbo->placements; |
|
- | 101 | if (domain & RADEON_GEM_DOMAIN_VRAM) { |
|
- | 102 | /* Try placing BOs which don't need CPU access outside of the |
|
- | 103 | * CPU accessible part of VRAM |
|
- | 104 | */ |
|
- | 105 | if ((rbo->flags & RADEON_GEM_NO_CPU_ACCESS) && |
|
- | 106 | rbo->rdev->mc.visible_vram_size < rbo->rdev->mc.real_vram_size) { |
|
102 | rbo->placement.busy_placement = rbo->placements; |
107 | rbo->placements[c].fpfn = |
- | 108 | rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT; |
|
103 | if (domain & RADEON_GEM_DOMAIN_VRAM) |
109 | rbo->placements[c++].flags = TTM_PL_FLAG_WC | |
- | 110 | TTM_PL_FLAG_UNCACHED | |
|
- | 111 | TTM_PL_FLAG_VRAM; |
|
- | 112 | } |
|
- | 113 | ||
- | 114 | rbo->placements[c].fpfn = 0; |
|
- | 115 | rbo->placements[c++].flags = TTM_PL_FLAG_WC | |
|
- | 116 | TTM_PL_FLAG_UNCACHED | |
|
- | 117 | TTM_PL_FLAG_VRAM; |
|
104 | rbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | |
118 | } |
105 | TTM_PL_FLAG_VRAM; |
119 | |
- | 120 | if (domain & RADEON_GEM_DOMAIN_GTT) { |
|
106 | if (domain & RADEON_GEM_DOMAIN_GTT) { |
121 | if (rbo->flags & RADEON_GEM_GTT_UC) { |
- | 122 | rbo->placements[c].fpfn = 0; |
|
- | 123 | rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED | |
|
107 | if (rbo->flags & RADEON_GEM_GTT_UC) { |
124 | TTM_PL_FLAG_TT; |
108 | rbo->placements[c++] = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_TT; |
125 | |
- | 126 | } else if ((rbo->flags & RADEON_GEM_GTT_WC) || |
|
109 | } else if ((rbo->flags & RADEON_GEM_GTT_WC) || |
127 | (rbo->rdev->flags & RADEON_IS_AGP)) { |
- | 128 | rbo->placements[c].fpfn = 0; |
|
110 | (rbo->rdev->flags & RADEON_IS_AGP)) { |
129 | rbo->placements[c++].flags = TTM_PL_FLAG_WC | |
111 | rbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | |
130 | TTM_PL_FLAG_UNCACHED | |
- | 131 | TTM_PL_FLAG_TT; |
|
112 | TTM_PL_FLAG_TT; |
132 | } else { |
- | 133 | rbo->placements[c].fpfn = 0; |
|
113 | } else { |
134 | rbo->placements[c++].flags = TTM_PL_FLAG_CACHED | |
114 | rbo->placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_TT; |
135 | TTM_PL_FLAG_TT; |
- | 136 | } |
|
115 | } |
137 | } |
116 | } |
138 | |
- | 139 | if (domain & RADEON_GEM_DOMAIN_CPU) { |
|
117 | if (domain & RADEON_GEM_DOMAIN_CPU) { |
140 | if (rbo->flags & RADEON_GEM_GTT_UC) { |
- | 141 | rbo->placements[c].fpfn = 0; |
|
- | 142 | rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED | |
|
118 | if (rbo->flags & RADEON_GEM_GTT_UC) { |
143 | TTM_PL_FLAG_SYSTEM; |
119 | rbo->placements[c++] = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_SYSTEM; |
144 | |
- | 145 | } else if ((rbo->flags & RADEON_GEM_GTT_WC) || |
|
120 | } else if ((rbo->flags & RADEON_GEM_GTT_WC) || |
146 | rbo->rdev->flags & RADEON_IS_AGP) { |
- | 147 | rbo->placements[c].fpfn = 0; |
|
121 | rbo->rdev->flags & RADEON_IS_AGP) { |
148 | rbo->placements[c++].flags = TTM_PL_FLAG_WC | |
122 | rbo->placements[c++] = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED | |
149 | TTM_PL_FLAG_UNCACHED | |
- | 150 | TTM_PL_FLAG_SYSTEM; |
|
123 | TTM_PL_FLAG_SYSTEM; |
151 | } else { |
- | 152 | rbo->placements[c].fpfn = 0; |
|
124 | } else { |
153 | rbo->placements[c++].flags = TTM_PL_FLAG_CACHED | |
125 | rbo->placements[c++] = TTM_PL_FLAG_CACHED | TTM_PL_FLAG_SYSTEM; |
154 | TTM_PL_FLAG_SYSTEM; |
126 | } |
155 | } |
- | 156 | } |
|
127 | } |
157 | if (!c) { |
- | 158 | rbo->placements[c].fpfn = 0; |
|
- | 159 | rbo->placements[c++].flags = TTM_PL_MASK_CACHING | |
|
- | 160 | TTM_PL_FLAG_SYSTEM; |
|
128 | if (!c) |
161 | } |
129 | rbo->placements[c++] = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM; |
162 | |
Line -... | Line 163... | ||
- | 163 | rbo->placement.num_placement = c; |
|
- | 164 | rbo->placement.num_busy_placement = c; |
|
- | 165 | ||
- | 166 | for (i = 0; i < c; ++i) { |
|
- | 167 | if ((rbo->flags & RADEON_GEM_CPU_ACCESS) && |
|
- | 168 | (rbo->placements[i].flags & TTM_PL_FLAG_VRAM) && |
|
- | 169 | !rbo->placements[i].fpfn) |
|
- | 170 | rbo->placements[i].lpfn = |
|
- | 171 | rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT; |
|
- | 172 | else |
|
130 | rbo->placement.num_placement = c; |
173 | rbo->placements[i].lpfn = 0; |
131 | rbo->placement.num_busy_placement = c; |
174 | } |
132 | 175 | ||
133 | /* |
176 | /* |
134 | * Use two-ended allocation depending on the buffer size to |
177 | * Use two-ended allocation depending on the buffer size to |
135 | * improve fragmentation quality. |
178 | * improve fragmentation quality. |
136 | * 512kb was measured as the most optimal number. |
179 | * 512kb was measured as the most optimal number. |
137 | */ |
180 | */ |
138 | if (rbo->tbo.mem.size > 512 * 1024) { |
181 | if (rbo->tbo.mem.size > 512 * 1024) { |
139 | for (i = 0; i < c; i++) { |
182 | for (i = 0; i < c; i++) { |
140 | rbo->placements[i] |= TTM_PL_FLAG_TOPDOWN; |
183 | rbo->placements[i].flags |= TTM_PL_FLAG_TOPDOWN; |
Line 141... | Line 184... | ||
141 | } |
184 | } |
142 | } |
185 | } |
143 | } |
186 | } |
- | 187 | ||
- | 188 | int radeon_bo_create(struct radeon_device *rdev, |
|
144 | 189 | unsigned long size, int byte_align, bool kernel, |
|
145 | int radeon_bo_create(struct radeon_device *rdev, |
190 | u32 domain, u32 flags, struct sg_table *sg, |
146 | unsigned long size, int byte_align, bool kernel, u32 domain, |
191 | struct reservation_object *resv, |
147 | u32 flags, struct sg_table *sg, struct radeon_bo **bo_ptr) |
192 | struct radeon_bo **bo_ptr) |
148 | { |
193 | { |
Line 185... | Line 230... | ||
185 | bo->flags = flags; |
230 | bo->flags = flags; |
186 | /* PCI GART is always snooped */ |
231 | /* PCI GART is always snooped */ |
187 | if (!(rdev->flags & RADEON_IS_PCIE)) |
232 | if (!(rdev->flags & RADEON_IS_PCIE)) |
188 | bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC); |
233 | bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC); |
Line -... | Line 234... | ||
- | 234 | ||
189 | 235 | #ifdef CONFIG_X86_32 |
|
190 | // printf("%s rdev->flags %x bo->flags %x\n", |
236 | /* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit |
191 | // __FUNCTION__, bo->flags); |
237 | * See https://bugs.freedesktop.org/show_bug.cgi?id=84627 |
192 | - | ||
193 | if(flags & RADEON_GEM_GTT_WC) |
238 | */ |
- | 239 | bo->flags &= ~RADEON_GEM_GTT_WC; |
|
Line 194... | Line 240... | ||
194 | bo->flags&= ~RADEON_GEM_GTT_WC; |
240 | #endif |
195 | 241 | ||
196 | radeon_ttm_placement_from_domain(bo, domain); |
242 | radeon_ttm_placement_from_domain(bo, domain); |
197 | /* Kernel allocation are uninterruptible */ |
243 | /* Kernel allocation are uninterruptible */ |
198 | // down_read(&rdev->pm.mclk_lock); |
244 | // down_read(&rdev->pm.mclk_lock); |
199 | r = ttm_bo_init(&rdev->mman.bdev, &bo->tbo, size, type, |
245 | r = ttm_bo_init(&rdev->mman.bdev, &bo->tbo, size, type, |
200 | &bo->placement, page_align, !kernel, NULL, |
246 | &bo->placement, page_align, !kernel, NULL, |
201 | acc_size, sg, &radeon_ttm_bo_destroy); |
247 | acc_size, sg, resv, &radeon_ttm_bo_destroy); |
202 | // up_read(&rdev->pm.mclk_lock); |
248 | // up_read(&rdev->pm.mclk_lock); |
203 | if (unlikely(r != 0)) { |
249 | if (unlikely(r != 0)) { |
204 | return r; |
250 | return r; |
Line 287... | Line 333... | ||
287 | } |
333 | } |
Line 288... | Line 334... | ||
288 | 334 | ||
289 | return 0; |
335 | return 0; |
290 | } |
336 | } |
291 | radeon_ttm_placement_from_domain(bo, domain); |
337 | radeon_ttm_placement_from_domain(bo, domain); |
292 | if (domain == RADEON_GEM_DOMAIN_VRAM) { |
338 | for (i = 0; i < bo->placement.num_placement; i++) { |
- | 339 | /* force to pin into visible video ram */ |
|
- | 340 | if ((bo->placements[i].flags & TTM_PL_FLAG_VRAM) && |
|
293 | /* force to pin into visible video ram */ |
341 | !(bo->flags & RADEON_GEM_NO_CPU_ACCESS) && |
294 | bo->placement.lpfn = bo->rdev->mc.visible_vram_size >> PAGE_SHIFT; |
- | |
295 | } |
342 | (!max_offset || max_offset > bo->rdev->mc.visible_vram_size)) |
296 | if (max_offset) { |
343 | bo->placements[i].lpfn = |
297 | u64 lpfn = max_offset >> PAGE_SHIFT; |
344 | bo->rdev->mc.visible_vram_size >> PAGE_SHIFT; |
298 | - | ||
299 | if (!bo->placement.lpfn) |
345 | else |
Line 300... | Line 346... | ||
300 | bo->placement.lpfn = bo->rdev->mc.gtt_size >> PAGE_SHIFT; |
346 | bo->placements[i].lpfn = max_offset >> PAGE_SHIFT; |
301 | - | ||
302 | if (lpfn < bo->placement.lpfn) |
347 | |
303 | bo->placement.lpfn = lpfn; |
- | |
304 | } |
- | |
- | 348 | bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT; |
|
305 | for (i = 0; i < bo->placement.num_placement; i++) |
349 | } |
306 | bo->placements[i] |= TTM_PL_FLAG_NO_EVICT; |
350 | |
307 | r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); |
351 | r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); |
308 | if (likely(r == 0)) { |
352 | if (likely(r == 0)) { |
309 | bo->pin_count = 1; |
353 | bo->pin_count = 1; |
Line 333... | Line 377... | ||
333 | return 0; |
377 | return 0; |
334 | } |
378 | } |
335 | bo->pin_count--; |
379 | bo->pin_count--; |
336 | if (bo->pin_count) |
380 | if (bo->pin_count) |
337 | return 0; |
381 | return 0; |
338 | for (i = 0; i < bo->placement.num_placement; i++) |
382 | for (i = 0; i < bo->placement.num_placement; i++) { |
- | 383 | bo->placements[i].lpfn = 0; |
|
339 | bo->placements[i] &= ~TTM_PL_FLAG_NO_EVICT; |
384 | bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT; |
- | 385 | } |
|
340 | r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); |
386 | r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); |
341 | if (likely(r == 0)) { |
387 | if (likely(r == 0)) { |
342 | if (bo->tbo.mem.mem_type == TTM_PL_VRAM) |
388 | if (bo->tbo.mem.mem_type == TTM_PL_VRAM) |
343 | bo->rdev->vram_pin_size -= radeon_bo_size(bo); |
389 | bo->rdev->vram_pin_size -= radeon_bo_size(bo); |
344 | else |
390 | else |
Line 420... | Line 466... | ||
420 | 466 | ||
421 | int radeon_bo_list_validate(struct radeon_device *rdev, |
467 | int radeon_bo_list_validate(struct radeon_device *rdev, |
422 | struct ww_acquire_ctx *ticket, |
468 | struct ww_acquire_ctx *ticket, |
423 | struct list_head *head, int ring) |
469 | struct list_head *head, int ring) |
424 | { |
470 | { |
425 | struct radeon_cs_reloc *lobj; |
471 | struct radeon_bo_list *lobj; |
426 | struct radeon_bo *bo; |
472 | struct list_head duplicates; |
427 | int r; |
473 | int r; |
428 | u64 bytes_moved = 0, initial_bytes_moved; |
474 | u64 bytes_moved = 0, initial_bytes_moved; |
Line -... | Line 475... | ||
- | 475 | u64 bytes_moved_threshold = radeon_bo_get_threshold_for_moves(rdev); |
|
429 | u64 bytes_moved_threshold = radeon_bo_get_threshold_for_moves(rdev); |
476 | |
430 | 477 | INIT_LIST_HEAD(&duplicates); |
|
431 | r = ttm_eu_reserve_buffers(ticket, head); |
478 | r = ttm_eu_reserve_buffers(ticket, head, true, &duplicates); |
432 | if (unlikely(r != 0)) { |
479 | if (unlikely(r != 0)) { |
Line 433... | Line 480... | ||
433 | return r; |
480 | return r; |
434 | } |
481 | } |
435 | 482 | ||
436 | list_for_each_entry(lobj, head, tv.head) { |
483 | list_for_each_entry(lobj, head, tv.head) { |
- | 484 | struct radeon_bo *bo = lobj->robj; |
|
437 | bo = lobj->robj; |
485 | if (!bo->pin_count) { |
438 | if (!bo->pin_count) { |
486 | u32 domain = lobj->prefered_domains; |
Line -... | Line 487... | ||
- | 487 | u32 allowed = lobj->allowed_domains; |
|
- | 488 | u32 current_domain = |
|
- | 489 | radeon_mem_type_to_domain(bo->tbo.mem.mem_type); |
|
439 | u32 domain = lobj->prefered_domains; |
490 | |
440 | u32 current_domain = |
491 | WARN_ONCE(bo->gem_base.dumb, |
441 | radeon_mem_type_to_domain(bo->tbo.mem.mem_type); |
492 | "GPU use of dumb buffer is illegal.\n"); |
442 | 493 | ||
443 | /* Check if this buffer will be moved and don't move it |
494 | /* Check if this buffer will be moved and don't move it |
444 | * if we have moved too many buffers for this IB already. |
495 | * if we have moved too many buffers for this IB already. |
445 | * |
496 | * |
446 | * Note that this allows moving at least one buffer of |
497 | * Note that this allows moving at least one buffer of |
447 | * any size, because it doesn't take the current "bo" |
498 | * any size, because it doesn't take the current "bo" |
448 | * into account. We don't want to disallow buffer moves |
499 | * into account. We don't want to disallow buffer moves |
449 | * completely. |
500 | * completely. |
450 | */ |
501 | */ |
451 | if ((lobj->allowed_domains & current_domain) != 0 && |
502 | if ((allowed & current_domain) != 0 && |
452 | (domain & current_domain) == 0 && /* will be moved */ |
503 | (domain & current_domain) == 0 && /* will be moved */ |
Line 453... | Line 504... | ||
453 | bytes_moved > bytes_moved_threshold) { |
504 | bytes_moved > bytes_moved_threshold) { |
454 | /* don't move it */ |
505 | /* don't move it */ |
455 | domain = current_domain; |
506 | domain = current_domain; |
456 | } |
507 | } |
Line 457... | Line 508... | ||
457 | 508 | ||
458 | retry: |
509 | retry: |
459 | radeon_ttm_placement_from_domain(bo, domain); |
510 | radeon_ttm_placement_from_domain(bo, domain); |
460 | if (ring == R600_RING_TYPE_UVD_INDEX) |
511 | if (ring == R600_RING_TYPE_UVD_INDEX) |
Line 476... | Line 527... | ||
476 | } |
527 | } |
477 | } |
528 | } |
478 | lobj->gpu_offset = radeon_bo_gpu_offset(bo); |
529 | lobj->gpu_offset = radeon_bo_gpu_offset(bo); |
479 | lobj->tiling_flags = bo->tiling_flags; |
530 | lobj->tiling_flags = bo->tiling_flags; |
480 | } |
531 | } |
- | 532 | ||
- | 533 | list_for_each_entry(lobj, &duplicates, tv.head) { |
|
- | 534 | lobj->gpu_offset = radeon_bo_gpu_offset(lobj->robj); |
|
- | 535 | lobj->tiling_flags = lobj->robj->tiling_flags; |
|
- | 536 | } |
|
- | 537 | ||
481 | return 0; |
538 | return 0; |
482 | } |
539 | } |
Line 483... | Line 540... | ||
483 | 540 | ||
484 | int radeon_bo_get_surface_reg(struct radeon_bo *bo) |
541 | int radeon_bo_get_surface_reg(struct radeon_bo *bo) |
Line 676... | Line 733... | ||
676 | int r; |
733 | int r; |
Line 677... | Line 734... | ||
677 | 734 | ||
678 | r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, NULL); |
735 | r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, NULL); |
679 | if (unlikely(r != 0)) |
736 | if (unlikely(r != 0)) |
680 | return r; |
- | |
681 | spin_lock(&bo->tbo.bdev->fence_lock); |
737 | return r; |
682 | if (mem_type) |
738 | if (mem_type) |
683 | *mem_type = bo->tbo.mem.mem_type; |
- | |
- | 739 | *mem_type = bo->tbo.mem.mem_type; |
|
684 | if (bo->tbo.sync_obj) |
740 | |
685 | r = ttm_bo_wait(&bo->tbo, true, true, no_wait); |
- | |
686 | spin_unlock(&bo->tbo.bdev->fence_lock); |
741 | r = ttm_bo_wait(&bo->tbo, true, true, no_wait); |
687 | ttm_bo_unreserve(&bo->tbo); |
742 | ttm_bo_unreserve(&bo->tbo); |
688 | return r; |
743 | return r; |
- | 744 | } |
|
- | 745 | ||
- | 746 | /** |
|
- | 747 | * radeon_bo_fence - add fence to buffer object |
|
- | 748 | * |
|
- | 749 | * @bo: buffer object in question |
|
- | 750 | * @fence: fence to add |
|
- | 751 | * @shared: true if fence should be added shared |
|
- | 752 | * |
|
- | 753 | */ |
|
- | 754 | void radeon_bo_fence(struct radeon_bo *bo, struct radeon_fence *fence, |
|
- | 755 | bool shared) |
|
- | 756 | { |
|
- | 757 | struct reservation_object *resv = bo->tbo.resv; |
|
- | 758 | ||
- | 759 | if (shared) |
|
- | 760 | reservation_object_add_shared_fence(resv, &fence->base); |
|
- | 761 | else |
|
- | 762 | reservation_object_add_excl_fence(resv, &fence->base); |