Rev 5346 | Go to most recent revision | Only display areas with differences | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 5346 | Rev 6104 | ||
---|---|---|---|
1 | /* |
1 | /* |
2 | * Copyright 2009 Jerome Glisse. |
2 | * Copyright 2009 Jerome Glisse. |
3 | * All Rights Reserved. |
3 | * All Rights Reserved. |
4 | * |
4 | * |
5 | * Permission is hereby granted, free of charge, to any person obtaining a |
5 | * Permission is hereby granted, free of charge, to any person obtaining a |
6 | * copy of this software and associated documentation files (the |
6 | * copy of this software and associated documentation files (the |
7 | * "Software"), to deal in the Software without restriction, including |
7 | * "Software"), to deal in the Software without restriction, including |
8 | * without limitation the rights to use, copy, modify, merge, publish, |
8 | * without limitation the rights to use, copy, modify, merge, publish, |
9 | * distribute, sub license, and/or sell copies of the Software, and to |
9 | * distribute, sub license, and/or sell copies of the Software, and to |
10 | * permit persons to whom the Software is furnished to do so, subject to |
10 | * permit persons to whom the Software is furnished to do so, subject to |
11 | * the following conditions: |
11 | * the following conditions: |
12 | * |
12 | * |
13 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
13 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
14 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
14 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
15 | * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
15 | * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
16 | * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, |
16 | * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, |
17 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
17 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
18 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
18 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
19 | * USE OR OTHER DEALINGS IN THE SOFTWARE. |
19 | * USE OR OTHER DEALINGS IN THE SOFTWARE. |
20 | * |
20 | * |
21 | * The above copyright notice and this permission notice (including the |
21 | * The above copyright notice and this permission notice (including the |
22 | * next paragraph) shall be included in all copies or substantial portions |
22 | * next paragraph) shall be included in all copies or substantial portions |
23 | * of the Software. |
23 | * of the Software. |
24 | * |
24 | * |
25 | */ |
25 | */ |
26 | /* |
26 | /* |
27 | * Authors: |
27 | * Authors: |
28 | * Jerome Glisse |
28 | * Jerome Glisse |
29 | * Thomas Hellstrom |
29 | * Thomas Hellstrom |
30 | * Dave Airlie |
30 | * Dave Airlie |
31 | */ |
31 | */ |
32 | #include |
32 | #include |
33 | #include |
33 | #include |
34 | #include |
34 | #include |
35 | #include |
35 | #include |
36 | #include "radeon.h" |
36 | #include "radeon.h" |
37 | #include "radeon_trace.h" |
37 | #include "radeon_trace.h" |
38 | 38 | ||
39 | 39 | ||
40 | int radeon_ttm_init(struct radeon_device *rdev); |
40 | int radeon_ttm_init(struct radeon_device *rdev); |
41 | void radeon_ttm_fini(struct radeon_device *rdev); |
41 | void radeon_ttm_fini(struct radeon_device *rdev); |
42 | static void radeon_bo_clear_surface_reg(struct radeon_bo *bo); |
42 | static void radeon_bo_clear_surface_reg(struct radeon_bo *bo); |
43 | 43 | ||
44 | /* |
44 | /* |
45 | * To exclude mutual BO access we rely on bo_reserve exclusion, as all |
45 | * To exclude mutual BO access we rely on bo_reserve exclusion, as all |
46 | * function are calling it. |
46 | * function are calling it. |
47 | */ |
47 | */ |
48 | 48 | ||
49 | static void radeon_update_memory_usage(struct radeon_bo *bo, |
49 | static void radeon_update_memory_usage(struct radeon_bo *bo, |
50 | unsigned mem_type, int sign) |
50 | unsigned mem_type, int sign) |
51 | { |
51 | { |
52 | struct radeon_device *rdev = bo->rdev; |
52 | struct radeon_device *rdev = bo->rdev; |
53 | u64 size = (u64)bo->tbo.num_pages << PAGE_SHIFT; |
53 | u64 size = (u64)bo->tbo.num_pages << PAGE_SHIFT; |
54 | 54 | ||
55 | switch (mem_type) { |
55 | switch (mem_type) { |
56 | case TTM_PL_TT: |
56 | case TTM_PL_TT: |
57 | if (sign > 0) |
57 | if (sign > 0) |
58 | __atomic_add_fetch(&rdev->gtt_usage.counter, size,__ATOMIC_RELAXED); |
58 | __atomic_add_fetch(&rdev->gtt_usage.counter, size,__ATOMIC_RELAXED); |
59 | else |
59 | else |
60 | __atomic_sub_fetch(&rdev->gtt_usage.counter, size,__ATOMIC_RELAXED); |
60 | __atomic_sub_fetch(&rdev->gtt_usage.counter, size,__ATOMIC_RELAXED); |
61 | break; |
61 | break; |
62 | case TTM_PL_VRAM: |
62 | case TTM_PL_VRAM: |
63 | if (sign > 0) |
63 | if (sign > 0) |
64 | __atomic_add_fetch(&rdev->vram_usage.counter, size,__ATOMIC_RELAXED); |
64 | __atomic_add_fetch(&rdev->vram_usage.counter, size,__ATOMIC_RELAXED); |
65 | else |
65 | else |
66 | __atomic_sub_fetch(&rdev->vram_usage.counter, size,__ATOMIC_RELAXED ); |
66 | __atomic_sub_fetch(&rdev->vram_usage.counter, size,__ATOMIC_RELAXED ); |
67 | break; |
67 | break; |
68 | } |
68 | } |
69 | } |
69 | } |
70 | 70 | ||
71 | static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo) |
71 | static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo) |
72 | { |
72 | { |
73 | struct radeon_bo *bo; |
73 | struct radeon_bo *bo; |
74 | 74 | ||
75 | bo = container_of(tbo, struct radeon_bo, tbo); |
75 | bo = container_of(tbo, struct radeon_bo, tbo); |
76 | 76 | ||
77 | radeon_update_memory_usage(bo, bo->tbo.mem.mem_type, -1); |
77 | radeon_update_memory_usage(bo, bo->tbo.mem.mem_type, -1); |
78 | 78 | ||
79 | mutex_lock(&bo->rdev->gem.mutex); |
79 | mutex_lock(&bo->rdev->gem.mutex); |
80 | list_del_init(&bo->list); |
80 | list_del_init(&bo->list); |
81 | mutex_unlock(&bo->rdev->gem.mutex); |
81 | mutex_unlock(&bo->rdev->gem.mutex); |
82 | radeon_bo_clear_surface_reg(bo); |
82 | radeon_bo_clear_surface_reg(bo); |
83 | WARN_ON(!list_empty(&bo->va)); |
83 | WARN_ON(!list_empty(&bo->va)); |
84 | drm_gem_object_release(&bo->gem_base); |
84 | drm_gem_object_release(&bo->gem_base); |
85 | kfree(bo); |
85 | kfree(bo); |
86 | } |
86 | } |
87 | 87 | ||
88 | bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo) |
88 | bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo) |
89 | { |
89 | { |
90 | if (bo->destroy == &radeon_ttm_bo_destroy) |
90 | if (bo->destroy == &radeon_ttm_bo_destroy) |
91 | return true; |
91 | return true; |
92 | return false; |
92 | return false; |
93 | } |
93 | } |
94 | 94 | ||
95 | void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain) |
95 | void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain) |
96 | { |
96 | { |
97 | u32 c = 0, i; |
97 | u32 c = 0, i; |
98 | 98 | ||
99 | rbo->placement.placement = rbo->placements; |
99 | rbo->placement.placement = rbo->placements; |
100 | rbo->placement.busy_placement = rbo->placements; |
100 | rbo->placement.busy_placement = rbo->placements; |
101 | if (domain & RADEON_GEM_DOMAIN_VRAM) { |
101 | if (domain & RADEON_GEM_DOMAIN_VRAM) { |
102 | /* Try placing BOs which don't need CPU access outside of the |
102 | /* Try placing BOs which don't need CPU access outside of the |
103 | * CPU accessible part of VRAM |
103 | * CPU accessible part of VRAM |
104 | */ |
104 | */ |
105 | if ((rbo->flags & RADEON_GEM_NO_CPU_ACCESS) && |
105 | if ((rbo->flags & RADEON_GEM_NO_CPU_ACCESS) && |
106 | rbo->rdev->mc.visible_vram_size < rbo->rdev->mc.real_vram_size) { |
106 | rbo->rdev->mc.visible_vram_size < rbo->rdev->mc.real_vram_size) { |
107 | rbo->placements[c].fpfn = |
107 | rbo->placements[c].fpfn = |
108 | rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT; |
108 | rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT; |
109 | rbo->placements[c++].flags = TTM_PL_FLAG_WC | |
109 | rbo->placements[c++].flags = TTM_PL_FLAG_WC | |
110 | TTM_PL_FLAG_UNCACHED | |
110 | TTM_PL_FLAG_UNCACHED | |
111 | TTM_PL_FLAG_VRAM; |
111 | TTM_PL_FLAG_VRAM; |
112 | } |
112 | } |
113 | 113 | ||
114 | rbo->placements[c].fpfn = 0; |
114 | rbo->placements[c].fpfn = 0; |
115 | rbo->placements[c++].flags = TTM_PL_FLAG_WC | |
115 | rbo->placements[c++].flags = TTM_PL_FLAG_WC | |
116 | TTM_PL_FLAG_UNCACHED | |
116 | TTM_PL_FLAG_UNCACHED | |
117 | TTM_PL_FLAG_VRAM; |
117 | TTM_PL_FLAG_VRAM; |
118 | } |
118 | } |
119 | 119 | ||
120 | if (domain & RADEON_GEM_DOMAIN_GTT) { |
120 | if (domain & RADEON_GEM_DOMAIN_GTT) { |
121 | if (rbo->flags & RADEON_GEM_GTT_UC) { |
121 | if (rbo->flags & RADEON_GEM_GTT_UC) { |
122 | rbo->placements[c].fpfn = 0; |
122 | rbo->placements[c].fpfn = 0; |
123 | rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED | |
123 | rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED | |
124 | TTM_PL_FLAG_TT; |
124 | TTM_PL_FLAG_TT; |
125 | 125 | ||
126 | } else if ((rbo->flags & RADEON_GEM_GTT_WC) || |
126 | } else if ((rbo->flags & RADEON_GEM_GTT_WC) || |
127 | (rbo->rdev->flags & RADEON_IS_AGP)) { |
127 | (rbo->rdev->flags & RADEON_IS_AGP)) { |
128 | rbo->placements[c].fpfn = 0; |
128 | rbo->placements[c].fpfn = 0; |
129 | rbo->placements[c++].flags = TTM_PL_FLAG_WC | |
129 | rbo->placements[c++].flags = TTM_PL_FLAG_WC | |
130 | TTM_PL_FLAG_UNCACHED | |
130 | TTM_PL_FLAG_UNCACHED | |
131 | TTM_PL_FLAG_TT; |
131 | TTM_PL_FLAG_TT; |
132 | } else { |
132 | } else { |
133 | rbo->placements[c].fpfn = 0; |
133 | rbo->placements[c].fpfn = 0; |
134 | rbo->placements[c++].flags = TTM_PL_FLAG_CACHED | |
134 | rbo->placements[c++].flags = TTM_PL_FLAG_CACHED | |
135 | TTM_PL_FLAG_TT; |
135 | TTM_PL_FLAG_TT; |
136 | } |
136 | } |
137 | } |
137 | } |
138 | 138 | ||
139 | if (domain & RADEON_GEM_DOMAIN_CPU) { |
139 | if (domain & RADEON_GEM_DOMAIN_CPU) { |
140 | if (rbo->flags & RADEON_GEM_GTT_UC) { |
140 | if (rbo->flags & RADEON_GEM_GTT_UC) { |
141 | rbo->placements[c].fpfn = 0; |
141 | rbo->placements[c].fpfn = 0; |
142 | rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED | |
142 | rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED | |
143 | TTM_PL_FLAG_SYSTEM; |
143 | TTM_PL_FLAG_SYSTEM; |
144 | 144 | ||
145 | } else if ((rbo->flags & RADEON_GEM_GTT_WC) || |
145 | } else if ((rbo->flags & RADEON_GEM_GTT_WC) || |
146 | rbo->rdev->flags & RADEON_IS_AGP) { |
146 | rbo->rdev->flags & RADEON_IS_AGP) { |
147 | rbo->placements[c].fpfn = 0; |
147 | rbo->placements[c].fpfn = 0; |
148 | rbo->placements[c++].flags = TTM_PL_FLAG_WC | |
148 | rbo->placements[c++].flags = TTM_PL_FLAG_WC | |
149 | TTM_PL_FLAG_UNCACHED | |
149 | TTM_PL_FLAG_UNCACHED | |
150 | TTM_PL_FLAG_SYSTEM; |
150 | TTM_PL_FLAG_SYSTEM; |
151 | } else { |
151 | } else { |
152 | rbo->placements[c].fpfn = 0; |
152 | rbo->placements[c].fpfn = 0; |
153 | rbo->placements[c++].flags = TTM_PL_FLAG_CACHED | |
153 | rbo->placements[c++].flags = TTM_PL_FLAG_CACHED | |
154 | TTM_PL_FLAG_SYSTEM; |
154 | TTM_PL_FLAG_SYSTEM; |
155 | } |
155 | } |
156 | } |
156 | } |
157 | if (!c) { |
157 | if (!c) { |
158 | rbo->placements[c].fpfn = 0; |
158 | rbo->placements[c].fpfn = 0; |
159 | rbo->placements[c++].flags = TTM_PL_MASK_CACHING | |
159 | rbo->placements[c++].flags = TTM_PL_MASK_CACHING | |
160 | TTM_PL_FLAG_SYSTEM; |
160 | TTM_PL_FLAG_SYSTEM; |
161 | } |
161 | } |
162 | 162 | ||
163 | rbo->placement.num_placement = c; |
163 | rbo->placement.num_placement = c; |
164 | rbo->placement.num_busy_placement = c; |
164 | rbo->placement.num_busy_placement = c; |
165 | 165 | ||
166 | for (i = 0; i < c; ++i) { |
166 | for (i = 0; i < c; ++i) { |
167 | if ((rbo->flags & RADEON_GEM_CPU_ACCESS) && |
167 | if ((rbo->flags & RADEON_GEM_CPU_ACCESS) && |
168 | (rbo->placements[i].flags & TTM_PL_FLAG_VRAM) && |
168 | (rbo->placements[i].flags & TTM_PL_FLAG_VRAM) && |
169 | !rbo->placements[i].fpfn) |
169 | !rbo->placements[i].fpfn) |
170 | rbo->placements[i].lpfn = |
170 | rbo->placements[i].lpfn = |
171 | rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT; |
171 | rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT; |
172 | else |
172 | else |
173 | rbo->placements[i].lpfn = 0; |
173 | rbo->placements[i].lpfn = 0; |
174 | } |
174 | } |
175 | - | ||
176 | /* |
- | |
177 | * Use two-ended allocation depending on the buffer size to |
- | |
178 | * improve fragmentation quality. |
- | |
179 | * 512kb was measured as the most optimal number. |
- | |
180 | */ |
- | |
181 | if (rbo->tbo.mem.size > 512 * 1024) { |
- | |
182 | for (i = 0; i < c; i++) { |
- | |
183 | rbo->placements[i].flags |= TTM_PL_FLAG_TOPDOWN; |
- | |
184 | } |
- | |
185 | } |
- | |
186 | } |
175 | } |
187 | 176 | ||
188 | int radeon_bo_create(struct radeon_device *rdev, |
177 | int radeon_bo_create(struct radeon_device *rdev, |
189 | unsigned long size, int byte_align, bool kernel, |
178 | unsigned long size, int byte_align, bool kernel, |
190 | u32 domain, u32 flags, struct sg_table *sg, |
179 | u32 domain, u32 flags, struct sg_table *sg, |
191 | struct reservation_object *resv, |
180 | struct reservation_object *resv, |
192 | struct radeon_bo **bo_ptr) |
181 | struct radeon_bo **bo_ptr) |
193 | { |
182 | { |
194 | struct radeon_bo *bo; |
183 | struct radeon_bo *bo; |
195 | enum ttm_bo_type type; |
184 | enum ttm_bo_type type; |
196 | unsigned long page_align = roundup(byte_align, PAGE_SIZE) >> PAGE_SHIFT; |
185 | unsigned long page_align = roundup(byte_align, PAGE_SIZE) >> PAGE_SHIFT; |
197 | size_t acc_size; |
186 | size_t acc_size; |
198 | int r; |
187 | int r; |
199 | 188 | ||
200 | size = ALIGN(size, PAGE_SIZE); |
189 | size = ALIGN(size, PAGE_SIZE); |
201 | 190 | ||
202 | if (kernel) { |
191 | if (kernel) { |
203 | type = ttm_bo_type_kernel; |
192 | type = ttm_bo_type_kernel; |
204 | } else if (sg) { |
193 | } else if (sg) { |
205 | type = ttm_bo_type_sg; |
194 | type = ttm_bo_type_sg; |
206 | } else { |
195 | } else { |
207 | type = ttm_bo_type_device; |
196 | type = ttm_bo_type_device; |
208 | } |
197 | } |
209 | *bo_ptr = NULL; |
198 | *bo_ptr = NULL; |
210 | 199 | ||
211 | acc_size = ttm_bo_dma_acc_size(&rdev->mman.bdev, size, |
200 | acc_size = ttm_bo_dma_acc_size(&rdev->mman.bdev, size, |
212 | sizeof(struct radeon_bo)); |
201 | sizeof(struct radeon_bo)); |
213 | 202 | ||
214 | bo = kzalloc(sizeof(struct radeon_bo), GFP_KERNEL); |
203 | bo = kzalloc(sizeof(struct radeon_bo), GFP_KERNEL); |
215 | if (bo == NULL) |
204 | if (bo == NULL) |
216 | return -ENOMEM; |
205 | return -ENOMEM; |
217 | r = drm_gem_object_init(rdev->ddev, &bo->gem_base, size); |
206 | r = drm_gem_object_init(rdev->ddev, &bo->gem_base, size); |
218 | if (unlikely(r)) { |
207 | if (unlikely(r)) { |
219 | kfree(bo); |
208 | kfree(bo); |
220 | return r; |
209 | return r; |
221 | } |
210 | } |
222 | bo->rdev = rdev; |
211 | bo->rdev = rdev; |
223 | bo->surface_reg = -1; |
212 | bo->surface_reg = -1; |
224 | INIT_LIST_HEAD(&bo->list); |
213 | INIT_LIST_HEAD(&bo->list); |
225 | INIT_LIST_HEAD(&bo->va); |
214 | INIT_LIST_HEAD(&bo->va); |
226 | bo->initial_domain = domain & (RADEON_GEM_DOMAIN_VRAM | |
215 | bo->initial_domain = domain & (RADEON_GEM_DOMAIN_VRAM | |
227 | RADEON_GEM_DOMAIN_GTT | |
216 | RADEON_GEM_DOMAIN_GTT | |
228 | RADEON_GEM_DOMAIN_CPU); |
217 | RADEON_GEM_DOMAIN_CPU); |
229 | 218 | ||
230 | bo->flags = flags; |
219 | bo->flags = flags; |
231 | /* PCI GART is always snooped */ |
220 | /* PCI GART is always snooped */ |
232 | if (!(rdev->flags & RADEON_IS_PCIE)) |
221 | if (!(rdev->flags & RADEON_IS_PCIE)) |
233 | bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC); |
222 | bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC); |
- | 223 | ||
- | 224 | /* Write-combined CPU mappings of GTT cause GPU hangs with RV6xx |
|
- | 225 | * See https://bugs.freedesktop.org/show_bug.cgi?id=91268 |
|
- | 226 | */ |
|
- | 227 | if (rdev->family >= CHIP_RV610 && rdev->family <= CHIP_RV635) |
|
- | 228 | bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC); |
|
234 | 229 | ||
235 | #ifdef CONFIG_X86_32 |
230 | #ifdef CONFIG_X86_32 |
236 | /* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit |
231 | /* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit |
237 | * See https://bugs.freedesktop.org/show_bug.cgi?id=84627 |
232 | * See https://bugs.freedesktop.org/show_bug.cgi?id=84627 |
238 | */ |
233 | */ |
- | 234 | bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC); |
|
- | 235 | #elif defined(CONFIG_X86) && !defined(CONFIG_X86_PAT) |
|
- | 236 | /* Don't try to enable write-combining when it can't work, or things |
|
- | 237 | * may be slow |
|
- | 238 | * See https://bugs.freedesktop.org/show_bug.cgi?id=88758 |
|
- | 239 | */ |
|
- | 240 | ||
- | 241 | #warning Please enable CONFIG_MTRR and CONFIG_X86_PAT for better performance \ |
|
- | 242 | thanks to write-combining |
|
- | 243 | ||
239 | bo->flags &= ~RADEON_GEM_GTT_WC; |
244 | if (bo->flags & RADEON_GEM_GTT_WC) |
- | 245 | DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for " |
|
- | 246 | "better performance thanks to write-combining\n"); |
|
- | 247 | bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC); |
|
240 | #endif |
248 | #endif |
241 | 249 | ||
242 | radeon_ttm_placement_from_domain(bo, domain); |
250 | radeon_ttm_placement_from_domain(bo, domain); |
243 | /* Kernel allocation are uninterruptible */ |
251 | /* Kernel allocation are uninterruptible */ |
244 | down_read(&rdev->pm.mclk_lock); |
252 | down_read(&rdev->pm.mclk_lock); |
245 | r = ttm_bo_init(&rdev->mman.bdev, &bo->tbo, size, type, |
253 | r = ttm_bo_init(&rdev->mman.bdev, &bo->tbo, size, type, |
246 | &bo->placement, page_align, !kernel, NULL, |
254 | &bo->placement, page_align, !kernel, NULL, |
247 | acc_size, sg, resv, &radeon_ttm_bo_destroy); |
255 | acc_size, sg, resv, &radeon_ttm_bo_destroy); |
248 | up_read(&rdev->pm.mclk_lock); |
256 | up_read(&rdev->pm.mclk_lock); |
249 | if (unlikely(r != 0)) { |
257 | if (unlikely(r != 0)) { |
250 | return r; |
258 | return r; |
251 | } |
259 | } |
252 | *bo_ptr = bo; |
260 | *bo_ptr = bo; |
253 | 261 | ||
254 | trace_radeon_bo_create(bo); |
262 | trace_radeon_bo_create(bo); |
255 | 263 | ||
256 | return 0; |
264 | return 0; |
257 | } |
265 | } |
258 | 266 | ||
259 | int radeon_bo_kmap(struct radeon_bo *bo, void **ptr) |
267 | int radeon_bo_kmap(struct radeon_bo *bo, void **ptr) |
260 | { |
268 | { |
261 | bool is_iomem; |
269 | bool is_iomem; |
262 | int r; |
270 | int r; |
263 | 271 | ||
264 | if (bo->kptr) { |
272 | if (bo->kptr) { |
265 | if (ptr) { |
273 | if (ptr) { |
266 | *ptr = bo->kptr; |
274 | *ptr = bo->kptr; |
267 | } |
275 | } |
268 | return 0; |
276 | return 0; |
269 | } |
277 | } |
270 | r = ttm_bo_kmap(&bo->tbo, 0, bo->tbo.num_pages, &bo->kmap); |
278 | r = ttm_bo_kmap(&bo->tbo, 0, bo->tbo.num_pages, &bo->kmap); |
271 | if (r) { |
279 | if (r) { |
272 | return r; |
280 | return r; |
273 | } |
281 | } |
274 | bo->kptr = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem); |
282 | bo->kptr = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem); |
275 | if (ptr) { |
283 | if (ptr) { |
276 | *ptr = bo->kptr; |
284 | *ptr = bo->kptr; |
277 | } |
285 | } |
278 | radeon_bo_check_tiling(bo, 0, 0); |
286 | radeon_bo_check_tiling(bo, 0, 0); |
279 | return 0; |
287 | return 0; |
280 | } |
288 | } |
281 | 289 | ||
282 | void radeon_bo_kunmap(struct radeon_bo *bo) |
290 | void radeon_bo_kunmap(struct radeon_bo *bo) |
283 | { |
291 | { |
284 | if (bo->kptr == NULL) |
292 | if (bo->kptr == NULL) |
285 | return; |
293 | return; |
286 | bo->kptr = NULL; |
294 | bo->kptr = NULL; |
287 | radeon_bo_check_tiling(bo, 0, 0); |
295 | radeon_bo_check_tiling(bo, 0, 0); |
288 | ttm_bo_kunmap(&bo->kmap); |
296 | ttm_bo_kunmap(&bo->kmap); |
289 | } |
297 | } |
290 | 298 | ||
291 | struct radeon_bo *radeon_bo_ref(struct radeon_bo *bo) |
299 | struct radeon_bo *radeon_bo_ref(struct radeon_bo *bo) |
292 | { |
300 | { |
293 | if (bo == NULL) |
301 | if (bo == NULL) |
294 | return NULL; |
302 | return NULL; |
295 | 303 | ||
296 | ttm_bo_reference(&bo->tbo); |
304 | ttm_bo_reference(&bo->tbo); |
297 | return bo; |
305 | return bo; |
298 | } |
306 | } |
299 | 307 | ||
300 | void radeon_bo_unref(struct radeon_bo **bo) |
308 | void radeon_bo_unref(struct radeon_bo **bo) |
301 | { |
309 | { |
302 | struct ttm_buffer_object *tbo; |
310 | struct ttm_buffer_object *tbo; |
303 | struct radeon_device *rdev; |
311 | struct radeon_device *rdev; |
304 | 312 | ||
305 | if ((*bo) == NULL) |
313 | if ((*bo) == NULL) |
306 | return; |
314 | return; |
307 | rdev = (*bo)->rdev; |
315 | rdev = (*bo)->rdev; |
308 | tbo = &((*bo)->tbo); |
316 | tbo = &((*bo)->tbo); |
309 | ttm_bo_unref(&tbo); |
317 | ttm_bo_unref(&tbo); |
310 | if (tbo == NULL) |
318 | if (tbo == NULL) |
311 | *bo = NULL; |
319 | *bo = NULL; |
312 | } |
320 | } |
313 | 321 | ||
314 | int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset, |
322 | int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset, |
315 | u64 *gpu_addr) |
323 | u64 *gpu_addr) |
316 | { |
324 | { |
317 | int r, i; |
325 | int r, i; |
318 | 326 | ||
319 | if (bo->pin_count) { |
327 | if (bo->pin_count) { |
320 | bo->pin_count++; |
328 | bo->pin_count++; |
321 | if (gpu_addr) |
329 | if (gpu_addr) |
322 | *gpu_addr = radeon_bo_gpu_offset(bo); |
330 | *gpu_addr = radeon_bo_gpu_offset(bo); |
323 | 331 | ||
324 | if (max_offset != 0) { |
332 | if (max_offset != 0) { |
325 | u64 domain_start; |
333 | u64 domain_start; |
326 | 334 | ||
327 | if (domain == RADEON_GEM_DOMAIN_VRAM) |
335 | if (domain == RADEON_GEM_DOMAIN_VRAM) |
328 | domain_start = bo->rdev->mc.vram_start; |
336 | domain_start = bo->rdev->mc.vram_start; |
329 | else |
337 | else |
330 | domain_start = bo->rdev->mc.gtt_start; |
338 | domain_start = bo->rdev->mc.gtt_start; |
331 | WARN_ON_ONCE(max_offset < |
339 | WARN_ON_ONCE(max_offset < |
332 | (radeon_bo_gpu_offset(bo) - domain_start)); |
340 | (radeon_bo_gpu_offset(bo) - domain_start)); |
333 | } |
341 | } |
334 | 342 | ||
335 | return 0; |
343 | return 0; |
336 | } |
344 | } |
337 | radeon_ttm_placement_from_domain(bo, domain); |
345 | radeon_ttm_placement_from_domain(bo, domain); |
338 | for (i = 0; i < bo->placement.num_placement; i++) { |
346 | for (i = 0; i < bo->placement.num_placement; i++) { |
339 | /* force to pin into visible video ram */ |
347 | /* force to pin into visible video ram */ |
340 | if ((bo->placements[i].flags & TTM_PL_FLAG_VRAM) && |
348 | if ((bo->placements[i].flags & TTM_PL_FLAG_VRAM) && |
341 | !(bo->flags & RADEON_GEM_NO_CPU_ACCESS) && |
349 | !(bo->flags & RADEON_GEM_NO_CPU_ACCESS) && |
342 | (!max_offset || max_offset > bo->rdev->mc.visible_vram_size)) |
350 | (!max_offset || max_offset > bo->rdev->mc.visible_vram_size)) |
343 | bo->placements[i].lpfn = |
351 | bo->placements[i].lpfn = |
344 | bo->rdev->mc.visible_vram_size >> PAGE_SHIFT; |
352 | bo->rdev->mc.visible_vram_size >> PAGE_SHIFT; |
345 | else |
353 | else |
346 | bo->placements[i].lpfn = max_offset >> PAGE_SHIFT; |
354 | bo->placements[i].lpfn = max_offset >> PAGE_SHIFT; |
347 | 355 | ||
348 | bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT; |
356 | bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT; |
349 | } |
357 | } |
350 | 358 | ||
351 | r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); |
359 | r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); |
352 | if (likely(r == 0)) { |
360 | if (likely(r == 0)) { |
353 | bo->pin_count = 1; |
361 | bo->pin_count = 1; |
354 | if (gpu_addr != NULL) |
362 | if (gpu_addr != NULL) |
355 | *gpu_addr = radeon_bo_gpu_offset(bo); |
363 | *gpu_addr = radeon_bo_gpu_offset(bo); |
356 | if (domain == RADEON_GEM_DOMAIN_VRAM) |
364 | if (domain == RADEON_GEM_DOMAIN_VRAM) |
357 | bo->rdev->vram_pin_size += radeon_bo_size(bo); |
365 | bo->rdev->vram_pin_size += radeon_bo_size(bo); |
358 | else |
366 | else |
359 | bo->rdev->gart_pin_size += radeon_bo_size(bo); |
367 | bo->rdev->gart_pin_size += radeon_bo_size(bo); |
360 | } else { |
368 | } else { |
361 | dev_err(bo->rdev->dev, "%p pin failed\n", bo); |
369 | dev_err(bo->rdev->dev, "%p pin failed\n", bo); |
362 | } |
370 | } |
363 | return r; |
371 | return r; |
364 | } |
372 | } |
365 | 373 | ||
366 | int radeon_bo_pin(struct radeon_bo *bo, u32 domain, u64 *gpu_addr) |
374 | int radeon_bo_pin(struct radeon_bo *bo, u32 domain, u64 *gpu_addr) |
367 | { |
375 | { |
368 | return radeon_bo_pin_restricted(bo, domain, 0, gpu_addr); |
376 | return radeon_bo_pin_restricted(bo, domain, 0, gpu_addr); |
369 | } |
377 | } |
370 | 378 | ||
371 | int radeon_bo_unpin(struct radeon_bo *bo) |
379 | int radeon_bo_unpin(struct radeon_bo *bo) |
372 | { |
380 | { |
373 | int r, i; |
381 | int r, i; |
374 | 382 | ||
375 | if (!bo->pin_count) { |
383 | if (!bo->pin_count) { |
376 | dev_warn(bo->rdev->dev, "%p unpin not necessary\n", bo); |
384 | dev_warn(bo->rdev->dev, "%p unpin not necessary\n", bo); |
377 | return 0; |
385 | return 0; |
378 | } |
386 | } |
379 | bo->pin_count--; |
387 | bo->pin_count--; |
380 | if (bo->pin_count) |
388 | if (bo->pin_count) |
381 | return 0; |
389 | return 0; |
382 | for (i = 0; i < bo->placement.num_placement; i++) { |
390 | for (i = 0; i < bo->placement.num_placement; i++) { |
383 | bo->placements[i].lpfn = 0; |
391 | bo->placements[i].lpfn = 0; |
384 | bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT; |
392 | bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT; |
385 | } |
393 | } |
386 | r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); |
394 | r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); |
387 | if (likely(r == 0)) { |
395 | if (likely(r == 0)) { |
388 | if (bo->tbo.mem.mem_type == TTM_PL_VRAM) |
396 | if (bo->tbo.mem.mem_type == TTM_PL_VRAM) |
389 | bo->rdev->vram_pin_size -= radeon_bo_size(bo); |
397 | bo->rdev->vram_pin_size -= radeon_bo_size(bo); |
390 | else |
398 | else |
391 | bo->rdev->gart_pin_size -= radeon_bo_size(bo); |
399 | bo->rdev->gart_pin_size -= radeon_bo_size(bo); |
392 | } else { |
400 | } else { |
393 | dev_err(bo->rdev->dev, "%p validate failed for unpin\n", bo); |
401 | dev_err(bo->rdev->dev, "%p validate failed for unpin\n", bo); |
394 | } |
402 | } |
395 | return r; |
403 | return r; |
396 | } |
404 | } |
397 | 405 | ||
398 | int radeon_bo_init(struct radeon_device *rdev) |
406 | int radeon_bo_init(struct radeon_device *rdev) |
399 | { |
407 | { |
400 | /* Add an MTRR for the VRAM */ |
408 | /* Add an MTRR for the VRAM */ |
401 | DRM_INFO("Detected VRAM RAM=%lluM, BAR=%lluM\n", |
409 | DRM_INFO("Detected VRAM RAM=%lluM, BAR=%lluM\n", |
402 | rdev->mc.mc_vram_size >> 20, |
410 | rdev->mc.mc_vram_size >> 20, |
403 | (unsigned long long)rdev->mc.aper_size >> 20); |
411 | (unsigned long long)rdev->mc.aper_size >> 20); |
404 | DRM_INFO("RAM width %dbits %cDR\n", |
412 | DRM_INFO("RAM width %dbits %cDR\n", |
405 | rdev->mc.vram_width, rdev->mc.vram_is_ddr ? 'D' : 'S'); |
413 | rdev->mc.vram_width, rdev->mc.vram_is_ddr ? 'D' : 'S'); |
406 | return radeon_ttm_init(rdev); |
414 | return radeon_ttm_init(rdev); |
407 | } |
415 | } |
408 | 416 | ||
409 | void radeon_bo_fini(struct radeon_device *rdev) |
417 | void radeon_bo_fini(struct radeon_device *rdev) |
410 | { |
418 | { |
411 | // radeon_ttm_fini(rdev); |
419 | // radeon_ttm_fini(rdev); |
412 | // arch_phys_wc_del(rdev->mc.vram_mtrr); |
420 | // arch_phys_wc_del(rdev->mc.vram_mtrr); |
413 | } |
421 | } |
414 | 422 | ||
415 | /* Returns how many bytes TTM can move per IB. |
423 | /* Returns how many bytes TTM can move per IB. |
416 | */ |
424 | */ |
417 | static u64 radeon_bo_get_threshold_for_moves(struct radeon_device *rdev) |
425 | static u64 radeon_bo_get_threshold_for_moves(struct radeon_device *rdev) |
418 | { |
426 | { |
419 | u64 real_vram_size = rdev->mc.real_vram_size; |
427 | u64 real_vram_size = rdev->mc.real_vram_size; |
420 | u64 vram_usage = atomic64_read(&rdev->vram_usage); |
428 | u64 vram_usage = atomic64_read(&rdev->vram_usage); |
421 | 429 | ||
422 | /* This function is based on the current VRAM usage. |
430 | /* This function is based on the current VRAM usage. |
423 | * |
431 | * |
424 | * - If all of VRAM is free, allow relocating the number of bytes that |
432 | * - If all of VRAM is free, allow relocating the number of bytes that |
425 | * is equal to 1/4 of the size of VRAM for this IB. |
433 | * is equal to 1/4 of the size of VRAM for this IB. |
426 | 434 | ||
427 | * - If more than one half of VRAM is occupied, only allow relocating |
435 | * - If more than one half of VRAM is occupied, only allow relocating |
428 | * 1 MB of data for this IB. |
436 | * 1 MB of data for this IB. |
429 | * |
437 | * |
430 | * - From 0 to one half of used VRAM, the threshold decreases |
438 | * - From 0 to one half of used VRAM, the threshold decreases |
431 | * linearly. |
439 | * linearly. |
432 | * __________________ |
440 | * __________________ |
433 | * 1/4 of -|\ | |
441 | * 1/4 of -|\ | |
434 | * VRAM | \ | |
442 | * VRAM | \ | |
435 | * | \ | |
443 | * | \ | |
436 | * | \ | |
444 | * | \ | |
437 | * | \ | |
445 | * | \ | |
438 | * | \ | |
446 | * | \ | |
439 | * | \ | |
447 | * | \ | |
440 | * | \________|1 MB |
448 | * | \________|1 MB |
441 | * |----------------| |
449 | * |----------------| |
442 | * VRAM 0 % 100 % |
450 | * VRAM 0 % 100 % |
443 | * used used |
451 | * used used |
444 | * |
452 | * |
445 | * Note: It's a threshold, not a limit. The threshold must be crossed |
453 | * Note: It's a threshold, not a limit. The threshold must be crossed |
446 | * for buffer relocations to stop, so any buffer of an arbitrary size |
454 | * for buffer relocations to stop, so any buffer of an arbitrary size |
447 | * can be moved as long as the threshold isn't crossed before |
455 | * can be moved as long as the threshold isn't crossed before |
448 | * the relocation takes place. We don't want to disable buffer |
456 | * the relocation takes place. We don't want to disable buffer |
449 | * relocations completely. |
457 | * relocations completely. |
450 | * |
458 | * |
451 | * The idea is that buffers should be placed in VRAM at creation time |
459 | * The idea is that buffers should be placed in VRAM at creation time |
452 | * and TTM should only do a minimum number of relocations during |
460 | * and TTM should only do a minimum number of relocations during |
453 | * command submission. In practice, you need to submit at least |
461 | * command submission. In practice, you need to submit at least |
454 | * a dozen IBs to move all buffers to VRAM if they are in GTT. |
462 | * a dozen IBs to move all buffers to VRAM if they are in GTT. |
455 | * |
463 | * |
456 | * Also, things can get pretty crazy under memory pressure and actual |
464 | * Also, things can get pretty crazy under memory pressure and actual |
457 | * VRAM usage can change a lot, so playing safe even at 50% does |
465 | * VRAM usage can change a lot, so playing safe even at 50% does |
458 | * consistently increase performance. |
466 | * consistently increase performance. |
459 | */ |
467 | */ |
460 | 468 | ||
461 | u64 half_vram = real_vram_size >> 1; |
469 | u64 half_vram = real_vram_size >> 1; |
462 | u64 half_free_vram = vram_usage >= half_vram ? 0 : half_vram - vram_usage; |
470 | u64 half_free_vram = vram_usage >= half_vram ? 0 : half_vram - vram_usage; |
463 | u64 bytes_moved_threshold = half_free_vram >> 1; |
471 | u64 bytes_moved_threshold = half_free_vram >> 1; |
464 | return max(bytes_moved_threshold, 1024*1024ull); |
472 | return max(bytes_moved_threshold, 1024*1024ull); |
465 | } |
473 | } |
466 | 474 | ||
467 | int radeon_bo_list_validate(struct radeon_device *rdev, |
475 | int radeon_bo_list_validate(struct radeon_device *rdev, |
468 | struct ww_acquire_ctx *ticket, |
476 | struct ww_acquire_ctx *ticket, |
469 | struct list_head *head, int ring) |
477 | struct list_head *head, int ring) |
470 | { |
478 | { |
471 | struct radeon_bo_list *lobj; |
479 | struct radeon_bo_list *lobj; |
472 | struct list_head duplicates; |
480 | struct list_head duplicates; |
473 | int r; |
481 | int r; |
474 | u64 bytes_moved = 0, initial_bytes_moved; |
482 | u64 bytes_moved = 0, initial_bytes_moved; |
475 | u64 bytes_moved_threshold = radeon_bo_get_threshold_for_moves(rdev); |
483 | u64 bytes_moved_threshold = radeon_bo_get_threshold_for_moves(rdev); |
476 | 484 | ||
477 | INIT_LIST_HEAD(&duplicates); |
485 | INIT_LIST_HEAD(&duplicates); |
478 | r = ttm_eu_reserve_buffers(ticket, head, true, &duplicates); |
486 | r = ttm_eu_reserve_buffers(ticket, head, true, &duplicates); |
479 | if (unlikely(r != 0)) { |
487 | if (unlikely(r != 0)) { |
480 | return r; |
488 | return r; |
481 | } |
489 | } |
482 | 490 | ||
483 | list_for_each_entry(lobj, head, tv.head) { |
491 | list_for_each_entry(lobj, head, tv.head) { |
484 | struct radeon_bo *bo = lobj->robj; |
492 | struct radeon_bo *bo = lobj->robj; |
485 | if (!bo->pin_count) { |
493 | if (!bo->pin_count) { |
486 | u32 domain = lobj->prefered_domains; |
494 | u32 domain = lobj->prefered_domains; |
487 | u32 allowed = lobj->allowed_domains; |
495 | u32 allowed = lobj->allowed_domains; |
488 | u32 current_domain = |
496 | u32 current_domain = |
489 | radeon_mem_type_to_domain(bo->tbo.mem.mem_type); |
497 | radeon_mem_type_to_domain(bo->tbo.mem.mem_type); |
490 | 498 | ||
491 | /* Check if this buffer will be moved and don't move it |
499 | /* Check if this buffer will be moved and don't move it |
492 | * if we have moved too many buffers for this IB already. |
500 | * if we have moved too many buffers for this IB already. |
493 | * |
501 | * |
494 | * Note that this allows moving at least one buffer of |
502 | * Note that this allows moving at least one buffer of |
495 | * any size, because it doesn't take the current "bo" |
503 | * any size, because it doesn't take the current "bo" |
496 | * into account. We don't want to disallow buffer moves |
504 | * into account. We don't want to disallow buffer moves |
497 | * completely. |
505 | * completely. |
498 | */ |
506 | */ |
499 | if ((allowed & current_domain) != 0 && |
507 | if ((allowed & current_domain) != 0 && |
500 | (domain & current_domain) == 0 && /* will be moved */ |
508 | (domain & current_domain) == 0 && /* will be moved */ |
501 | bytes_moved > bytes_moved_threshold) { |
509 | bytes_moved > bytes_moved_threshold) { |
502 | /* don't move it */ |
510 | /* don't move it */ |
503 | domain = current_domain; |
511 | domain = current_domain; |
504 | } |
512 | } |
505 | 513 | ||
506 | retry: |
514 | retry: |
507 | radeon_ttm_placement_from_domain(bo, domain); |
515 | radeon_ttm_placement_from_domain(bo, domain); |
508 | if (ring == R600_RING_TYPE_UVD_INDEX) |
516 | if (ring == R600_RING_TYPE_UVD_INDEX) |
509 | radeon_uvd_force_into_uvd_segment(bo, allowed); |
517 | radeon_uvd_force_into_uvd_segment(bo, allowed); |
510 | 518 | ||
511 | initial_bytes_moved = atomic64_read(&rdev->num_bytes_moved); |
519 | initial_bytes_moved = atomic64_read(&rdev->num_bytes_moved); |
512 | r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); |
520 | r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); |
513 | bytes_moved += atomic64_read(&rdev->num_bytes_moved) - |
521 | bytes_moved += atomic64_read(&rdev->num_bytes_moved) - |
514 | initial_bytes_moved; |
522 | initial_bytes_moved; |
515 | 523 | ||
516 | if (unlikely(r)) { |
524 | if (unlikely(r)) { |
517 | if (r != -ERESTARTSYS && |
525 | if (r != -ERESTARTSYS && |
518 | domain != lobj->allowed_domains) { |
526 | domain != lobj->allowed_domains) { |
519 | domain = lobj->allowed_domains; |
527 | domain = lobj->allowed_domains; |
520 | goto retry; |
528 | goto retry; |
521 | } |
529 | } |
522 | ttm_eu_backoff_reservation(ticket, head); |
530 | ttm_eu_backoff_reservation(ticket, head); |
523 | return r; |
531 | return r; |
524 | } |
532 | } |
525 | } |
533 | } |
526 | lobj->gpu_offset = radeon_bo_gpu_offset(bo); |
534 | lobj->gpu_offset = radeon_bo_gpu_offset(bo); |
527 | lobj->tiling_flags = bo->tiling_flags; |
535 | lobj->tiling_flags = bo->tiling_flags; |
528 | } |
536 | } |
529 | 537 | ||
530 | list_for_each_entry(lobj, &duplicates, tv.head) { |
538 | list_for_each_entry(lobj, &duplicates, tv.head) { |
531 | lobj->gpu_offset = radeon_bo_gpu_offset(lobj->robj); |
539 | lobj->gpu_offset = radeon_bo_gpu_offset(lobj->robj); |
532 | lobj->tiling_flags = lobj->robj->tiling_flags; |
540 | lobj->tiling_flags = lobj->robj->tiling_flags; |
533 | } |
541 | } |
534 | 542 | ||
535 | return 0; |
543 | return 0; |
536 | } |
544 | } |
537 | 545 | ||
538 | int radeon_bo_get_surface_reg(struct radeon_bo *bo) |
546 | int radeon_bo_get_surface_reg(struct radeon_bo *bo) |
539 | { |
547 | { |
540 | struct radeon_device *rdev = bo->rdev; |
548 | struct radeon_device *rdev = bo->rdev; |
541 | struct radeon_surface_reg *reg; |
549 | struct radeon_surface_reg *reg; |
542 | struct radeon_bo *old_object; |
550 | struct radeon_bo *old_object; |
543 | int steal; |
551 | int steal; |
544 | int i; |
552 | int i; |
545 | 553 | ||
546 | lockdep_assert_held(&bo->tbo.resv->lock.base); |
554 | lockdep_assert_held(&bo->tbo.resv->lock.base); |
547 | 555 | ||
548 | if (!bo->tiling_flags) |
556 | if (!bo->tiling_flags) |
549 | return 0; |
557 | return 0; |
550 | 558 | ||
551 | if (bo->surface_reg >= 0) { |
559 | if (bo->surface_reg >= 0) { |
552 | reg = &rdev->surface_regs[bo->surface_reg]; |
560 | reg = &rdev->surface_regs[bo->surface_reg]; |
553 | i = bo->surface_reg; |
561 | i = bo->surface_reg; |
554 | goto out; |
562 | goto out; |
555 | } |
563 | } |
556 | 564 | ||
557 | steal = -1; |
565 | steal = -1; |
558 | for (i = 0; i < RADEON_GEM_MAX_SURFACES; i++) { |
566 | for (i = 0; i < RADEON_GEM_MAX_SURFACES; i++) { |
559 | 567 | ||
560 | reg = &rdev->surface_regs[i]; |
568 | reg = &rdev->surface_regs[i]; |
561 | if (!reg->bo) |
569 | if (!reg->bo) |
562 | break; |
570 | break; |
563 | 571 | ||
564 | old_object = reg->bo; |
572 | old_object = reg->bo; |
565 | if (old_object->pin_count == 0) |
573 | if (old_object->pin_count == 0) |
566 | steal = i; |
574 | steal = i; |
567 | } |
575 | } |
568 | 576 | ||
569 | /* if we are all out */ |
577 | /* if we are all out */ |
570 | if (i == RADEON_GEM_MAX_SURFACES) { |
578 | if (i == RADEON_GEM_MAX_SURFACES) { |
571 | if (steal == -1) |
579 | if (steal == -1) |
572 | return -ENOMEM; |
580 | return -ENOMEM; |
573 | /* find someone with a surface reg and nuke their BO */ |
581 | /* find someone with a surface reg and nuke their BO */ |
574 | reg = &rdev->surface_regs[steal]; |
582 | reg = &rdev->surface_regs[steal]; |
575 | old_object = reg->bo; |
583 | old_object = reg->bo; |
576 | /* blow away the mapping */ |
584 | /* blow away the mapping */ |
577 | DRM_DEBUG("stealing surface reg %d from %p\n", steal, old_object); |
585 | DRM_DEBUG("stealing surface reg %d from %p\n", steal, old_object); |
578 | ttm_bo_unmap_virtual(&old_object->tbo); |
586 | ttm_bo_unmap_virtual(&old_object->tbo); |
579 | old_object->surface_reg = -1; |
587 | old_object->surface_reg = -1; |
580 | i = steal; |
588 | i = steal; |
581 | } |
589 | } |
582 | 590 | ||
583 | bo->surface_reg = i; |
591 | bo->surface_reg = i; |
584 | reg->bo = bo; |
592 | reg->bo = bo; |
585 | 593 | ||
586 | out: |
594 | out: |
587 | radeon_set_surface_reg(rdev, i, bo->tiling_flags, bo->pitch, |
595 | radeon_set_surface_reg(rdev, i, bo->tiling_flags, bo->pitch, |
588 | bo->tbo.mem.start << PAGE_SHIFT, |
596 | bo->tbo.mem.start << PAGE_SHIFT, |
589 | bo->tbo.num_pages << PAGE_SHIFT); |
597 | bo->tbo.num_pages << PAGE_SHIFT); |
590 | return 0; |
598 | return 0; |
591 | } |
599 | } |
592 | 600 | ||
593 | static void radeon_bo_clear_surface_reg(struct radeon_bo *bo) |
601 | static void radeon_bo_clear_surface_reg(struct radeon_bo *bo) |
594 | { |
602 | { |
595 | struct radeon_device *rdev = bo->rdev; |
603 | struct radeon_device *rdev = bo->rdev; |
596 | struct radeon_surface_reg *reg; |
604 | struct radeon_surface_reg *reg; |
597 | 605 | ||
598 | if (bo->surface_reg == -1) |
606 | if (bo->surface_reg == -1) |
599 | return; |
607 | return; |
600 | 608 | ||
601 | reg = &rdev->surface_regs[bo->surface_reg]; |
609 | reg = &rdev->surface_regs[bo->surface_reg]; |
602 | radeon_clear_surface_reg(rdev, bo->surface_reg); |
610 | radeon_clear_surface_reg(rdev, bo->surface_reg); |
603 | 611 | ||
604 | reg->bo = NULL; |
612 | reg->bo = NULL; |
605 | bo->surface_reg = -1; |
613 | bo->surface_reg = -1; |
606 | } |
614 | } |
607 | 615 | ||
608 | int radeon_bo_set_tiling_flags(struct radeon_bo *bo, |
616 | int radeon_bo_set_tiling_flags(struct radeon_bo *bo, |
609 | uint32_t tiling_flags, uint32_t pitch) |
617 | uint32_t tiling_flags, uint32_t pitch) |
610 | { |
618 | { |
611 | struct radeon_device *rdev = bo->rdev; |
619 | struct radeon_device *rdev = bo->rdev; |
612 | int r; |
620 | int r; |
613 | 621 | ||
614 | if (rdev->family >= CHIP_CEDAR) { |
622 | if (rdev->family >= CHIP_CEDAR) { |
615 | unsigned bankw, bankh, mtaspect, tilesplit, stilesplit; |
623 | unsigned bankw, bankh, mtaspect, tilesplit, stilesplit; |
616 | 624 | ||
617 | bankw = (tiling_flags >> RADEON_TILING_EG_BANKW_SHIFT) & RADEON_TILING_EG_BANKW_MASK; |
625 | bankw = (tiling_flags >> RADEON_TILING_EG_BANKW_SHIFT) & RADEON_TILING_EG_BANKW_MASK; |
618 | bankh = (tiling_flags >> RADEON_TILING_EG_BANKH_SHIFT) & RADEON_TILING_EG_BANKH_MASK; |
626 | bankh = (tiling_flags >> RADEON_TILING_EG_BANKH_SHIFT) & RADEON_TILING_EG_BANKH_MASK; |
619 | mtaspect = (tiling_flags >> RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT) & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK; |
627 | mtaspect = (tiling_flags >> RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT) & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK; |
620 | tilesplit = (tiling_flags >> RADEON_TILING_EG_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_TILE_SPLIT_MASK; |
628 | tilesplit = (tiling_flags >> RADEON_TILING_EG_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_TILE_SPLIT_MASK; |
621 | stilesplit = (tiling_flags >> RADEON_TILING_EG_STENCIL_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_STENCIL_TILE_SPLIT_MASK; |
629 | stilesplit = (tiling_flags >> RADEON_TILING_EG_STENCIL_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_STENCIL_TILE_SPLIT_MASK; |
622 | switch (bankw) { |
630 | switch (bankw) { |
623 | case 0: |
631 | case 0: |
624 | case 1: |
632 | case 1: |
625 | case 2: |
633 | case 2: |
626 | case 4: |
634 | case 4: |
627 | case 8: |
635 | case 8: |
628 | break; |
636 | break; |
629 | default: |
637 | default: |
630 | return -EINVAL; |
638 | return -EINVAL; |
631 | } |
639 | } |
632 | switch (bankh) { |
640 | switch (bankh) { |
633 | case 0: |
641 | case 0: |
634 | case 1: |
642 | case 1: |
635 | case 2: |
643 | case 2: |
636 | case 4: |
644 | case 4: |
637 | case 8: |
645 | case 8: |
638 | break; |
646 | break; |
639 | default: |
647 | default: |
640 | return -EINVAL; |
648 | return -EINVAL; |
641 | } |
649 | } |
642 | switch (mtaspect) { |
650 | switch (mtaspect) { |
643 | case 0: |
651 | case 0: |
644 | case 1: |
652 | case 1: |
645 | case 2: |
653 | case 2: |
646 | case 4: |
654 | case 4: |
647 | case 8: |
655 | case 8: |
648 | break; |
656 | break; |
649 | default: |
657 | default: |
650 | return -EINVAL; |
658 | return -EINVAL; |
651 | } |
659 | } |
652 | if (tilesplit > 6) { |
660 | if (tilesplit > 6) { |
653 | return -EINVAL; |
661 | return -EINVAL; |
654 | } |
662 | } |
655 | if (stilesplit > 6) { |
663 | if (stilesplit > 6) { |
656 | return -EINVAL; |
664 | return -EINVAL; |
657 | } |
665 | } |
658 | } |
666 | } |
659 | r = radeon_bo_reserve(bo, false); |
667 | r = radeon_bo_reserve(bo, false); |
660 | if (unlikely(r != 0)) |
668 | if (unlikely(r != 0)) |
661 | return r; |
669 | return r; |
662 | bo->tiling_flags = tiling_flags; |
670 | bo->tiling_flags = tiling_flags; |
663 | bo->pitch = pitch; |
671 | bo->pitch = pitch; |
664 | radeon_bo_unreserve(bo); |
672 | radeon_bo_unreserve(bo); |
665 | return 0; |
673 | return 0; |
666 | } |
674 | } |
667 | 675 | ||
668 | void radeon_bo_get_tiling_flags(struct radeon_bo *bo, |
676 | void radeon_bo_get_tiling_flags(struct radeon_bo *bo, |
669 | uint32_t *tiling_flags, |
677 | uint32_t *tiling_flags, |
670 | uint32_t *pitch) |
678 | uint32_t *pitch) |
671 | { |
679 | { |
672 | lockdep_assert_held(&bo->tbo.resv->lock.base); |
680 | lockdep_assert_held(&bo->tbo.resv->lock.base); |
673 | 681 | ||
674 | if (tiling_flags) |
682 | if (tiling_flags) |
675 | *tiling_flags = bo->tiling_flags; |
683 | *tiling_flags = bo->tiling_flags; |
676 | if (pitch) |
684 | if (pitch) |
677 | *pitch = bo->pitch; |
685 | *pitch = bo->pitch; |
678 | } |
686 | } |
679 | 687 | ||
680 | int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved, |
688 | int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved, |
681 | bool force_drop) |
689 | bool force_drop) |
682 | { |
690 | { |
683 | if (!force_drop) |
691 | if (!force_drop) |
684 | lockdep_assert_held(&bo->tbo.resv->lock.base); |
692 | lockdep_assert_held(&bo->tbo.resv->lock.base); |
685 | 693 | ||
686 | if (!(bo->tiling_flags & RADEON_TILING_SURFACE)) |
694 | if (!(bo->tiling_flags & RADEON_TILING_SURFACE)) |
687 | return 0; |
695 | return 0; |
688 | 696 | ||
689 | if (force_drop) { |
697 | if (force_drop) { |
690 | radeon_bo_clear_surface_reg(bo); |
698 | radeon_bo_clear_surface_reg(bo); |
691 | return 0; |
699 | return 0; |
692 | } |
700 | } |
693 | 701 | ||
694 | if (bo->tbo.mem.mem_type != TTM_PL_VRAM) { |
702 | if (bo->tbo.mem.mem_type != TTM_PL_VRAM) { |
695 | if (!has_moved) |
703 | if (!has_moved) |
696 | return 0; |
704 | return 0; |
697 | 705 | ||
698 | if (bo->surface_reg >= 0) |
706 | if (bo->surface_reg >= 0) |
699 | radeon_bo_clear_surface_reg(bo); |
707 | radeon_bo_clear_surface_reg(bo); |
700 | return 0; |
708 | return 0; |
701 | } |
709 | } |
702 | 710 | ||
703 | if ((bo->surface_reg >= 0) && !has_moved) |
711 | if ((bo->surface_reg >= 0) && !has_moved) |
704 | return 0; |
712 | return 0; |
705 | 713 | ||
706 | return radeon_bo_get_surface_reg(bo); |
714 | return radeon_bo_get_surface_reg(bo); |
707 | } |
715 | } |
708 | 716 | ||
709 | void radeon_bo_move_notify(struct ttm_buffer_object *bo, |
717 | void radeon_bo_move_notify(struct ttm_buffer_object *bo, |
710 | struct ttm_mem_reg *new_mem) |
718 | struct ttm_mem_reg *new_mem) |
711 | { |
719 | { |
712 | struct radeon_bo *rbo; |
720 | struct radeon_bo *rbo; |
713 | 721 | ||
714 | if (!radeon_ttm_bo_is_radeon_bo(bo)) |
722 | if (!radeon_ttm_bo_is_radeon_bo(bo)) |
715 | return; |
723 | return; |
716 | 724 | ||
717 | rbo = container_of(bo, struct radeon_bo, tbo); |
725 | rbo = container_of(bo, struct radeon_bo, tbo); |
718 | radeon_bo_check_tiling(rbo, 0, 1); |
726 | radeon_bo_check_tiling(rbo, 0, 1); |
719 | radeon_vm_bo_invalidate(rbo->rdev, rbo); |
727 | radeon_vm_bo_invalidate(rbo->rdev, rbo); |
720 | 728 | ||
721 | /* update statistics */ |
729 | /* update statistics */ |
722 | if (!new_mem) |
730 | if (!new_mem) |
723 | return; |
731 | return; |
724 | 732 | ||
725 | radeon_update_memory_usage(rbo, bo->mem.mem_type, -1); |
733 | radeon_update_memory_usage(rbo, bo->mem.mem_type, -1); |
726 | radeon_update_memory_usage(rbo, new_mem->mem_type, 1); |
734 | radeon_update_memory_usage(rbo, new_mem->mem_type, 1); |
727 | } |
735 | } |
728 | int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type, bool no_wait) |
736 | int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type, bool no_wait) |
729 | { |
737 | { |
730 | int r; |
738 | int r; |
731 | 739 | ||
732 | r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, NULL); |
740 | r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, NULL); |
733 | if (unlikely(r != 0)) |
741 | if (unlikely(r != 0)) |
734 | return r; |
742 | return r; |
735 | if (mem_type) |
743 | if (mem_type) |
736 | *mem_type = bo->tbo.mem.mem_type; |
744 | *mem_type = bo->tbo.mem.mem_type; |
737 | 745 | ||
738 | r = ttm_bo_wait(&bo->tbo, true, true, no_wait); |
746 | r = ttm_bo_wait(&bo->tbo, true, true, no_wait); |
739 | ttm_bo_unreserve(&bo->tbo); |
747 | ttm_bo_unreserve(&bo->tbo); |
740 | return r; |
748 | return r; |
741 | } |
749 | } |
742 | 750 | ||
743 | /** |
751 | /** |
744 | * radeon_bo_fence - add fence to buffer object |
752 | * radeon_bo_fence - add fence to buffer object |
745 | * |
753 | * |
746 | * @bo: buffer object in question |
754 | * @bo: buffer object in question |
747 | * @fence: fence to add |
755 | * @fence: fence to add |
748 | * @shared: true if fence should be added shared |
756 | * @shared: true if fence should be added shared |
749 | * |
757 | * |
750 | */ |
758 | */ |
751 | void radeon_bo_fence(struct radeon_bo *bo, struct radeon_fence *fence, |
759 | void radeon_bo_fence(struct radeon_bo *bo, struct radeon_fence *fence, |
752 | bool shared) |
760 | bool shared) |
753 | { |
761 | { |
754 | struct reservation_object *resv = bo->tbo.resv; |
762 | struct reservation_object *resv = bo->tbo.resv; |
755 | 763 | ||
756 | if (shared) |
764 | if (shared) |
757 | reservation_object_add_shared_fence(resv, &fence->base); |
765 | reservation_object_add_shared_fence(resv, &fence->base); |
758 | else |
766 | else |
759 | reservation_object_add_excl_fence(resv, &fence->base); |
767 | reservation_object_add_excl_fence(resv, &fence->base); |
760 | }><>><>>>> |
768 | }><>><>>>> |
761 | >>>>><> |
769 | >=>>>><> |