Rev 5346 | Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 5346 | Rev 6104 | ||
---|---|---|---|
Line 92... | Line 92... | ||
92 | return false; |
92 | return false; |
93 | } |
93 | } |
Line 94... | Line 94... | ||
94 | 94 | ||
95 | void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain) |
95 | void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain) |
96 | { |
96 | { |
Line 97... | Line 97... | ||
97 | u32 c = 0, i; |
97 | u32 c = 0, i; |
98 | 98 | ||
99 | rbo->placement.placement = rbo->placements; |
99 | rbo->placement.placement = rbo->placements; |
100 | rbo->placement.busy_placement = rbo->placements; |
100 | rbo->placement.busy_placement = rbo->placements; |
101 | if (domain & RADEON_GEM_DOMAIN_VRAM) { |
101 | if (domain & RADEON_GEM_DOMAIN_VRAM) { |
102 | /* Try placing BOs which don't need CPU access outside of the |
102 | /* Try placing BOs which don't need CPU access outside of the |
103 | * CPU accessible part of VRAM |
103 | * CPU accessible part of VRAM |
104 | */ |
104 | */ |
105 | if ((rbo->flags & RADEON_GEM_NO_CPU_ACCESS) && |
105 | if ((rbo->flags & RADEON_GEM_NO_CPU_ACCESS) && |
106 | rbo->rdev->mc.visible_vram_size < rbo->rdev->mc.real_vram_size) { |
106 | rbo->rdev->mc.visible_vram_size < rbo->rdev->mc.real_vram_size) { |
107 | rbo->placements[c].fpfn = |
107 | rbo->placements[c].fpfn = |
108 | rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT; |
108 | rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT; |
109 | rbo->placements[c++].flags = TTM_PL_FLAG_WC | |
109 | rbo->placements[c++].flags = TTM_PL_FLAG_WC | |
110 | TTM_PL_FLAG_UNCACHED | |
110 | TTM_PL_FLAG_UNCACHED | |
Line 111... | Line 111... | ||
111 | TTM_PL_FLAG_VRAM; |
111 | TTM_PL_FLAG_VRAM; |
112 | } |
112 | } |
113 | 113 | ||
114 | rbo->placements[c].fpfn = 0; |
114 | rbo->placements[c].fpfn = 0; |
115 | rbo->placements[c++].flags = TTM_PL_FLAG_WC | |
115 | rbo->placements[c++].flags = TTM_PL_FLAG_WC | |
Line 116... | Line 116... | ||
116 | TTM_PL_FLAG_UNCACHED | |
116 | TTM_PL_FLAG_UNCACHED | |
117 | TTM_PL_FLAG_VRAM; |
117 | TTM_PL_FLAG_VRAM; |
118 | } |
118 | } |
119 | 119 | ||
120 | if (domain & RADEON_GEM_DOMAIN_GTT) { |
120 | if (domain & RADEON_GEM_DOMAIN_GTT) { |
Line 127... | Line 127... | ||
127 | (rbo->rdev->flags & RADEON_IS_AGP)) { |
127 | (rbo->rdev->flags & RADEON_IS_AGP)) { |
128 | rbo->placements[c].fpfn = 0; |
128 | rbo->placements[c].fpfn = 0; |
129 | rbo->placements[c++].flags = TTM_PL_FLAG_WC | |
129 | rbo->placements[c++].flags = TTM_PL_FLAG_WC | |
130 | TTM_PL_FLAG_UNCACHED | |
130 | TTM_PL_FLAG_UNCACHED | |
131 | TTM_PL_FLAG_TT; |
131 | TTM_PL_FLAG_TT; |
132 | } else { |
132 | } else { |
133 | rbo->placements[c].fpfn = 0; |
133 | rbo->placements[c].fpfn = 0; |
134 | rbo->placements[c++].flags = TTM_PL_FLAG_CACHED | |
134 | rbo->placements[c++].flags = TTM_PL_FLAG_CACHED | |
135 | TTM_PL_FLAG_TT; |
135 | TTM_PL_FLAG_TT; |
136 | } |
136 | } |
137 | } |
137 | } |
Line 138... | Line 138... | ||
138 | 138 | ||
139 | if (domain & RADEON_GEM_DOMAIN_CPU) { |
139 | if (domain & RADEON_GEM_DOMAIN_CPU) { |
140 | if (rbo->flags & RADEON_GEM_GTT_UC) { |
140 | if (rbo->flags & RADEON_GEM_GTT_UC) { |
141 | rbo->placements[c].fpfn = 0; |
141 | rbo->placements[c].fpfn = 0; |
142 | rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED | |
142 | rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED | |
Line 146... | Line 146... | ||
146 | rbo->rdev->flags & RADEON_IS_AGP) { |
146 | rbo->rdev->flags & RADEON_IS_AGP) { |
147 | rbo->placements[c].fpfn = 0; |
147 | rbo->placements[c].fpfn = 0; |
148 | rbo->placements[c++].flags = TTM_PL_FLAG_WC | |
148 | rbo->placements[c++].flags = TTM_PL_FLAG_WC | |
149 | TTM_PL_FLAG_UNCACHED | |
149 | TTM_PL_FLAG_UNCACHED | |
150 | TTM_PL_FLAG_SYSTEM; |
150 | TTM_PL_FLAG_SYSTEM; |
151 | } else { |
151 | } else { |
152 | rbo->placements[c].fpfn = 0; |
152 | rbo->placements[c].fpfn = 0; |
153 | rbo->placements[c++].flags = TTM_PL_FLAG_CACHED | |
153 | rbo->placements[c++].flags = TTM_PL_FLAG_CACHED | |
154 | TTM_PL_FLAG_SYSTEM; |
154 | TTM_PL_FLAG_SYSTEM; |
155 | } |
155 | } |
156 | } |
156 | } |
157 | if (!c) { |
157 | if (!c) { |
158 | rbo->placements[c].fpfn = 0; |
158 | rbo->placements[c].fpfn = 0; |
159 | rbo->placements[c++].flags = TTM_PL_MASK_CACHING | |
159 | rbo->placements[c++].flags = TTM_PL_MASK_CACHING | |
160 | TTM_PL_FLAG_SYSTEM; |
160 | TTM_PL_FLAG_SYSTEM; |
161 | } |
161 | } |
Line 162... | Line 162... | ||
162 | 162 | ||
163 | rbo->placement.num_placement = c; |
163 | rbo->placement.num_placement = c; |
Line 164... | Line 164... | ||
164 | rbo->placement.num_busy_placement = c; |
164 | rbo->placement.num_busy_placement = c; |
165 | 165 | ||
166 | for (i = 0; i < c; ++i) { |
166 | for (i = 0; i < c; ++i) { |
167 | if ((rbo->flags & RADEON_GEM_CPU_ACCESS) && |
167 | if ((rbo->flags & RADEON_GEM_CPU_ACCESS) && |
168 | (rbo->placements[i].flags & TTM_PL_FLAG_VRAM) && |
168 | (rbo->placements[i].flags & TTM_PL_FLAG_VRAM) && |
169 | !rbo->placements[i].fpfn) |
169 | !rbo->placements[i].fpfn) |
170 | rbo->placements[i].lpfn = |
170 | rbo->placements[i].lpfn = |
171 | rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT; |
171 | rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT; |
172 | else |
172 | else |
173 | rbo->placements[i].lpfn = 0; |
- | |
174 | } |
- | |
175 | - | ||
176 | /* |
- | |
177 | * Use two-ended allocation depending on the buffer size to |
- | |
178 | * improve fragmentation quality. |
- | |
179 | * 512kb was measured as the most optimal number. |
- | |
180 | */ |
- | |
181 | if (rbo->tbo.mem.size > 512 * 1024) { |
- | |
182 | for (i = 0; i < c; i++) { |
- | |
183 | rbo->placements[i].flags |= TTM_PL_FLAG_TOPDOWN; |
- | |
184 | } |
173 | rbo->placements[i].lpfn = 0; |
Line 185... | Line 174... | ||
185 | } |
174 | } |
186 | } |
175 | } |
187 | 176 | ||
188 | int radeon_bo_create(struct radeon_device *rdev, |
177 | int radeon_bo_create(struct radeon_device *rdev, |
189 | unsigned long size, int byte_align, bool kernel, |
178 | unsigned long size, int byte_align, bool kernel, |
190 | u32 domain, u32 flags, struct sg_table *sg, |
179 | u32 domain, u32 flags, struct sg_table *sg, |
191 | struct reservation_object *resv, |
180 | struct reservation_object *resv, |
192 | struct radeon_bo **bo_ptr) |
181 | struct radeon_bo **bo_ptr) |
193 | { |
182 | { |
194 | struct radeon_bo *bo; |
183 | struct radeon_bo *bo; |
195 | enum ttm_bo_type type; |
184 | enum ttm_bo_type type; |
Line 196... | Line 185... | ||
196 | unsigned long page_align = roundup(byte_align, PAGE_SIZE) >> PAGE_SHIFT; |
185 | unsigned long page_align = roundup(byte_align, PAGE_SIZE) >> PAGE_SHIFT; |
Line 197... | Line 186... | ||
197 | size_t acc_size; |
186 | size_t acc_size; |
198 | int r; |
187 | int r; |
Line 209... | Line 198... | ||
209 | *bo_ptr = NULL; |
198 | *bo_ptr = NULL; |
Line 210... | Line 199... | ||
210 | 199 | ||
211 | acc_size = ttm_bo_dma_acc_size(&rdev->mman.bdev, size, |
200 | acc_size = ttm_bo_dma_acc_size(&rdev->mman.bdev, size, |
Line 212... | Line 201... | ||
212 | sizeof(struct radeon_bo)); |
201 | sizeof(struct radeon_bo)); |
213 | 202 | ||
214 | bo = kzalloc(sizeof(struct radeon_bo), GFP_KERNEL); |
203 | bo = kzalloc(sizeof(struct radeon_bo), GFP_KERNEL); |
215 | if (bo == NULL) |
204 | if (bo == NULL) |
216 | return -ENOMEM; |
205 | return -ENOMEM; |
217 | r = drm_gem_object_init(rdev->ddev, &bo->gem_base, size); |
206 | r = drm_gem_object_init(rdev->ddev, &bo->gem_base, size); |
218 | if (unlikely(r)) { |
207 | if (unlikely(r)) { |
219 | kfree(bo); |
208 | kfree(bo); |
220 | return r; |
209 | return r; |
221 | } |
210 | } |
222 | bo->rdev = rdev; |
211 | bo->rdev = rdev; |
223 | bo->surface_reg = -1; |
212 | bo->surface_reg = -1; |
224 | INIT_LIST_HEAD(&bo->list); |
213 | INIT_LIST_HEAD(&bo->list); |
225 | INIT_LIST_HEAD(&bo->va); |
214 | INIT_LIST_HEAD(&bo->va); |
226 | bo->initial_domain = domain & (RADEON_GEM_DOMAIN_VRAM | |
215 | bo->initial_domain = domain & (RADEON_GEM_DOMAIN_VRAM | |
Line 227... | Line 216... | ||
227 | RADEON_GEM_DOMAIN_GTT | |
216 | RADEON_GEM_DOMAIN_GTT | |
228 | RADEON_GEM_DOMAIN_CPU); |
217 | RADEON_GEM_DOMAIN_CPU); |
229 | 218 | ||
230 | bo->flags = flags; |
219 | bo->flags = flags; |
Line -... | Line 220... | ||
- | 220 | /* PCI GART is always snooped */ |
|
- | 221 | if (!(rdev->flags & RADEON_IS_PCIE)) |
|
- | 222 | bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC); |
|
- | 223 | ||
- | 224 | /* Write-combined CPU mappings of GTT cause GPU hangs with RV6xx |
|
- | 225 | * See https://bugs.freedesktop.org/show_bug.cgi?id=91268 |
|
231 | /* PCI GART is always snooped */ |
226 | */ |
232 | if (!(rdev->flags & RADEON_IS_PCIE)) |
227 | if (rdev->family >= CHIP_RV610 && rdev->family <= CHIP_RV635) |
233 | bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC); |
228 | bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC); |
234 | 229 | ||
- | 230 | #ifdef CONFIG_X86_32 |
|
- | 231 | /* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit |
|
- | 232 | * See https://bugs.freedesktop.org/show_bug.cgi?id=84627 |
|
- | 233 | */ |
|
- | 234 | bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC); |
|
- | 235 | #elif defined(CONFIG_X86) && !defined(CONFIG_X86_PAT) |
|
- | 236 | /* Don't try to enable write-combining when it can't work, or things |
|
- | 237 | * may be slow |
|
- | 238 | * See https://bugs.freedesktop.org/show_bug.cgi?id=88758 |
|
- | 239 | */ |
|
235 | #ifdef CONFIG_X86_32 |
240 | |
- | 241 | #warning Please enable CONFIG_MTRR and CONFIG_X86_PAT for better performance \ |
|
- | 242 | thanks to write-combining |
|
- | 243 | ||
236 | /* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit |
244 | if (bo->flags & RADEON_GEM_GTT_WC) |
Line 237... | Line 245... | ||
237 | * See https://bugs.freedesktop.org/show_bug.cgi?id=84627 |
245 | DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for " |
238 | */ |
246 | "better performance thanks to write-combining\n"); |
239 | bo->flags &= ~RADEON_GEM_GTT_WC; |
247 | bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC); |
240 | #endif |
248 | #endif |
241 | 249 | ||
242 | radeon_ttm_placement_from_domain(bo, domain); |
250 | radeon_ttm_placement_from_domain(bo, domain); |
Line 310... | Line 318... | ||
310 | if (tbo == NULL) |
318 | if (tbo == NULL) |
311 | *bo = NULL; |
319 | *bo = NULL; |
312 | } |
320 | } |
Line 313... | Line 321... | ||
313 | 321 | ||
314 | int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset, |
322 | int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset, |
315 | u64 *gpu_addr) |
323 | u64 *gpu_addr) |
316 | { |
324 | { |
- | 325 | int r, i; |
|
- | 326 | ||
- | 327 | if (bo->pin_count) { |
|
- | 328 | bo->pin_count++; |
|
- | 329 | if (gpu_addr) |
|
- | 330 | *gpu_addr = radeon_bo_gpu_offset(bo); |
|
- | 331 | ||
- | 332 | if (max_offset != 0) { |
|
- | 333 | u64 domain_start; |
|
- | 334 | ||
- | 335 | if (domain == RADEON_GEM_DOMAIN_VRAM) |
|
- | 336 | domain_start = bo->rdev->mc.vram_start; |
|
- | 337 | else |
|
- | 338 | domain_start = bo->rdev->mc.gtt_start; |
|
- | 339 | WARN_ON_ONCE(max_offset < |
|
- | 340 | (radeon_bo_gpu_offset(bo) - domain_start)); |
|
Line 317... | Line -... | ||
317 | int r, i; |
- | |
318 | - | ||
319 | if (bo->pin_count) { |
- | |
320 | bo->pin_count++; |
- | |
321 | if (gpu_addr) |
- | |
322 | *gpu_addr = radeon_bo_gpu_offset(bo); |
- | |
323 | - | ||
324 | if (max_offset != 0) { |
- | |
325 | u64 domain_start; |
- | |
326 | - | ||
327 | if (domain == RADEON_GEM_DOMAIN_VRAM) |
- | |
328 | domain_start = bo->rdev->mc.vram_start; |
- | |
329 | else |
- | |
330 | domain_start = bo->rdev->mc.gtt_start; |
- | |
331 | WARN_ON_ONCE(max_offset < |
- | |
332 | (radeon_bo_gpu_offset(bo) - domain_start)); |
- | |
333 | } |
341 | } |
334 | 342 | ||
335 | return 0; |
343 | return 0; |
336 | } |
344 | } |
337 | radeon_ttm_placement_from_domain(bo, domain); |
345 | radeon_ttm_placement_from_domain(bo, domain); |
338 | for (i = 0; i < bo->placement.num_placement; i++) { |
346 | for (i = 0; i < bo->placement.num_placement; i++) { |
339 | /* force to pin into visible video ram */ |
347 | /* force to pin into visible video ram */ |
340 | if ((bo->placements[i].flags & TTM_PL_FLAG_VRAM) && |
348 | if ((bo->placements[i].flags & TTM_PL_FLAG_VRAM) && |
341 | !(bo->flags & RADEON_GEM_NO_CPU_ACCESS) && |
349 | !(bo->flags & RADEON_GEM_NO_CPU_ACCESS) && |
342 | (!max_offset || max_offset > bo->rdev->mc.visible_vram_size)) |
350 | (!max_offset || max_offset > bo->rdev->mc.visible_vram_size)) |
343 | bo->placements[i].lpfn = |
351 | bo->placements[i].lpfn = |
344 | bo->rdev->mc.visible_vram_size >> PAGE_SHIFT; |
352 | bo->rdev->mc.visible_vram_size >> PAGE_SHIFT; |
Line 345... | Line 353... | ||
345 | else |
353 | else |
346 | bo->placements[i].lpfn = max_offset >> PAGE_SHIFT; |
354 | bo->placements[i].lpfn = max_offset >> PAGE_SHIFT; |
Line 347... | Line 355... | ||
347 | 355 | ||
348 | bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT; |
356 | bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT; |
349 | } |
357 | } |
350 | 358 | ||
351 | r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); |
359 | r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false); |
352 | if (likely(r == 0)) { |
360 | if (likely(r == 0)) { |
353 | bo->pin_count = 1; |
361 | bo->pin_count = 1; |
354 | if (gpu_addr != NULL) |
362 | if (gpu_addr != NULL) |
355 | *gpu_addr = radeon_bo_gpu_offset(bo); |
363 | *gpu_addr = radeon_bo_gpu_offset(bo); |
356 | if (domain == RADEON_GEM_DOMAIN_VRAM) |
364 | if (domain == RADEON_GEM_DOMAIN_VRAM) |
357 | bo->rdev->vram_pin_size += radeon_bo_size(bo); |
365 | bo->rdev->vram_pin_size += radeon_bo_size(bo); |
358 | else |
366 | else |
359 | bo->rdev->gart_pin_size += radeon_bo_size(bo); |
367 | bo->rdev->gart_pin_size += radeon_bo_size(bo); |
360 | } else { |
368 | } else { |
Line 361... | Line 369... | ||
361 | dev_err(bo->rdev->dev, "%p pin failed\n", bo); |
369 | dev_err(bo->rdev->dev, "%p pin failed\n", bo); |
362 | } |
370 | } |
363 | return r; |
371 | return r; |
364 | } |
372 | } |
Line 365... | Line 373... | ||
365 | 373 | ||
366 | int radeon_bo_pin(struct radeon_bo *bo, u32 domain, u64 *gpu_addr) |
374 | int radeon_bo_pin(struct radeon_bo *bo, u32 domain, u64 *gpu_addr) |
367 | { |
375 | { |
Line 368... | Line 376... | ||
368 | return radeon_bo_pin_restricted(bo, domain, 0, gpu_addr); |
376 | return radeon_bo_pin_restricted(bo, domain, 0, gpu_addr); |
369 | } |
377 | } |
370 | 378 | ||
371 | int radeon_bo_unpin(struct radeon_bo *bo) |
379 | int radeon_bo_unpin(struct radeon_bo *bo) |
372 | { |
380 | { |
373 | int r, i; |
381 | int r, i; |
374 | 382 | ||
375 | if (!bo->pin_count) { |
383 | if (!bo->pin_count) { |
376 | dev_warn(bo->rdev->dev, "%p unpin not necessary\n", bo); |
384 | dev_warn(bo->rdev->dev, "%p unpin not necessary\n", bo); |
377 | return 0; |
385 | return 0; |
378 | } |
386 | } |
379 | bo->pin_count--; |
387 | bo->pin_count--; |
Line 733... | Line 741... | ||
733 | if (unlikely(r != 0)) |
741 | if (unlikely(r != 0)) |
734 | return r; |
742 | return r; |
735 | if (mem_type) |
743 | if (mem_type) |
736 | *mem_type = bo->tbo.mem.mem_type; |
744 | *mem_type = bo->tbo.mem.mem_type; |
Line 737... | Line 745... | ||
737 | 745 | ||
738 | r = ttm_bo_wait(&bo->tbo, true, true, no_wait); |
746 | r = ttm_bo_wait(&bo->tbo, true, true, no_wait); |
739 | ttm_bo_unreserve(&bo->tbo); |
747 | ttm_bo_unreserve(&bo->tbo); |
740 | return r; |
748 | return r; |
Line 741... | Line 749... | ||
741 | } |
749 | } |