Rev 6660 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
2332 | Serge | 1 | /* |
2 | * Copyright © 2010 Daniel Vetter |
||
5060 | serge | 3 | * Copyright © 2011-2014 Intel Corporation |
2332 | Serge | 4 | * |
5 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
6 | * copy of this software and associated documentation files (the "Software"), |
||
7 | * to deal in the Software without restriction, including without limitation |
||
8 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
9 | * and/or sell copies of the Software, and to permit persons to whom the |
||
10 | * Software is furnished to do so, subject to the following conditions: |
||
11 | * |
||
12 | * The above copyright notice and this permission notice (including the next |
||
13 | * paragraph) shall be included in all copies or substantial portions of the |
||
14 | * Software. |
||
15 | * |
||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
||
22 | * IN THE SOFTWARE. |
||
23 | * |
||
24 | */ |
||
25 | |||
5354 | serge | 26 | #include |
3031 | serge | 27 | #include |
28 | #include |
||
2332 | Serge | 29 | #include "i915_drv.h" |
6084 | serge | 30 | #include "i915_vgpu.h" |
2351 | Serge | 31 | #include "i915_trace.h" |
2332 | Serge | 32 | #include "intel_drv.h" |
33 | |||
6084 | serge | 34 | /** |
35 | * DOC: Global GTT views |
||
36 | * |
||
37 | * Background and previous state |
||
38 | * |
||
39 | * Historically objects could exists (be bound) in global GTT space only as |
||
40 | * singular instances with a view representing all of the object's backing pages |
||
41 | * in a linear fashion. This view will be called a normal view. |
||
42 | * |
||
43 | * To support multiple views of the same object, where the number of mapped |
||
44 | * pages is not equal to the backing store, or where the layout of the pages |
||
45 | * is not linear, concept of a GGTT view was added. |
||
46 | * |
||
47 | * One example of an alternative view is a stereo display driven by a single |
||
48 | * image. In this case we would have a framebuffer looking like this |
||
49 | * (2x2 pages): |
||
50 | * |
||
51 | * 12 |
||
52 | * 34 |
||
53 | * |
||
54 | * Above would represent a normal GGTT view as normally mapped for GPU or CPU |
||
55 | * rendering. In contrast, fed to the display engine would be an alternative |
||
56 | * view which could look something like this: |
||
57 | * |
||
58 | * 1212 |
||
59 | * 3434 |
||
60 | * |
||
61 | * In this example both the size and layout of pages in the alternative view is |
||
62 | * different from the normal view. |
||
63 | * |
||
64 | * Implementation and usage |
||
65 | * |
||
66 | * GGTT views are implemented using VMAs and are distinguished via enum |
||
67 | * i915_ggtt_view_type and struct i915_ggtt_view. |
||
68 | * |
||
69 | * A new flavour of core GEM functions which work with GGTT bound objects were |
||
70 | * added with the _ggtt_ infix, and sometimes with _view postfix to avoid |
||
71 | * renaming in large amounts of code. They take the struct i915_ggtt_view |
||
72 | * parameter encapsulating all metadata required to implement a view. |
||
73 | * |
||
74 | * As a helper for callers which are only interested in the normal view, |
||
75 | * globally const i915_ggtt_view_normal singleton instance exists. All old core |
||
76 | * GEM API functions, the ones not taking the view parameter, are operating on, |
||
77 | * or with the normal GGTT view. |
||
78 | * |
||
79 | * Code wanting to add or use a new GGTT view needs to: |
||
80 | * |
||
81 | * 1. Add a new enum with a suitable name. |
||
82 | * 2. Extend the metadata in the i915_ggtt_view structure if required. |
||
83 | * 3. Add support to i915_get_vma_pages(). |
||
84 | * |
||
85 | * New views are required to build a scatter-gather table from within the |
||
86 | * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and |
||
87 | * exists for the lifetime of an VMA. |
||
88 | * |
||
89 | * Core API is designed to have copy semantics which means that passed in |
||
90 | * struct i915_ggtt_view does not need to be persistent (left around after |
||
91 | * calling the core API functions). |
||
92 | * |
||
93 | */ |
||
3243 | Serge | 94 | |
6084 | serge | 95 | static int |
96 | i915_get_ggtt_vma_pages(struct i915_vma *vma); |
||
97 | |||
98 | const struct i915_ggtt_view i915_ggtt_view_normal; |
||
99 | const struct i915_ggtt_view i915_ggtt_view_rotated = { |
||
100 | .type = I915_GGTT_VIEW_ROTATED |
||
101 | }; |
||
102 | |||
5354 | serge | 103 | static int sanitize_enable_ppgtt(struct drm_device *dev, int enable_ppgtt) |
5060 | serge | 104 | { |
5354 | serge | 105 | bool has_aliasing_ppgtt; |
106 | bool has_full_ppgtt; |
||
6937 | serge | 107 | bool has_full_48bit_ppgtt; |
3243 | Serge | 108 | |
5354 | serge | 109 | has_aliasing_ppgtt = INTEL_INFO(dev)->gen >= 6; |
110 | has_full_ppgtt = INTEL_INFO(dev)->gen >= 7; |
||
6937 | serge | 111 | has_full_48bit_ppgtt = IS_BROADWELL(dev) || INTEL_INFO(dev)->gen >= 9; |
3243 | Serge | 112 | |
6084 | serge | 113 | if (intel_vgpu_active(dev)) |
114 | has_full_ppgtt = false; /* emulation is too hard */ |
||
115 | |||
5354 | serge | 116 | /* |
117 | * We don't allow disabling PPGTT for gen9+ as it's a requirement for |
||
118 | * execlists, the sole mechanism available to submit work. |
||
119 | */ |
||
120 | if (INTEL_INFO(dev)->gen < 9 && |
||
121 | (enable_ppgtt == 0 || !has_aliasing_ppgtt)) |
||
5060 | serge | 122 | return 0; |
4104 | Serge | 123 | |
5060 | serge | 124 | if (enable_ppgtt == 1) |
125 | return 1; |
||
4560 | Serge | 126 | |
5354 | serge | 127 | if (enable_ppgtt == 2 && has_full_ppgtt) |
5060 | serge | 128 | return 2; |
4560 | Serge | 129 | |
6937 | serge | 130 | if (enable_ppgtt == 3 && has_full_48bit_ppgtt) |
131 | return 3; |
||
132 | |||
5060 | serge | 133 | #ifdef CONFIG_INTEL_IOMMU |
134 | /* Disable ppgtt on SNB if VT-d is on. */ |
||
135 | if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) { |
||
136 | DRM_INFO("Disabling PPGTT because VT-d is on\n"); |
||
137 | return 0; |
||
138 | } |
||
139 | #endif |
||
140 | |||
141 | /* Early VLV doesn't have this */ |
||
6937 | serge | 142 | if (IS_VALLEYVIEW(dev) && dev->pdev->revision < 0xb) { |
5060 | serge | 143 | DRM_DEBUG_DRIVER("disabling PPGTT on pre-B3 step VLV\n"); |
144 | return 0; |
||
145 | } |
||
146 | |||
6084 | serge | 147 | if (INTEL_INFO(dev)->gen >= 8 && i915.enable_execlists) |
6937 | serge | 148 | return has_full_48bit_ppgtt ? 3 : 2; |
6084 | serge | 149 | else |
150 | return has_aliasing_ppgtt ? 1 : 0; |
||
5060 | serge | 151 | } |
152 | |||
6084 | serge | 153 | static int ppgtt_bind_vma(struct i915_vma *vma, |
154 | enum i915_cache_level cache_level, |
||
155 | u32 unused) |
||
156 | { |
||
157 | u32 pte_flags = 0; |
||
5060 | serge | 158 | |
6084 | serge | 159 | /* Currently applicable only to VLV */ |
160 | if (vma->obj->gt_ro) |
||
161 | pte_flags |= PTE_READ_ONLY; |
||
5060 | serge | 162 | |
6084 | serge | 163 | vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start, |
164 | cache_level, pte_flags); |
||
165 | |||
166 | return 0; |
||
167 | } |
||
168 | |||
169 | static void ppgtt_unbind_vma(struct i915_vma *vma) |
||
4560 | Serge | 170 | { |
6084 | serge | 171 | vma->vm->clear_range(vma->vm, |
172 | vma->node.start, |
||
173 | vma->obj->base.size, |
||
174 | true); |
||
175 | } |
||
176 | |||
177 | static gen8_pte_t gen8_pte_encode(dma_addr_t addr, |
||
178 | enum i915_cache_level level, |
||
179 | bool valid) |
||
180 | { |
||
181 | gen8_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0; |
||
4560 | Serge | 182 | pte |= addr; |
5060 | serge | 183 | |
184 | switch (level) { |
||
185 | case I915_CACHE_NONE: |
||
186 | pte |= PPAT_UNCACHED_INDEX; |
||
187 | break; |
||
188 | case I915_CACHE_WT: |
||
189 | pte |= PPAT_DISPLAY_ELLC_INDEX; |
||
190 | break; |
||
191 | default: |
||
4560 | Serge | 192 | pte |= PPAT_CACHED_INDEX; |
5060 | serge | 193 | break; |
194 | } |
||
195 | |||
4560 | Serge | 196 | return pte; |
197 | } |
||
198 | |||
6084 | serge | 199 | static gen8_pde_t gen8_pde_encode(const dma_addr_t addr, |
200 | const enum i915_cache_level level) |
||
4560 | Serge | 201 | { |
6084 | serge | 202 | gen8_pde_t pde = _PAGE_PRESENT | _PAGE_RW; |
4560 | Serge | 203 | pde |= addr; |
204 | if (level != I915_CACHE_NONE) |
||
205 | pde |= PPAT_CACHED_PDE_INDEX; |
||
206 | else |
||
207 | pde |= PPAT_UNCACHED_INDEX; |
||
208 | return pde; |
||
209 | } |
||
210 | |||
6084 | serge | 211 | #define gen8_pdpe_encode gen8_pde_encode |
212 | #define gen8_pml4e_encode gen8_pde_encode |
||
213 | |||
214 | static gen6_pte_t snb_pte_encode(dma_addr_t addr, |
||
215 | enum i915_cache_level level, |
||
216 | bool valid, u32 unused) |
||
4104 | Serge | 217 | { |
6084 | serge | 218 | gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0; |
4104 | Serge | 219 | pte |= GEN6_PTE_ADDR_ENCODE(addr); |
220 | |||
221 | switch (level) { |
||
222 | case I915_CACHE_L3_LLC: |
||
223 | case I915_CACHE_LLC: |
||
224 | pte |= GEN6_PTE_CACHE_LLC; |
||
225 | break; |
||
226 | case I915_CACHE_NONE: |
||
227 | pte |= GEN6_PTE_UNCACHED; |
||
228 | break; |
||
229 | default: |
||
6084 | serge | 230 | MISSING_CASE(level); |
4104 | Serge | 231 | } |
232 | |||
233 | return pte; |
||
234 | } |
||
235 | |||
6084 | serge | 236 | static gen6_pte_t ivb_pte_encode(dma_addr_t addr, |
237 | enum i915_cache_level level, |
||
238 | bool valid, u32 unused) |
||
3243 | Serge | 239 | { |
6084 | serge | 240 | gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0; |
3243 | Serge | 241 | pte |= GEN6_PTE_ADDR_ENCODE(addr); |
242 | |||
243 | switch (level) { |
||
4104 | Serge | 244 | case I915_CACHE_L3_LLC: |
245 | pte |= GEN7_PTE_CACHE_L3_LLC; |
||
3243 | Serge | 246 | break; |
247 | case I915_CACHE_LLC: |
||
248 | pte |= GEN6_PTE_CACHE_LLC; |
||
249 | break; |
||
250 | case I915_CACHE_NONE: |
||
6084 | serge | 251 | pte |= GEN6_PTE_UNCACHED; |
3243 | Serge | 252 | break; |
253 | default: |
||
6084 | serge | 254 | MISSING_CASE(level); |
3243 | Serge | 255 | } |
256 | |||
257 | return pte; |
||
258 | } |
||
259 | |||
6084 | serge | 260 | static gen6_pte_t byt_pte_encode(dma_addr_t addr, |
261 | enum i915_cache_level level, |
||
262 | bool valid, u32 flags) |
||
3746 | Serge | 263 | { |
6084 | serge | 264 | gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0; |
4104 | Serge | 265 | pte |= GEN6_PTE_ADDR_ENCODE(addr); |
266 | |||
5060 | serge | 267 | if (!(flags & PTE_READ_ONLY)) |
6084 | serge | 268 | pte |= BYT_PTE_WRITEABLE; |
4104 | Serge | 269 | |
270 | if (level != I915_CACHE_NONE) |
||
271 | pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES; |
||
272 | |||
273 | return pte; |
||
274 | } |
||
275 | |||
6084 | serge | 276 | static gen6_pte_t hsw_pte_encode(dma_addr_t addr, |
277 | enum i915_cache_level level, |
||
278 | bool valid, u32 unused) |
||
4104 | Serge | 279 | { |
6084 | serge | 280 | gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0; |
4104 | Serge | 281 | pte |= HSW_PTE_ADDR_ENCODE(addr); |
282 | |||
283 | if (level != I915_CACHE_NONE) |
||
284 | pte |= HSW_WB_LLC_AGE3; |
||
285 | |||
286 | return pte; |
||
287 | } |
||
288 | |||
6084 | serge | 289 | static gen6_pte_t iris_pte_encode(dma_addr_t addr, |
290 | enum i915_cache_level level, |
||
291 | bool valid, u32 unused) |
||
4104 | Serge | 292 | { |
6084 | serge | 293 | gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0; |
4104 | Serge | 294 | pte |= HSW_PTE_ADDR_ENCODE(addr); |
295 | |||
296 | switch (level) { |
||
297 | case I915_CACHE_NONE: |
||
298 | break; |
||
299 | case I915_CACHE_WT: |
||
4560 | Serge | 300 | pte |= HSW_WT_ELLC_LLC_AGE3; |
4104 | Serge | 301 | break; |
302 | default: |
||
4560 | Serge | 303 | pte |= HSW_WB_ELLC_LLC_AGE3; |
4104 | Serge | 304 | break; |
305 | } |
||
306 | |||
307 | return pte; |
||
308 | } |
||
309 | |||
6084 | serge | 310 | static int __setup_page_dma(struct drm_device *dev, |
311 | struct i915_page_dma *p, gfp_t flags) |
||
312 | { |
||
313 | struct device *device = &dev->pdev->dev; |
||
314 | |||
315 | p->page = alloc_page(flags); |
||
316 | if (!p->page) |
||
317 | return -ENOMEM; |
||
318 | |||
319 | p->daddr = page_to_phys(p->page); |
||
320 | |||
321 | return 0; |
||
322 | } |
||
323 | |||
324 | static int setup_page_dma(struct drm_device *dev, struct i915_page_dma *p) |
||
325 | { |
||
326 | return __setup_page_dma(dev, p, GFP_KERNEL); |
||
327 | } |
||
328 | |||
329 | static void cleanup_page_dma(struct drm_device *dev, struct i915_page_dma *p) |
||
330 | { |
||
331 | if (WARN_ON(!p->page)) |
||
332 | return; |
||
333 | |||
334 | __free_page(p->page); |
||
335 | memset(p, 0, sizeof(*p)); |
||
336 | } |
||
337 | |||
338 | static void *kmap_page_dma(struct i915_page_dma *p) |
||
339 | { |
||
340 | return kmap_atomic(p->page); |
||
341 | } |
||
342 | |||
343 | /* We use the flushing unmap only with ppgtt structures: |
||
344 | * page directories, page tables and scratch pages. |
||
345 | */ |
||
346 | static void kunmap_page_dma(struct drm_device *dev, void *vaddr) |
||
347 | { |
||
348 | /* There are only few exceptions for gen >=6. chv and bxt. |
||
349 | * And we are not sure about the latter so play safe for now. |
||
350 | */ |
||
351 | if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev)) |
||
352 | drm_clflush_virt_range(vaddr, PAGE_SIZE); |
||
353 | |||
354 | kunmap_atomic(vaddr); |
||
355 | } |
||
356 | |||
357 | #define kmap_px(px) kmap_page_dma(px_base(px)) |
||
358 | #define kunmap_px(ppgtt, vaddr) kunmap_page_dma((ppgtt)->base.dev, (vaddr)) |
||
359 | |||
360 | #define setup_px(dev, px) setup_page_dma((dev), px_base(px)) |
||
361 | #define cleanup_px(dev, px) cleanup_page_dma((dev), px_base(px)) |
||
362 | #define fill_px(dev, px, v) fill_page_dma((dev), px_base(px), (v)) |
||
363 | #define fill32_px(dev, px, v) fill_page_dma_32((dev), px_base(px), (v)) |
||
364 | |||
365 | static void fill_page_dma(struct drm_device *dev, struct i915_page_dma *p, |
||
366 | const uint64_t val) |
||
367 | { |
||
368 | int i; |
||
369 | uint64_t * const vaddr = kmap_page_dma(p); |
||
370 | |||
371 | for (i = 0; i < 512; i++) |
||
372 | vaddr[i] = val; |
||
373 | |||
374 | kunmap_page_dma(dev, vaddr); |
||
375 | } |
||
376 | |||
377 | static void fill_page_dma_32(struct drm_device *dev, struct i915_page_dma *p, |
||
378 | const uint32_t val32) |
||
379 | { |
||
380 | uint64_t v = val32; |
||
381 | |||
382 | v = v << 32 | val32; |
||
383 | |||
384 | fill_page_dma(dev, p, v); |
||
385 | } |
||
386 | |||
387 | static struct i915_page_scratch *alloc_scratch_page(struct drm_device *dev) |
||
388 | { |
||
389 | struct i915_page_scratch *sp; |
||
390 | int ret; |
||
391 | |||
392 | sp = kzalloc(sizeof(*sp), GFP_KERNEL); |
||
393 | if (sp == NULL) |
||
394 | return ERR_PTR(-ENOMEM); |
||
395 | |||
396 | ret = __setup_page_dma(dev, px_base(sp), GFP_DMA32 | __GFP_ZERO); |
||
397 | if (ret) { |
||
398 | kfree(sp); |
||
399 | return ERR_PTR(ret); |
||
400 | } |
||
401 | |||
402 | // set_pages_uc(px_page(sp), 1); |
||
403 | |||
404 | return sp; |
||
405 | } |
||
406 | |||
407 | static void free_scratch_page(struct drm_device *dev, |
||
408 | struct i915_page_scratch *sp) |
||
409 | { |
||
410 | // set_pages_wb(px_page(sp), 1); |
||
411 | |||
412 | cleanup_px(dev, sp); |
||
413 | kfree(sp); |
||
414 | } |
||
415 | |||
416 | static struct i915_page_table *alloc_pt(struct drm_device *dev) |
||
417 | { |
||
418 | struct i915_page_table *pt; |
||
419 | const size_t count = INTEL_INFO(dev)->gen >= 8 ? |
||
420 | GEN8_PTES : GEN6_PTES; |
||
421 | int ret = -ENOMEM; |
||
422 | |||
423 | pt = kzalloc(sizeof(*pt), GFP_KERNEL); |
||
424 | if (!pt) |
||
425 | return ERR_PTR(-ENOMEM); |
||
426 | |||
427 | pt->used_ptes = kcalloc(BITS_TO_LONGS(count), sizeof(*pt->used_ptes), |
||
428 | GFP_KERNEL); |
||
429 | |||
430 | if (!pt->used_ptes) |
||
431 | goto fail_bitmap; |
||
432 | |||
433 | ret = setup_px(dev, pt); |
||
434 | if (ret) |
||
435 | goto fail_page_m; |
||
436 | |||
437 | return pt; |
||
438 | |||
439 | fail_page_m: |
||
440 | kfree(pt->used_ptes); |
||
441 | fail_bitmap: |
||
442 | kfree(pt); |
||
443 | |||
444 | return ERR_PTR(ret); |
||
445 | } |
||
446 | |||
447 | static void free_pt(struct drm_device *dev, struct i915_page_table *pt) |
||
448 | { |
||
449 | cleanup_px(dev, pt); |
||
450 | kfree(pt->used_ptes); |
||
451 | kfree(pt); |
||
452 | } |
||
453 | |||
454 | static void gen8_initialize_pt(struct i915_address_space *vm, |
||
455 | struct i915_page_table *pt) |
||
456 | { |
||
457 | gen8_pte_t scratch_pte; |
||
458 | |||
459 | scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page), |
||
460 | I915_CACHE_LLC, true); |
||
461 | |||
462 | fill_px(vm->dev, pt, scratch_pte); |
||
463 | } |
||
464 | |||
465 | static void gen6_initialize_pt(struct i915_address_space *vm, |
||
466 | struct i915_page_table *pt) |
||
467 | { |
||
468 | gen6_pte_t scratch_pte; |
||
469 | |||
470 | WARN_ON(px_dma(vm->scratch_page) == 0); |
||
471 | |||
472 | scratch_pte = vm->pte_encode(px_dma(vm->scratch_page), |
||
473 | I915_CACHE_LLC, true, 0); |
||
474 | |||
475 | fill32_px(vm->dev, pt, scratch_pte); |
||
476 | } |
||
477 | |||
478 | static struct i915_page_directory *alloc_pd(struct drm_device *dev) |
||
479 | { |
||
480 | struct i915_page_directory *pd; |
||
481 | int ret = -ENOMEM; |
||
482 | |||
483 | pd = kzalloc(sizeof(*pd), GFP_KERNEL); |
||
484 | if (!pd) |
||
485 | return ERR_PTR(-ENOMEM); |
||
486 | |||
487 | pd->used_pdes = kcalloc(BITS_TO_LONGS(I915_PDES), |
||
488 | sizeof(*pd->used_pdes), GFP_KERNEL); |
||
489 | if (!pd->used_pdes) |
||
490 | goto fail_bitmap; |
||
491 | |||
492 | ret = setup_px(dev, pd); |
||
493 | if (ret) |
||
494 | goto fail_page_m; |
||
495 | |||
496 | return pd; |
||
497 | |||
498 | fail_page_m: |
||
499 | kfree(pd->used_pdes); |
||
500 | fail_bitmap: |
||
501 | kfree(pd); |
||
502 | |||
503 | return ERR_PTR(ret); |
||
504 | } |
||
505 | |||
506 | static void free_pd(struct drm_device *dev, struct i915_page_directory *pd) |
||
507 | { |
||
508 | if (px_page(pd)) { |
||
509 | cleanup_px(dev, pd); |
||
510 | kfree(pd->used_pdes); |
||
511 | kfree(pd); |
||
512 | } |
||
513 | } |
||
514 | |||
515 | static void gen8_initialize_pd(struct i915_address_space *vm, |
||
516 | struct i915_page_directory *pd) |
||
517 | { |
||
518 | gen8_pde_t scratch_pde; |
||
519 | |||
520 | scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC); |
||
521 | |||
522 | fill_px(vm->dev, pd, scratch_pde); |
||
523 | } |
||
524 | |||
525 | static int __pdp_init(struct drm_device *dev, |
||
526 | struct i915_page_directory_pointer *pdp) |
||
527 | { |
||
528 | size_t pdpes = I915_PDPES_PER_PDP(dev); |
||
529 | |||
530 | pdp->used_pdpes = kcalloc(BITS_TO_LONGS(pdpes), |
||
531 | sizeof(unsigned long), |
||
532 | GFP_KERNEL); |
||
533 | if (!pdp->used_pdpes) |
||
534 | return -ENOMEM; |
||
535 | |||
536 | pdp->page_directory = kcalloc(pdpes, sizeof(*pdp->page_directory), |
||
537 | GFP_KERNEL); |
||
538 | if (!pdp->page_directory) { |
||
539 | kfree(pdp->used_pdpes); |
||
540 | /* the PDP might be the statically allocated top level. Keep it |
||
541 | * as clean as possible */ |
||
542 | pdp->used_pdpes = NULL; |
||
543 | return -ENOMEM; |
||
544 | } |
||
545 | |||
546 | return 0; |
||
547 | } |
||
548 | |||
549 | static void __pdp_fini(struct i915_page_directory_pointer *pdp) |
||
550 | { |
||
551 | kfree(pdp->used_pdpes); |
||
552 | kfree(pdp->page_directory); |
||
553 | pdp->page_directory = NULL; |
||
554 | } |
||
555 | |||
556 | static struct |
||
557 | i915_page_directory_pointer *alloc_pdp(struct drm_device *dev) |
||
558 | { |
||
559 | struct i915_page_directory_pointer *pdp; |
||
560 | int ret = -ENOMEM; |
||
561 | |||
562 | WARN_ON(!USES_FULL_48BIT_PPGTT(dev)); |
||
563 | |||
564 | pdp = kzalloc(sizeof(*pdp), GFP_KERNEL); |
||
565 | if (!pdp) |
||
566 | return ERR_PTR(-ENOMEM); |
||
567 | |||
568 | ret = __pdp_init(dev, pdp); |
||
569 | if (ret) |
||
570 | goto fail_bitmap; |
||
571 | |||
572 | ret = setup_px(dev, pdp); |
||
573 | if (ret) |
||
574 | goto fail_page_m; |
||
575 | |||
576 | return pdp; |
||
577 | |||
578 | fail_page_m: |
||
579 | __pdp_fini(pdp); |
||
580 | fail_bitmap: |
||
581 | kfree(pdp); |
||
582 | |||
583 | return ERR_PTR(ret); |
||
584 | } |
||
585 | |||
586 | static void free_pdp(struct drm_device *dev, |
||
587 | struct i915_page_directory_pointer *pdp) |
||
588 | { |
||
589 | __pdp_fini(pdp); |
||
590 | if (USES_FULL_48BIT_PPGTT(dev)) { |
||
591 | cleanup_px(dev, pdp); |
||
592 | kfree(pdp); |
||
593 | } |
||
594 | } |
||
595 | |||
596 | static void gen8_initialize_pdp(struct i915_address_space *vm, |
||
597 | struct i915_page_directory_pointer *pdp) |
||
598 | { |
||
599 | gen8_ppgtt_pdpe_t scratch_pdpe; |
||
600 | |||
601 | scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC); |
||
602 | |||
603 | fill_px(vm->dev, pdp, scratch_pdpe); |
||
604 | } |
||
605 | |||
606 | static void gen8_initialize_pml4(struct i915_address_space *vm, |
||
607 | struct i915_pml4 *pml4) |
||
608 | { |
||
609 | gen8_ppgtt_pml4e_t scratch_pml4e; |
||
610 | |||
611 | scratch_pml4e = gen8_pml4e_encode(px_dma(vm->scratch_pdp), |
||
612 | I915_CACHE_LLC); |
||
613 | |||
614 | fill_px(vm->dev, pml4, scratch_pml4e); |
||
615 | } |
||
616 | |||
617 | static void |
||
618 | gen8_setup_page_directory(struct i915_hw_ppgtt *ppgtt, |
||
619 | struct i915_page_directory_pointer *pdp, |
||
620 | struct i915_page_directory *pd, |
||
621 | int index) |
||
622 | { |
||
623 | gen8_ppgtt_pdpe_t *page_directorypo; |
||
624 | |||
625 | if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) |
||
626 | return; |
||
627 | |||
628 | page_directorypo = kmap_px(pdp); |
||
629 | page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC); |
||
630 | kunmap_px(ppgtt, page_directorypo); |
||
631 | } |
||
632 | |||
633 | static void |
||
634 | gen8_setup_page_directory_pointer(struct i915_hw_ppgtt *ppgtt, |
||
635 | struct i915_pml4 *pml4, |
||
636 | struct i915_page_directory_pointer *pdp, |
||
637 | int index) |
||
638 | { |
||
639 | gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4); |
||
640 | |||
641 | WARN_ON(!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)); |
||
642 | pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC); |
||
643 | kunmap_px(ppgtt, pagemap); |
||
644 | } |
||
645 | |||
4560 | Serge | 646 | /* Broadwell Page Directory Pointer Descriptors */ |
6084 | serge | 647 | static int gen8_write_pdp(struct drm_i915_gem_request *req, |
648 | unsigned entry, |
||
649 | dma_addr_t addr) |
||
4560 | Serge | 650 | { |
6084 | serge | 651 | struct intel_engine_cs *ring = req->ring; |
4560 | Serge | 652 | int ret; |
653 | |||
654 | BUG_ON(entry >= 4); |
||
655 | |||
6084 | serge | 656 | ret = intel_ring_begin(req, 6); |
4560 | Serge | 657 | if (ret) |
658 | return ret; |
||
659 | |||
660 | intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); |
||
6937 | serge | 661 | intel_ring_emit_reg(ring, GEN8_RING_PDP_UDW(ring, entry)); |
6084 | serge | 662 | intel_ring_emit(ring, upper_32_bits(addr)); |
4560 | Serge | 663 | intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); |
6937 | serge | 664 | intel_ring_emit_reg(ring, GEN8_RING_PDP_LDW(ring, entry)); |
6084 | serge | 665 | intel_ring_emit(ring, lower_32_bits(addr)); |
4560 | Serge | 666 | intel_ring_advance(ring); |
667 | |||
668 | return 0; |
||
669 | } |
||
670 | |||
6084 | serge | 671 | static int gen8_legacy_mm_switch(struct i915_hw_ppgtt *ppgtt, |
672 | struct drm_i915_gem_request *req) |
||
4560 | Serge | 673 | { |
5060 | serge | 674 | int i, ret; |
4560 | Serge | 675 | |
6084 | serge | 676 | for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) { |
677 | const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); |
||
4560 | Serge | 678 | |
6084 | serge | 679 | ret = gen8_write_pdp(req, i, pd_daddr); |
680 | if (ret) |
||
5060 | serge | 681 | return ret; |
4560 | Serge | 682 | } |
5060 | serge | 683 | |
4560 | Serge | 684 | return 0; |
685 | } |
||
686 | |||
6084 | serge | 687 | static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt, |
688 | struct drm_i915_gem_request *req) |
||
4560 | Serge | 689 | { |
6084 | serge | 690 | return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4)); |
691 | } |
||
692 | |||
693 | static void gen8_ppgtt_clear_pte_range(struct i915_address_space *vm, |
||
694 | struct i915_page_directory_pointer *pdp, |
||
695 | uint64_t start, |
||
696 | uint64_t length, |
||
697 | gen8_pte_t scratch_pte) |
||
698 | { |
||
4560 | Serge | 699 | struct i915_hw_ppgtt *ppgtt = |
700 | container_of(vm, struct i915_hw_ppgtt, base); |
||
6084 | serge | 701 | gen8_pte_t *pt_vaddr; |
702 | unsigned pdpe = gen8_pdpe_index(start); |
||
703 | unsigned pde = gen8_pde_index(start); |
||
704 | unsigned pte = gen8_pte_index(start); |
||
5060 | serge | 705 | unsigned num_entries = length >> PAGE_SHIFT; |
4560 | Serge | 706 | unsigned last_pte, i; |
707 | |||
6084 | serge | 708 | if (WARN_ON(!pdp)) |
709 | return; |
||
4560 | Serge | 710 | |
711 | while (num_entries) { |
||
6084 | serge | 712 | struct i915_page_directory *pd; |
713 | struct i915_page_table *pt; |
||
4560 | Serge | 714 | |
6084 | serge | 715 | if (WARN_ON(!pdp->page_directory[pdpe])) |
716 | break; |
||
717 | |||
718 | pd = pdp->page_directory[pdpe]; |
||
719 | |||
720 | if (WARN_ON(!pd->page_table[pde])) |
||
721 | break; |
||
722 | |||
723 | pt = pd->page_table[pde]; |
||
724 | |||
725 | if (WARN_ON(!px_page(pt))) |
||
726 | break; |
||
727 | |||
5060 | serge | 728 | last_pte = pte + num_entries; |
6084 | serge | 729 | if (last_pte > GEN8_PTES) |
730 | last_pte = GEN8_PTES; |
||
4560 | Serge | 731 | |
6084 | serge | 732 | pt_vaddr = kmap_px(pt); |
4560 | Serge | 733 | |
5060 | serge | 734 | for (i = pte; i < last_pte; i++) { |
4560 | Serge | 735 | pt_vaddr[i] = scratch_pte; |
5060 | serge | 736 | num_entries--; |
737 | } |
||
4560 | Serge | 738 | |
6084 | serge | 739 | kunmap_px(ppgtt, pt); |
5060 | serge | 740 | |
741 | pte = 0; |
||
6084 | serge | 742 | if (++pde == I915_PDES) { |
743 | if (++pdpe == I915_PDPES_PER_PDP(vm->dev)) |
||
744 | break; |
||
5060 | serge | 745 | pde = 0; |
746 | } |
||
4560 | Serge | 747 | } |
748 | } |
||
749 | |||
6084 | serge | 750 | static void gen8_ppgtt_clear_range(struct i915_address_space *vm, |
751 | uint64_t start, |
||
752 | uint64_t length, |
||
753 | bool use_scratch) |
||
4560 | Serge | 754 | { |
755 | struct i915_hw_ppgtt *ppgtt = |
||
756 | container_of(vm, struct i915_hw_ppgtt, base); |
||
6084 | serge | 757 | gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page), |
758 | I915_CACHE_LLC, use_scratch); |
||
4560 | Serge | 759 | |
6084 | serge | 760 | if (!USES_FULL_48BIT_PPGTT(vm->dev)) { |
761 | gen8_ppgtt_clear_pte_range(vm, &ppgtt->pdp, start, length, |
||
762 | scratch_pte); |
||
763 | } else { |
||
6937 | serge | 764 | uint64_t pml4e; |
6084 | serge | 765 | struct i915_page_directory_pointer *pdp; |
766 | |||
6937 | serge | 767 | gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) { |
6084 | serge | 768 | gen8_ppgtt_clear_pte_range(vm, pdp, start, length, |
769 | scratch_pte); |
||
770 | } |
||
771 | } |
||
772 | } |
||
773 | |||
774 | static void |
||
775 | gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm, |
||
776 | struct i915_page_directory_pointer *pdp, |
||
777 | struct sg_page_iter *sg_iter, |
||
778 | uint64_t start, |
||
779 | enum i915_cache_level cache_level) |
||
780 | { |
||
781 | struct i915_hw_ppgtt *ppgtt = |
||
782 | container_of(vm, struct i915_hw_ppgtt, base); |
||
783 | gen8_pte_t *pt_vaddr; |
||
784 | unsigned pdpe = gen8_pdpe_index(start); |
||
785 | unsigned pde = gen8_pde_index(start); |
||
786 | unsigned pte = gen8_pte_index(start); |
||
787 | |||
5354 | serge | 788 | pt_vaddr = NULL; |
4560 | Serge | 789 | |
6084 | serge | 790 | while (__sg_page_iter_next(sg_iter)) { |
791 | if (pt_vaddr == NULL) { |
||
792 | struct i915_page_directory *pd = pdp->page_directory[pdpe]; |
||
793 | struct i915_page_table *pt = pd->page_table[pde]; |
||
794 | pt_vaddr = kmap_px(pt); |
||
795 | } |
||
4560 | Serge | 796 | |
5060 | serge | 797 | pt_vaddr[pte] = |
6084 | serge | 798 | gen8_pte_encode(sg_page_iter_dma_address(sg_iter), |
4560 | Serge | 799 | cache_level, true); |
6084 | serge | 800 | if (++pte == GEN8_PTES) { |
801 | kunmap_px(ppgtt, pt_vaddr); |
||
5354 | serge | 802 | pt_vaddr = NULL; |
6084 | serge | 803 | if (++pde == I915_PDES) { |
804 | if (++pdpe == I915_PDPES_PER_PDP(vm->dev)) |
||
805 | break; |
||
5060 | serge | 806 | pde = 0; |
807 | } |
||
808 | pte = 0; |
||
4560 | Serge | 809 | } |
810 | } |
||
6084 | serge | 811 | |
812 | if (pt_vaddr) |
||
813 | kunmap_px(ppgtt, pt_vaddr); |
||
814 | } |
||
815 | |||
816 | static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, |
||
817 | struct sg_table *pages, |
||
818 | uint64_t start, |
||
819 | enum i915_cache_level cache_level, |
||
820 | u32 unused) |
||
821 | { |
||
822 | struct i915_hw_ppgtt *ppgtt = |
||
823 | container_of(vm, struct i915_hw_ppgtt, base); |
||
824 | struct sg_page_iter sg_iter; |
||
825 | |||
826 | __sg_page_iter_start(&sg_iter, pages->sgl, sg_nents(pages->sgl), 0); |
||
827 | |||
828 | if (!USES_FULL_48BIT_PPGTT(vm->dev)) { |
||
829 | gen8_ppgtt_insert_pte_entries(vm, &ppgtt->pdp, &sg_iter, start, |
||
830 | cache_level); |
||
831 | } else { |
||
832 | struct i915_page_directory_pointer *pdp; |
||
6937 | serge | 833 | uint64_t pml4e; |
6084 | serge | 834 | uint64_t length = (uint64_t)pages->orig_nents << PAGE_SHIFT; |
835 | |||
6937 | serge | 836 | gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) { |
6084 | serge | 837 | gen8_ppgtt_insert_pte_entries(vm, pdp, &sg_iter, |
838 | start, cache_level); |
||
839 | } |
||
5354 | serge | 840 | } |
4560 | Serge | 841 | } |
842 | |||
6084 | serge | 843 | static void gen8_free_page_tables(struct drm_device *dev, |
844 | struct i915_page_directory *pd) |
||
4560 | Serge | 845 | { |
5060 | serge | 846 | int i; |
847 | |||
6084 | serge | 848 | if (!px_page(pd)) |
5060 | serge | 849 | return; |
850 | |||
6084 | serge | 851 | for_each_set_bit(i, pd->used_pdes, I915_PDES) { |
852 | if (WARN_ON(!pd->page_table[i])) |
||
853 | continue; |
||
854 | |||
855 | free_pt(dev, pd->page_table[i]); |
||
856 | pd->page_table[i] = NULL; |
||
857 | } |
||
5060 | serge | 858 | } |
859 | |||
6084 | serge | 860 | static int gen8_init_scratch(struct i915_address_space *vm) |
5060 | serge | 861 | { |
6084 | serge | 862 | struct drm_device *dev = vm->dev; |
863 | |||
864 | vm->scratch_page = alloc_scratch_page(dev); |
||
865 | if (IS_ERR(vm->scratch_page)) |
||
866 | return PTR_ERR(vm->scratch_page); |
||
867 | |||
868 | vm->scratch_pt = alloc_pt(dev); |
||
869 | if (IS_ERR(vm->scratch_pt)) { |
||
870 | free_scratch_page(dev, vm->scratch_page); |
||
871 | return PTR_ERR(vm->scratch_pt); |
||
872 | } |
||
873 | |||
874 | vm->scratch_pd = alloc_pd(dev); |
||
875 | if (IS_ERR(vm->scratch_pd)) { |
||
876 | free_pt(dev, vm->scratch_pt); |
||
877 | free_scratch_page(dev, vm->scratch_page); |
||
878 | return PTR_ERR(vm->scratch_pd); |
||
879 | } |
||
880 | |||
881 | if (USES_FULL_48BIT_PPGTT(dev)) { |
||
882 | vm->scratch_pdp = alloc_pdp(dev); |
||
883 | if (IS_ERR(vm->scratch_pdp)) { |
||
884 | free_pd(dev, vm->scratch_pd); |
||
885 | free_pt(dev, vm->scratch_pt); |
||
886 | free_scratch_page(dev, vm->scratch_page); |
||
887 | return PTR_ERR(vm->scratch_pdp); |
||
888 | } |
||
889 | } |
||
890 | |||
891 | gen8_initialize_pt(vm, vm->scratch_pt); |
||
892 | gen8_initialize_pd(vm, vm->scratch_pd); |
||
893 | if (USES_FULL_48BIT_PPGTT(dev)) |
||
894 | gen8_initialize_pdp(vm, vm->scratch_pdp); |
||
895 | |||
896 | return 0; |
||
897 | } |
||
898 | |||
899 | static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create) |
||
900 | { |
||
901 | enum vgt_g2v_type msg; |
||
902 | struct drm_device *dev = ppgtt->base.dev; |
||
903 | struct drm_i915_private *dev_priv = dev->dev_private; |
||
5060 | serge | 904 | int i; |
905 | |||
6084 | serge | 906 | if (USES_FULL_48BIT_PPGTT(dev)) { |
907 | u64 daddr = px_dma(&ppgtt->pml4); |
||
908 | |||
6937 | serge | 909 | I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr)); |
910 | I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr)); |
||
6084 | serge | 911 | |
912 | msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE : |
||
913 | VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY); |
||
914 | } else { |
||
915 | for (i = 0; i < GEN8_LEGACY_PDPES; i++) { |
||
916 | u64 daddr = i915_page_dir_dma_addr(ppgtt, i); |
||
917 | |||
6937 | serge | 918 | I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr)); |
919 | I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr)); |
||
6084 | serge | 920 | } |
921 | |||
922 | msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE : |
||
923 | VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY); |
||
5060 | serge | 924 | } |
925 | |||
6084 | serge | 926 | I915_WRITE(vgtif_reg(g2v_notify), msg); |
927 | |||
928 | return 0; |
||
5060 | serge | 929 | } |
930 | |||
6084 | serge | 931 | static void gen8_free_scratch(struct i915_address_space *vm) |
5060 | serge | 932 | { |
6084 | serge | 933 | struct drm_device *dev = vm->dev; |
4560 | Serge | 934 | |
6084 | serge | 935 | if (USES_FULL_48BIT_PPGTT(dev)) |
936 | free_pdp(dev, vm->scratch_pdp); |
||
937 | free_pd(dev, vm->scratch_pd); |
||
938 | free_pt(dev, vm->scratch_pt); |
||
939 | free_scratch_page(dev, vm->scratch_page); |
||
940 | } |
||
941 | |||
942 | static void gen8_ppgtt_cleanup_3lvl(struct drm_device *dev, |
||
943 | struct i915_page_directory_pointer *pdp) |
||
944 | { |
||
945 | int i; |
||
946 | |||
947 | for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev)) { |
||
948 | if (WARN_ON(!pdp->page_directory[i])) |
||
5060 | serge | 949 | continue; |
4560 | Serge | 950 | |
6084 | serge | 951 | gen8_free_page_tables(dev, pdp->page_directory[i]); |
952 | free_pd(dev, pdp->page_directory[i]); |
||
953 | } |
||
4560 | Serge | 954 | |
6084 | serge | 955 | free_pdp(dev, pdp); |
956 | } |
||
957 | |||
958 | static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt) |
||
959 | { |
||
960 | int i; |
||
961 | |||
962 | for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) { |
||
963 | if (WARN_ON(!ppgtt->pml4.pdps[i])) |
||
964 | continue; |
||
965 | |||
966 | gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, ppgtt->pml4.pdps[i]); |
||
5060 | serge | 967 | } |
6084 | serge | 968 | |
969 | cleanup_px(ppgtt->base.dev, &ppgtt->pml4); |
||
5060 | serge | 970 | } |
4560 | Serge | 971 | |
5060 | serge | 972 | static void gen8_ppgtt_cleanup(struct i915_address_space *vm) |
973 | { |
||
974 | struct i915_hw_ppgtt *ppgtt = |
||
975 | container_of(vm, struct i915_hw_ppgtt, base); |
||
976 | |||
6084 | serge | 977 | if (intel_vgpu_active(vm->dev)) |
978 | gen8_ppgtt_notify_vgt(ppgtt, false); |
||
979 | |||
980 | if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) |
||
981 | gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, &ppgtt->pdp); |
||
982 | else |
||
983 | gen8_ppgtt_cleanup_4lvl(ppgtt); |
||
984 | |||
985 | gen8_free_scratch(vm); |
||
5060 | serge | 986 | } |
987 | |||
6084 | serge | 988 | /** |
989 | * gen8_ppgtt_alloc_pagetabs() - Allocate page tables for VA range. |
||
990 | * @vm: Master vm structure. |
||
991 | * @pd: Page directory for this address range. |
||
992 | * @start: Starting virtual address to begin allocations. |
||
993 | * @length: Size of the allocations. |
||
994 | * @new_pts: Bitmap set by function with new allocations. Likely used by the |
||
995 | * caller to free on error. |
||
996 | * |
||
997 | * Allocate the required number of page tables. Extremely similar to |
||
998 | * gen8_ppgtt_alloc_page_directories(). The main difference is here we are limited by |
||
999 | * the page directory boundary (instead of the page directory pointer). That |
||
1000 | * boundary is 1GB virtual. Therefore, unlike gen8_ppgtt_alloc_page_directories(), it is |
||
1001 | * possible, and likely that the caller will need to use multiple calls of this |
||
1002 | * function to achieve the appropriate allocation. |
||
1003 | * |
||
1004 | * Return: 0 if success; negative error code otherwise. |
||
1005 | */ |
||
1006 | static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm, |
||
1007 | struct i915_page_directory *pd, |
||
1008 | uint64_t start, |
||
1009 | uint64_t length, |
||
1010 | unsigned long *new_pts) |
||
5060 | serge | 1011 | { |
6084 | serge | 1012 | struct drm_device *dev = vm->dev; |
1013 | struct i915_page_table *pt; |
||
1014 | uint32_t pde; |
||
5060 | serge | 1015 | |
6937 | serge | 1016 | gen8_for_each_pde(pt, pd, start, length, pde) { |
6084 | serge | 1017 | /* Don't reallocate page tables */ |
1018 | if (test_bit(pde, pd->used_pdes)) { |
||
1019 | /* Scratch is never allocated this way */ |
||
1020 | WARN_ON(pt == vm->scratch_pt); |
||
1021 | continue; |
||
1022 | } |
||
5060 | serge | 1023 | |
6084 | serge | 1024 | pt = alloc_pt(dev); |
1025 | if (IS_ERR(pt)) |
||
1026 | goto unwind_out; |
||
5060 | serge | 1027 | |
6084 | serge | 1028 | gen8_initialize_pt(vm, pt); |
1029 | pd->page_table[pde] = pt; |
||
1030 | __set_bit(pde, new_pts); |
||
1031 | trace_i915_page_table_entry_alloc(vm, pde, start, GEN8_PDE_SHIFT); |
||
1032 | } |
||
5060 | serge | 1033 | |
6084 | serge | 1034 | return 0; |
1035 | |||
1036 | unwind_out: |
||
1037 | for_each_set_bit(pde, new_pts, I915_PDES) |
||
1038 | free_pt(dev, pd->page_table[pde]); |
||
1039 | |||
1040 | return -ENOMEM; |
||
5060 | serge | 1041 | } |
1042 | |||
6084 | serge | 1043 | /** |
1044 | * gen8_ppgtt_alloc_page_directories() - Allocate page directories for VA range. |
||
1045 | * @vm: Master vm structure. |
||
1046 | * @pdp: Page directory pointer for this address range. |
||
1047 | * @start: Starting virtual address to begin allocations. |
||
1048 | * @length: Size of the allocations. |
||
1049 | * @new_pds: Bitmap set by function with new allocations. Likely used by the |
||
1050 | * caller to free on error. |
||
1051 | * |
||
1052 | * Allocate the required number of page directories starting at the pde index of |
||
1053 | * @start, and ending at the pde index @start + @length. This function will skip |
||
1054 | * over already allocated page directories within the range, and only allocate |
||
1055 | * new ones, setting the appropriate pointer within the pdp as well as the |
||
1056 | * correct position in the bitmap @new_pds. |
||
1057 | * |
||
1058 | * The function will only allocate the pages within the range for a give page |
||
1059 | * directory pointer. In other words, if @start + @length straddles a virtually |
||
1060 | * addressed PDP boundary (512GB for 4k pages), there will be more allocations |
||
1061 | * required by the caller, This is not currently possible, and the BUG in the |
||
1062 | * code will prevent it. |
||
1063 | * |
||
1064 | * Return: 0 if success; negative error code otherwise. |
||
1065 | */ |
||
1066 | static int |
||
1067 | gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm, |
||
1068 | struct i915_page_directory_pointer *pdp, |
||
1069 | uint64_t start, |
||
1070 | uint64_t length, |
||
1071 | unsigned long *new_pds) |
||
5060 | serge | 1072 | { |
6084 | serge | 1073 | struct drm_device *dev = vm->dev; |
1074 | struct i915_page_directory *pd; |
||
1075 | uint32_t pdpe; |
||
1076 | uint32_t pdpes = I915_PDPES_PER_PDP(dev); |
||
5060 | serge | 1077 | |
6084 | serge | 1078 | WARN_ON(!bitmap_empty(new_pds, pdpes)); |
1079 | |||
6937 | serge | 1080 | gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { |
6084 | serge | 1081 | if (test_bit(pdpe, pdp->used_pdpes)) |
1082 | continue; |
||
1083 | |||
1084 | pd = alloc_pd(dev); |
||
1085 | if (IS_ERR(pd)) |
||
5060 | serge | 1086 | goto unwind_out; |
6084 | serge | 1087 | |
1088 | gen8_initialize_pd(vm, pd); |
||
1089 | pdp->page_directory[pdpe] = pd; |
||
1090 | __set_bit(pdpe, new_pds); |
||
1091 | trace_i915_page_directory_entry_alloc(vm, pdpe, start, GEN8_PDPE_SHIFT); |
||
4560 | Serge | 1092 | } |
1093 | |||
5060 | serge | 1094 | return 0; |
1095 | |||
1096 | unwind_out: |
||
6084 | serge | 1097 | for_each_set_bit(pdpe, new_pds, pdpes) |
1098 | free_pd(dev, pdp->page_directory[pdpe]); |
||
5060 | serge | 1099 | |
6084 | serge | 1100 | return -ENOMEM; |
4560 | Serge | 1101 | } |
1102 | |||
6084 | serge | 1103 | /** |
1104 | * gen8_ppgtt_alloc_page_dirpointers() - Allocate pdps for VA range. |
||
1105 | * @vm: Master vm structure. |
||
1106 | * @pml4: Page map level 4 for this address range. |
||
1107 | * @start: Starting virtual address to begin allocations. |
||
1108 | * @length: Size of the allocations. |
||
1109 | * @new_pdps: Bitmap set by function with new allocations. Likely used by the |
||
1110 | * caller to free on error. |
||
1111 | * |
||
1112 | * Allocate the required number of page directory pointers. Extremely similar to |
||
1113 | * gen8_ppgtt_alloc_page_directories() and gen8_ppgtt_alloc_pagetabs(). |
||
1114 | * The main difference is here we are limited by the pml4 boundary (instead of |
||
1115 | * the page directory pointer). |
||
1116 | * |
||
1117 | * Return: 0 if success; negative error code otherwise. |
||
1118 | */ |
||
1119 | static int |
||
1120 | gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm, |
||
1121 | struct i915_pml4 *pml4, |
||
1122 | uint64_t start, |
||
1123 | uint64_t length, |
||
1124 | unsigned long *new_pdps) |
||
5060 | serge | 1125 | { |
6084 | serge | 1126 | struct drm_device *dev = vm->dev; |
1127 | struct i915_page_directory_pointer *pdp; |
||
1128 | uint32_t pml4e; |
||
5060 | serge | 1129 | |
6084 | serge | 1130 | WARN_ON(!bitmap_empty(new_pdps, GEN8_PML4ES_PER_PML4)); |
1131 | |||
6937 | serge | 1132 | gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { |
6084 | serge | 1133 | if (!test_bit(pml4e, pml4->used_pml4es)) { |
1134 | pdp = alloc_pdp(dev); |
||
1135 | if (IS_ERR(pdp)) |
||
1136 | goto unwind_out; |
||
1137 | |||
1138 | gen8_initialize_pdp(vm, pdp); |
||
1139 | pml4->pdps[pml4e] = pdp; |
||
1140 | __set_bit(pml4e, new_pdps); |
||
1141 | trace_i915_page_directory_pointer_entry_alloc(vm, |
||
1142 | pml4e, |
||
1143 | start, |
||
1144 | GEN8_PML4E_SHIFT); |
||
5060 | serge | 1145 | } |
6084 | serge | 1146 | } |
5060 | serge | 1147 | |
1148 | return 0; |
||
6084 | serge | 1149 | |
1150 | unwind_out: |
||
1151 | for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4) |
||
1152 | free_pdp(dev, pml4->pdps[pml4e]); |
||
1153 | |||
1154 | return -ENOMEM; |
||
5060 | serge | 1155 | } |
1156 | |||
6084 | serge | 1157 | static void |
1158 | free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long *new_pts) |
||
5060 | serge | 1159 | { |
6084 | serge | 1160 | kfree(new_pts); |
1161 | kfree(new_pds); |
||
1162 | } |
||
1163 | |||
1164 | /* Fills in the page directory bitmap, and the array of page tables bitmap. Both |
||
1165 | * of these are based on the number of PDPEs in the system. |
||
1166 | */ |
||
1167 | static |
||
1168 | int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds, |
||
1169 | unsigned long **new_pts, |
||
1170 | uint32_t pdpes) |
||
1171 | { |
||
1172 | unsigned long *pds; |
||
1173 | unsigned long *pts; |
||
1174 | |||
1175 | pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_TEMPORARY); |
||
1176 | if (!pds) |
||
5060 | serge | 1177 | return -ENOMEM; |
1178 | |||
6084 | serge | 1179 | pts = kcalloc(pdpes, BITS_TO_LONGS(I915_PDES) * sizeof(unsigned long), |
1180 | GFP_TEMPORARY); |
||
1181 | if (!pts) |
||
1182 | goto err_out; |
||
5060 | serge | 1183 | |
6084 | serge | 1184 | *new_pds = pds; |
1185 | *new_pts = pts; |
||
1186 | |||
5060 | serge | 1187 | return 0; |
6084 | serge | 1188 | |
1189 | err_out: |
||
1190 | free_gen8_temp_bitmaps(pds, pts); |
||
1191 | return -ENOMEM; |
||
5060 | serge | 1192 | } |
1193 | |||
6084 | serge | 1194 | /* PDE TLBs are a pain to invalidate on GEN8+. When we modify |
1195 | * the page table structures, we mark them dirty so that |
||
1196 | * context switching/execlist queuing code takes extra steps |
||
1197 | * to ensure that tlbs are flushed. |
||
1198 | */ |
||
1199 | static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt) |
||
5060 | serge | 1200 | { |
6084 | serge | 1201 | ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.dev)->ring_mask; |
1202 | } |
||
1203 | |||
1204 | static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, |
||
1205 | struct i915_page_directory_pointer *pdp, |
||
1206 | uint64_t start, |
||
1207 | uint64_t length) |
||
1208 | { |
||
1209 | struct i915_hw_ppgtt *ppgtt = |
||
1210 | container_of(vm, struct i915_hw_ppgtt, base); |
||
1211 | unsigned long *new_page_dirs, *new_page_tables; |
||
1212 | struct drm_device *dev = vm->dev; |
||
1213 | struct i915_page_directory *pd; |
||
1214 | const uint64_t orig_start = start; |
||
1215 | const uint64_t orig_length = length; |
||
1216 | uint32_t pdpe; |
||
1217 | uint32_t pdpes = I915_PDPES_PER_PDP(dev); |
||
5060 | serge | 1218 | int ret; |
1219 | |||
6084 | serge | 1220 | /* Wrap is never okay since we can only represent 48b, and we don't |
1221 | * actually use the other side of the canonical address space. |
||
1222 | */ |
||
1223 | if (WARN_ON(start + length < start)) |
||
1224 | return -ENODEV; |
||
1225 | |||
1226 | if (WARN_ON(start + length > vm->total)) |
||
1227 | return -ENODEV; |
||
1228 | |||
1229 | ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes); |
||
5060 | serge | 1230 | if (ret) |
1231 | return ret; |
||
1232 | |||
6084 | serge | 1233 | /* Do the allocations first so we can easily bail out */ |
1234 | ret = gen8_ppgtt_alloc_page_directories(vm, pdp, start, length, |
||
1235 | new_page_dirs); |
||
5060 | serge | 1236 | if (ret) { |
6084 | serge | 1237 | free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); |
5060 | serge | 1238 | return ret; |
1239 | } |
||
1240 | |||
6084 | serge | 1241 | /* For every page directory referenced, allocate page tables */ |
6937 | serge | 1242 | gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { |
6084 | serge | 1243 | ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length, |
1244 | new_page_tables + pdpe * BITS_TO_LONGS(I915_PDES)); |
||
1245 | if (ret) |
||
1246 | goto err_out; |
||
1247 | } |
||
5060 | serge | 1248 | |
6084 | serge | 1249 | start = orig_start; |
1250 | length = orig_length; |
||
5060 | serge | 1251 | |
6084 | serge | 1252 | /* Allocations have completed successfully, so set the bitmaps, and do |
1253 | * the mappings. */ |
||
6937 | serge | 1254 | gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { |
6084 | serge | 1255 | gen8_pde_t *const page_directory = kmap_px(pd); |
1256 | struct i915_page_table *pt; |
||
1257 | uint64_t pd_len = length; |
||
1258 | uint64_t pd_start = start; |
||
1259 | uint32_t pde; |
||
1260 | |||
1261 | /* Every pd should be allocated, we just did that above. */ |
||
1262 | WARN_ON(!pd); |
||
1263 | |||
6937 | serge | 1264 | gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) { |
6084 | serge | 1265 | /* Same reasoning as pd */ |
1266 | WARN_ON(!pt); |
||
1267 | WARN_ON(!pd_len); |
||
1268 | WARN_ON(!gen8_pte_count(pd_start, pd_len)); |
||
1269 | |||
1270 | /* Set our used ptes within the page table */ |
||
1271 | bitmap_set(pt->used_ptes, |
||
1272 | gen8_pte_index(pd_start), |
||
1273 | gen8_pte_count(pd_start, pd_len)); |
||
1274 | |||
1275 | /* Our pde is now pointing to the pagetable, pt */ |
||
1276 | __set_bit(pde, pd->used_pdes); |
||
1277 | |||
1278 | /* Map the PDE to the page table */ |
||
1279 | page_directory[pde] = gen8_pde_encode(px_dma(pt), |
||
1280 | I915_CACHE_LLC); |
||
1281 | trace_i915_page_table_entry_map(&ppgtt->base, pde, pt, |
||
1282 | gen8_pte_index(start), |
||
1283 | gen8_pte_count(start, length), |
||
1284 | GEN8_PTES); |
||
1285 | |||
1286 | /* NB: We haven't yet mapped ptes to pages. At this |
||
1287 | * point we're still relying on insert_entries() */ |
||
1288 | } |
||
1289 | |||
1290 | kunmap_px(ppgtt, page_directory); |
||
1291 | __set_bit(pdpe, pdp->used_pdpes); |
||
1292 | gen8_setup_page_directory(ppgtt, pdp, pd, pdpe); |
||
1293 | } |
||
1294 | |||
1295 | free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); |
||
1296 | mark_tlbs_dirty(ppgtt); |
||
1297 | return 0; |
||
1298 | |||
1299 | err_out: |
||
1300 | while (pdpe--) { |
||
6937 | serge | 1301 | unsigned long temp; |
1302 | |||
6084 | serge | 1303 | for_each_set_bit(temp, new_page_tables + pdpe * |
1304 | BITS_TO_LONGS(I915_PDES), I915_PDES) |
||
1305 | free_pt(dev, pdp->page_directory[pdpe]->page_table[temp]); |
||
1306 | } |
||
1307 | |||
1308 | for_each_set_bit(pdpe, new_page_dirs, pdpes) |
||
1309 | free_pd(dev, pdp->page_directory[pdpe]); |
||
1310 | |||
1311 | free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); |
||
1312 | mark_tlbs_dirty(ppgtt); |
||
5060 | serge | 1313 | return ret; |
1314 | } |
||
1315 | |||
6084 | serge | 1316 | static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm, |
1317 | struct i915_pml4 *pml4, |
||
1318 | uint64_t start, |
||
1319 | uint64_t length) |
||
5060 | serge | 1320 | { |
6084 | serge | 1321 | DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4); |
1322 | struct i915_hw_ppgtt *ppgtt = |
||
1323 | container_of(vm, struct i915_hw_ppgtt, base); |
||
1324 | struct i915_page_directory_pointer *pdp; |
||
6937 | serge | 1325 | uint64_t pml4e; |
6084 | serge | 1326 | int ret = 0; |
5060 | serge | 1327 | |
6084 | serge | 1328 | /* Do the pml4 allocations first, so we don't need to track the newly |
1329 | * allocated tables below the pdp */ |
||
1330 | bitmap_zero(new_pdps, GEN8_PML4ES_PER_PML4); |
||
5060 | serge | 1331 | |
6084 | serge | 1332 | /* The pagedirectory and pagetable allocations are done in the shared 3 |
1333 | * and 4 level code. Just allocate the pdps. |
||
1334 | */ |
||
1335 | ret = gen8_ppgtt_alloc_page_dirpointers(vm, pml4, start, length, |
||
1336 | new_pdps); |
||
1337 | if (ret) |
||
1338 | return ret; |
||
5060 | serge | 1339 | |
6084 | serge | 1340 | WARN(bitmap_weight(new_pdps, GEN8_PML4ES_PER_PML4) > 2, |
1341 | "The allocation has spanned more than 512GB. " |
||
1342 | "It is highly likely this is incorrect."); |
||
5060 | serge | 1343 | |
6937 | serge | 1344 | gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { |
6084 | serge | 1345 | WARN_ON(!pdp); |
1346 | |||
1347 | ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length); |
||
1348 | if (ret) |
||
1349 | goto err_out; |
||
1350 | |||
1351 | gen8_setup_page_directory_pointer(ppgtt, pml4, pdp, pml4e); |
||
1352 | } |
||
1353 | |||
1354 | bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es, |
||
1355 | GEN8_PML4ES_PER_PML4); |
||
1356 | |||
5060 | serge | 1357 | return 0; |
6084 | serge | 1358 | |
1359 | err_out: |
||
1360 | for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4) |
||
1361 | gen8_ppgtt_cleanup_3lvl(vm->dev, pml4->pdps[pml4e]); |
||
1362 | |||
1363 | return ret; |
||
5060 | serge | 1364 | } |
1365 | |||
6084 | serge | 1366 | static int gen8_alloc_va_range(struct i915_address_space *vm, |
1367 | uint64_t start, uint64_t length) |
||
5060 | serge | 1368 | { |
6084 | serge | 1369 | struct i915_hw_ppgtt *ppgtt = |
1370 | container_of(vm, struct i915_hw_ppgtt, base); |
||
1371 | |||
1372 | if (USES_FULL_48BIT_PPGTT(vm->dev)) |
||
1373 | return gen8_alloc_va_range_4lvl(vm, &ppgtt->pml4, start, length); |
||
1374 | else |
||
1375 | return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length); |
||
1376 | } |
||
1377 | |||
1378 | static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp, |
||
1379 | uint64_t start, uint64_t length, |
||
1380 | gen8_pte_t scratch_pte, |
||
1381 | struct seq_file *m) |
||
1382 | { |
||
1383 | struct i915_page_directory *pd; |
||
1384 | uint32_t pdpe; |
||
1385 | |||
6937 | serge | 1386 | gen8_for_each_pdpe(pd, pdp, start, length, pdpe) { |
6084 | serge | 1387 | struct i915_page_table *pt; |
1388 | uint64_t pd_len = length; |
||
1389 | uint64_t pd_start = start; |
||
1390 | uint32_t pde; |
||
1391 | |||
1392 | if (!test_bit(pdpe, pdp->used_pdpes)) |
||
1393 | continue; |
||
1394 | |||
1395 | seq_printf(m, "\tPDPE #%d\n", pdpe); |
||
6937 | serge | 1396 | gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) { |
6084 | serge | 1397 | uint32_t pte; |
1398 | gen8_pte_t *pt_vaddr; |
||
1399 | |||
1400 | if (!test_bit(pde, pd->used_pdes)) |
||
1401 | continue; |
||
1402 | |||
1403 | pt_vaddr = kmap_px(pt); |
||
1404 | for (pte = 0; pte < GEN8_PTES; pte += 4) { |
||
1405 | uint64_t va = |
||
1406 | (pdpe << GEN8_PDPE_SHIFT) | |
||
1407 | (pde << GEN8_PDE_SHIFT) | |
||
1408 | (pte << GEN8_PTE_SHIFT); |
||
1409 | int i; |
||
1410 | bool found = false; |
||
1411 | |||
1412 | for (i = 0; i < 4; i++) |
||
1413 | if (pt_vaddr[pte + i] != scratch_pte) |
||
1414 | found = true; |
||
1415 | if (!found) |
||
1416 | continue; |
||
1417 | |||
1418 | seq_printf(m, "\t\t0x%llx [%03d,%03d,%04d]: =", va, pdpe, pde, pte); |
||
1419 | for (i = 0; i < 4; i++) { |
||
1420 | if (pt_vaddr[pte + i] != scratch_pte) |
||
1421 | seq_printf(m, " %llx", pt_vaddr[pte + i]); |
||
1422 | else |
||
1423 | seq_puts(m, " SCRATCH "); |
||
1424 | } |
||
1425 | seq_puts(m, "\n"); |
||
1426 | } |
||
1427 | /* don't use kunmap_px, it could trigger |
||
1428 | * an unnecessary flush. |
||
1429 | */ |
||
1430 | kunmap_atomic(pt_vaddr); |
||
1431 | } |
||
1432 | } |
||
1433 | } |
||
1434 | |||
1435 | static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) |
||
1436 | { |
||
1437 | struct i915_address_space *vm = &ppgtt->base; |
||
1438 | uint64_t start = ppgtt->base.start; |
||
1439 | uint64_t length = ppgtt->base.total; |
||
1440 | gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page), |
||
1441 | I915_CACHE_LLC, true); |
||
1442 | |||
1443 | if (!USES_FULL_48BIT_PPGTT(vm->dev)) { |
||
1444 | gen8_dump_pdp(&ppgtt->pdp, start, length, scratch_pte, m); |
||
1445 | } else { |
||
6937 | serge | 1446 | uint64_t pml4e; |
6084 | serge | 1447 | struct i915_pml4 *pml4 = &ppgtt->pml4; |
1448 | struct i915_page_directory_pointer *pdp; |
||
1449 | |||
6937 | serge | 1450 | gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) { |
6084 | serge | 1451 | if (!test_bit(pml4e, pml4->used_pml4es)) |
1452 | continue; |
||
1453 | |||
1454 | seq_printf(m, " PML4E #%llu\n", pml4e); |
||
1455 | gen8_dump_pdp(pdp, start, length, scratch_pte, m); |
||
1456 | } |
||
1457 | } |
||
1458 | } |
||
1459 | |||
1460 | static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt) |
||
1461 | { |
||
1462 | unsigned long *new_page_dirs, *new_page_tables; |
||
1463 | uint32_t pdpes = I915_PDPES_PER_PDP(dev); |
||
5060 | serge | 1464 | int ret; |
1465 | |||
6084 | serge | 1466 | /* We allocate temp bitmap for page tables for no gain |
1467 | * but as this is for init only, lets keep the things simple |
||
1468 | */ |
||
1469 | ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes); |
||
1470 | if (ret) |
||
1471 | return ret; |
||
5060 | serge | 1472 | |
6084 | serge | 1473 | /* Allocate for all pdps regardless of how the ppgtt |
1474 | * was defined. |
||
1475 | */ |
||
1476 | ret = gen8_ppgtt_alloc_page_directories(&ppgtt->base, &ppgtt->pdp, |
||
1477 | 0, 1ULL << 32, |
||
1478 | new_page_dirs); |
||
1479 | if (!ret) |
||
1480 | *ppgtt->pdp.used_pdpes = *new_page_dirs; |
||
5060 | serge | 1481 | |
6084 | serge | 1482 | free_gen8_temp_bitmaps(new_page_dirs, new_page_tables); |
1483 | |||
1484 | return ret; |
||
5060 | serge | 1485 | } |
1486 | |||
6084 | serge | 1487 | /* |
5060 | serge | 1488 | * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers |
1489 | * with a net effect resembling a 2-level page table in normal x86 terms. Each |
||
1490 | * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address |
||
1491 | * space. |
||
4560 | Serge | 1492 | * |
5060 | serge | 1493 | */ |
6084 | serge | 1494 | static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt) |
4560 | Serge | 1495 | { |
6084 | serge | 1496 | int ret; |
4560 | Serge | 1497 | |
6084 | serge | 1498 | ret = gen8_init_scratch(&ppgtt->base); |
5060 | serge | 1499 | if (ret) |
1500 | return ret; |
||
4560 | Serge | 1501 | |
6084 | serge | 1502 | ppgtt->base.start = 0; |
1503 | ppgtt->base.cleanup = gen8_ppgtt_cleanup; |
||
1504 | ppgtt->base.allocate_va_range = gen8_alloc_va_range; |
||
1505 | ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; |
||
1506 | ppgtt->base.clear_range = gen8_ppgtt_clear_range; |
||
1507 | ppgtt->base.unbind_vma = ppgtt_unbind_vma; |
||
1508 | ppgtt->base.bind_vma = ppgtt_bind_vma; |
||
1509 | ppgtt->debug_dump = gen8_dump_ppgtt; |
||
1510 | |||
1511 | if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { |
||
1512 | ret = setup_px(ppgtt->base.dev, &ppgtt->pml4); |
||
5060 | serge | 1513 | if (ret) |
6084 | serge | 1514 | goto free_scratch; |
4560 | Serge | 1515 | |
6084 | serge | 1516 | gen8_initialize_pml4(&ppgtt->base, &ppgtt->pml4); |
1517 | |||
1518 | ppgtt->base.total = 1ULL << 48; |
||
1519 | ppgtt->switch_mm = gen8_48b_mm_switch; |
||
1520 | } else { |
||
1521 | ret = __pdp_init(ppgtt->base.dev, &ppgtt->pdp); |
||
1522 | if (ret) |
||
1523 | goto free_scratch; |
||
1524 | |||
1525 | ppgtt->base.total = 1ULL << 32; |
||
1526 | ppgtt->switch_mm = gen8_legacy_mm_switch; |
||
1527 | trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base, |
||
1528 | 0, 0, |
||
1529 | GEN8_PML4E_SHIFT); |
||
1530 | |||
1531 | if (intel_vgpu_active(ppgtt->base.dev)) { |
||
1532 | ret = gen8_preallocate_top_level_pdps(ppgtt); |
||
5060 | serge | 1533 | if (ret) |
6084 | serge | 1534 | goto free_scratch; |
4560 | Serge | 1535 | } |
1536 | } |
||
1537 | |||
6084 | serge | 1538 | if (intel_vgpu_active(ppgtt->base.dev)) |
1539 | gen8_ppgtt_notify_vgt(ppgtt, true); |
||
4560 | Serge | 1540 | |
1541 | return 0; |
||
1542 | |||
6084 | serge | 1543 | free_scratch: |
1544 | gen8_free_scratch(&ppgtt->base); |
||
4560 | Serge | 1545 | return ret; |
1546 | } |
||
1547 | |||
6084 | serge | 1548 | static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m) |
4104 | Serge | 1549 | { |
6084 | serge | 1550 | struct i915_address_space *vm = &ppgtt->base; |
1551 | struct i915_page_table *unused; |
||
1552 | gen6_pte_t scratch_pte; |
||
3746 | Serge | 1553 | uint32_t pd_entry; |
6084 | serge | 1554 | uint32_t pte, pde, temp; |
1555 | uint32_t start = ppgtt->base.start, length = ppgtt->base.total; |
||
3746 | Serge | 1556 | |
6084 | serge | 1557 | scratch_pte = vm->pte_encode(px_dma(vm->scratch_page), |
1558 | I915_CACHE_LLC, true, 0); |
||
3746 | Serge | 1559 | |
6084 | serge | 1560 | gen6_for_each_pde(unused, &ppgtt->pd, start, length, temp, pde) { |
1561 | u32 expected; |
||
1562 | gen6_pte_t *pt_vaddr; |
||
1563 | const dma_addr_t pt_addr = px_dma(ppgtt->pd.page_table[pde]); |
||
1564 | pd_entry = readl(ppgtt->pd_addr + pde); |
||
1565 | expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID); |
||
3746 | Serge | 1566 | |
6084 | serge | 1567 | if (pd_entry != expected) |
1568 | seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n", |
||
1569 | pde, |
||
1570 | pd_entry, |
||
1571 | expected); |
||
1572 | seq_printf(m, "\tPDE: %x\n", pd_entry); |
||
1573 | |||
1574 | pt_vaddr = kmap_px(ppgtt->pd.page_table[pde]); |
||
1575 | |||
1576 | for (pte = 0; pte < GEN6_PTES; pte+=4) { |
||
1577 | unsigned long va = |
||
1578 | (pde * PAGE_SIZE * GEN6_PTES) + |
||
1579 | (pte * PAGE_SIZE); |
||
1580 | int i; |
||
1581 | bool found = false; |
||
1582 | for (i = 0; i < 4; i++) |
||
1583 | if (pt_vaddr[pte + i] != scratch_pte) |
||
1584 | found = true; |
||
1585 | if (!found) |
||
1586 | continue; |
||
1587 | |||
1588 | seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte); |
||
1589 | for (i = 0; i < 4; i++) { |
||
1590 | if (pt_vaddr[pte + i] != scratch_pte) |
||
1591 | seq_printf(m, " %08x", pt_vaddr[pte + i]); |
||
1592 | else |
||
1593 | seq_puts(m, " SCRATCH "); |
||
1594 | } |
||
1595 | seq_puts(m, "\n"); |
||
1596 | } |
||
1597 | kunmap_px(ppgtt, pt_vaddr); |
||
3746 | Serge | 1598 | } |
4104 | Serge | 1599 | } |
3746 | Serge | 1600 | |
6084 | serge | 1601 | /* Write pde (index) from the page directory @pd to the page table @pt */ |
1602 | static void gen6_write_pde(struct i915_page_directory *pd, |
||
1603 | const int pde, struct i915_page_table *pt) |
||
1604 | { |
||
1605 | /* Caller needs to make sure the write completes if necessary */ |
||
1606 | struct i915_hw_ppgtt *ppgtt = |
||
1607 | container_of(pd, struct i915_hw_ppgtt, pd); |
||
1608 | u32 pd_entry; |
||
1609 | |||
1610 | pd_entry = GEN6_PDE_ADDR_ENCODE(px_dma(pt)); |
||
1611 | pd_entry |= GEN6_PDE_VALID; |
||
1612 | |||
1613 | writel(pd_entry, ppgtt->pd_addr + pde); |
||
1614 | } |
||
1615 | |||
1616 | /* Write all the page tables found in the ppgtt structure to incrementing page |
||
1617 | * directories. */ |
||
1618 | static void gen6_write_page_range(struct drm_i915_private *dev_priv, |
||
1619 | struct i915_page_directory *pd, |
||
1620 | uint32_t start, uint32_t length) |
||
1621 | { |
||
1622 | struct i915_page_table *pt; |
||
1623 | uint32_t pde, temp; |
||
1624 | |||
1625 | gen6_for_each_pde(pt, pd, start, length, temp, pde) |
||
1626 | gen6_write_pde(pd, pde, pt); |
||
1627 | |||
1628 | /* Make sure write is complete before other code can use this page |
||
1629 | * table. Also require for WC mapped PTEs */ |
||
1630 | readl(dev_priv->gtt.gsm); |
||
1631 | } |
||
1632 | |||
5060 | serge | 1633 | static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt) |
4104 | Serge | 1634 | { |
6084 | serge | 1635 | BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f); |
4104 | Serge | 1636 | |
6084 | serge | 1637 | return (ppgtt->pd.base.ggtt_offset / 64) << 16; |
5060 | serge | 1638 | } |
4104 | Serge | 1639 | |
5060 | serge | 1640 | static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt, |
6084 | serge | 1641 | struct drm_i915_gem_request *req) |
5060 | serge | 1642 | { |
6084 | serge | 1643 | struct intel_engine_cs *ring = req->ring; |
5060 | serge | 1644 | int ret; |
3746 | Serge | 1645 | |
5060 | serge | 1646 | /* NB: TLBs must be flushed and invalidated before a switch */ |
6084 | serge | 1647 | ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); |
5060 | serge | 1648 | if (ret) |
1649 | return ret; |
||
3746 | Serge | 1650 | |
6084 | serge | 1651 | ret = intel_ring_begin(req, 6); |
5060 | serge | 1652 | if (ret) |
1653 | return ret; |
||
3746 | Serge | 1654 | |
5060 | serge | 1655 | intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2)); |
6937 | serge | 1656 | intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(ring)); |
5060 | serge | 1657 | intel_ring_emit(ring, PP_DIR_DCLV_2G); |
6937 | serge | 1658 | intel_ring_emit_reg(ring, RING_PP_DIR_BASE(ring)); |
5060 | serge | 1659 | intel_ring_emit(ring, get_pd_offset(ppgtt)); |
1660 | intel_ring_emit(ring, MI_NOOP); |
||
1661 | intel_ring_advance(ring); |
||
1662 | |||
1663 | return 0; |
||
1664 | } |
||
1665 | |||
6084 | serge | 1666 | static int vgpu_mm_switch(struct i915_hw_ppgtt *ppgtt, |
1667 | struct drm_i915_gem_request *req) |
||
1668 | { |
||
1669 | struct intel_engine_cs *ring = req->ring; |
||
1670 | struct drm_i915_private *dev_priv = to_i915(ppgtt->base.dev); |
||
1671 | |||
1672 | I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); |
||
1673 | I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt)); |
||
1674 | return 0; |
||
1675 | } |
||
1676 | |||
5060 | serge | 1677 | static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt, |
6084 | serge | 1678 | struct drm_i915_gem_request *req) |
5060 | serge | 1679 | { |
6084 | serge | 1680 | struct intel_engine_cs *ring = req->ring; |
5060 | serge | 1681 | int ret; |
1682 | |||
1683 | /* NB: TLBs must be flushed and invalidated before a switch */ |
||
6084 | serge | 1684 | ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); |
5060 | serge | 1685 | if (ret) |
1686 | return ret; |
||
1687 | |||
6084 | serge | 1688 | ret = intel_ring_begin(req, 6); |
5060 | serge | 1689 | if (ret) |
1690 | return ret; |
||
1691 | |||
1692 | intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2)); |
||
6937 | serge | 1693 | intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(ring)); |
5060 | serge | 1694 | intel_ring_emit(ring, PP_DIR_DCLV_2G); |
6937 | serge | 1695 | intel_ring_emit_reg(ring, RING_PP_DIR_BASE(ring)); |
5060 | serge | 1696 | intel_ring_emit(ring, get_pd_offset(ppgtt)); |
1697 | intel_ring_emit(ring, MI_NOOP); |
||
1698 | intel_ring_advance(ring); |
||
1699 | |||
1700 | /* XXX: RCS is the only one to auto invalidate the TLBs? */ |
||
1701 | if (ring->id != RCS) { |
||
6084 | serge | 1702 | ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS); |
5060 | serge | 1703 | if (ret) |
1704 | return ret; |
||
1705 | } |
||
1706 | |||
1707 | return 0; |
||
1708 | } |
||
1709 | |||
1710 | static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt, |
||
6084 | serge | 1711 | struct drm_i915_gem_request *req) |
5060 | serge | 1712 | { |
6084 | serge | 1713 | struct intel_engine_cs *ring = req->ring; |
5060 | serge | 1714 | struct drm_device *dev = ppgtt->base.dev; |
1715 | struct drm_i915_private *dev_priv = dev->dev_private; |
||
1716 | |||
1717 | |||
1718 | I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G); |
||
1719 | I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt)); |
||
1720 | |||
1721 | POSTING_READ(RING_PP_DIR_DCLV(ring)); |
||
1722 | |||
1723 | return 0; |
||
1724 | } |
||
1725 | |||
5354 | serge | 1726 | static void gen8_ppgtt_enable(struct drm_device *dev) |
5060 | serge | 1727 | { |
1728 | struct drm_i915_private *dev_priv = dev->dev_private; |
||
1729 | struct intel_engine_cs *ring; |
||
5354 | serge | 1730 | int j; |
5060 | serge | 1731 | |
1732 | for_each_ring(ring, dev_priv, j) { |
||
6084 | serge | 1733 | u32 four_level = USES_FULL_48BIT_PPGTT(dev) ? GEN8_GFX_PPGTT_48B : 0; |
5060 | serge | 1734 | I915_WRITE(RING_MODE_GEN7(ring), |
6084 | serge | 1735 | _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE | four_level)); |
5060 | serge | 1736 | } |
1737 | } |
||
1738 | |||
5354 | serge | 1739 | static void gen7_ppgtt_enable(struct drm_device *dev) |
5060 | serge | 1740 | { |
1741 | struct drm_i915_private *dev_priv = dev->dev_private; |
||
1742 | struct intel_engine_cs *ring; |
||
6084 | serge | 1743 | uint32_t ecochk, ecobits; |
5060 | serge | 1744 | int i; |
3746 | Serge | 1745 | |
6084 | serge | 1746 | ecobits = I915_READ(GAC_ECO_BITS); |
1747 | I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B); |
||
3746 | Serge | 1748 | |
6084 | serge | 1749 | ecochk = I915_READ(GAM_ECOCHK); |
1750 | if (IS_HASWELL(dev)) { |
||
1751 | ecochk |= ECOCHK_PPGTT_WB_HSW; |
||
1752 | } else { |
||
1753 | ecochk |= ECOCHK_PPGTT_LLC_IVB; |
||
1754 | ecochk &= ~ECOCHK_PPGTT_GFDT_IVB; |
||
1755 | } |
||
1756 | I915_WRITE(GAM_ECOCHK, ecochk); |
||
3746 | Serge | 1757 | |
1758 | for_each_ring(ring, dev_priv, i) { |
||
5060 | serge | 1759 | /* GFX_MODE is per-ring on gen7+ */ |
6084 | serge | 1760 | I915_WRITE(RING_MODE_GEN7(ring), |
1761 | _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); |
||
3746 | Serge | 1762 | } |
1763 | } |
||
1764 | |||
5354 | serge | 1765 | static void gen6_ppgtt_enable(struct drm_device *dev) |
5060 | serge | 1766 | { |
1767 | struct drm_i915_private *dev_priv = dev->dev_private; |
||
1768 | uint32_t ecochk, gab_ctl, ecobits; |
||
1769 | |||
1770 | ecobits = I915_READ(GAC_ECO_BITS); |
||
1771 | I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT | |
||
1772 | ECOBITS_PPGTT_CACHE64B); |
||
1773 | |||
1774 | gab_ctl = I915_READ(GAB_CTL); |
||
1775 | I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT); |
||
1776 | |||
1777 | ecochk = I915_READ(GAM_ECOCHK); |
||
1778 | I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B); |
||
1779 | |||
1780 | I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); |
||
1781 | } |
||
1782 | |||
3031 | serge | 1783 | /* PPGTT support for Sandybdrige/Gen6 and later */ |
4104 | Serge | 1784 | static void gen6_ppgtt_clear_range(struct i915_address_space *vm, |
5060 | serge | 1785 | uint64_t start, |
1786 | uint64_t length, |
||
4280 | Serge | 1787 | bool use_scratch) |
3031 | serge | 1788 | { |
4104 | Serge | 1789 | struct i915_hw_ppgtt *ppgtt = |
1790 | container_of(vm, struct i915_hw_ppgtt, base); |
||
6084 | serge | 1791 | gen6_pte_t *pt_vaddr, scratch_pte; |
5060 | serge | 1792 | unsigned first_entry = start >> PAGE_SHIFT; |
1793 | unsigned num_entries = length >> PAGE_SHIFT; |
||
6084 | serge | 1794 | unsigned act_pt = first_entry / GEN6_PTES; |
1795 | unsigned first_pte = first_entry % GEN6_PTES; |
||
3031 | serge | 1796 | unsigned last_pte, i; |
1797 | |||
6084 | serge | 1798 | scratch_pte = vm->pte_encode(px_dma(vm->scratch_page), |
1799 | I915_CACHE_LLC, true, 0); |
||
3031 | serge | 1800 | |
3480 | Serge | 1801 | while (num_entries) { |
6084 | serge | 1802 | last_pte = first_pte + num_entries; |
1803 | if (last_pte > GEN6_PTES) |
||
1804 | last_pte = GEN6_PTES; |
||
3031 | serge | 1805 | |
6084 | serge | 1806 | pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]); |
3031 | serge | 1807 | |
6084 | serge | 1808 | for (i = first_pte; i < last_pte; i++) |
1809 | pt_vaddr[i] = scratch_pte; |
||
3031 | serge | 1810 | |
6084 | serge | 1811 | kunmap_px(ppgtt, pt_vaddr); |
5354 | serge | 1812 | |
6084 | serge | 1813 | num_entries -= last_pte - first_pte; |
1814 | first_pte = 0; |
||
1815 | act_pt++; |
||
5354 | serge | 1816 | } |
3480 | Serge | 1817 | } |
1818 | |||
4104 | Serge | 1819 | static void gen6_ppgtt_insert_entries(struct i915_address_space *vm, |
3480 | Serge | 1820 | struct sg_table *pages, |
5060 | serge | 1821 | uint64_t start, |
1822 | enum i915_cache_level cache_level, u32 flags) |
||
3480 | Serge | 1823 | { |
4104 | Serge | 1824 | struct i915_hw_ppgtt *ppgtt = |
1825 | container_of(vm, struct i915_hw_ppgtt, base); |
||
6084 | serge | 1826 | gen6_pte_t *pt_vaddr; |
5060 | serge | 1827 | unsigned first_entry = start >> PAGE_SHIFT; |
6084 | serge | 1828 | unsigned act_pt = first_entry / GEN6_PTES; |
1829 | unsigned act_pte = first_entry % GEN6_PTES; |
||
3746 | Serge | 1830 | struct sg_page_iter sg_iter; |
3480 | Serge | 1831 | |
5354 | serge | 1832 | pt_vaddr = NULL; |
3746 | Serge | 1833 | for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) { |
5354 | serge | 1834 | if (pt_vaddr == NULL) |
6084 | serge | 1835 | pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]); |
3480 | Serge | 1836 | |
4560 | Serge | 1837 | pt_vaddr[act_pte] = |
1838 | vm->pte_encode(sg_page_iter_dma_address(&sg_iter), |
||
5060 | serge | 1839 | cache_level, true, flags); |
1840 | |||
6084 | serge | 1841 | if (++act_pte == GEN6_PTES) { |
1842 | kunmap_px(ppgtt, pt_vaddr); |
||
5354 | serge | 1843 | pt_vaddr = NULL; |
3746 | Serge | 1844 | act_pt++; |
1845 | act_pte = 0; |
||
3480 | Serge | 1846 | } |
6084 | serge | 1847 | } |
5354 | serge | 1848 | if (pt_vaddr) |
6084 | serge | 1849 | kunmap_px(ppgtt, pt_vaddr); |
3031 | serge | 1850 | } |
1851 | |||
6084 | serge | 1852 | static int gen6_alloc_va_range(struct i915_address_space *vm, |
1853 | uint64_t start_in, uint64_t length_in) |
||
3031 | serge | 1854 | { |
6084 | serge | 1855 | DECLARE_BITMAP(new_page_tables, I915_PDES); |
1856 | struct drm_device *dev = vm->dev; |
||
1857 | struct drm_i915_private *dev_priv = dev->dev_private; |
||
1858 | struct i915_hw_ppgtt *ppgtt = |
||
1859 | container_of(vm, struct i915_hw_ppgtt, base); |
||
1860 | struct i915_page_table *pt; |
||
1861 | uint32_t start, length, start_save, length_save; |
||
1862 | uint32_t pde, temp; |
||
1863 | int ret; |
||
3480 | Serge | 1864 | |
6084 | serge | 1865 | if (WARN_ON(start_in + length_in > ppgtt->base.total)) |
1866 | return -ENODEV; |
||
1867 | |||
1868 | start = start_save = start_in; |
||
1869 | length = length_save = length_in; |
||
1870 | |||
1871 | bitmap_zero(new_page_tables, I915_PDES); |
||
1872 | |||
1873 | /* The allocation is done in two stages so that we can bail out with |
||
1874 | * minimal amount of pain. The first stage finds new page tables that |
||
1875 | * need allocation. The second stage marks use ptes within the page |
||
1876 | * tables. |
||
1877 | */ |
||
1878 | gen6_for_each_pde(pt, &ppgtt->pd, start, length, temp, pde) { |
||
1879 | if (pt != vm->scratch_pt) { |
||
1880 | WARN_ON(bitmap_empty(pt->used_ptes, GEN6_PTES)); |
||
1881 | continue; |
||
1882 | } |
||
1883 | |||
1884 | /* We've already allocated a page table */ |
||
1885 | WARN_ON(!bitmap_empty(pt->used_ptes, GEN6_PTES)); |
||
1886 | |||
1887 | pt = alloc_pt(dev); |
||
1888 | if (IS_ERR(pt)) { |
||
1889 | ret = PTR_ERR(pt); |
||
1890 | goto unwind_out; |
||
1891 | } |
||
1892 | |||
1893 | gen6_initialize_pt(vm, pt); |
||
1894 | |||
1895 | ppgtt->pd.page_table[pde] = pt; |
||
1896 | __set_bit(pde, new_page_tables); |
||
1897 | trace_i915_page_table_entry_alloc(vm, pde, start, GEN6_PDE_SHIFT); |
||
3480 | Serge | 1898 | } |
6084 | serge | 1899 | |
1900 | start = start_save; |
||
1901 | length = length_save; |
||
1902 | |||
1903 | gen6_for_each_pde(pt, &ppgtt->pd, start, length, temp, pde) { |
||
1904 | DECLARE_BITMAP(tmp_bitmap, GEN6_PTES); |
||
1905 | |||
1906 | bitmap_zero(tmp_bitmap, GEN6_PTES); |
||
1907 | bitmap_set(tmp_bitmap, gen6_pte_index(start), |
||
1908 | gen6_pte_count(start, length)); |
||
1909 | |||
1910 | if (__test_and_clear_bit(pde, new_page_tables)) |
||
1911 | gen6_write_pde(&ppgtt->pd, pde, pt); |
||
1912 | |||
1913 | trace_i915_page_table_entry_map(vm, pde, pt, |
||
1914 | gen6_pte_index(start), |
||
1915 | gen6_pte_count(start, length), |
||
1916 | GEN6_PTES); |
||
1917 | bitmap_or(pt->used_ptes, tmp_bitmap, pt->used_ptes, |
||
1918 | GEN6_PTES); |
||
1919 | } |
||
1920 | |||
1921 | WARN_ON(!bitmap_empty(new_page_tables, I915_PDES)); |
||
1922 | |||
1923 | /* Make sure write is complete before other code can use this page |
||
1924 | * table. Also require for WC mapped PTEs */ |
||
1925 | readl(dev_priv->gtt.gsm); |
||
1926 | |||
1927 | mark_tlbs_dirty(ppgtt); |
||
1928 | return 0; |
||
1929 | |||
1930 | unwind_out: |
||
1931 | for_each_set_bit(pde, new_page_tables, I915_PDES) { |
||
1932 | struct i915_page_table *pt = ppgtt->pd.page_table[pde]; |
||
1933 | |||
1934 | ppgtt->pd.page_table[pde] = vm->scratch_pt; |
||
1935 | free_pt(vm->dev, pt); |
||
1936 | } |
||
1937 | |||
1938 | mark_tlbs_dirty(ppgtt); |
||
1939 | return ret; |
||
5060 | serge | 1940 | } |
3480 | Serge | 1941 | |
6084 | serge | 1942 | static int gen6_init_scratch(struct i915_address_space *vm) |
5060 | serge | 1943 | { |
6084 | serge | 1944 | struct drm_device *dev = vm->dev; |
5060 | serge | 1945 | |
6084 | serge | 1946 | vm->scratch_page = alloc_scratch_page(dev); |
1947 | if (IS_ERR(vm->scratch_page)) |
||
1948 | return PTR_ERR(vm->scratch_page); |
||
1949 | |||
1950 | vm->scratch_pt = alloc_pt(dev); |
||
1951 | if (IS_ERR(vm->scratch_pt)) { |
||
1952 | free_scratch_page(dev, vm->scratch_page); |
||
1953 | return PTR_ERR(vm->scratch_pt); |
||
1954 | } |
||
1955 | |||
1956 | gen6_initialize_pt(vm, vm->scratch_pt); |
||
1957 | |||
1958 | return 0; |
||
3480 | Serge | 1959 | } |
1960 | |||
6084 | serge | 1961 | static void gen6_free_scratch(struct i915_address_space *vm) |
1962 | { |
||
1963 | struct drm_device *dev = vm->dev; |
||
1964 | |||
1965 | free_pt(dev, vm->scratch_pt); |
||
1966 | free_scratch_page(dev, vm->scratch_page); |
||
1967 | } |
||
1968 | |||
5060 | serge | 1969 | static void gen6_ppgtt_cleanup(struct i915_address_space *vm) |
3480 | Serge | 1970 | { |
5060 | serge | 1971 | struct i915_hw_ppgtt *ppgtt = |
1972 | container_of(vm, struct i915_hw_ppgtt, base); |
||
6084 | serge | 1973 | struct i915_page_table *pt; |
1974 | uint32_t pde; |
||
5060 | serge | 1975 | |
1976 | drm_mm_remove_node(&ppgtt->node); |
||
1977 | |||
6084 | serge | 1978 | gen6_for_all_pdes(pt, ppgtt, pde) { |
1979 | if (pt != vm->scratch_pt) |
||
1980 | free_pt(ppgtt->base.dev, pt); |
||
1981 | } |
||
1982 | |||
1983 | gen6_free_scratch(vm); |
||
5060 | serge | 1984 | } |
1985 | |||
1986 | static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt) |
||
1987 | { |
||
6084 | serge | 1988 | struct i915_address_space *vm = &ppgtt->base; |
4104 | Serge | 1989 | struct drm_device *dev = ppgtt->base.dev; |
3031 | serge | 1990 | struct drm_i915_private *dev_priv = dev->dev_private; |
5060 | serge | 1991 | bool retried = false; |
1992 | int ret; |
||
3031 | serge | 1993 | |
5060 | serge | 1994 | /* PPGTT PDEs reside in the GGTT and consists of 512 entries. The |
1995 | * allocator works in address space sizes, so it's multiplied by page |
||
1996 | * size. We allocate at the top of the GTT to avoid fragmentation. |
||
1997 | */ |
||
1998 | BUG_ON(!drm_mm_initialized(&dev_priv->gtt.base.mm)); |
||
6084 | serge | 1999 | |
2000 | ret = gen6_init_scratch(vm); |
||
2001 | if (ret) |
||
2002 | return ret; |
||
2003 | |||
5060 | serge | 2004 | alloc: |
2005 | ret = drm_mm_insert_node_in_range_generic(&dev_priv->gtt.base.mm, |
||
2006 | &ppgtt->node, GEN6_PD_SIZE, |
||
2007 | GEN6_PD_ALIGN, 0, |
||
2008 | 0, dev_priv->gtt.base.total, |
||
2009 | DRM_MM_TOPDOWN); |
||
2010 | if (ret == -ENOSPC && !retried) { |
||
2011 | ret = i915_gem_evict_something(dev, &dev_priv->gtt.base, |
||
2012 | GEN6_PD_SIZE, GEN6_PD_ALIGN, |
||
2013 | I915_CACHE_NONE, |
||
2014 | 0, dev_priv->gtt.base.total, |
||
2015 | 0); |
||
2016 | if (ret) |
||
6084 | serge | 2017 | goto err_out; |
3031 | serge | 2018 | |
5060 | serge | 2019 | retried = true; |
2020 | goto alloc; |
||
2021 | } |
||
2022 | |||
6084 | serge | 2023 | if (ret) |
2024 | goto err_out; |
||
2025 | |||
2026 | |||
5060 | serge | 2027 | if (ppgtt->node.start < dev_priv->gtt.mappable_end) |
2028 | DRM_DEBUG("Forced to use aperture for PDEs\n"); |
||
2029 | |||
6084 | serge | 2030 | return 0; |
2031 | |||
2032 | err_out: |
||
2033 | gen6_free_scratch(vm); |
||
5060 | serge | 2034 | return ret; |
2035 | } |
||
2036 | |||
2037 | static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt) |
||
2038 | { |
||
6084 | serge | 2039 | return gen6_ppgtt_allocate_page_directories(ppgtt); |
5060 | serge | 2040 | } |
3031 | serge | 2041 | |
6084 | serge | 2042 | static void gen6_scratch_va_range(struct i915_hw_ppgtt *ppgtt, |
2043 | uint64_t start, uint64_t length) |
||
5060 | serge | 2044 | { |
6084 | serge | 2045 | struct i915_page_table *unused; |
2046 | uint32_t pde, temp; |
||
5060 | serge | 2047 | |
6084 | serge | 2048 | gen6_for_each_pde(unused, &ppgtt->pd, start, length, temp, pde) |
2049 | ppgtt->pd.page_table[pde] = ppgtt->base.scratch_pt; |
||
3031 | serge | 2050 | } |
2051 | |||
5060 | serge | 2052 | static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt) |
3031 | serge | 2053 | { |
5060 | serge | 2054 | struct drm_device *dev = ppgtt->base.dev; |
3031 | serge | 2055 | struct drm_i915_private *dev_priv = dev->dev_private; |
3480 | Serge | 2056 | int ret; |
3031 | serge | 2057 | |
5060 | serge | 2058 | ppgtt->base.pte_encode = dev_priv->gtt.base.pte_encode; |
2059 | if (IS_GEN6(dev)) { |
||
2060 | ppgtt->switch_mm = gen6_mm_switch; |
||
2061 | } else if (IS_HASWELL(dev)) { |
||
2062 | ppgtt->switch_mm = hsw_mm_switch; |
||
2063 | } else if (IS_GEN7(dev)) { |
||
2064 | ppgtt->switch_mm = gen7_mm_switch; |
||
2065 | } else |
||
2066 | BUG(); |
||
3031 | serge | 2067 | |
6084 | serge | 2068 | if (intel_vgpu_active(dev)) |
2069 | ppgtt->switch_mm = vgpu_mm_switch; |
||
2070 | |||
5060 | serge | 2071 | ret = gen6_ppgtt_alloc(ppgtt); |
2072 | if (ret) |
||
2073 | return ret; |
||
2074 | |||
6084 | serge | 2075 | ppgtt->base.allocate_va_range = gen6_alloc_va_range; |
5060 | serge | 2076 | ppgtt->base.clear_range = gen6_ppgtt_clear_range; |
2077 | ppgtt->base.insert_entries = gen6_ppgtt_insert_entries; |
||
6084 | serge | 2078 | ppgtt->base.unbind_vma = ppgtt_unbind_vma; |
2079 | ppgtt->base.bind_vma = ppgtt_bind_vma; |
||
5060 | serge | 2080 | ppgtt->base.cleanup = gen6_ppgtt_cleanup; |
2081 | ppgtt->base.start = 0; |
||
6084 | serge | 2082 | ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE; |
2083 | ppgtt->debug_dump = gen6_dump_ppgtt; |
||
5060 | serge | 2084 | |
6084 | serge | 2085 | ppgtt->pd.base.ggtt_offset = |
2086 | ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t); |
||
5060 | serge | 2087 | |
6084 | serge | 2088 | ppgtt->pd_addr = (gen6_pte_t __iomem *)dev_priv->gtt.gsm + |
2089 | ppgtt->pd.base.ggtt_offset / sizeof(gen6_pte_t); |
||
5060 | serge | 2090 | |
6084 | serge | 2091 | gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total); |
2092 | |||
2093 | gen6_write_page_range(dev_priv, &ppgtt->pd, 0, ppgtt->base.total); |
||
2094 | |||
2095 | DRM_DEBUG_DRIVER("Allocated pde space (%lldM) at GTT entry: %llx\n", |
||
5060 | serge | 2096 | ppgtt->node.size >> 20, |
2097 | ppgtt->node.start / PAGE_SIZE); |
||
2098 | |||
5354 | serge | 2099 | DRM_DEBUG("Adding PPGTT at offset %x\n", |
6084 | serge | 2100 | ppgtt->pd.base.ggtt_offset << 10); |
5354 | serge | 2101 | |
5060 | serge | 2102 | return 0; |
2103 | } |
||
2104 | |||
5354 | serge | 2105 | static int __hw_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) |
5060 | serge | 2106 | { |
4104 | Serge | 2107 | ppgtt->base.dev = dev; |
3031 | serge | 2108 | |
3746 | Serge | 2109 | if (INTEL_INFO(dev)->gen < 8) |
5354 | serge | 2110 | return gen6_ppgtt_init(ppgtt); |
3746 | Serge | 2111 | else |
6084 | serge | 2112 | return gen8_ppgtt_init(ppgtt); |
5354 | serge | 2113 | } |
6084 | serge | 2114 | |
2115 | static void i915_address_space_init(struct i915_address_space *vm, |
||
2116 | struct drm_i915_private *dev_priv) |
||
2117 | { |
||
2118 | drm_mm_init(&vm->mm, vm->start, vm->total); |
||
2119 | vm->dev = dev_priv->dev; |
||
2120 | INIT_LIST_HEAD(&vm->active_list); |
||
2121 | INIT_LIST_HEAD(&vm->inactive_list); |
||
2122 | list_add_tail(&vm->global_link, &dev_priv->vm_list); |
||
2123 | } |
||
2124 | |||
5354 | serge | 2125 | int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt) |
2126 | { |
||
2127 | struct drm_i915_private *dev_priv = dev->dev_private; |
||
2128 | int ret = 0; |
||
3746 | Serge | 2129 | |
5354 | serge | 2130 | ret = __hw_ppgtt_init(dev, ppgtt); |
2131 | if (ret == 0) { |
||
5060 | serge | 2132 | kref_init(&ppgtt->ref); |
6084 | serge | 2133 | i915_address_space_init(&ppgtt->base, dev_priv); |
5354 | serge | 2134 | } |
2135 | |||
2136 | return ret; |
||
2137 | } |
||
2138 | |||
2139 | int i915_ppgtt_init_hw(struct drm_device *dev) |
||
2140 | { |
||
2141 | /* In the case of execlists, PPGTT is enabled by the context descriptor |
||
2142 | * and the PDPs are contained within the context itself. We don't |
||
2143 | * need to do anything here. */ |
||
2144 | if (i915.enable_execlists) |
||
2145 | return 0; |
||
2146 | |||
2147 | if (!USES_PPGTT(dev)) |
||
2148 | return 0; |
||
2149 | |||
2150 | if (IS_GEN6(dev)) |
||
2151 | gen6_ppgtt_enable(dev); |
||
2152 | else if (IS_GEN7(dev)) |
||
2153 | gen7_ppgtt_enable(dev); |
||
2154 | else if (INTEL_INFO(dev)->gen >= 8) |
||
2155 | gen8_ppgtt_enable(dev); |
||
2156 | else |
||
6084 | serge | 2157 | MISSING_CASE(INTEL_INFO(dev)->gen); |
5354 | serge | 2158 | |
6084 | serge | 2159 | return 0; |
2160 | } |
||
3480 | Serge | 2161 | |
6084 | serge | 2162 | int i915_ppgtt_init_ring(struct drm_i915_gem_request *req) |
2163 | { |
||
2164 | struct drm_i915_private *dev_priv = req->ring->dev->dev_private; |
||
2165 | struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; |
||
2166 | |||
2167 | if (i915.enable_execlists) |
||
2168 | return 0; |
||
2169 | |||
2170 | if (!ppgtt) |
||
2171 | return 0; |
||
2172 | |||
2173 | return ppgtt->switch_mm(ppgtt, req); |
||
3031 | serge | 2174 | } |
6084 | serge | 2175 | |
5354 | serge | 2176 | struct i915_hw_ppgtt * |
2177 | i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv) |
||
2178 | { |
||
2179 | struct i915_hw_ppgtt *ppgtt; |
||
2180 | int ret; |
||
3031 | serge | 2181 | |
5354 | serge | 2182 | ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); |
2183 | if (!ppgtt) |
||
2184 | return ERR_PTR(-ENOMEM); |
||
2185 | |||
2186 | ret = i915_ppgtt_init(dev, ppgtt); |
||
2187 | if (ret) { |
||
2188 | kfree(ppgtt); |
||
2189 | return ERR_PTR(ret); |
||
2190 | } |
||
2191 | |||
2192 | ppgtt->file_priv = fpriv; |
||
2193 | |||
2194 | trace_i915_ppgtt_create(&ppgtt->base); |
||
2195 | |||
2196 | return ppgtt; |
||
2197 | } |
||
2198 | |||
2199 | void i915_ppgtt_release(struct kref *kref) |
||
2200 | { |
||
2201 | struct i915_hw_ppgtt *ppgtt = |
||
2202 | container_of(kref, struct i915_hw_ppgtt, ref); |
||
2203 | |||
2204 | trace_i915_ppgtt_release(&ppgtt->base); |
||
2205 | |||
2206 | /* vmas should already be unbound */ |
||
2207 | WARN_ON(!list_empty(&ppgtt->base.active_list)); |
||
2208 | WARN_ON(!list_empty(&ppgtt->base.inactive_list)); |
||
2209 | |||
2210 | list_del(&ppgtt->base.global_link); |
||
2211 | drm_mm_takedown(&ppgtt->base.mm); |
||
2212 | |||
2213 | ppgtt->base.cleanup(&ppgtt->base); |
||
2214 | kfree(ppgtt); |
||
2215 | } |
||
2216 | |||
3480 | Serge | 2217 | extern int intel_iommu_gfx_mapped; |
2218 | /* Certain Gen5 chipsets require require idling the GPU before |
||
2219 | * unmapping anything from the GTT when VT-d is enabled. |
||
2220 | */ |
||
6084 | serge | 2221 | static bool needs_idle_maps(struct drm_device *dev) |
3480 | Serge | 2222 | { |
2223 | #ifdef CONFIG_INTEL_IOMMU |
||
2224 | /* Query intel_iommu to see if we need the workaround. Presumably that |
||
2225 | * was loaded first. |
||
2226 | */ |
||
2227 | if (IS_GEN5(dev) && IS_MOBILE(dev) && intel_iommu_gfx_mapped) |
||
2228 | return true; |
||
2229 | #endif |
||
2230 | return false; |
||
2231 | } |
||
2232 | |||
2344 | Serge | 2233 | static bool do_idling(struct drm_i915_private *dev_priv) |
2234 | { |
||
2235 | bool ret = dev_priv->mm.interruptible; |
||
2236 | |||
3480 | Serge | 2237 | if (unlikely(dev_priv->gtt.do_idle_maps)) { |
2344 | Serge | 2238 | dev_priv->mm.interruptible = false; |
2239 | if (i915_gpu_idle(dev_priv->dev)) { |
||
2240 | DRM_ERROR("Couldn't idle GPU\n"); |
||
2241 | /* Wait a bit, in hopes it avoids the hang */ |
||
2242 | udelay(10); |
||
2243 | } |
||
2244 | } |
||
2245 | |||
2246 | return ret; |
||
2247 | } |
||
2248 | |||
2249 | static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible) |
||
2250 | { |
||
3480 | Serge | 2251 | if (unlikely(dev_priv->gtt.do_idle_maps)) |
2344 | Serge | 2252 | dev_priv->mm.interruptible = interruptible; |
2253 | } |
||
2254 | |||
4280 | Serge | 2255 | void i915_check_and_clear_faults(struct drm_device *dev) |
2256 | { |
||
2257 | struct drm_i915_private *dev_priv = dev->dev_private; |
||
5060 | serge | 2258 | struct intel_engine_cs *ring; |
4280 | Serge | 2259 | int i; |
2260 | |||
2261 | if (INTEL_INFO(dev)->gen < 6) |
||
2262 | return; |
||
2263 | |||
2264 | for_each_ring(ring, dev_priv, i) { |
||
2265 | u32 fault_reg; |
||
2266 | fault_reg = I915_READ(RING_FAULT_REG(ring)); |
||
2267 | if (fault_reg & RING_FAULT_VALID) { |
||
2268 | DRM_DEBUG_DRIVER("Unexpected fault\n" |
||
5354 | serge | 2269 | "\tAddr: 0x%08lx\n" |
4280 | Serge | 2270 | "\tAddress space: %s\n" |
2271 | "\tSource ID: %d\n" |
||
2272 | "\tType: %d\n", |
||
2273 | fault_reg & PAGE_MASK, |
||
2274 | fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT", |
||
2275 | RING_FAULT_SRCID(fault_reg), |
||
2276 | RING_FAULT_FAULT_TYPE(fault_reg)); |
||
2277 | I915_WRITE(RING_FAULT_REG(ring), |
||
2278 | fault_reg & ~RING_FAULT_VALID); |
||
2279 | } |
||
2280 | } |
||
2281 | POSTING_READ(RING_FAULT_REG(&dev_priv->ring[RCS])); |
||
2282 | } |
||
2283 | |||
5354 | serge | 2284 | static void i915_ggtt_flush(struct drm_i915_private *dev_priv) |
2285 | { |
||
2286 | if (INTEL_INFO(dev_priv->dev)->gen < 6) { |
||
2287 | intel_gtt_chipset_flush(); |
||
2288 | } else { |
||
2289 | I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); |
||
2290 | POSTING_READ(GFX_FLSH_CNTL_GEN6); |
||
2291 | } |
||
2292 | } |
||
2293 | |||
4280 | Serge | 2294 | void i915_gem_suspend_gtt_mappings(struct drm_device *dev) |
2295 | { |
||
2296 | struct drm_i915_private *dev_priv = dev->dev_private; |
||
2297 | |||
2298 | /* Don't bother messing with faults pre GEN6 as we have little |
||
2299 | * documentation supporting that it's a good idea. |
||
2300 | */ |
||
2301 | if (INTEL_INFO(dev)->gen < 6) |
||
2302 | return; |
||
2303 | |||
2304 | i915_check_and_clear_faults(dev); |
||
2305 | |||
2306 | dev_priv->gtt.base.clear_range(&dev_priv->gtt.base, |
||
5060 | serge | 2307 | dev_priv->gtt.base.start, |
2308 | dev_priv->gtt.base.total, |
||
2309 | true); |
||
5354 | serge | 2310 | |
2311 | i915_ggtt_flush(dev_priv); |
||
4280 | Serge | 2312 | } |
2313 | |||
3031 | serge | 2314 | int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj) |
2332 | Serge | 2315 | { |
3480 | Serge | 2316 | if (!dma_map_sg(&obj->base.dev->pdev->dev, |
2317 | obj->pages->sgl, obj->pages->nents, |
||
2318 | PCI_DMA_BIDIRECTIONAL)) |
||
2319 | return -ENOSPC; |
||
3243 | Serge | 2320 | |
2332 | Serge | 2321 | return 0; |
2322 | } |
||
2323 | |||
6084 | serge | 2324 | static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) |
4560 | Serge | 2325 | { |
2326 | #ifdef writeq |
||
2327 | writeq(pte, addr); |
||
2328 | #else |
||
2329 | iowrite32((u32)pte, addr); |
||
2330 | iowrite32(pte >> 32, addr + 4); |
||
2331 | #endif |
||
2332 | } |
||
2333 | |||
2334 | static void gen8_ggtt_insert_entries(struct i915_address_space *vm, |
||
2335 | struct sg_table *st, |
||
5060 | serge | 2336 | uint64_t start, |
2337 | enum i915_cache_level level, u32 unused) |
||
4560 | Serge | 2338 | { |
2339 | struct drm_i915_private *dev_priv = vm->dev->dev_private; |
||
5060 | serge | 2340 | unsigned first_entry = start >> PAGE_SHIFT; |
6084 | serge | 2341 | gen8_pte_t __iomem *gtt_entries = |
2342 | (gen8_pte_t __iomem *)dev_priv->gtt.gsm + first_entry; |
||
4560 | Serge | 2343 | int i = 0; |
2344 | struct sg_page_iter sg_iter; |
||
5060 | serge | 2345 | dma_addr_t addr = 0; /* shut up gcc */ |
6937 | serge | 2346 | int rpm_atomic_seq; |
4560 | Serge | 2347 | |
6937 | serge | 2348 | rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); |
2349 | |||
4560 | Serge | 2350 | for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) { |
2351 | addr = sg_dma_address(sg_iter.sg) + |
||
2352 | (sg_iter.sg_pgoffset << PAGE_SHIFT); |
||
2353 | gen8_set_pte(>t_entries[i], |
||
2354 | gen8_pte_encode(addr, level, true)); |
||
2355 | i++; |
||
2356 | } |
||
2357 | |||
2358 | /* |
||
2359 | * XXX: This serves as a posting read to make sure that the PTE has |
||
2360 | * actually been updated. There is some concern that even though |
||
2361 | * registers and PTEs are within the same BAR that they are potentially |
||
2362 | * of NUMA access patterns. Therefore, even with the way we assume |
||
2363 | * hardware should work, we must keep this posting read for paranoia. |
||
2364 | */ |
||
2365 | if (i != 0) |
||
2366 | WARN_ON(readq(>t_entries[i-1]) |
||
2367 | != gen8_pte_encode(addr, level, true)); |
||
2368 | |||
2369 | /* This next bit makes the above posting read even more important. We |
||
2370 | * want to flush the TLBs only after we're certain all the PTE updates |
||
2371 | * have finished. |
||
2372 | */ |
||
2373 | I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); |
||
2374 | POSTING_READ(GFX_FLSH_CNTL_GEN6); |
||
6937 | serge | 2375 | |
2376 | assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); |
||
4560 | Serge | 2377 | } |
2378 | |||
6937 | serge | 2379 | struct insert_entries { |
2380 | struct i915_address_space *vm; |
||
2381 | struct sg_table *st; |
||
2382 | uint64_t start; |
||
2383 | enum i915_cache_level level; |
||
2384 | u32 flags; |
||
2385 | }; |
||
2386 | |||
2387 | static int gen8_ggtt_insert_entries__cb(void *_arg) |
||
2388 | { |
||
2389 | struct insert_entries *arg = _arg; |
||
2390 | gen8_ggtt_insert_entries(arg->vm, arg->st, |
||
2391 | arg->start, arg->level, arg->flags); |
||
2392 | return 0; |
||
2393 | } |
||
2394 | |||
2395 | static void gen8_ggtt_insert_entries__BKL(struct i915_address_space *vm, |
||
2396 | struct sg_table *st, |
||
2397 | uint64_t start, |
||
2398 | enum i915_cache_level level, |
||
2399 | u32 flags) |
||
2400 | { |
||
2401 | struct insert_entries arg = { vm, st, start, level, flags }; |
||
2402 | gen8_ggtt_insert_entries__cb, &arg; |
||
2403 | } |
||
2404 | |||
3243 | Serge | 2405 | /* |
2406 | * Binds an object into the global gtt with the specified cache level. The object |
||
2407 | * will be accessible to the GPU via commands whose operands reference offsets |
||
2408 | * within the global GTT as well as accessible by the GPU through the GMADR |
||
2409 | * mapped BAR (dev_priv->mm.gtt->gtt). |
||
2410 | */ |
||
4104 | Serge | 2411 | static void gen6_ggtt_insert_entries(struct i915_address_space *vm, |
3480 | Serge | 2412 | struct sg_table *st, |
5060 | serge | 2413 | uint64_t start, |
2414 | enum i915_cache_level level, u32 flags) |
||
3243 | Serge | 2415 | { |
4104 | Serge | 2416 | struct drm_i915_private *dev_priv = vm->dev->dev_private; |
5060 | serge | 2417 | unsigned first_entry = start >> PAGE_SHIFT; |
6084 | serge | 2418 | gen6_pte_t __iomem *gtt_entries = |
2419 | (gen6_pte_t __iomem *)dev_priv->gtt.gsm + first_entry; |
||
3746 | Serge | 2420 | int i = 0; |
2421 | struct sg_page_iter sg_iter; |
||
5060 | serge | 2422 | dma_addr_t addr = 0; |
6937 | serge | 2423 | int rpm_atomic_seq; |
3243 | Serge | 2424 | |
6937 | serge | 2425 | rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); |
2426 | |||
3746 | Serge | 2427 | for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) { |
2428 | addr = sg_page_iter_dma_address(&sg_iter); |
||
5060 | serge | 2429 | iowrite32(vm->pte_encode(addr, level, true, flags), >t_entries[i]); |
6084 | serge | 2430 | i++; |
2431 | } |
||
3243 | Serge | 2432 | |
2433 | /* XXX: This serves as a posting read to make sure that the PTE has |
||
2434 | * actually been updated. There is some concern that even though |
||
2435 | * registers and PTEs are within the same BAR that they are potentially |
||
2436 | * of NUMA access patterns. Therefore, even with the way we assume |
||
2437 | * hardware should work, we must keep this posting read for paranoia. |
||
2438 | */ |
||
5060 | serge | 2439 | if (i != 0) { |
2440 | unsigned long gtt = readl(>t_entries[i-1]); |
||
2441 | WARN_ON(gtt != vm->pte_encode(addr, level, true, flags)); |
||
2442 | } |
||
3243 | Serge | 2443 | |
2444 | /* This next bit makes the above posting read even more important. We |
||
2445 | * want to flush the TLBs only after we're certain all the PTE updates |
||
2446 | * have finished. |
||
2447 | */ |
||
2448 | I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); |
||
2449 | POSTING_READ(GFX_FLSH_CNTL_GEN6); |
||
6937 | serge | 2450 | |
2451 | assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); |
||
3243 | Serge | 2452 | } |
2453 | |||
4560 | Serge | 2454 | static void gen8_ggtt_clear_range(struct i915_address_space *vm, |
5060 | serge | 2455 | uint64_t start, |
2456 | uint64_t length, |
||
4560 | Serge | 2457 | bool use_scratch) |
2458 | { |
||
2459 | struct drm_i915_private *dev_priv = vm->dev->dev_private; |
||
5060 | serge | 2460 | unsigned first_entry = start >> PAGE_SHIFT; |
2461 | unsigned num_entries = length >> PAGE_SHIFT; |
||
6084 | serge | 2462 | gen8_pte_t scratch_pte, __iomem *gtt_base = |
2463 | (gen8_pte_t __iomem *) dev_priv->gtt.gsm + first_entry; |
||
4560 | Serge | 2464 | const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry; |
2465 | int i; |
||
6937 | serge | 2466 | int rpm_atomic_seq; |
4560 | Serge | 2467 | |
6937 | serge | 2468 | rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); |
2469 | |||
4560 | Serge | 2470 | if (WARN(num_entries > max_entries, |
2471 | "First entry = %d; Num entries = %d (max=%d)\n", |
||
2472 | first_entry, num_entries, max_entries)) |
||
2473 | num_entries = max_entries; |
||
2474 | |||
6084 | serge | 2475 | scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page), |
4560 | Serge | 2476 | I915_CACHE_LLC, |
2477 | use_scratch); |
||
2478 | for (i = 0; i < num_entries; i++) |
||
2479 | gen8_set_pte(>t_base[i], scratch_pte); |
||
2480 | readl(gtt_base); |
||
6937 | serge | 2481 | |
2482 | assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); |
||
4560 | Serge | 2483 | } |
2484 | |||
4104 | Serge | 2485 | static void gen6_ggtt_clear_range(struct i915_address_space *vm, |
5060 | serge | 2486 | uint64_t start, |
2487 | uint64_t length, |
||
4280 | Serge | 2488 | bool use_scratch) |
3480 | Serge | 2489 | { |
4104 | Serge | 2490 | struct drm_i915_private *dev_priv = vm->dev->dev_private; |
5060 | serge | 2491 | unsigned first_entry = start >> PAGE_SHIFT; |
2492 | unsigned num_entries = length >> PAGE_SHIFT; |
||
6084 | serge | 2493 | gen6_pte_t scratch_pte, __iomem *gtt_base = |
2494 | (gen6_pte_t __iomem *) dev_priv->gtt.gsm + first_entry; |
||
3480 | Serge | 2495 | const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry; |
2496 | int i; |
||
6937 | serge | 2497 | int rpm_atomic_seq; |
3480 | Serge | 2498 | |
6937 | serge | 2499 | rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); |
2500 | |||
4126 | Serge | 2501 | if (WARN(num_entries > max_entries, |
2502 | "First entry = %d; Num entries = %d (max=%d)\n", |
||
2503 | first_entry, num_entries, max_entries)) |
||
6084 | serge | 2504 | num_entries = max_entries; |
3480 | Serge | 2505 | |
6084 | serge | 2506 | scratch_pte = vm->pte_encode(px_dma(vm->scratch_page), |
2507 | I915_CACHE_LLC, use_scratch, 0); |
||
4280 | Serge | 2508 | |
3480 | Serge | 2509 | for (i = 0; i < num_entries; i++) |
2510 | iowrite32(scratch_pte, >t_base[i]); |
||
2511 | readl(gtt_base); |
||
6937 | serge | 2512 | |
2513 | assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); |
||
3480 | Serge | 2514 | } |
2515 | |||
6084 | serge | 2516 | static void i915_ggtt_insert_entries(struct i915_address_space *vm, |
2517 | struct sg_table *pages, |
||
2518 | uint64_t start, |
||
2519 | enum i915_cache_level cache_level, u32 unused) |
||
3480 | Serge | 2520 | { |
6937 | serge | 2521 | struct drm_i915_private *dev_priv = vm->dev->dev_private; |
3480 | Serge | 2522 | unsigned int flags = (cache_level == I915_CACHE_NONE) ? |
2523 | AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; |
||
6937 | serge | 2524 | int rpm_atomic_seq; |
3480 | Serge | 2525 | |
6937 | serge | 2526 | rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); |
2527 | |||
6084 | serge | 2528 | intel_gtt_insert_sg_entries(pages, start >> PAGE_SHIFT, flags); |
2529 | |||
6937 | serge | 2530 | assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); |
2531 | |||
3480 | Serge | 2532 | } |
2533 | |||
4104 | Serge | 2534 | static void i915_ggtt_clear_range(struct i915_address_space *vm, |
5060 | serge | 2535 | uint64_t start, |
2536 | uint64_t length, |
||
4280 | Serge | 2537 | bool unused) |
3480 | Serge | 2538 | { |
6937 | serge | 2539 | struct drm_i915_private *dev_priv = vm->dev->dev_private; |
5060 | serge | 2540 | unsigned first_entry = start >> PAGE_SHIFT; |
2541 | unsigned num_entries = length >> PAGE_SHIFT; |
||
6937 | serge | 2542 | int rpm_atomic_seq; |
2543 | |||
2544 | rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); |
||
2545 | |||
3480 | Serge | 2546 | intel_gtt_clear_range(first_entry, num_entries); |
6937 | serge | 2547 | |
2548 | assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); |
||
3480 | Serge | 2549 | } |
2550 | |||
6084 | serge | 2551 | static int ggtt_bind_vma(struct i915_vma *vma, |
2552 | enum i915_cache_level cache_level, |
||
2553 | u32 flags) |
||
5060 | serge | 2554 | { |
6084 | serge | 2555 | struct drm_i915_gem_object *obj = vma->obj; |
2556 | u32 pte_flags = 0; |
||
2557 | int ret; |
||
3480 | Serge | 2558 | |
6084 | serge | 2559 | ret = i915_get_ggtt_vma_pages(vma); |
2560 | if (ret) |
||
2561 | return ret; |
||
2562 | |||
2563 | /* Currently applicable only to VLV */ |
||
2564 | if (obj->gt_ro) |
||
2565 | pte_flags |= PTE_READ_ONLY; |
||
2566 | |||
2567 | vma->vm->insert_entries(vma->vm, vma->ggtt_view.pages, |
||
2568 | vma->node.start, |
||
2569 | cache_level, pte_flags); |
||
2570 | |||
2571 | /* |
||
2572 | * Without aliasing PPGTT there's no difference between |
||
2573 | * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally |
||
2574 | * upgrade to both bound if we bind either to avoid double-binding. |
||
2575 | */ |
||
2576 | vma->bound |= GLOBAL_BIND | LOCAL_BIND; |
||
2577 | |||
2578 | return 0; |
||
5060 | serge | 2579 | } |
2580 | |||
6084 | serge | 2581 | static int aliasing_gtt_bind_vma(struct i915_vma *vma, |
2582 | enum i915_cache_level cache_level, |
||
2583 | u32 flags) |
||
2332 | Serge | 2584 | { |
5060 | serge | 2585 | struct drm_device *dev = vma->vm->dev; |
3480 | Serge | 2586 | struct drm_i915_private *dev_priv = dev->dev_private; |
5060 | serge | 2587 | struct drm_i915_gem_object *obj = vma->obj; |
6084 | serge | 2588 | struct sg_table *pages = obj->pages; |
2589 | u32 pte_flags = 0; |
||
2590 | int ret; |
||
3480 | Serge | 2591 | |
6084 | serge | 2592 | ret = i915_get_ggtt_vma_pages(vma); |
2593 | if (ret) |
||
2594 | return ret; |
||
2595 | pages = vma->ggtt_view.pages; |
||
2596 | |||
5060 | serge | 2597 | /* Currently applicable only to VLV */ |
2598 | if (obj->gt_ro) |
||
6084 | serge | 2599 | pte_flags |= PTE_READ_ONLY; |
2332 | Serge | 2600 | |
6084 | serge | 2601 | |
2602 | if (flags & GLOBAL_BIND) { |
||
2603 | vma->vm->insert_entries(vma->vm, pages, |
||
2604 | vma->node.start, |
||
2605 | cache_level, pte_flags); |
||
5060 | serge | 2606 | } |
2607 | |||
6084 | serge | 2608 | if (flags & LOCAL_BIND) { |
5060 | serge | 2609 | struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt; |
6084 | serge | 2610 | appgtt->base.insert_entries(&appgtt->base, pages, |
5060 | serge | 2611 | vma->node.start, |
6084 | serge | 2612 | cache_level, pte_flags); |
5060 | serge | 2613 | } |
6084 | serge | 2614 | |
2615 | return 0; |
||
2332 | Serge | 2616 | } |
2617 | |||
5060 | serge | 2618 | static void ggtt_unbind_vma(struct i915_vma *vma) |
2332 | Serge | 2619 | { |
5060 | serge | 2620 | struct drm_device *dev = vma->vm->dev; |
3480 | Serge | 2621 | struct drm_i915_private *dev_priv = dev->dev_private; |
5060 | serge | 2622 | struct drm_i915_gem_object *obj = vma->obj; |
6084 | serge | 2623 | const uint64_t size = min_t(uint64_t, |
2624 | obj->base.size, |
||
2625 | vma->node.size); |
||
3480 | Serge | 2626 | |
5354 | serge | 2627 | if (vma->bound & GLOBAL_BIND) { |
5060 | serge | 2628 | vma->vm->clear_range(vma->vm, |
2629 | vma->node.start, |
||
6084 | serge | 2630 | size, |
2631 | true); |
||
5060 | serge | 2632 | } |
3031 | serge | 2633 | |
6084 | serge | 2634 | if (dev_priv->mm.aliasing_ppgtt && vma->bound & LOCAL_BIND) { |
5060 | serge | 2635 | struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt; |
6084 | serge | 2636 | |
5060 | serge | 2637 | appgtt->base.clear_range(&appgtt->base, |
2638 | vma->node.start, |
||
6084 | serge | 2639 | size, |
5060 | serge | 2640 | true); |
2641 | } |
||
3031 | serge | 2642 | } |
2643 | |||
2644 | void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj) |
||
2645 | { |
||
2344 | Serge | 2646 | struct drm_device *dev = obj->base.dev; |
2647 | struct drm_i915_private *dev_priv = dev->dev_private; |
||
2648 | bool interruptible; |
||
2649 | |||
2650 | interruptible = do_idling(dev_priv); |
||
2651 | |||
6084 | serge | 2652 | dma_unmap_sg(&dev->pdev->dev, obj->pages->sgl, obj->pages->nents, |
2653 | PCI_DMA_BIDIRECTIONAL); |
||
2332 | Serge | 2654 | |
3031 | serge | 2655 | undo_idling(dev_priv, interruptible); |
2656 | } |
||
2657 | |||
2658 | static void i915_gtt_color_adjust(struct drm_mm_node *node, |
||
2659 | unsigned long color, |
||
6084 | serge | 2660 | u64 *start, |
2661 | u64 *end) |
||
3031 | serge | 2662 | { |
2663 | if (node->color != color) |
||
2664 | *start += 4096; |
||
2665 | |||
2666 | if (!list_empty(&node->node_list)) { |
||
2667 | node = list_entry(node->node_list.next, |
||
2668 | struct drm_mm_node, |
||
2669 | node_list); |
||
2670 | if (node->allocated && node->color != color) |
||
2671 | *end -= 4096; |
||
2332 | Serge | 2672 | } |
3031 | serge | 2673 | } |
4560 | Serge | 2674 | |
5354 | serge | 2675 | static int i915_gem_setup_global_gtt(struct drm_device *dev, |
6084 | serge | 2676 | u64 start, |
2677 | u64 mappable_end, |
||
2678 | u64 end) |
||
3031 | serge | 2679 | { |
3480 | Serge | 2680 | /* Let GEM Manage all of the aperture. |
2681 | * |
||
2682 | * However, leave one page at the end still bound to the scratch page. |
||
2683 | * There are a number of places where the hardware apparently prefetches |
||
2684 | * past the end of the object, and we've seen multiple hangs with the |
||
2685 | * GPU head pointer stuck in a batchbuffer bound at the last page of the |
||
2686 | * aperture. One page should be enough to keep any prefetching inside |
||
2687 | * of the aperture. |
||
2688 | */ |
||
4104 | Serge | 2689 | struct drm_i915_private *dev_priv = dev->dev_private; |
2690 | struct i915_address_space *ggtt_vm = &dev_priv->gtt.base; |
||
3480 | Serge | 2691 | struct drm_mm_node *entry; |
2692 | struct drm_i915_gem_object *obj; |
||
2693 | unsigned long hole_start, hole_end; |
||
5354 | serge | 2694 | int ret; |
3031 | serge | 2695 | |
3480 | Serge | 2696 | BUG_ON(mappable_end > end); |
2697 | |||
6084 | serge | 2698 | ggtt_vm->start = start; |
2699 | |||
2700 | /* Subtract the guard page before address space initialization to |
||
2701 | * shrink the range used by drm_mm */ |
||
2702 | ggtt_vm->total = end - start - PAGE_SIZE; |
||
2703 | i915_address_space_init(ggtt_vm, dev_priv); |
||
2704 | ggtt_vm->total += PAGE_SIZE; |
||
2705 | |||
2706 | if (intel_vgpu_active(dev)) { |
||
2707 | ret = intel_vgt_balloon(dev); |
||
2708 | if (ret) |
||
2709 | return ret; |
||
2710 | } |
||
2711 | |||
3031 | serge | 2712 | if (!HAS_LLC(dev)) |
6084 | serge | 2713 | ggtt_vm->mm.color_adjust = i915_gtt_color_adjust; |
3031 | serge | 2714 | |
3480 | Serge | 2715 | /* Mark any preallocated objects as occupied */ |
4104 | Serge | 2716 | list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { |
2717 | struct i915_vma *vma = i915_gem_obj_to_vma(obj, ggtt_vm); |
||
5354 | serge | 2718 | |
6084 | serge | 2719 | DRM_DEBUG_KMS("reserving preallocated space: %llx + %zx\n", |
4104 | Serge | 2720 | i915_gem_obj_ggtt_offset(obj), obj->base.size); |
3031 | serge | 2721 | |
4104 | Serge | 2722 | WARN_ON(i915_gem_obj_ggtt_bound(obj)); |
2723 | ret = drm_mm_reserve_node(&ggtt_vm->mm, &vma->node); |
||
5354 | serge | 2724 | if (ret) { |
2725 | DRM_DEBUG_KMS("Reservation failed: %i\n", ret); |
||
2726 | return ret; |
||
2727 | } |
||
2728 | vma->bound |= GLOBAL_BIND; |
||
6084 | serge | 2729 | __i915_vma_set_map_and_fenceable(vma); |
2730 | list_add_tail(&vma->mm_list, &ggtt_vm->inactive_list); |
||
3480 | Serge | 2731 | } |
2732 | |||
2733 | /* Clear any non-preallocated blocks */ |
||
4104 | Serge | 2734 | drm_mm_for_each_hole(entry, &ggtt_vm->mm, hole_start, hole_end) { |
3480 | Serge | 2735 | DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n", |
2736 | hole_start, hole_end); |
||
5060 | serge | 2737 | ggtt_vm->clear_range(ggtt_vm, hole_start, |
2738 | hole_end - hole_start, true); |
||
3480 | Serge | 2739 | } |
2740 | |||
2741 | /* And finally clear the reserved guard page */ |
||
5060 | serge | 2742 | ggtt_vm->clear_range(ggtt_vm, end - PAGE_SIZE, PAGE_SIZE, true); |
5354 | serge | 2743 | |
2744 | if (USES_PPGTT(dev) && !USES_FULL_PPGTT(dev)) { |
||
2745 | struct i915_hw_ppgtt *ppgtt; |
||
2746 | |||
2747 | ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL); |
||
2748 | if (!ppgtt) |
||
2749 | return -ENOMEM; |
||
2750 | |||
2751 | ret = __hw_ppgtt_init(dev, ppgtt); |
||
6084 | serge | 2752 | if (ret) { |
2753 | ppgtt->base.cleanup(&ppgtt->base); |
||
2754 | kfree(ppgtt); |
||
5354 | serge | 2755 | return ret; |
6084 | serge | 2756 | } |
5354 | serge | 2757 | |
6084 | serge | 2758 | if (ppgtt->base.allocate_va_range) |
2759 | ret = ppgtt->base.allocate_va_range(&ppgtt->base, 0, |
||
2760 | ppgtt->base.total); |
||
2761 | if (ret) { |
||
2762 | ppgtt->base.cleanup(&ppgtt->base); |
||
2763 | kfree(ppgtt); |
||
2764 | return ret; |
||
2765 | } |
||
2766 | |||
2767 | ppgtt->base.clear_range(&ppgtt->base, |
||
2768 | ppgtt->base.start, |
||
2769 | ppgtt->base.total, |
||
2770 | true); |
||
2771 | |||
5354 | serge | 2772 | dev_priv->mm.aliasing_ppgtt = ppgtt; |
6084 | serge | 2773 | WARN_ON(dev_priv->gtt.base.bind_vma != ggtt_bind_vma); |
2774 | dev_priv->gtt.base.bind_vma = aliasing_gtt_bind_vma; |
||
5354 | serge | 2775 | } |
2776 | |||
2777 | return 0; |
||
2332 | Serge | 2778 | } |
3243 | Serge | 2779 | |
3480 | Serge | 2780 | void i915_gem_init_global_gtt(struct drm_device *dev) |
2781 | { |
||
2782 | struct drm_i915_private *dev_priv = dev->dev_private; |
||
6084 | serge | 2783 | u64 gtt_size, mappable_size; |
3480 | Serge | 2784 | |
4104 | Serge | 2785 | gtt_size = dev_priv->gtt.base.total; |
3480 | Serge | 2786 | mappable_size = dev_priv->gtt.mappable_end; |
2787 | |||
4280 | Serge | 2788 | i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size); |
3480 | Serge | 2789 | } |
2790 | |||
5354 | serge | 2791 | void i915_global_gtt_cleanup(struct drm_device *dev) |
2792 | { |
||
2793 | struct drm_i915_private *dev_priv = dev->dev_private; |
||
2794 | struct i915_address_space *vm = &dev_priv->gtt.base; |
||
2795 | |||
2796 | if (dev_priv->mm.aliasing_ppgtt) { |
||
2797 | struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; |
||
2798 | |||
2799 | ppgtt->base.cleanup(&ppgtt->base); |
||
2800 | } |
||
2801 | |||
2802 | if (drm_mm_initialized(&vm->mm)) { |
||
6084 | serge | 2803 | if (intel_vgpu_active(dev)) |
2804 | intel_vgt_deballoon(); |
||
2805 | |||
5354 | serge | 2806 | drm_mm_takedown(&vm->mm); |
2807 | list_del(&vm->global_link); |
||
2808 | } |
||
2809 | |||
2810 | vm->cleanup(vm); |
||
2811 | } |
||
2812 | |||
6084 | serge | 2813 | static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) |
3243 | Serge | 2814 | { |
2815 | snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT; |
||
2816 | snb_gmch_ctl &= SNB_GMCH_GGMS_MASK; |
||
2817 | return snb_gmch_ctl << 20; |
||
2818 | } |
||
2819 | |||
6084 | serge | 2820 | static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl) |
4560 | Serge | 2821 | { |
2822 | bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT; |
||
2823 | bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK; |
||
2824 | if (bdw_gmch_ctl) |
||
2825 | bdw_gmch_ctl = 1 << bdw_gmch_ctl; |
||
2826 | |||
5060 | serge | 2827 | #ifdef CONFIG_X86_32 |
2828 | /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * PAGE_SIZE */ |
||
2829 | if (bdw_gmch_ctl > 4) |
||
2830 | bdw_gmch_ctl = 4; |
||
2831 | #endif |
||
2832 | |||
4560 | Serge | 2833 | return bdw_gmch_ctl << 20; |
2834 | } |
||
2835 | |||
6084 | serge | 2836 | static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl) |
5060 | serge | 2837 | { |
2838 | gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT; |
||
2839 | gmch_ctrl &= SNB_GMCH_GGMS_MASK; |
||
2840 | |||
2841 | if (gmch_ctrl) |
||
2842 | return 1 << (20 + gmch_ctrl); |
||
2843 | |||
2844 | return 0; |
||
2845 | } |
||
2846 | |||
6084 | serge | 2847 | static size_t gen6_get_stolen_size(u16 snb_gmch_ctl) |
3243 | Serge | 2848 | { |
2849 | snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT; |
||
2850 | snb_gmch_ctl &= SNB_GMCH_GMS_MASK; |
||
2851 | return snb_gmch_ctl << 25; /* 32 MB units */ |
||
2852 | } |
||
2853 | |||
6084 | serge | 2854 | static size_t gen8_get_stolen_size(u16 bdw_gmch_ctl) |
4560 | Serge | 2855 | { |
2856 | bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT; |
||
2857 | bdw_gmch_ctl &= BDW_GMCH_GMS_MASK; |
||
2858 | return bdw_gmch_ctl << 25; /* 32 MB units */ |
||
2859 | } |
||
2860 | |||
5060 | serge | 2861 | static size_t chv_get_stolen_size(u16 gmch_ctrl) |
2862 | { |
||
2863 | gmch_ctrl >>= SNB_GMCH_GMS_SHIFT; |
||
2864 | gmch_ctrl &= SNB_GMCH_GMS_MASK; |
||
2865 | |||
2866 | /* |
||
2867 | * 0x0 to 0x10: 32MB increments starting at 0MB |
||
2868 | * 0x11 to 0x16: 4MB increments starting at 8MB |
||
2869 | * 0x17 to 0x1d: 4MB increments start at 36MB |
||
2870 | */ |
||
2871 | if (gmch_ctrl < 0x11) |
||
2872 | return gmch_ctrl << 25; |
||
2873 | else if (gmch_ctrl < 0x17) |
||
2874 | return (gmch_ctrl - 0x11 + 2) << 22; |
||
2875 | else |
||
2876 | return (gmch_ctrl - 0x17 + 9) << 22; |
||
2877 | } |
||
2878 | |||
5354 | serge | 2879 | static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl) |
2880 | { |
||
2881 | gen9_gmch_ctl >>= BDW_GMCH_GMS_SHIFT; |
||
2882 | gen9_gmch_ctl &= BDW_GMCH_GMS_MASK; |
||
2883 | |||
2884 | if (gen9_gmch_ctl < 0xf0) |
||
2885 | return gen9_gmch_ctl << 25; /* 32 MB units */ |
||
2886 | else |
||
2887 | /* 4MB increments starting at 0xf0 for 4MB */ |
||
2888 | return (gen9_gmch_ctl - 0xf0 + 1) << 22; |
||
2889 | } |
||
2890 | |||
4560 | Serge | 2891 | static int ggtt_probe_common(struct drm_device *dev, |
2892 | size_t gtt_size) |
||
2893 | { |
||
2894 | struct drm_i915_private *dev_priv = dev->dev_private; |
||
6084 | serge | 2895 | struct i915_page_scratch *scratch_page; |
4560 | Serge | 2896 | phys_addr_t gtt_phys_addr; |
2897 | |||
2898 | /* For Modern GENs the PTEs and register space are split in the BAR */ |
||
2899 | gtt_phys_addr = pci_resource_start(dev->pdev, 0) + |
||
2900 | (pci_resource_len(dev->pdev, 0) / 2); |
||
2901 | |||
6084 | serge | 2902 | /* |
2903 | * On BXT writes larger than 64 bit to the GTT pagetable range will be |
||
2904 | * dropped. For WC mappings in general we have 64 byte burst writes |
||
2905 | * when the WC buffer is flushed, so we can't use it, but have to |
||
2906 | * resort to an uncached mapping. The WC issue is easily caught by the |
||
2907 | * readback check when writing GTT PTE entries. |
||
2908 | */ |
||
2909 | if (IS_BROXTON(dev)) |
||
2910 | dev_priv->gtt.gsm = ioremap_nocache(gtt_phys_addr, gtt_size); |
||
2911 | else |
||
2912 | dev_priv->gtt.gsm = ioremap_wc(gtt_phys_addr, gtt_size); |
||
4560 | Serge | 2913 | if (!dev_priv->gtt.gsm) { |
2914 | DRM_ERROR("Failed to map the gtt page table\n"); |
||
2915 | return -ENOMEM; |
||
2916 | } |
||
2917 | |||
6084 | serge | 2918 | scratch_page = alloc_scratch_page(dev); |
2919 | if (IS_ERR(scratch_page)) { |
||
4560 | Serge | 2920 | DRM_ERROR("Scratch setup failed\n"); |
2921 | /* iounmap will also get called at remove, but meh */ |
||
2922 | iounmap(dev_priv->gtt.gsm); |
||
6084 | serge | 2923 | return PTR_ERR(scratch_page); |
4560 | Serge | 2924 | } |
2925 | |||
6084 | serge | 2926 | dev_priv->gtt.base.scratch_page = scratch_page; |
2927 | |||
2928 | return 0; |
||
4560 | Serge | 2929 | } |
2930 | |||
2931 | /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability |
||
2932 | * bits. When using advanced contexts each context stores its own PAT, but |
||
2933 | * writing this data shouldn't be harmful even in those cases. */ |
||
5060 | serge | 2934 | static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv) |
4560 | Serge | 2935 | { |
2936 | uint64_t pat; |
||
2937 | |||
2938 | pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */ |
||
2939 | GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */ |
||
2940 | GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */ |
||
2941 | GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */ |
||
2942 | GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) | |
||
2943 | GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) | |
||
2944 | GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) | |
||
2945 | GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); |
||
2946 | |||
5354 | serge | 2947 | if (!USES_PPGTT(dev_priv->dev)) |
2948 | /* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry, |
||
2949 | * so RTL will always use the value corresponding to |
||
2950 | * pat_sel = 000". |
||
2951 | * So let's disable cache for GGTT to avoid screen corruptions. |
||
2952 | * MOCS still can be used though. |
||
2953 | * - System agent ggtt writes (i.e. cpu gtt mmaps) already work |
||
2954 | * before this patch, i.e. the same uncached + snooping access |
||
2955 | * like on gen6/7 seems to be in effect. |
||
2956 | * - So this just fixes blitter/render access. Again it looks |
||
2957 | * like it's not just uncached access, but uncached + snooping. |
||
2958 | * So we can still hold onto all our assumptions wrt cpu |
||
2959 | * clflushing on LLC machines. |
||
2960 | */ |
||
2961 | pat = GEN8_PPAT(0, GEN8_PPAT_UC); |
||
2962 | |||
4560 | Serge | 2963 | /* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b |
2964 | * write would work. */ |
||
6084 | serge | 2965 | I915_WRITE(GEN8_PRIVATE_PAT_LO, pat); |
2966 | I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32); |
||
4560 | Serge | 2967 | } |
2968 | |||
5060 | serge | 2969 | static void chv_setup_private_ppat(struct drm_i915_private *dev_priv) |
2970 | { |
||
2971 | uint64_t pat; |
||
2972 | |||
2973 | /* |
||
2974 | * Map WB on BDW to snooped on CHV. |
||
2975 | * |
||
2976 | * Only the snoop bit has meaning for CHV, the rest is |
||
2977 | * ignored. |
||
2978 | * |
||
5354 | serge | 2979 | * The hardware will never snoop for certain types of accesses: |
2980 | * - CPU GTT (GMADR->GGTT->no snoop->memory) |
||
2981 | * - PPGTT page tables |
||
2982 | * - some other special cycles |
||
2983 | * |
||
2984 | * As with BDW, we also need to consider the following for GT accesses: |
||
2985 | * "For GGTT, there is NO pat_sel[2:0] from the entry, |
||
2986 | * so RTL will always use the value corresponding to |
||
2987 | * pat_sel = 000". |
||
2988 | * Which means we must set the snoop bit in PAT entry 0 |
||
2989 | * in order to keep the global status page working. |
||
5060 | serge | 2990 | */ |
2991 | pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) | |
||
2992 | GEN8_PPAT(1, 0) | |
||
2993 | GEN8_PPAT(2, 0) | |
||
2994 | GEN8_PPAT(3, 0) | |
||
2995 | GEN8_PPAT(4, CHV_PPAT_SNOOP) | |
||
2996 | GEN8_PPAT(5, CHV_PPAT_SNOOP) | |
||
2997 | GEN8_PPAT(6, CHV_PPAT_SNOOP) | |
||
2998 | GEN8_PPAT(7, CHV_PPAT_SNOOP); |
||
2999 | |||
6084 | serge | 3000 | I915_WRITE(GEN8_PRIVATE_PAT_LO, pat); |
3001 | I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32); |
||
5060 | serge | 3002 | } |
3003 | |||
4560 | Serge | 3004 | static int gen8_gmch_probe(struct drm_device *dev, |
6084 | serge | 3005 | u64 *gtt_total, |
4560 | Serge | 3006 | size_t *stolen, |
3007 | phys_addr_t *mappable_base, |
||
6084 | serge | 3008 | u64 *mappable_end) |
4560 | Serge | 3009 | { |
3010 | struct drm_i915_private *dev_priv = dev->dev_private; |
||
6084 | serge | 3011 | u64 gtt_size; |
4560 | Serge | 3012 | u16 snb_gmch_ctl; |
3013 | int ret; |
||
3014 | |||
3015 | /* TODO: We're not aware of mappable constraints on gen8 yet */ |
||
3016 | *mappable_base = pci_resource_start(dev->pdev, 2); |
||
3017 | *mappable_end = pci_resource_len(dev->pdev, 2); |
||
3018 | |||
3019 | if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(39))) |
||
3020 | pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(39)); |
||
3021 | |||
3022 | pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); |
||
3023 | |||
5354 | serge | 3024 | if (INTEL_INFO(dev)->gen >= 9) { |
3025 | *stolen = gen9_get_stolen_size(snb_gmch_ctl); |
||
3026 | gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl); |
||
3027 | } else if (IS_CHERRYVIEW(dev)) { |
||
5060 | serge | 3028 | *stolen = chv_get_stolen_size(snb_gmch_ctl); |
3029 | gtt_size = chv_get_total_gtt_size(snb_gmch_ctl); |
||
3030 | } else { |
||
6084 | serge | 3031 | *stolen = gen8_get_stolen_size(snb_gmch_ctl); |
5060 | serge | 3032 | gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl); |
3033 | } |
||
4560 | Serge | 3034 | |
6084 | serge | 3035 | *gtt_total = (gtt_size / sizeof(gen8_pte_t)) << PAGE_SHIFT; |
4560 | Serge | 3036 | |
6084 | serge | 3037 | if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev)) |
5060 | serge | 3038 | chv_setup_private_ppat(dev_priv); |
3039 | else |
||
3040 | bdw_setup_private_ppat(dev_priv); |
||
4560 | Serge | 3041 | |
3042 | ret = ggtt_probe_common(dev, gtt_size); |
||
3043 | |||
3044 | dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range; |
||
3045 | dev_priv->gtt.base.insert_entries = gen8_ggtt_insert_entries; |
||
6084 | serge | 3046 | dev_priv->gtt.base.bind_vma = ggtt_bind_vma; |
3047 | dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma; |
||
4560 | Serge | 3048 | |
6937 | serge | 3049 | if (IS_CHERRYVIEW(dev_priv)) |
3050 | dev_priv->gtt.base.insert_entries = gen8_ggtt_insert_entries__BKL; |
||
3051 | |||
4560 | Serge | 3052 | return ret; |
3053 | } |
||
3054 | |||
3480 | Serge | 3055 | static int gen6_gmch_probe(struct drm_device *dev, |
6084 | serge | 3056 | u64 *gtt_total, |
3480 | Serge | 3057 | size_t *stolen, |
3058 | phys_addr_t *mappable_base, |
||
6084 | serge | 3059 | u64 *mappable_end) |
3243 | Serge | 3060 | { |
3061 | struct drm_i915_private *dev_priv = dev->dev_private; |
||
3480 | Serge | 3062 | unsigned int gtt_size; |
3243 | Serge | 3063 | u16 snb_gmch_ctl; |
3064 | int ret; |
||
3065 | |||
3480 | Serge | 3066 | *mappable_base = pci_resource_start(dev->pdev, 2); |
3067 | *mappable_end = pci_resource_len(dev->pdev, 2); |
||
3068 | |||
3069 | /* 64/512MB is the current min/max we actually know of, but this is just |
||
3070 | * a coarse sanity check. |
||
3243 | Serge | 3071 | */ |
3480 | Serge | 3072 | if ((*mappable_end < (64<<20) || (*mappable_end > (512<<20)))) { |
6084 | serge | 3073 | DRM_ERROR("Unknown GMADR size (%llx)\n", |
3480 | Serge | 3074 | dev_priv->gtt.mappable_end); |
3075 | return -ENXIO; |
||
6084 | serge | 3076 | } |
3243 | Serge | 3077 | |
3480 | Serge | 3078 | if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40))) |
3079 | pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40)); |
||
3080 | pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); |
||
3243 | Serge | 3081 | |
4104 | Serge | 3082 | *stolen = gen6_get_stolen_size(snb_gmch_ctl); |
4560 | Serge | 3083 | |
3084 | gtt_size = gen6_get_total_gtt_size(snb_gmch_ctl); |
||
6084 | serge | 3085 | *gtt_total = (gtt_size / sizeof(gen6_pte_t)) << PAGE_SHIFT; |
3243 | Serge | 3086 | |
4560 | Serge | 3087 | ret = ggtt_probe_common(dev, gtt_size); |
3243 | Serge | 3088 | |
4104 | Serge | 3089 | dev_priv->gtt.base.clear_range = gen6_ggtt_clear_range; |
3090 | dev_priv->gtt.base.insert_entries = gen6_ggtt_insert_entries; |
||
6084 | serge | 3091 | dev_priv->gtt.base.bind_vma = ggtt_bind_vma; |
3092 | dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma; |
||
3480 | Serge | 3093 | |
3094 | return ret; |
||
3095 | } |
||
3096 | |||
4104 | Serge | 3097 | static void gen6_gmch_remove(struct i915_address_space *vm) |
3480 | Serge | 3098 | { |
4104 | Serge | 3099 | |
3100 | struct i915_gtt *gtt = container_of(vm, struct i915_gtt, base); |
||
5060 | serge | 3101 | |
4104 | Serge | 3102 | iounmap(gtt->gsm); |
6084 | serge | 3103 | free_scratch_page(vm->dev, vm->scratch_page); |
3480 | Serge | 3104 | } |
3105 | |||
3106 | static int i915_gmch_probe(struct drm_device *dev, |
||
6084 | serge | 3107 | u64 *gtt_total, |
3480 | Serge | 3108 | size_t *stolen, |
3109 | phys_addr_t *mappable_base, |
||
6084 | serge | 3110 | u64 *mappable_end) |
3480 | Serge | 3111 | { |
3112 | struct drm_i915_private *dev_priv = dev->dev_private; |
||
3113 | int ret; |
||
3114 | |||
3115 | ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->dev->pdev, NULL); |
||
3116 | if (!ret) { |
||
3117 | DRM_ERROR("failed to set up gmch\n"); |
||
3118 | return -EIO; |
||
3243 | Serge | 3119 | } |
3120 | |||
3480 | Serge | 3121 | intel_gtt_get(gtt_total, stolen, mappable_base, mappable_end); |
3243 | Serge | 3122 | |
3480 | Serge | 3123 | dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev); |
6084 | serge | 3124 | dev_priv->gtt.base.insert_entries = i915_ggtt_insert_entries; |
4104 | Serge | 3125 | dev_priv->gtt.base.clear_range = i915_ggtt_clear_range; |
6084 | serge | 3126 | dev_priv->gtt.base.bind_vma = ggtt_bind_vma; |
3127 | dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma; |
||
3480 | Serge | 3128 | |
4560 | Serge | 3129 | if (unlikely(dev_priv->gtt.do_idle_maps)) |
3130 | DRM_INFO("applying Ironlake quirks for intel_iommu\n"); |
||
3131 | |||
3243 | Serge | 3132 | return 0; |
3480 | Serge | 3133 | } |
3243 | Serge | 3134 | |
4104 | Serge | 3135 | static void i915_gmch_remove(struct i915_address_space *vm) |
3480 | Serge | 3136 | { |
4560 | Serge | 3137 | // intel_gmch_remove(); |
3480 | Serge | 3138 | } |
3139 | |||
3140 | int i915_gem_gtt_init(struct drm_device *dev) |
||
3141 | { |
||
3142 | struct drm_i915_private *dev_priv = dev->dev_private; |
||
3143 | struct i915_gtt *gtt = &dev_priv->gtt; |
||
3144 | int ret; |
||
3145 | |||
3146 | if (INTEL_INFO(dev)->gen <= 5) { |
||
4104 | Serge | 3147 | gtt->gtt_probe = i915_gmch_probe; |
3148 | gtt->base.cleanup = i915_gmch_remove; |
||
4560 | Serge | 3149 | } else if (INTEL_INFO(dev)->gen < 8) { |
4104 | Serge | 3150 | gtt->gtt_probe = gen6_gmch_probe; |
3151 | gtt->base.cleanup = gen6_gmch_remove; |
||
3152 | if (IS_HASWELL(dev) && dev_priv->ellc_size) |
||
3153 | gtt->base.pte_encode = iris_pte_encode; |
||
3154 | else if (IS_HASWELL(dev)) |
||
3155 | gtt->base.pte_encode = hsw_pte_encode; |
||
3156 | else if (IS_VALLEYVIEW(dev)) |
||
3157 | gtt->base.pte_encode = byt_pte_encode; |
||
3158 | else if (INTEL_INFO(dev)->gen >= 7) |
||
3159 | gtt->base.pte_encode = ivb_pte_encode; |
||
3160 | else |
||
3161 | gtt->base.pte_encode = snb_pte_encode; |
||
4560 | Serge | 3162 | } else { |
3163 | dev_priv->gtt.gtt_probe = gen8_gmch_probe; |
||
3164 | dev_priv->gtt.base.cleanup = gen6_gmch_remove; |
||
3480 | Serge | 3165 | } |
3166 | |||
6084 | serge | 3167 | gtt->base.dev = dev; |
3168 | |||
4104 | Serge | 3169 | ret = gtt->gtt_probe(dev, >t->base.total, >t->stolen_size, |
3170 | >t->mappable_base, >t->mappable_end); |
||
3480 | Serge | 3171 | if (ret) |
4104 | Serge | 3172 | return ret; |
3480 | Serge | 3173 | |
3174 | /* GMADR is the PCI mmio aperture into the global GTT. */ |
||
6084 | serge | 3175 | DRM_INFO("Memory usable by graphics device = %lluM\n", |
4104 | Serge | 3176 | gtt->base.total >> 20); |
6084 | serge | 3177 | DRM_DEBUG_DRIVER("GMADR size = %lldM\n", gtt->mappable_end >> 20); |
4104 | Serge | 3178 | DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", gtt->stolen_size >> 20); |
5060 | serge | 3179 | #ifdef CONFIG_INTEL_IOMMU |
3180 | if (intel_iommu_gfx_mapped) |
||
3181 | DRM_INFO("VT-d active for gfx access\n"); |
||
3182 | #endif |
||
3183 | /* |
||
3184 | * i915.enable_ppgtt is read-only, so do an early pass to validate the |
||
3185 | * user's requested state against the hardware/driver capabilities. We |
||
3186 | * do this now so that we can print out any log messages once rather |
||
3187 | * than every time we check intel_enable_ppgtt(). |
||
3188 | */ |
||
3189 | i915.enable_ppgtt = sanitize_enable_ppgtt(dev, i915.enable_ppgtt); |
||
3190 | DRM_DEBUG_DRIVER("ppgtt mode: %i\n", i915.enable_ppgtt); |
||
3480 | Serge | 3191 | |
3192 | return 0; |
||
3243 | Serge | 3193 | } |
3194 | |||
6084 | serge | 3195 | void i915_gem_restore_gtt_mappings(struct drm_device *dev) |
5060 | serge | 3196 | { |
6084 | serge | 3197 | struct drm_i915_private *dev_priv = dev->dev_private; |
3198 | struct drm_i915_gem_object *obj; |
||
3199 | struct i915_address_space *vm; |
||
3200 | struct i915_vma *vma; |
||
3201 | bool flush; |
||
3202 | |||
3203 | i915_check_and_clear_faults(dev); |
||
3204 | |||
3205 | /* First fill our portion of the GTT with scratch pages */ |
||
3206 | dev_priv->gtt.base.clear_range(&dev_priv->gtt.base, |
||
3207 | dev_priv->gtt.base.start, |
||
3208 | dev_priv->gtt.base.total, |
||
3209 | true); |
||
3210 | |||
3211 | /* Cache flush objects bound into GGTT and rebind them. */ |
||
3212 | vm = &dev_priv->gtt.base; |
||
3213 | list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { |
||
3214 | flush = false; |
||
3215 | list_for_each_entry(vma, &obj->vma_list, vma_link) { |
||
3216 | if (vma->vm != vm) |
||
3217 | continue; |
||
3218 | |||
3219 | WARN_ON(i915_vma_bind(vma, obj->cache_level, |
||
3220 | PIN_UPDATE)); |
||
3221 | |||
3222 | flush = true; |
||
3223 | } |
||
3224 | |||
3225 | if (flush) |
||
3226 | i915_gem_clflush_object(obj, obj->pin_display); |
||
3227 | } |
||
3228 | |||
3229 | if (INTEL_INFO(dev)->gen >= 8) { |
||
3230 | if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev)) |
||
3231 | chv_setup_private_ppat(dev_priv); |
||
3232 | else |
||
3233 | bdw_setup_private_ppat(dev_priv); |
||
3234 | |||
3235 | return; |
||
3236 | } |
||
3237 | |||
3238 | if (USES_PPGTT(dev)) { |
||
3239 | list_for_each_entry(vm, &dev_priv->vm_list, global_link) { |
||
3240 | /* TODO: Perhaps it shouldn't be gen6 specific */ |
||
3241 | |||
3242 | struct i915_hw_ppgtt *ppgtt = |
||
3243 | container_of(vm, struct i915_hw_ppgtt, |
||
3244 | base); |
||
3245 | |||
3246 | if (i915_is_ggtt(vm)) |
||
3247 | ppgtt = dev_priv->mm.aliasing_ppgtt; |
||
3248 | |||
3249 | gen6_write_page_range(dev_priv, &ppgtt->pd, |
||
3250 | 0, ppgtt->base.total); |
||
3251 | } |
||
3252 | } |
||
3253 | |||
3254 | i915_ggtt_flush(dev_priv); |
||
3255 | } |
||
3256 | |||
3257 | static struct i915_vma * |
||
3258 | __i915_gem_vma_create(struct drm_i915_gem_object *obj, |
||
3259 | struct i915_address_space *vm, |
||
3260 | const struct i915_ggtt_view *ggtt_view) |
||
3261 | { |
||
3262 | struct i915_vma *vma; |
||
3263 | |||
3264 | if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view)) |
||
3265 | return ERR_PTR(-EINVAL); |
||
3266 | |||
3267 | // vma = kmem_cache_zalloc(to_i915(obj->base.dev)->vmas, GFP_KERNEL); |
||
3268 | vma = kzalloc(sizeof(*vma), GFP_KERNEL); |
||
5060 | serge | 3269 | if (vma == NULL) |
3270 | return ERR_PTR(-ENOMEM); |
||
3271 | |||
3272 | INIT_LIST_HEAD(&vma->vma_link); |
||
3273 | INIT_LIST_HEAD(&vma->mm_list); |
||
3274 | INIT_LIST_HEAD(&vma->exec_list); |
||
3275 | vma->vm = vm; |
||
3276 | vma->obj = obj; |
||
3277 | |||
6084 | serge | 3278 | if (i915_is_ggtt(vm)) |
3279 | vma->ggtt_view = *ggtt_view; |
||
5060 | serge | 3280 | |
6084 | serge | 3281 | list_add_tail(&vma->vma_link, &obj->vma_list); |
3282 | if (!i915_is_ggtt(vm)) |
||
5354 | serge | 3283 | i915_ppgtt_get(i915_vm_to_ppgtt(vm)); |
5060 | serge | 3284 | |
3285 | return vma; |
||
3286 | } |
||
3287 | |||
3288 | struct i915_vma * |
||
3289 | i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, |
||
3290 | struct i915_address_space *vm) |
||
3291 | { |
||
3292 | struct i915_vma *vma; |
||
3293 | |||
3294 | vma = i915_gem_obj_to_vma(obj, vm); |
||
3295 | if (!vma) |
||
6084 | serge | 3296 | vma = __i915_gem_vma_create(obj, vm, |
3297 | i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL); |
||
5060 | serge | 3298 | |
3299 | return vma; |
||
3300 | } |
||
3301 | |||
6084 | serge | 3302 | struct i915_vma * |
3303 | i915_gem_obj_lookup_or_create_ggtt_vma(struct drm_i915_gem_object *obj, |
||
3304 | const struct i915_ggtt_view *view) |
||
3243 | Serge | 3305 | { |
6084 | serge | 3306 | struct i915_address_space *ggtt = i915_obj_to_ggtt(obj); |
3307 | struct i915_vma *vma; |
||
3243 | Serge | 3308 | |
6084 | serge | 3309 | if (WARN_ON(!view)) |
3310 | return ERR_PTR(-EINVAL); |
||
3243 | Serge | 3311 | |
6084 | serge | 3312 | vma = i915_gem_obj_to_ggtt_view(obj, view); |
3243 | Serge | 3313 | |
6084 | serge | 3314 | if (IS_ERR(vma)) |
3315 | return vma; |
||
3243 | Serge | 3316 | |
6084 | serge | 3317 | if (!vma) |
3318 | vma = __i915_gem_vma_create(obj, ggtt, view); |
||
3243 | Serge | 3319 | |
6084 | serge | 3320 | return vma; |
3243 | Serge | 3321 | |
6084 | serge | 3322 | } |
3243 | Serge | 3323 | |
6084 | serge | 3324 | static struct scatterlist * |
3325 | rotate_pages(dma_addr_t *in, unsigned int offset, |
||
3326 | unsigned int width, unsigned int height, |
||
3327 | struct sg_table *st, struct scatterlist *sg) |
||
3328 | { |
||
3329 | unsigned int column, row; |
||
3330 | unsigned int src_idx; |
||
3243 | Serge | 3331 | |
6084 | serge | 3332 | if (!sg) { |
3333 | st->nents = 0; |
||
3334 | sg = st->sgl; |
||
3335 | } |
||
3243 | Serge | 3336 | |
6084 | serge | 3337 | for (column = 0; column < width; column++) { |
3338 | src_idx = width * (height - 1) + column; |
||
3339 | for (row = 0; row < height; row++) { |
||
3340 | st->nents++; |
||
3341 | /* We don't need the pages, but need to initialize |
||
3342 | * the entries so the sg list can be happily traversed. |
||
3343 | * The only thing we need are DMA addresses. |
||
3344 | */ |
||
3345 | sg_set_page(sg, NULL, PAGE_SIZE, 0); |
||
3346 | sg_dma_address(sg) = in[offset + src_idx]; |
||
3347 | sg_dma_len(sg) = PAGE_SIZE; |
||
3348 | sg = sg_next(sg); |
||
3349 | src_idx -= width; |
||
3350 | } |
||
3351 | } |
||
3352 | |||
3353 | return sg; |
||
3243 | Serge | 3354 | } |
3355 | |||
6084 | serge | 3356 | static struct sg_table * |
3357 | intel_rotate_fb_obj_pages(struct i915_ggtt_view *ggtt_view, |
||
3358 | struct drm_i915_gem_object *obj) |
||
3243 | Serge | 3359 | { |
6937 | serge | 3360 | struct intel_rotation_info *rot_info = &ggtt_view->params.rotation_info; |
6084 | serge | 3361 | unsigned int size_pages = rot_info->size >> PAGE_SHIFT; |
3362 | unsigned int size_pages_uv; |
||
3363 | struct sg_page_iter sg_iter; |
||
3364 | unsigned long i; |
||
3365 | dma_addr_t *page_addr_list; |
||
3366 | struct sg_table *st; |
||
3367 | unsigned int uv_start_page; |
||
3368 | struct scatterlist *sg; |
||
3369 | int ret = -ENOMEM; |
||
3243 | Serge | 3370 | |
6084 | serge | 3371 | /* Allocate a temporary list of source pages for random access. */ |
3372 | page_addr_list = drm_malloc_ab(obj->base.size / PAGE_SIZE, |
||
3373 | sizeof(dma_addr_t)); |
||
3374 | if (!page_addr_list) |
||
3375 | return ERR_PTR(ret); |
||
3243 | Serge | 3376 | |
6084 | serge | 3377 | /* Account for UV plane with NV12. */ |
3378 | if (rot_info->pixel_format == DRM_FORMAT_NV12) |
||
3379 | size_pages_uv = rot_info->size_uv >> PAGE_SHIFT; |
||
3380 | else |
||
3381 | size_pages_uv = 0; |
||
3243 | Serge | 3382 | |
6084 | serge | 3383 | /* Allocate target SG list. */ |
3384 | st = kmalloc(sizeof(*st), GFP_KERNEL); |
||
3385 | if (!st) |
||
3386 | goto err_st_alloc; |
||
3243 | Serge | 3387 | |
6084 | serge | 3388 | ret = sg_alloc_table(st, size_pages + size_pages_uv, GFP_KERNEL); |
3389 | if (ret) |
||
3390 | goto err_sg_alloc; |
||
3243 | Serge | 3391 | |
6084 | serge | 3392 | /* Populate source page list from the object. */ |
3393 | i = 0; |
||
3394 | for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) { |
||
3395 | page_addr_list[i] = sg_page_iter_dma_address(&sg_iter); |
||
3396 | i++; |
||
3397 | } |
||
3243 | Serge | 3398 | |
6084 | serge | 3399 | /* Rotate the pages. */ |
3400 | sg = rotate_pages(page_addr_list, 0, |
||
3401 | rot_info->width_pages, rot_info->height_pages, |
||
3402 | st, NULL); |
||
3243 | Serge | 3403 | |
6084 | serge | 3404 | /* Append the UV plane if NV12. */ |
3405 | if (rot_info->pixel_format == DRM_FORMAT_NV12) { |
||
3406 | uv_start_page = size_pages; |
||
3243 | Serge | 3407 | |
6084 | serge | 3408 | /* Check for tile-row un-alignment. */ |
3409 | if (offset_in_page(rot_info->uv_offset)) |
||
3410 | uv_start_page--; |
||
3243 | Serge | 3411 | |
6084 | serge | 3412 | rot_info->uv_start_page = uv_start_page; |
3243 | Serge | 3413 | |
6084 | serge | 3414 | rotate_pages(page_addr_list, uv_start_page, |
3415 | rot_info->width_pages_uv, |
||
3416 | rot_info->height_pages_uv, |
||
3417 | st, sg); |
||
3418 | } |
||
3243 | Serge | 3419 | |
6084 | serge | 3420 | DRM_DEBUG_KMS( |
3421 | "Created rotated page mapping for object size %zu (pitch=%u, height=%u, pixel_format=0x%x, %ux%u tiles, %u pages (%u plane 0)).\n", |
||
3422 | obj->base.size, rot_info->pitch, rot_info->height, |
||
3423 | rot_info->pixel_format, rot_info->width_pages, |
||
3424 | rot_info->height_pages, size_pages + size_pages_uv, |
||
3425 | size_pages); |
||
3243 | Serge | 3426 | |
6084 | serge | 3427 | drm_free_large(page_addr_list); |
3243 | Serge | 3428 | |
6084 | serge | 3429 | return st; |
3243 | Serge | 3430 | |
6084 | serge | 3431 | err_sg_alloc: |
3432 | kfree(st); |
||
3433 | err_st_alloc: |
||
3434 | drm_free_large(page_addr_list); |
||
3435 | |||
3436 | DRM_DEBUG_KMS( |
||
3437 | "Failed to create rotated mapping for object size %zu! (%d) (pitch=%u, height=%u, pixel_format=0x%x, %ux%u tiles, %u pages (%u plane 0))\n", |
||
3438 | obj->base.size, ret, rot_info->pitch, rot_info->height, |
||
3439 | rot_info->pixel_format, rot_info->width_pages, |
||
3440 | rot_info->height_pages, size_pages + size_pages_uv, |
||
3441 | size_pages); |
||
3442 | return ERR_PTR(ret); |
||
3243 | Serge | 3443 | } |
3444 | |||
6084 | serge | 3445 | static struct sg_table * |
3446 | intel_partial_pages(const struct i915_ggtt_view *view, |
||
3447 | struct drm_i915_gem_object *obj) |
||
3243 | Serge | 3448 | { |
6084 | serge | 3449 | struct sg_table *st; |
3450 | struct scatterlist *sg; |
||
3451 | struct sg_page_iter obj_sg_iter; |
||
3452 | int ret = -ENOMEM; |
||
3243 | Serge | 3453 | |
6084 | serge | 3454 | st = kmalloc(sizeof(*st), GFP_KERNEL); |
3455 | if (!st) |
||
3456 | goto err_st_alloc; |
||
3746 | Serge | 3457 | |
6084 | serge | 3458 | ret = sg_alloc_table(st, view->params.partial.size, GFP_KERNEL); |
3459 | if (ret) |
||
3460 | goto err_sg_alloc; |
||
3746 | Serge | 3461 | |
6084 | serge | 3462 | sg = st->sgl; |
3463 | st->nents = 0; |
||
3464 | for_each_sg_page(obj->pages->sgl, &obj_sg_iter, obj->pages->nents, |
||
3465 | view->params.partial.offset) |
||
3466 | { |
||
3467 | if (st->nents >= view->params.partial.size) |
||
3468 | break; |
||
3469 | |||
3470 | sg_set_page(sg, NULL, PAGE_SIZE, 0); |
||
3471 | sg_dma_address(sg) = sg_page_iter_dma_address(&obj_sg_iter); |
||
3472 | sg_dma_len(sg) = PAGE_SIZE; |
||
3473 | |||
3474 | sg = sg_next(sg); |
||
3475 | st->nents++; |
||
3476 | } |
||
3477 | |||
3478 | return st; |
||
3479 | |||
3480 | err_sg_alloc: |
||
3481 | kfree(st); |
||
3482 | err_st_alloc: |
||
3483 | return ERR_PTR(ret); |
||
3746 | Serge | 3484 | } |
3485 | |||
6084 | serge | 3486 | static int |
3487 | i915_get_ggtt_vma_pages(struct i915_vma *vma) |
||
3746 | Serge | 3488 | { |
6084 | serge | 3489 | int ret = 0; |
3490 | |||
3491 | if (vma->ggtt_view.pages) |
||
3492 | return 0; |
||
3493 | |||
3494 | if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) |
||
3495 | vma->ggtt_view.pages = vma->obj->pages; |
||
3496 | else if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED) |
||
3497 | vma->ggtt_view.pages = |
||
3498 | intel_rotate_fb_obj_pages(&vma->ggtt_view, vma->obj); |
||
3499 | else if (vma->ggtt_view.type == I915_GGTT_VIEW_PARTIAL) |
||
3500 | vma->ggtt_view.pages = |
||
3501 | intel_partial_pages(&vma->ggtt_view, vma->obj); |
||
3502 | else |
||
3503 | WARN_ONCE(1, "GGTT view %u not implemented!\n", |
||
3504 | vma->ggtt_view.type); |
||
3505 | |||
3506 | if (!vma->ggtt_view.pages) { |
||
3507 | DRM_ERROR("Failed to get pages for GGTT view type %u!\n", |
||
3508 | vma->ggtt_view.type); |
||
3509 | ret = -EINVAL; |
||
3510 | } else if (IS_ERR(vma->ggtt_view.pages)) { |
||
3511 | ret = PTR_ERR(vma->ggtt_view.pages); |
||
3512 | vma->ggtt_view.pages = NULL; |
||
3513 | DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n", |
||
3514 | vma->ggtt_view.type, ret); |
||
3515 | } |
||
3516 | |||
3517 | return ret; |
||
3746 | Serge | 3518 | } |
3519 | |||
6084 | serge | 3520 | /** |
3521 | * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space. |
||
3522 | * @vma: VMA to map |
||
3523 | * @cache_level: mapping cache level |
||
3524 | * @flags: flags like global or local mapping |
||
3525 | * |
||
3526 | * DMA addresses are taken from the scatter-gather table of this object (or of |
||
3527 | * this VMA in case of non-default GGTT views) and PTE entries set up. |
||
3528 | * Note that DMA addresses are also the only part of the SG table we care about. |
||
3529 | */ |
||
3530 | int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level, |
||
3531 | u32 flags) |
||
3746 | Serge | 3532 | { |
6084 | serge | 3533 | int ret; |
3534 | u32 bind_flags; |
||
3746 | Serge | 3535 | |
6084 | serge | 3536 | if (WARN_ON(flags == 0)) |
3537 | return -EINVAL; |
||
3746 | Serge | 3538 | |
6084 | serge | 3539 | bind_flags = 0; |
3540 | if (flags & PIN_GLOBAL) |
||
3541 | bind_flags |= GLOBAL_BIND; |
||
3542 | if (flags & PIN_USER) |
||
3543 | bind_flags |= LOCAL_BIND; |
||
3746 | Serge | 3544 | |
6084 | serge | 3545 | if (flags & PIN_UPDATE) |
3546 | bind_flags |= vma->bound; |
||
3547 | else |
||
3548 | bind_flags &= ~vma->bound; |
||
3549 | |||
3550 | if (bind_flags == 0) |
||
3551 | return 0; |
||
3552 | |||
3553 | if (vma->bound == 0 && vma->vm->allocate_va_range) { |
||
3554 | trace_i915_va_alloc(vma->vm, |
||
3555 | vma->node.start, |
||
3556 | vma->node.size, |
||
3557 | VM_TO_TRACE_NAME(vma->vm)); |
||
3558 | |||
3559 | /* XXX: i915_vma_pin() will fix this +- hack */ |
||
3560 | vma->pin_count++; |
||
3561 | ret = vma->vm->allocate_va_range(vma->vm, |
||
3562 | vma->node.start, |
||
3563 | vma->node.size); |
||
3564 | vma->pin_count--; |
||
3565 | if (ret) |
||
3566 | return ret; |
||
3567 | } |
||
3568 | |||
3569 | ret = vma->vm->bind_vma(vma, cache_level, bind_flags); |
||
3570 | if (ret) |
||
3571 | return ret; |
||
3572 | |||
3573 | vma->bound |= bind_flags; |
||
3574 | |||
3575 | return 0; |
||
3746 | Serge | 3576 | } |
3577 | |||
6084 | serge | 3578 | /** |
3579 | * i915_ggtt_view_size - Get the size of a GGTT view. |
||
3580 | * @obj: Object the view is of. |
||
3581 | * @view: The view in question. |
||
3582 | * |
||
3583 | * @return The size of the GGTT view in bytes. |
||
3584 | */ |
||
3585 | size_t |
||
3586 | i915_ggtt_view_size(struct drm_i915_gem_object *obj, |
||
3587 | const struct i915_ggtt_view *view) |
||
3588 | { |
||
3589 | if (view->type == I915_GGTT_VIEW_NORMAL) { |
||
3590 | return obj->base.size; |
||
3591 | } else if (view->type == I915_GGTT_VIEW_ROTATED) { |
||
6937 | serge | 3592 | return view->params.rotation_info.size; |
6084 | serge | 3593 | } else if (view->type == I915_GGTT_VIEW_PARTIAL) { |
3594 | return view->params.partial.size << PAGE_SHIFT; |
||
3595 | } else { |
||
3596 | WARN_ONCE(1, "GGTT view %u not implemented!\n", view->type); |
||
3597 | return obj->base.size; |
||
3598 | } |
||
3599 | }><>>>>=>><>20))))><20))))>20)><20)>>><>><>><>>><>><>>><>>><>><>><>><>><>><>><>>>><>>>>>><>>>><>>>>><>><>><>>>><>><>><>>>>><>>><>>>> |