Rev 6935 | Go to most recent revision | Show entire file | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 6935 | Rev 6937 | ||
---|---|---|---|
Line 247... | Line 247... | ||
247 | return (HAS_LLC(obj->base.dev) || |
247 | return (HAS_LLC(obj->base.dev) || |
248 | obj->base.write_domain == I915_GEM_DOMAIN_CPU || |
248 | obj->base.write_domain == I915_GEM_DOMAIN_CPU || |
249 | obj->cache_level != I915_CACHE_NONE); |
249 | obj->cache_level != I915_CACHE_NONE); |
250 | } |
250 | } |
Line -... | Line 251... | ||
- | 251 | ||
- | 252 | /* Used to convert any address to canonical form. |
|
- | 253 | * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS, |
|
- | 254 | * MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the |
|
- | 255 | * addresses to be in a canonical form: |
|
- | 256 | * "GraphicsAddress[63:48] are ignored by the HW and assumed to be in correct |
|
- | 257 | * canonical form [63:48] == [47]." |
|
- | 258 | */ |
|
- | 259 | #define GEN8_HIGH_ADDRESS_BIT 47 |
|
- | 260 | static inline uint64_t gen8_canonical_addr(uint64_t address) |
|
- | 261 | { |
|
- | 262 | return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT); |
|
- | 263 | } |
|
- | 264 | ||
- | 265 | static inline uint64_t gen8_noncanonical_addr(uint64_t address) |
|
- | 266 | { |
|
- | 267 | return address & ((1ULL << (GEN8_HIGH_ADDRESS_BIT + 1)) - 1); |
|
- | 268 | } |
|
- | 269 | ||
- | 270 | static inline uint64_t |
|
- | 271 | relocation_target(struct drm_i915_gem_relocation_entry *reloc, |
|
- | 272 | uint64_t target_offset) |
|
- | 273 | { |
|
- | 274 | return gen8_canonical_addr((int)reloc->delta + target_offset); |
|
- | 275 | } |
|
251 | 276 | ||
252 | static int |
277 | static int |
253 | relocate_entry_cpu(struct drm_i915_gem_object *obj, |
278 | relocate_entry_cpu(struct drm_i915_gem_object *obj, |
254 | struct drm_i915_gem_relocation_entry *reloc, |
279 | struct drm_i915_gem_relocation_entry *reloc, |
255 | uint64_t target_offset) |
280 | uint64_t target_offset) |
256 | { |
281 | { |
257 | struct drm_device *dev = obj->base.dev; |
282 | struct drm_device *dev = obj->base.dev; |
258 | uint32_t page_offset = offset_in_page(reloc->offset); |
283 | uint32_t page_offset = offset_in_page(reloc->offset); |
259 | uint64_t delta = reloc->delta + target_offset; |
284 | uint64_t delta = relocation_target(reloc, target_offset); |
260 | char *vaddr; |
285 | char *vaddr; |
Line 261... | Line 286... | ||
261 | int ret; |
286 | int ret; |
262 | 287 | ||
263 | ret = i915_gem_object_set_to_cpu_domain(obj, true); |
288 | ret = i915_gem_object_set_to_cpu_domain(obj, true); |
Line 264... | Line 289... | ||
264 | if (ret) |
289 | if (ret) |
265 | return ret; |
290 | return ret; |
266 | 291 | ||
Line 267... | Line 292... | ||
267 | vaddr = kmap_atomic(i915_gem_object_get_page(obj, |
292 | vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, |
268 | reloc->offset >> PAGE_SHIFT)); |
293 | reloc->offset >> PAGE_SHIFT)); |
Line 269... | Line 294... | ||
269 | *(uint32_t *)(vaddr + page_offset) = lower_32_bits(delta); |
294 | *(uint32_t *)(vaddr + page_offset) = lower_32_bits(delta); |
270 | 295 | ||
271 | if (INTEL_INFO(dev)->gen >= 8) { |
296 | if (INTEL_INFO(dev)->gen >= 8) { |
272 | page_offset = offset_in_page(page_offset + sizeof(uint32_t)); |
297 | page_offset = offset_in_page(page_offset + sizeof(uint32_t)); |
273 | 298 | ||
Line 274... | Line 299... | ||
274 | if (page_offset == 0) { |
299 | if (page_offset == 0) { |
275 | kunmap_atomic(vaddr); |
300 | kunmap_atomic(vaddr); |
Line 290... | Line 315... | ||
290 | struct drm_i915_gem_relocation_entry *reloc, |
315 | struct drm_i915_gem_relocation_entry *reloc, |
291 | uint64_t target_offset) |
316 | uint64_t target_offset) |
292 | { |
317 | { |
293 | struct drm_device *dev = obj->base.dev; |
318 | struct drm_device *dev = obj->base.dev; |
294 | struct drm_i915_private *dev_priv = dev->dev_private; |
319 | struct drm_i915_private *dev_priv = dev->dev_private; |
295 | uint64_t delta = reloc->delta + target_offset; |
320 | uint64_t delta = relocation_target(reloc, target_offset); |
296 | uint64_t offset; |
321 | uint64_t offset; |
297 | void __iomem *reloc_page; |
322 | void __iomem *reloc_page; |
298 | int ret; |
323 | int ret; |
Line 299... | Line 324... | ||
299 | 324 | ||
Line 332... | Line 357... | ||
332 | struct drm_i915_gem_relocation_entry *reloc, |
357 | struct drm_i915_gem_relocation_entry *reloc, |
333 | uint64_t target_offset) |
358 | uint64_t target_offset) |
334 | { |
359 | { |
335 | struct drm_device *dev = obj->base.dev; |
360 | struct drm_device *dev = obj->base.dev; |
336 | uint32_t page_offset = offset_in_page(reloc->offset); |
361 | uint32_t page_offset = offset_in_page(reloc->offset); |
337 | uint64_t delta = (int)reloc->delta + target_offset; |
362 | uint64_t delta = relocation_target(reloc, target_offset); |
338 | char *vaddr; |
363 | char *vaddr; |
339 | int ret; |
364 | int ret; |
Line 340... | Line 365... | ||
340 | 365 | ||
341 | ret = i915_gem_object_set_to_gtt_domain(obj, true); |
366 | ret = i915_gem_object_set_to_gtt_domain(obj, true); |
342 | if (ret) |
367 | if (ret) |
Line 343... | Line 368... | ||
343 | return ret; |
368 | return ret; |
344 | 369 | ||
345 | vaddr = kmap_atomic(i915_gem_object_get_page(obj, |
370 | vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, |
Line 346... | Line 371... | ||
346 | reloc->offset >> PAGE_SHIFT)); |
371 | reloc->offset >> PAGE_SHIFT)); |
347 | clflush_write32(vaddr + page_offset, lower_32_bits(delta)); |
372 | clflush_write32(vaddr + page_offset, lower_32_bits(delta)); |
Line 348... | Line 373... | ||
348 | 373 | ||
349 | if (INTEL_INFO(dev)->gen >= 8) { |
374 | if (INTEL_INFO(dev)->gen >= 8) { |
350 | page_offset = offset_in_page(page_offset + sizeof(uint32_t)); |
375 | page_offset = offset_in_page(page_offset + sizeof(uint32_t)); |
351 | 376 | ||
352 | if (page_offset == 0) { |
377 | if (page_offset == 0) { |
Line 353... | Line 378... | ||
353 | kunmap_atomic(vaddr); |
378 | kunmap_atomic(vaddr); |
354 | vaddr = kmap_atomic(i915_gem_object_get_page(obj, |
379 | vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, |
Line 380... | Line 405... | ||
380 | if (unlikely(target_vma == NULL)) |
405 | if (unlikely(target_vma == NULL)) |
381 | return -ENOENT; |
406 | return -ENOENT; |
382 | target_i915_obj = target_vma->obj; |
407 | target_i915_obj = target_vma->obj; |
383 | target_obj = &target_vma->obj->base; |
408 | target_obj = &target_vma->obj->base; |
Line 384... | Line 409... | ||
384 | 409 | ||
Line 385... | Line 410... | ||
385 | target_offset = target_vma->node.start; |
410 | target_offset = gen8_canonical_addr(target_vma->node.start); |
386 | 411 | ||
387 | /* Sandybridge PPGTT errata: We need a global gtt mapping for MI and |
412 | /* Sandybridge PPGTT errata: We need a global gtt mapping for MI and |
388 | * pipe_control writes because the gpu doesn't properly redirect them |
413 | * pipe_control writes because the gpu doesn't properly redirect them |
Line 581... | Line 606... | ||
581 | flags |= PIN_ZONE_4G; |
606 | flags |= PIN_ZONE_4G; |
582 | if (entry->flags & __EXEC_OBJECT_NEEDS_MAP) |
607 | if (entry->flags & __EXEC_OBJECT_NEEDS_MAP) |
583 | flags |= PIN_GLOBAL | PIN_MAPPABLE; |
608 | flags |= PIN_GLOBAL | PIN_MAPPABLE; |
584 | if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS) |
609 | if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS) |
585 | flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS; |
610 | flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS; |
- | 611 | if (entry->flags & EXEC_OBJECT_PINNED) |
|
- | 612 | flags |= entry->offset | PIN_OFFSET_FIXED; |
|
586 | if ((flags & PIN_MAPPABLE) == 0) |
613 | if ((flags & PIN_MAPPABLE) == 0) |
587 | flags |= PIN_HIGH; |
614 | flags |= PIN_HIGH; |
588 | } |
615 | } |
Line 589... | Line 616... | ||
589 | 616 | ||
Line 652... | Line 679... | ||
652 | 679 | ||
653 | if (entry->alignment && |
680 | if (entry->alignment && |
654 | vma->node.start & (entry->alignment - 1)) |
681 | vma->node.start & (entry->alignment - 1)) |
Line -... | Line 682... | ||
- | 682 | return true; |
|
- | 683 | ||
- | 684 | if (entry->flags & EXEC_OBJECT_PINNED && |
|
- | 685 | vma->node.start != entry->offset) |
|
655 | return true; |
686 | return true; |
656 | 687 | ||
657 | if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS && |
688 | if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS && |
Line 658... | Line 689... | ||
658 | vma->node.start < BATCH_OFFSET_BIAS) |
689 | vma->node.start < BATCH_OFFSET_BIAS) |
Line 677... | Line 708... | ||
677 | { |
708 | { |
678 | struct drm_i915_gem_object *obj; |
709 | struct drm_i915_gem_object *obj; |
679 | struct i915_vma *vma; |
710 | struct i915_vma *vma; |
680 | struct i915_address_space *vm; |
711 | struct i915_address_space *vm; |
681 | struct list_head ordered_vmas; |
712 | struct list_head ordered_vmas; |
- | 713 | struct list_head pinned_vmas; |
|
682 | bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; |
714 | bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; |
683 | int retry; |
715 | int retry; |
Line 684... | Line 716... | ||
684 | 716 | ||
Line 685... | Line 717... | ||
685 | i915_gem_retire_requests_ring(ring); |
717 | i915_gem_retire_requests_ring(ring); |
Line 686... | Line 718... | ||
686 | 718 | ||
- | 719 | vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm; |
|
687 | vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm; |
720 | |
688 | 721 | INIT_LIST_HEAD(&ordered_vmas); |
|
689 | INIT_LIST_HEAD(&ordered_vmas); |
722 | INIT_LIST_HEAD(&pinned_vmas); |
Line 690... | Line 723... | ||
690 | while (!list_empty(vmas)) { |
723 | while (!list_empty(vmas)) { |
Line 703... | Line 736... | ||
703 | need_fence = |
736 | need_fence = |
704 | entry->flags & EXEC_OBJECT_NEEDS_FENCE && |
737 | entry->flags & EXEC_OBJECT_NEEDS_FENCE && |
705 | obj->tiling_mode != I915_TILING_NONE; |
738 | obj->tiling_mode != I915_TILING_NONE; |
706 | need_mappable = need_fence || need_reloc_mappable(vma); |
739 | need_mappable = need_fence || need_reloc_mappable(vma); |
Line -... | Line 740... | ||
- | 740 | ||
- | 741 | if (entry->flags & EXEC_OBJECT_PINNED) |
|
707 | 742 | list_move_tail(&vma->exec_list, &pinned_vmas); |
|
708 | if (need_mappable) { |
743 | else if (need_mappable) { |
709 | entry->flags |= __EXEC_OBJECT_NEEDS_MAP; |
744 | entry->flags |= __EXEC_OBJECT_NEEDS_MAP; |
710 | list_move(&vma->exec_list, &ordered_vmas); |
745 | list_move(&vma->exec_list, &ordered_vmas); |
711 | } else |
746 | } else |
Line 712... | Line 747... | ||
712 | list_move_tail(&vma->exec_list, &ordered_vmas); |
747 | list_move_tail(&vma->exec_list, &ordered_vmas); |
713 | 748 | ||
714 | obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND; |
749 | obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND; |
715 | obj->base.pending_write_domain = 0; |
750 | obj->base.pending_write_domain = 0; |
- | 751 | } |
|
Line 716... | Line 752... | ||
716 | } |
752 | list_splice(&ordered_vmas, vmas); |
717 | list_splice(&ordered_vmas, vmas); |
753 | list_splice(&pinned_vmas, vmas); |
718 | 754 | ||
719 | /* Attempt to pin all of the buffers into the GTT. |
755 | /* Attempt to pin all of the buffers into the GTT. |
Line 965... | Line 1001... | ||
965 | int length; /* limited by fault_in_pages_readable() */ |
1001 | int length; /* limited by fault_in_pages_readable() */ |
Line 966... | Line 1002... | ||
966 | 1002 | ||
967 | if (exec[i].flags & invalid_flags) |
1003 | if (exec[i].flags & invalid_flags) |
Line -... | Line 1004... | ||
- | 1004 | return -EINVAL; |
|
- | 1005 | ||
- | 1006 | /* Offset can be used as input (EXEC_OBJECT_PINNED), reject |
|
- | 1007 | * any non-page-aligned or non-canonical addresses. |
|
- | 1008 | */ |
|
- | 1009 | if (exec[i].flags & EXEC_OBJECT_PINNED) { |
|
- | 1010 | if (exec[i].offset != |
|
- | 1011 | gen8_canonical_addr(exec[i].offset & PAGE_MASK)) |
|
- | 1012 | return -EINVAL; |
|
- | 1013 | ||
- | 1014 | /* From drm_mm perspective address space is continuous, |
|
- | 1015 | * so from this point we're always using non-canonical |
|
- | 1016 | * form internally. |
|
- | 1017 | */ |
|
- | 1018 | exec[i].offset = gen8_noncanonical_addr(exec[i].offset); |
|
968 | return -EINVAL; |
1019 | } |
969 | 1020 | ||
Line 970... | Line 1021... | ||
970 | if (exec[i].alignment && !is_power_of_2(exec[i].alignment)) |
1021 | if (exec[i].alignment && !is_power_of_2(exec[i].alignment)) |
971 | return -EINVAL; |
1022 | return -EINVAL; |
Line 1089... | Line 1140... | ||
1089 | if (ret) |
1140 | if (ret) |
1090 | return ret; |
1141 | return ret; |
Line 1091... | Line 1142... | ||
1091 | 1142 | ||
1092 | for (i = 0; i < 4; i++) { |
1143 | for (i = 0; i < 4; i++) { |
1093 | intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); |
1144 | intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); |
1094 | intel_ring_emit(ring, GEN7_SO_WRITE_OFFSET(i)); |
1145 | intel_ring_emit_reg(ring, GEN7_SO_WRITE_OFFSET(i)); |
1095 | intel_ring_emit(ring, 0); |
1146 | intel_ring_emit(ring, 0); |
Line 1096... | Line 1147... | ||
1096 | } |
1147 | } |
Line 1216... | Line 1267... | ||
1216 | if (ret) |
1267 | if (ret) |
1217 | return ret; |
1268 | return ret; |
Line 1218... | Line 1269... | ||
1218 | 1269 | ||
1219 | intel_ring_emit(ring, MI_NOOP); |
1270 | intel_ring_emit(ring, MI_NOOP); |
1220 | intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); |
1271 | intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); |
1221 | intel_ring_emit(ring, INSTPM); |
1272 | intel_ring_emit_reg(ring, INSTPM); |
1222 | intel_ring_emit(ring, instp_mask << 16 | instp_mode); |
1273 | intel_ring_emit(ring, instp_mask << 16 | instp_mode); |
Line 1223... | Line 1274... | ||
1223 | intel_ring_advance(ring); |
1274 | intel_ring_advance(ring); |
1224 | 1275 | ||
Line 1292... | Line 1343... | ||
1292 | * very low in the GTT. Ensure this doesn't happen. |
1343 | * very low in the GTT. Ensure this doesn't happen. |
1293 | * |
1344 | * |
1294 | * Note that actual hangs have only been observed on gen7, but for |
1345 | * Note that actual hangs have only been observed on gen7, but for |
1295 | * paranoia do it everywhere. |
1346 | * paranoia do it everywhere. |
1296 | */ |
1347 | */ |
- | 1348 | if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0) |
|
1297 | vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; |
1349 | vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; |
Line 1298... | Line 1350... | ||
1298 | 1350 | ||
1299 | return vma->obj; |
1351 | return vma->obj; |
Line 1652... | Line 1704... | ||
1652 | struct drm_i915_gem_exec_object __user *user_exec_list = |
1704 | struct drm_i915_gem_exec_object __user *user_exec_list = |
1653 | to_user_ptr(args->buffers_ptr); |
1705 | to_user_ptr(args->buffers_ptr); |
Line 1654... | Line 1706... | ||
1654 | 1706 | ||
1655 | /* Copy the new buffer offsets back to the user's exec list. */ |
1707 | /* Copy the new buffer offsets back to the user's exec list. */ |
- | 1708 | for (i = 0; i < args->buffer_count; i++) { |
|
- | 1709 | exec2_list[i].offset = |
|
1656 | for (i = 0; i < args->buffer_count; i++) { |
1710 | gen8_canonical_addr(exec2_list[i].offset); |
1657 | ret = __copy_to_user(&user_exec_list[i].offset, |
1711 | ret = __copy_to_user(&user_exec_list[i].offset, |
1658 | &exec2_list[i].offset, |
1712 | &exec2_list[i].offset, |
1659 | sizeof(user_exec_list[i].offset)); |
1713 | sizeof(user_exec_list[i].offset)); |
1660 | if (ret) { |
1714 | if (ret) { |
Line 1716... | Line 1770... | ||
1716 | struct drm_i915_gem_exec_object2 __user *user_exec_list = |
1770 | struct drm_i915_gem_exec_object2 __user *user_exec_list = |
1717 | to_user_ptr(args->buffers_ptr); |
1771 | to_user_ptr(args->buffers_ptr); |
1718 | int i; |
1772 | int i; |
Line 1719... | Line 1773... | ||
1719 | 1773 | ||
- | 1774 | for (i = 0; i < args->buffer_count; i++) { |
|
- | 1775 | exec2_list[i].offset = |
|
1720 | for (i = 0; i < args->buffer_count; i++) { |
1776 | gen8_canonical_addr(exec2_list[i].offset); |
1721 | ret = __copy_to_user(&user_exec_list[i].offset, |
1777 | ret = __copy_to_user(&user_exec_list[i].offset, |
1722 | &exec2_list[i].offset, |
1778 | &exec2_list[i].offset, |
1723 | sizeof(user_exec_list[i].offset)); |
1779 | sizeof(user_exec_list[i].offset)); |
1724 | if (ret) { |
1780 | if (ret) { |