Rev 6937 | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
3263 | Serge | 1 | /* |
2 | * Copyright © 2008,2010 Intel Corporation |
||
3 | * |
||
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
5 | * copy of this software and associated documentation files (the "Software"), |
||
6 | * to deal in the Software without restriction, including without limitation |
||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
8 | * and/or sell copies of the Software, and to permit persons to whom the |
||
9 | * Software is furnished to do so, subject to the following conditions: |
||
10 | * |
||
11 | * The above copyright notice and this permission notice (including the next |
||
12 | * paragraph) shall be included in all copies or substantial portions of the |
||
13 | * Software. |
||
14 | * |
||
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
||
20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
||
21 | * IN THE SOFTWARE. |
||
22 | * |
||
23 | * Authors: |
||
24 | * Eric Anholt |
||
25 | * Chris Wilson |
||
26 | * |
||
27 | */ |
||
28 | |||
29 | #include |
||
30 | #include |
||
31 | #include "i915_drv.h" |
||
32 | #include "i915_trace.h" |
||
33 | #include "intel_drv.h" |
||
5060 | serge | 34 | #include |
6935 | serge | 35 | #include |
3263 | Serge | 36 | |
4560 | Serge | 37 | #define __EXEC_OBJECT_HAS_PIN (1<<31) |
38 | #define __EXEC_OBJECT_HAS_FENCE (1<<30) |
||
5354 | serge | 39 | #define __EXEC_OBJECT_NEEDS_MAP (1<<29) |
5060 | serge | 40 | #define __EXEC_OBJECT_NEEDS_BIAS (1<<28) |
3263 | Serge | 41 | |
5060 | serge | 42 | #define BATCH_OFFSET_BIAS (256*1024) |
3263 | Serge | 43 | |
4560 | Serge | 44 | struct eb_vmas { |
45 | struct list_head vmas; |
||
3263 | Serge | 46 | int and; |
3480 | Serge | 47 | union { |
4560 | Serge | 48 | struct i915_vma *lut[0]; |
5060 | serge | 49 | struct hlist_head buckets[0]; |
3480 | Serge | 50 | }; |
3263 | Serge | 51 | }; |
52 | |||
4560 | Serge | 53 | static struct eb_vmas * |
3480 | Serge | 54 | eb_create(struct drm_i915_gem_execbuffer2 *args) |
3263 | Serge | 55 | { |
4560 | Serge | 56 | struct eb_vmas *eb = NULL; |
3480 | Serge | 57 | |
58 | if (args->flags & I915_EXEC_HANDLE_LUT) { |
||
4560 | Serge | 59 | unsigned size = args->buffer_count; |
60 | size *= sizeof(struct i915_vma *); |
||
61 | size += sizeof(struct eb_vmas); |
||
3480 | Serge | 62 | eb = kmalloc(size, GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY); |
63 | } |
||
64 | |||
65 | if (eb == NULL) { |
||
4560 | Serge | 66 | unsigned size = args->buffer_count; |
67 | unsigned count = PAGE_SIZE / sizeof(struct hlist_head) / 2; |
||
3480 | Serge | 68 | BUILD_BUG_ON_NOT_POWER_OF_2(PAGE_SIZE / sizeof(struct hlist_head)); |
69 | while (count > 2*size) |
||
6084 | serge | 70 | count >>= 1; |
71 | eb = kzalloc(count*sizeof(struct hlist_head) + |
||
4560 | Serge | 72 | sizeof(struct eb_vmas), |
3480 | Serge | 73 | GFP_TEMPORARY); |
6084 | serge | 74 | if (eb == NULL) |
75 | return eb; |
||
3263 | Serge | 76 | |
6084 | serge | 77 | eb->and = count - 1; |
3480 | Serge | 78 | } else |
79 | eb->and = -args->buffer_count; |
||
80 | |||
4560 | Serge | 81 | INIT_LIST_HEAD(&eb->vmas); |
3263 | Serge | 82 | return eb; |
83 | } |
||
84 | |||
85 | static void |
||
4560 | Serge | 86 | eb_reset(struct eb_vmas *eb) |
3263 | Serge | 87 | { |
3480 | Serge | 88 | if (eb->and >= 0) |
6084 | serge | 89 | memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head)); |
3263 | Serge | 90 | } |
91 | |||
3480 | Serge | 92 | static int |
4560 | Serge | 93 | eb_lookup_vmas(struct eb_vmas *eb, |
6084 | serge | 94 | struct drm_i915_gem_exec_object2 *exec, |
95 | const struct drm_i915_gem_execbuffer2 *args, |
||
4560 | Serge | 96 | struct i915_address_space *vm, |
6084 | serge | 97 | struct drm_file *file) |
3263 | Serge | 98 | { |
4560 | Serge | 99 | struct drm_i915_gem_object *obj; |
100 | struct list_head objects; |
||
101 | int i, ret; |
||
3480 | Serge | 102 | |
4560 | Serge | 103 | INIT_LIST_HEAD(&objects); |
3480 | Serge | 104 | spin_lock(&file->table_lock); |
4560 | Serge | 105 | /* Grab a reference to the object and release the lock so we can lookup |
106 | * or create the VMA without using GFP_ATOMIC */ |
||
3480 | Serge | 107 | for (i = 0; i < args->buffer_count; i++) { |
6084 | serge | 108 | obj = to_intel_bo(idr_find(&file->object_idr, exec[i].handle)); |
3480 | Serge | 109 | if (obj == NULL) { |
110 | spin_unlock(&file->table_lock); |
||
111 | DRM_DEBUG("Invalid object handle %d at index %d\n", |
||
112 | exec[i].handle, i); |
||
4560 | Serge | 113 | ret = -ENOENT; |
114 | goto err; |
||
3480 | Serge | 115 | } |
116 | |||
4560 | Serge | 117 | if (!list_empty(&obj->obj_exec_link)) { |
3480 | Serge | 118 | spin_unlock(&file->table_lock); |
119 | DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n", |
||
120 | obj, exec[i].handle, i); |
||
4560 | Serge | 121 | ret = -EINVAL; |
122 | goto err; |
||
3480 | Serge | 123 | } |
124 | |||
125 | drm_gem_object_reference(&obj->base); |
||
4560 | Serge | 126 | list_add_tail(&obj->obj_exec_link, &objects); |
127 | } |
||
128 | spin_unlock(&file->table_lock); |
||
3480 | Serge | 129 | |
4560 | Serge | 130 | i = 0; |
131 | while (!list_empty(&objects)) { |
||
132 | struct i915_vma *vma; |
||
133 | |||
134 | obj = list_first_entry(&objects, |
||
135 | struct drm_i915_gem_object, |
||
136 | obj_exec_link); |
||
137 | |||
138 | /* |
||
139 | * NOTE: We can leak any vmas created here when something fails |
||
140 | * later on. But that's no issue since vma_unbind can deal with |
||
141 | * vmas which are not actually bound. And since only |
||
142 | * lookup_or_create exists as an interface to get at the vma |
||
143 | * from the (obj, vm) we don't run the risk of creating |
||
144 | * duplicated vmas for the same vm. |
||
145 | */ |
||
5354 | serge | 146 | vma = i915_gem_obj_lookup_or_create_vma(obj, vm); |
4560 | Serge | 147 | if (IS_ERR(vma)) { |
148 | DRM_DEBUG("Failed to lookup VMA\n"); |
||
149 | ret = PTR_ERR(vma); |
||
150 | goto err; |
||
151 | } |
||
152 | |||
153 | /* Transfer ownership from the objects list to the vmas list. */ |
||
154 | list_add_tail(&vma->exec_list, &eb->vmas); |
||
155 | list_del_init(&obj->obj_exec_link); |
||
156 | |||
157 | vma->exec_entry = &exec[i]; |
||
3480 | Serge | 158 | if (eb->and < 0) { |
4560 | Serge | 159 | eb->lut[i] = vma; |
3480 | Serge | 160 | } else { |
161 | uint32_t handle = args->flags & I915_EXEC_HANDLE_LUT ? i : exec[i].handle; |
||
4560 | Serge | 162 | vma->exec_handle = handle; |
163 | hlist_add_head(&vma->exec_node, |
||
3480 | Serge | 164 | &eb->buckets[handle & eb->and]); |
165 | } |
||
4560 | Serge | 166 | ++i; |
3480 | Serge | 167 | } |
168 | |||
169 | return 0; |
||
4560 | Serge | 170 | |
171 | |||
172 | err: |
||
173 | while (!list_empty(&objects)) { |
||
174 | obj = list_first_entry(&objects, |
||
175 | struct drm_i915_gem_object, |
||
176 | obj_exec_link); |
||
177 | list_del_init(&obj->obj_exec_link); |
||
178 | drm_gem_object_unreference(&obj->base); |
||
179 | } |
||
180 | /* |
||
181 | * Objects already transfered to the vmas list will be unreferenced by |
||
182 | * eb_destroy. |
||
183 | */ |
||
184 | |||
185 | return ret; |
||
3263 | Serge | 186 | } |
187 | |||
4560 | Serge | 188 | static struct i915_vma *eb_get_vma(struct eb_vmas *eb, unsigned long handle) |
3263 | Serge | 189 | { |
3480 | Serge | 190 | if (eb->and < 0) { |
191 | if (handle >= -eb->and) |
||
192 | return NULL; |
||
193 | return eb->lut[handle]; |
||
194 | } else { |
||
6084 | serge | 195 | struct hlist_head *head; |
7144 | serge | 196 | struct i915_vma *vma; |
3263 | Serge | 197 | |
6084 | serge | 198 | head = &eb->buckets[handle & eb->and]; |
7144 | serge | 199 | hlist_for_each_entry(vma, head, exec_node) { |
4560 | Serge | 200 | if (vma->exec_handle == handle) |
201 | return vma; |
||
6084 | serge | 202 | } |
203 | return NULL; |
||
3263 | Serge | 204 | } |
205 | } |
||
206 | |||
207 | static void |
||
4560 | Serge | 208 | i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma) |
3263 | Serge | 209 | { |
4560 | Serge | 210 | struct drm_i915_gem_exec_object2 *entry; |
211 | struct drm_i915_gem_object *obj = vma->obj; |
||
3480 | Serge | 212 | |
4560 | Serge | 213 | if (!drm_mm_node_allocated(&vma->node)) |
214 | return; |
||
215 | |||
216 | entry = vma->exec_entry; |
||
217 | |||
218 | if (entry->flags & __EXEC_OBJECT_HAS_FENCE) |
||
219 | i915_gem_object_unpin_fence(obj); |
||
220 | |||
221 | if (entry->flags & __EXEC_OBJECT_HAS_PIN) |
||
5060 | serge | 222 | vma->pin_count--; |
4560 | Serge | 223 | |
224 | entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN); |
||
225 | } |
||
226 | |||
227 | static void eb_destroy(struct eb_vmas *eb) |
||
228 | { |
||
229 | while (!list_empty(&eb->vmas)) { |
||
230 | struct i915_vma *vma; |
||
231 | |||
232 | vma = list_first_entry(&eb->vmas, |
||
233 | struct i915_vma, |
||
3480 | Serge | 234 | exec_list); |
4560 | Serge | 235 | list_del_init(&vma->exec_list); |
236 | i915_gem_execbuffer_unreserve_vma(vma); |
||
237 | drm_gem_object_unreference(&vma->obj->base); |
||
3480 | Serge | 238 | } |
3263 | Serge | 239 | kfree(eb); |
240 | } |
||
241 | |||
242 | static inline int use_cpu_reloc(struct drm_i915_gem_object *obj) |
||
243 | { |
||
4560 | Serge | 244 | return (HAS_LLC(obj->base.dev) || |
245 | obj->base.write_domain == I915_GEM_DOMAIN_CPU || |
||
3263 | Serge | 246 | obj->cache_level != I915_CACHE_NONE); |
247 | } |
||
248 | |||
6937 | serge | 249 | /* Used to convert any address to canonical form. |
250 | * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS, |
||
251 | * MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the |
||
252 | * addresses to be in a canonical form: |
||
253 | * "GraphicsAddress[63:48] are ignored by the HW and assumed to be in correct |
||
254 | * canonical form [63:48] == [47]." |
||
255 | */ |
||
256 | #define GEN8_HIGH_ADDRESS_BIT 47 |
||
257 | static inline uint64_t gen8_canonical_addr(uint64_t address) |
||
258 | { |
||
259 | return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT); |
||
260 | } |
||
261 | |||
262 | static inline uint64_t gen8_noncanonical_addr(uint64_t address) |
||
263 | { |
||
264 | return address & ((1ULL << (GEN8_HIGH_ADDRESS_BIT + 1)) - 1); |
||
265 | } |
||
266 | |||
267 | static inline uint64_t |
||
268 | relocation_target(struct drm_i915_gem_relocation_entry *reloc, |
||
269 | uint64_t target_offset) |
||
270 | { |
||
271 | return gen8_canonical_addr((int)reloc->delta + target_offset); |
||
272 | } |
||
273 | |||
3263 | Serge | 274 | static int |
4371 | Serge | 275 | relocate_entry_cpu(struct drm_i915_gem_object *obj, |
5060 | serge | 276 | struct drm_i915_gem_relocation_entry *reloc, |
277 | uint64_t target_offset) |
||
4371 | Serge | 278 | { |
6084 | serge | 279 | struct drm_device *dev = obj->base.dev; |
280 | uint32_t page_offset = offset_in_page(reloc->offset); |
||
6937 | serge | 281 | uint64_t delta = relocation_target(reloc, target_offset); |
4371 | Serge | 282 | char *vaddr; |
4560 | Serge | 283 | int ret; |
4371 | Serge | 284 | |
4560 | Serge | 285 | ret = i915_gem_object_set_to_cpu_domain(obj, true); |
4371 | Serge | 286 | if (ret) |
287 | return ret; |
||
288 | |||
6937 | serge | 289 | vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, |
5354 | serge | 290 | reloc->offset >> PAGE_SHIFT)); |
5060 | serge | 291 | *(uint32_t *)(vaddr + page_offset) = lower_32_bits(delta); |
4371 | Serge | 292 | |
5060 | serge | 293 | if (INTEL_INFO(dev)->gen >= 8) { |
294 | page_offset = offset_in_page(page_offset + sizeof(uint32_t)); |
||
295 | |||
296 | if (page_offset == 0) { |
||
5354 | serge | 297 | kunmap_atomic(vaddr); |
6937 | serge | 298 | vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, |
5354 | serge | 299 | (reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT)); |
5060 | serge | 300 | } |
301 | |||
302 | *(uint32_t *)(vaddr + page_offset) = upper_32_bits(delta); |
||
303 | } |
||
304 | |||
5354 | serge | 305 | kunmap_atomic(vaddr); |
306 | |||
4371 | Serge | 307 | return 0; |
308 | } |
||
309 | |||
310 | static int |
||
311 | relocate_entry_gtt(struct drm_i915_gem_object *obj, |
||
5060 | serge | 312 | struct drm_i915_gem_relocation_entry *reloc, |
313 | uint64_t target_offset) |
||
4371 | Serge | 314 | { |
315 | struct drm_device *dev = obj->base.dev; |
||
316 | struct drm_i915_private *dev_priv = dev->dev_private; |
||
6937 | serge | 317 | uint64_t delta = relocation_target(reloc, target_offset); |
5354 | serge | 318 | uint64_t offset; |
4371 | Serge | 319 | void __iomem *reloc_page; |
4560 | Serge | 320 | int ret; |
4371 | Serge | 321 | |
322 | ret = i915_gem_object_set_to_gtt_domain(obj, true); |
||
323 | if (ret) |
||
324 | return ret; |
||
325 | |||
326 | ret = i915_gem_object_put_fence(obj); |
||
327 | if (ret) |
||
328 | return ret; |
||
329 | |||
330 | /* Map the page containing the relocation we're going to perform. */ |
||
5354 | serge | 331 | offset = i915_gem_obj_ggtt_offset(obj); |
332 | offset += reloc->offset; |
||
7144 | serge | 333 | reloc_page = io_mapping_map_atomic_wc(dev_priv->gtt.mappable, |
334 | offset & PAGE_MASK); |
||
5354 | serge | 335 | iowrite32(lower_32_bits(delta), reloc_page + offset_in_page(offset)); |
4371 | Serge | 336 | |
7144 | serge | 337 | if (INTEL_INFO(dev)->gen >= 8) { |
338 | offset += sizeof(uint32_t); |
||
5060 | serge | 339 | |
7144 | serge | 340 | if (offset_in_page(offset) == 0) { |
341 | io_mapping_unmap_atomic(reloc_page); |
||
342 | reloc_page = |
||
343 | io_mapping_map_atomic_wc(dev_priv->gtt.mappable, |
||
344 | offset); |
||
345 | } |
||
346 | |||
347 | iowrite32(upper_32_bits(delta), |
||
348 | reloc_page + offset_in_page(offset)); |
||
349 | } |
||
350 | |||
351 | io_mapping_unmap_atomic(reloc_page); |
||
352 | |||
6084 | serge | 353 | return 0; |
354 | } |
||
5354 | serge | 355 | |
6084 | serge | 356 | static void |
357 | clflush_write32(void *addr, uint32_t value) |
||
358 | { |
||
359 | /* This is not a fast path, so KISS. */ |
||
360 | drm_clflush_virt_range(addr, sizeof(uint32_t)); |
||
361 | *(uint32_t *)addr = value; |
||
362 | drm_clflush_virt_range(addr, sizeof(uint32_t)); |
||
363 | } |
||
364 | |||
365 | static int |
||
366 | relocate_entry_clflush(struct drm_i915_gem_object *obj, |
||
367 | struct drm_i915_gem_relocation_entry *reloc, |
||
368 | uint64_t target_offset) |
||
369 | { |
||
370 | struct drm_device *dev = obj->base.dev; |
||
371 | uint32_t page_offset = offset_in_page(reloc->offset); |
||
6937 | serge | 372 | uint64_t delta = relocation_target(reloc, target_offset); |
6084 | serge | 373 | char *vaddr; |
374 | int ret; |
||
375 | |||
376 | ret = i915_gem_object_set_to_gtt_domain(obj, true); |
||
377 | if (ret) |
||
378 | return ret; |
||
379 | |||
6937 | serge | 380 | vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, |
6084 | serge | 381 | reloc->offset >> PAGE_SHIFT)); |
382 | clflush_write32(vaddr + page_offset, lower_32_bits(delta)); |
||
383 | |||
384 | if (INTEL_INFO(dev)->gen >= 8) { |
||
385 | page_offset = offset_in_page(page_offset + sizeof(uint32_t)); |
||
386 | |||
387 | if (page_offset == 0) { |
||
388 | kunmap_atomic(vaddr); |
||
6937 | serge | 389 | vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, |
6084 | serge | 390 | (reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT)); |
391 | } |
||
392 | |||
393 | clflush_write32(vaddr + page_offset, upper_32_bits(delta)); |
||
394 | } |
||
395 | |||
396 | kunmap_atomic(vaddr); |
||
397 | |||
4371 | Serge | 398 | return 0; |
399 | } |
||
400 | |||
401 | static int |
||
3263 | Serge | 402 | i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, |
4560 | Serge | 403 | struct eb_vmas *eb, |
5060 | serge | 404 | struct drm_i915_gem_relocation_entry *reloc) |
3263 | Serge | 405 | { |
406 | struct drm_device *dev = obj->base.dev; |
||
407 | struct drm_gem_object *target_obj; |
||
408 | struct drm_i915_gem_object *target_i915_obj; |
||
4560 | Serge | 409 | struct i915_vma *target_vma; |
5060 | serge | 410 | uint64_t target_offset; |
4560 | Serge | 411 | int ret; |
3263 | Serge | 412 | |
413 | /* we've already hold a reference to all valid objects */ |
||
4560 | Serge | 414 | target_vma = eb_get_vma(eb, reloc->target_handle); |
415 | if (unlikely(target_vma == NULL)) |
||
3263 | Serge | 416 | return -ENOENT; |
4560 | Serge | 417 | target_i915_obj = target_vma->obj; |
418 | target_obj = &target_vma->obj->base; |
||
3263 | Serge | 419 | |
6937 | serge | 420 | target_offset = gen8_canonical_addr(target_vma->node.start); |
3263 | Serge | 421 | |
422 | /* Sandybridge PPGTT errata: We need a global gtt mapping for MI and |
||
423 | * pipe_control writes because the gpu doesn't properly redirect them |
||
424 | * through the ppgtt for non_secure batchbuffers. */ |
||
425 | if (unlikely(IS_GEN6(dev) && |
||
6084 | serge | 426 | reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION)) { |
427 | ret = i915_vma_bind(target_vma, target_i915_obj->cache_level, |
||
428 | PIN_GLOBAL); |
||
429 | if (WARN_ONCE(ret, "Unexpected failure to bind target VMA!")) |
||
430 | return ret; |
||
431 | } |
||
3263 | Serge | 432 | |
433 | /* Validate that the target is in a valid r/w GPU domain */ |
||
434 | if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) { |
||
435 | DRM_DEBUG("reloc with multiple write domains: " |
||
436 | "obj %p target %d offset %d " |
||
437 | "read %08x write %08x", |
||
438 | obj, reloc->target_handle, |
||
439 | (int) reloc->offset, |
||
440 | reloc->read_domains, |
||
441 | reloc->write_domain); |
||
4560 | Serge | 442 | return -EINVAL; |
3263 | Serge | 443 | } |
444 | if (unlikely((reloc->write_domain | reloc->read_domains) |
||
445 | & ~I915_GEM_GPU_DOMAINS)) { |
||
446 | DRM_DEBUG("reloc with read/write non-GPU domains: " |
||
447 | "obj %p target %d offset %d " |
||
448 | "read %08x write %08x", |
||
449 | obj, reloc->target_handle, |
||
450 | (int) reloc->offset, |
||
451 | reloc->read_domains, |
||
452 | reloc->write_domain); |
||
4560 | Serge | 453 | return -EINVAL; |
3263 | Serge | 454 | } |
455 | |||
456 | target_obj->pending_read_domains |= reloc->read_domains; |
||
457 | target_obj->pending_write_domain |= reloc->write_domain; |
||
458 | |||
459 | /* If the relocation already has the right value in it, no |
||
460 | * more work needs to be done. |
||
461 | */ |
||
462 | if (target_offset == reloc->presumed_offset) |
||
463 | return 0; |
||
464 | |||
465 | /* Check that the relocation address is valid... */ |
||
4560 | Serge | 466 | if (unlikely(reloc->offset > |
467 | obj->base.size - (INTEL_INFO(dev)->gen >= 8 ? 8 : 4))) { |
||
3263 | Serge | 468 | DRM_DEBUG("Relocation beyond object bounds: " |
469 | "obj %p target %d offset %d size %d.\n", |
||
470 | obj, reloc->target_handle, |
||
471 | (int) reloc->offset, |
||
472 | (int) obj->base.size); |
||
4560 | Serge | 473 | return -EINVAL; |
3263 | Serge | 474 | } |
475 | if (unlikely(reloc->offset & 3)) { |
||
476 | DRM_DEBUG("Relocation not 4-byte aligned: " |
||
477 | "obj %p target %d offset %d.\n", |
||
478 | obj, reloc->target_handle, |
||
479 | (int) reloc->offset); |
||
4560 | Serge | 480 | return -EINVAL; |
3263 | Serge | 481 | } |
482 | |||
483 | /* We can't wait for rendering with pagefaults disabled */ |
||
484 | |||
4371 | Serge | 485 | if (use_cpu_reloc(obj)) |
5060 | serge | 486 | ret = relocate_entry_cpu(obj, reloc, target_offset); |
6084 | serge | 487 | else if (obj->map_and_fenceable) |
5060 | serge | 488 | ret = relocate_entry_gtt(obj, reloc, target_offset); |
7144 | serge | 489 | else if (1) |
6084 | serge | 490 | ret = relocate_entry_clflush(obj, reloc, target_offset); |
491 | else { |
||
492 | WARN_ONCE(1, "Impossible case in relocation handling\n"); |
||
493 | ret = -ENODEV; |
||
494 | } |
||
3263 | Serge | 495 | |
6084 | serge | 496 | if (ret) |
497 | return ret; |
||
3263 | Serge | 498 | |
499 | /* and update the user's relocation entry */ |
||
500 | reloc->presumed_offset = target_offset; |
||
501 | |||
502 | return 0; |
||
503 | } |
||
504 | |||
505 | static int |
||
4560 | Serge | 506 | i915_gem_execbuffer_relocate_vma(struct i915_vma *vma, |
507 | struct eb_vmas *eb) |
||
3263 | Serge | 508 | { |
509 | #define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry)) |
||
3266 | Serge | 510 | struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(64)]; |
3263 | Serge | 511 | struct drm_i915_gem_relocation_entry __user *user_relocs; |
4560 | Serge | 512 | struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; |
3263 | Serge | 513 | int remain, ret; |
514 | |||
4539 | Serge | 515 | user_relocs = to_user_ptr(entry->relocs_ptr); |
3263 | Serge | 516 | |
517 | remain = entry->relocation_count; |
||
518 | while (remain) { |
||
519 | struct drm_i915_gem_relocation_entry *r = stack_reloc; |
||
520 | int count = remain; |
||
521 | if (count > ARRAY_SIZE(stack_reloc)) |
||
522 | count = ARRAY_SIZE(stack_reloc); |
||
523 | remain -= count; |
||
524 | |||
7144 | serge | 525 | if (__copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0]))) |
526 | return -EFAULT; |
||
3263 | Serge | 527 | |
528 | do { |
||
529 | u64 offset = r->presumed_offset; |
||
530 | |||
5060 | serge | 531 | ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r); |
3263 | Serge | 532 | if (ret) |
533 | return ret; |
||
534 | |||
7144 | serge | 535 | if (r->presumed_offset != offset && |
536 | __copy_to_user_inatomic(&user_relocs->presumed_offset, |
||
537 | &r->presumed_offset, |
||
538 | sizeof(r->presumed_offset))) { |
||
539 | return -EFAULT; |
||
540 | } |
||
3263 | Serge | 541 | |
542 | user_relocs++; |
||
543 | r++; |
||
544 | } while (--count); |
||
545 | } |
||
546 | |||
547 | return 0; |
||
548 | #undef N_RELOC |
||
549 | } |
||
550 | |||
551 | static int |
||
4560 | Serge | 552 | i915_gem_execbuffer_relocate_vma_slow(struct i915_vma *vma, |
553 | struct eb_vmas *eb, |
||
554 | struct drm_i915_gem_relocation_entry *relocs) |
||
3263 | Serge | 555 | { |
4560 | Serge | 556 | const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; |
3263 | Serge | 557 | int i, ret; |
558 | |||
559 | for (i = 0; i < entry->relocation_count; i++) { |
||
5060 | serge | 560 | ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i]); |
3263 | Serge | 561 | if (ret) |
562 | return ret; |
||
563 | } |
||
564 | |||
565 | return 0; |
||
566 | } |
||
567 | |||
568 | static int |
||
4560 | Serge | 569 | i915_gem_execbuffer_relocate(struct eb_vmas *eb) |
3263 | Serge | 570 | { |
4560 | Serge | 571 | struct i915_vma *vma; |
3263 | Serge | 572 | int ret = 0; |
573 | |||
574 | /* This is the fast path and we cannot handle a pagefault whilst |
||
575 | * holding the struct mutex lest the user pass in the relocations |
||
576 | * contained within a mmaped bo. For in such a case we, the page |
||
577 | * fault handler would call i915_gem_fault() and we would try to |
||
578 | * acquire the struct mutex again. Obviously this is bad and so |
||
579 | * lockdep complains vehemently. |
||
580 | */ |
||
6935 | serge | 581 | pagefault_disable(); |
4560 | Serge | 582 | list_for_each_entry(vma, &eb->vmas, exec_list) { |
583 | ret = i915_gem_execbuffer_relocate_vma(vma, eb); |
||
3263 | Serge | 584 | if (ret) |
585 | break; |
||
586 | } |
||
6935 | serge | 587 | pagefault_enable(); |
3263 | Serge | 588 | |
589 | return ret; |
||
590 | } |
||
591 | |||
6084 | serge | 592 | static bool only_mappable_for_reloc(unsigned int flags) |
593 | { |
||
594 | return (flags & (EXEC_OBJECT_NEEDS_FENCE | __EXEC_OBJECT_NEEDS_MAP)) == |
||
595 | __EXEC_OBJECT_NEEDS_MAP; |
||
596 | } |
||
597 | |||
3263 | Serge | 598 | static int |
4560 | Serge | 599 | i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, |
5060 | serge | 600 | struct intel_engine_cs *ring, |
6084 | serge | 601 | bool *need_reloc) |
3263 | Serge | 602 | { |
5060 | serge | 603 | struct drm_i915_gem_object *obj = vma->obj; |
4560 | Serge | 604 | struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; |
5060 | serge | 605 | uint64_t flags; |
3263 | Serge | 606 | int ret; |
607 | |||
6084 | serge | 608 | flags = PIN_USER; |
5060 | serge | 609 | if (entry->flags & EXEC_OBJECT_NEEDS_GTT) |
610 | flags |= PIN_GLOBAL; |
||
611 | |||
6084 | serge | 612 | if (!drm_mm_node_allocated(&vma->node)) { |
613 | /* Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset, |
||
614 | * limit address to the first 4GBs for unflagged objects. |
||
615 | */ |
||
616 | if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0) |
||
617 | flags |= PIN_ZONE_4G; |
||
618 | if (entry->flags & __EXEC_OBJECT_NEEDS_MAP) |
||
619 | flags |= PIN_GLOBAL | PIN_MAPPABLE; |
||
620 | if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS) |
||
621 | flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS; |
||
6937 | serge | 622 | if (entry->flags & EXEC_OBJECT_PINNED) |
623 | flags |= entry->offset | PIN_OFFSET_FIXED; |
||
6084 | serge | 624 | if ((flags & PIN_MAPPABLE) == 0) |
625 | flags |= PIN_HIGH; |
||
626 | } |
||
627 | |||
5060 | serge | 628 | ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags); |
6084 | serge | 629 | if ((ret == -ENOSPC || ret == -E2BIG) && |
630 | only_mappable_for_reloc(entry->flags)) |
||
631 | ret = i915_gem_object_pin(obj, vma->vm, |
||
632 | entry->alignment, |
||
633 | flags & ~PIN_MAPPABLE); |
||
3263 | Serge | 634 | if (ret) |
635 | return ret; |
||
636 | |||
637 | entry->flags |= __EXEC_OBJECT_HAS_PIN; |
||
638 | |||
6084 | serge | 639 | if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { |
640 | ret = i915_gem_object_get_fence(obj); |
||
641 | if (ret) |
||
642 | return ret; |
||
3263 | Serge | 643 | |
6084 | serge | 644 | if (i915_gem_object_pin_fence(obj)) |
645 | entry->flags |= __EXEC_OBJECT_HAS_FENCE; |
||
3263 | Serge | 646 | } |
647 | |||
4560 | Serge | 648 | if (entry->offset != vma->node.start) { |
649 | entry->offset = vma->node.start; |
||
3480 | Serge | 650 | *need_reloc = true; |
651 | } |
||
3266 | Serge | 652 | |
3480 | Serge | 653 | if (entry->flags & EXEC_OBJECT_WRITE) { |
654 | obj->base.pending_read_domains = I915_GEM_DOMAIN_RENDER; |
||
655 | obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER; |
||
656 | } |
||
657 | |||
3263 | Serge | 658 | return 0; |
659 | } |
||
660 | |||
5060 | serge | 661 | static bool |
5354 | serge | 662 | need_reloc_mappable(struct i915_vma *vma) |
5060 | serge | 663 | { |
664 | struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; |
||
5354 | serge | 665 | |
666 | if (entry->relocation_count == 0) |
||
667 | return false; |
||
668 | |||
7144 | serge | 669 | if (!vma->is_ggtt) |
5354 | serge | 670 | return false; |
671 | |||
672 | /* See also use_cpu_reloc() */ |
||
673 | if (HAS_LLC(vma->obj->base.dev)) |
||
674 | return false; |
||
675 | |||
676 | if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU) |
||
677 | return false; |
||
678 | |||
679 | return true; |
||
680 | } |
||
681 | |||
682 | static bool |
||
683 | eb_vma_misplaced(struct i915_vma *vma) |
||
684 | { |
||
685 | struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; |
||
5060 | serge | 686 | struct drm_i915_gem_object *obj = vma->obj; |
687 | |||
7144 | serge | 688 | WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP && !vma->is_ggtt); |
5060 | serge | 689 | |
690 | if (entry->alignment && |
||
691 | vma->node.start & (entry->alignment - 1)) |
||
692 | return true; |
||
693 | |||
6937 | serge | 694 | if (entry->flags & EXEC_OBJECT_PINNED && |
695 | vma->node.start != entry->offset) |
||
696 | return true; |
||
697 | |||
5060 | serge | 698 | if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS && |
699 | vma->node.start < BATCH_OFFSET_BIAS) |
||
700 | return true; |
||
701 | |||
6084 | serge | 702 | /* avoid costly ping-pong once a batch bo ended up non-mappable */ |
703 | if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable) |
||
704 | return !only_mappable_for_reloc(entry->flags); |
||
705 | |||
706 | if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0 && |
||
707 | (vma->node.start + vma->node.size - 1) >> 32) |
||
708 | return true; |
||
709 | |||
5060 | serge | 710 | return false; |
711 | } |
||
712 | |||
3263 | Serge | 713 | static int |
5060 | serge | 714 | i915_gem_execbuffer_reserve(struct intel_engine_cs *ring, |
4560 | Serge | 715 | struct list_head *vmas, |
6084 | serge | 716 | struct intel_context *ctx, |
3480 | Serge | 717 | bool *need_relocs) |
3263 | Serge | 718 | { |
719 | struct drm_i915_gem_object *obj; |
||
4560 | Serge | 720 | struct i915_vma *vma; |
721 | struct i915_address_space *vm; |
||
722 | struct list_head ordered_vmas; |
||
6937 | serge | 723 | struct list_head pinned_vmas; |
3263 | Serge | 724 | bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; |
725 | int retry; |
||
726 | |||
5060 | serge | 727 | i915_gem_retire_requests_ring(ring); |
728 | |||
4560 | Serge | 729 | vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm; |
730 | |||
731 | INIT_LIST_HEAD(&ordered_vmas); |
||
6937 | serge | 732 | INIT_LIST_HEAD(&pinned_vmas); |
4560 | Serge | 733 | while (!list_empty(vmas)) { |
3263 | Serge | 734 | struct drm_i915_gem_exec_object2 *entry; |
735 | bool need_fence, need_mappable; |
||
736 | |||
4560 | Serge | 737 | vma = list_first_entry(vmas, struct i915_vma, exec_list); |
738 | obj = vma->obj; |
||
739 | entry = vma->exec_entry; |
||
3263 | Serge | 740 | |
6084 | serge | 741 | if (ctx->flags & CONTEXT_NO_ZEROMAP) |
742 | entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; |
||
743 | |||
5354 | serge | 744 | if (!has_fenced_gpu_access) |
745 | entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE; |
||
3263 | Serge | 746 | need_fence = |
747 | entry->flags & EXEC_OBJECT_NEEDS_FENCE && |
||
748 | obj->tiling_mode != I915_TILING_NONE; |
||
4560 | Serge | 749 | need_mappable = need_fence || need_reloc_mappable(vma); |
3263 | Serge | 750 | |
6937 | serge | 751 | if (entry->flags & EXEC_OBJECT_PINNED) |
752 | list_move_tail(&vma->exec_list, &pinned_vmas); |
||
753 | else if (need_mappable) { |
||
5354 | serge | 754 | entry->flags |= __EXEC_OBJECT_NEEDS_MAP; |
4560 | Serge | 755 | list_move(&vma->exec_list, &ordered_vmas); |
5354 | serge | 756 | } else |
4560 | Serge | 757 | list_move_tail(&vma->exec_list, &ordered_vmas); |
3263 | Serge | 758 | |
3480 | Serge | 759 | obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND; |
3263 | Serge | 760 | obj->base.pending_write_domain = 0; |
761 | } |
||
4560 | Serge | 762 | list_splice(&ordered_vmas, vmas); |
6937 | serge | 763 | list_splice(&pinned_vmas, vmas); |
3263 | Serge | 764 | |
765 | /* Attempt to pin all of the buffers into the GTT. |
||
766 | * This is done in 3 phases: |
||
767 | * |
||
768 | * 1a. Unbind all objects that do not match the GTT constraints for |
||
769 | * the execbuffer (fenceable, mappable, alignment etc). |
||
770 | * 1b. Increment pin count for already bound objects. |
||
771 | * 2. Bind new objects. |
||
772 | * 3. Decrement pin count. |
||
773 | * |
||
774 | * This avoid unnecessary unbinding of later objects in order to make |
||
775 | * room for the earlier objects *unless* we need to defragment. |
||
776 | */ |
||
777 | retry = 0; |
||
778 | do { |
||
779 | int ret = 0; |
||
780 | |||
781 | /* Unbind any ill-fitting objects or pin. */ |
||
4560 | Serge | 782 | list_for_each_entry(vma, vmas, exec_list) { |
783 | if (!drm_mm_node_allocated(&vma->node)) |
||
3263 | Serge | 784 | continue; |
785 | |||
5354 | serge | 786 | if (eb_vma_misplaced(vma)) |
4560 | Serge | 787 | ret = i915_vma_unbind(vma); |
3263 | Serge | 788 | else |
4560 | Serge | 789 | ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs); |
3263 | Serge | 790 | if (ret) |
791 | goto err; |
||
792 | } |
||
793 | |||
794 | /* Bind fresh objects */ |
||
4560 | Serge | 795 | list_for_each_entry(vma, vmas, exec_list) { |
796 | if (drm_mm_node_allocated(&vma->node)) |
||
3263 | Serge | 797 | continue; |
798 | |||
4560 | Serge | 799 | ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs); |
3263 | Serge | 800 | if (ret) |
801 | goto err; |
||
802 | } |
||
803 | |||
4560 | Serge | 804 | err: |
3263 | Serge | 805 | if (ret != -ENOSPC || retry++) |
806 | return ret; |
||
807 | |||
4560 | Serge | 808 | /* Decrement pin count for bound objects */ |
809 | list_for_each_entry(vma, vmas, exec_list) |
||
810 | i915_gem_execbuffer_unreserve_vma(vma); |
||
811 | |||
5060 | serge | 812 | ret = i915_gem_evict_vm(vm, true); |
3263 | Serge | 813 | if (ret) |
814 | return ret; |
||
815 | } while (1); |
||
816 | } |
||
817 | |||
818 | static int |
||
819 | i915_gem_execbuffer_relocate_slow(struct drm_device *dev, |
||
3480 | Serge | 820 | struct drm_i915_gem_execbuffer2 *args, |
3263 | Serge | 821 | struct drm_file *file, |
5060 | serge | 822 | struct intel_engine_cs *ring, |
4560 | Serge | 823 | struct eb_vmas *eb, |
6084 | serge | 824 | struct drm_i915_gem_exec_object2 *exec, |
825 | struct intel_context *ctx) |
||
3263 | Serge | 826 | { |
827 | struct drm_i915_gem_relocation_entry *reloc; |
||
4560 | Serge | 828 | struct i915_address_space *vm; |
829 | struct i915_vma *vma; |
||
3480 | Serge | 830 | bool need_relocs; |
3263 | Serge | 831 | int *reloc_offset; |
832 | int i, total, ret; |
||
4560 | Serge | 833 | unsigned count = args->buffer_count; |
3263 | Serge | 834 | |
4560 | Serge | 835 | vm = list_first_entry(&eb->vmas, struct i915_vma, exec_list)->vm; |
836 | |||
3263 | Serge | 837 | /* We may process another execbuffer during the unlock... */ |
4560 | Serge | 838 | while (!list_empty(&eb->vmas)) { |
839 | vma = list_first_entry(&eb->vmas, struct i915_vma, exec_list); |
||
840 | list_del_init(&vma->exec_list); |
||
841 | i915_gem_execbuffer_unreserve_vma(vma); |
||
842 | drm_gem_object_unreference(&vma->obj->base); |
||
3263 | Serge | 843 | } |
844 | |||
845 | mutex_unlock(&dev->struct_mutex); |
||
846 | |||
847 | total = 0; |
||
848 | for (i = 0; i < count; i++) |
||
849 | total += exec[i].relocation_count; |
||
850 | |||
6084 | serge | 851 | reloc_offset = __builtin_malloc(count * sizeof(*reloc_offset)); |
852 | reloc = __builtin_malloc(total * sizeof(*reloc)); |
||
3263 | Serge | 853 | if (reloc == NULL || reloc_offset == NULL) { |
3266 | Serge | 854 | kfree(reloc); |
855 | kfree(reloc_offset); |
||
3263 | Serge | 856 | mutex_lock(&dev->struct_mutex); |
857 | return -ENOMEM; |
||
858 | } |
||
859 | |||
860 | total = 0; |
||
861 | for (i = 0; i < count; i++) { |
||
862 | struct drm_i915_gem_relocation_entry __user *user_relocs; |
||
863 | u64 invalid_offset = (u64)-1; |
||
864 | int j; |
||
865 | |||
4539 | Serge | 866 | user_relocs = to_user_ptr(exec[i].relocs_ptr); |
3263 | Serge | 867 | |
868 | if (copy_from_user(reloc+total, user_relocs, |
||
869 | exec[i].relocation_count * sizeof(*reloc))) { |
||
870 | ret = -EFAULT; |
||
871 | mutex_lock(&dev->struct_mutex); |
||
872 | goto err; |
||
873 | } |
||
874 | |||
875 | /* As we do not update the known relocation offsets after |
||
876 | * relocating (due to the complexities in lock handling), |
||
877 | * we need to mark them as invalid now so that we force the |
||
878 | * relocation processing next time. Just in case the target |
||
879 | * object is evicted and then rebound into its old |
||
880 | * presumed_offset before the next execbuffer - if that |
||
881 | * happened we would make the mistake of assuming that the |
||
882 | * relocations were valid. |
||
883 | */ |
||
884 | for (j = 0; j < exec[i].relocation_count; j++) { |
||
5060 | serge | 885 | if (__copy_to_user(&user_relocs[j].presumed_offset, |
6084 | serge | 886 | &invalid_offset, |
887 | sizeof(invalid_offset))) { |
||
3263 | Serge | 888 | ret = -EFAULT; |
889 | mutex_lock(&dev->struct_mutex); |
||
890 | goto err; |
||
891 | } |
||
892 | } |
||
893 | |||
894 | reloc_offset[i] = total; |
||
895 | total += exec[i].relocation_count; |
||
896 | } |
||
897 | |||
898 | ret = i915_mutex_lock_interruptible(dev); |
||
899 | if (ret) { |
||
900 | mutex_lock(&dev->struct_mutex); |
||
901 | goto err; |
||
902 | } |
||
903 | |||
904 | /* reacquire the objects */ |
||
905 | eb_reset(eb); |
||
4560 | Serge | 906 | ret = eb_lookup_vmas(eb, exec, args, vm, file); |
3480 | Serge | 907 | if (ret) |
6084 | serge | 908 | goto err; |
3263 | Serge | 909 | |
3480 | Serge | 910 | need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0; |
6084 | serge | 911 | ret = i915_gem_execbuffer_reserve(ring, &eb->vmas, ctx, &need_relocs); |
3263 | Serge | 912 | if (ret) |
913 | goto err; |
||
914 | |||
4560 | Serge | 915 | list_for_each_entry(vma, &eb->vmas, exec_list) { |
916 | int offset = vma->exec_entry - exec; |
||
917 | ret = i915_gem_execbuffer_relocate_vma_slow(vma, eb, |
||
918 | reloc + reloc_offset[offset]); |
||
3263 | Serge | 919 | if (ret) |
920 | goto err; |
||
921 | } |
||
922 | |||
923 | /* Leave the user relocations as are, this is the painfully slow path, |
||
924 | * and we want to avoid the complication of dropping the lock whilst |
||
925 | * having buffers reserved in the aperture and so causing spurious |
||
926 | * ENOSPC for random operations. |
||
927 | */ |
||
928 | |||
929 | err: |
||
3266 | Serge | 930 | kfree(reloc); |
931 | kfree(reloc_offset); |
||
3263 | Serge | 932 | return ret; |
933 | } |
||
934 | |||
935 | static int |
||
6084 | serge | 936 | i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, |
4560 | Serge | 937 | struct list_head *vmas) |
3263 | Serge | 938 | { |
6084 | serge | 939 | const unsigned other_rings = ~intel_ring_flag(req->ring); |
4560 | Serge | 940 | struct i915_vma *vma; |
3263 | Serge | 941 | uint32_t flush_domains = 0; |
4104 | Serge | 942 | bool flush_chipset = false; |
3263 | Serge | 943 | int ret; |
944 | |||
4560 | Serge | 945 | list_for_each_entry(vma, vmas, exec_list) { |
946 | struct drm_i915_gem_object *obj = vma->obj; |
||
3263 | Serge | 947 | |
6084 | serge | 948 | if (obj->active & other_rings) { |
949 | ret = i915_gem_object_sync(obj, req->ring, &req); |
||
950 | if (ret) |
||
951 | return ret; |
||
952 | } |
||
953 | |||
3263 | Serge | 954 | if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) |
4104 | Serge | 955 | flush_chipset |= i915_gem_clflush_object(obj, false); |
3263 | Serge | 956 | |
957 | flush_domains |= obj->base.write_domain; |
||
958 | } |
||
959 | |||
4104 | Serge | 960 | if (flush_chipset) |
6084 | serge | 961 | i915_gem_chipset_flush(req->ring->dev); |
3263 | Serge | 962 | |
963 | if (flush_domains & I915_GEM_DOMAIN_GTT) |
||
964 | wmb(); |
||
965 | |||
966 | /* Unconditionally invalidate gpu caches and ensure that we do flush |
||
967 | * any residual writes from the previous batch. |
||
968 | */ |
||
6084 | serge | 969 | return intel_ring_invalidate_all_caches(req); |
3263 | Serge | 970 | } |
971 | |||
972 | static bool |
||
973 | i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) |
||
974 | { |
||
3480 | Serge | 975 | if (exec->flags & __I915_EXEC_UNKNOWN_FLAGS) |
976 | return false; |
||
977 | |||
6084 | serge | 978 | /* Kernel clipping was a DRI1 misfeature */ |
979 | if (exec->num_cliprects || exec->cliprects_ptr) |
||
980 | return false; |
||
981 | |||
982 | if (exec->DR4 == 0xffffffff) { |
||
983 | DRM_DEBUG("UXA submitting garbage DR4, fixing up\n"); |
||
984 | exec->DR4 = 0; |
||
985 | } |
||
986 | if (exec->DR1 || exec->DR4) |
||
987 | return false; |
||
988 | |||
989 | if ((exec->batch_start_offset | exec->batch_len) & 0x7) |
||
990 | return false; |
||
991 | |||
992 | return true; |
||
3263 | Serge | 993 | } |
994 | |||
995 | static int |
||
5354 | serge | 996 | validate_exec_list(struct drm_device *dev, |
997 | struct drm_i915_gem_exec_object2 *exec, |
||
3263 | Serge | 998 | int count) |
999 | { |
||
4560 | Serge | 1000 | unsigned relocs_total = 0; |
1001 | unsigned relocs_max = UINT_MAX / sizeof(struct drm_i915_gem_relocation_entry); |
||
5354 | serge | 1002 | unsigned invalid_flags; |
1003 | int i; |
||
3263 | Serge | 1004 | |
5354 | serge | 1005 | invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS; |
1006 | if (USES_FULL_PPGTT(dev)) |
||
1007 | invalid_flags |= EXEC_OBJECT_NEEDS_GTT; |
||
1008 | |||
3263 | Serge | 1009 | for (i = 0; i < count; i++) { |
3746 | Serge | 1010 | char __user *ptr = to_user_ptr(exec[i].relocs_ptr); |
3263 | Serge | 1011 | int length; /* limited by fault_in_pages_readable() */ |
1012 | |||
5354 | serge | 1013 | if (exec[i].flags & invalid_flags) |
3263 | Serge | 1014 | return -EINVAL; |
1015 | |||
6937 | serge | 1016 | /* Offset can be used as input (EXEC_OBJECT_PINNED), reject |
1017 | * any non-page-aligned or non-canonical addresses. |
||
1018 | */ |
||
1019 | if (exec[i].flags & EXEC_OBJECT_PINNED) { |
||
1020 | if (exec[i].offset != |
||
1021 | gen8_canonical_addr(exec[i].offset & PAGE_MASK)) |
||
1022 | return -EINVAL; |
||
1023 | |||
1024 | /* From drm_mm perspective address space is continuous, |
||
1025 | * so from this point we're always using non-canonical |
||
1026 | * form internally. |
||
1027 | */ |
||
1028 | exec[i].offset = gen8_noncanonical_addr(exec[i].offset); |
||
1029 | } |
||
1030 | |||
6084 | serge | 1031 | if (exec[i].alignment && !is_power_of_2(exec[i].alignment)) |
1032 | return -EINVAL; |
||
1033 | |||
3480 | Serge | 1034 | /* First check for malicious input causing overflow in |
1035 | * the worst case where we need to allocate the entire |
||
1036 | * relocation tree as a single array. |
||
1037 | */ |
||
1038 | if (exec[i].relocation_count > relocs_max - relocs_total) |
||
1039 | return -EINVAL; |
||
1040 | relocs_total += exec[i].relocation_count; |
||
1041 | |||
3263 | Serge | 1042 | length = exec[i].relocation_count * |
1043 | sizeof(struct drm_i915_gem_relocation_entry); |
||
3746 | Serge | 1044 | /* |
1045 | * We must check that the entire relocation array is safe |
||
1046 | * to read, but since we may need to update the presumed |
||
1047 | * offsets during execution, check for full write access. |
||
1048 | */ |
||
4560 | Serge | 1049 | } |
3263 | Serge | 1050 | |
4560 | Serge | 1051 | return 0; |
1052 | } |
||
1053 | |||
5060 | serge | 1054 | static struct intel_context * |
4560 | Serge | 1055 | i915_gem_validate_context(struct drm_device *dev, struct drm_file *file, |
5060 | serge | 1056 | struct intel_engine_cs *ring, const u32 ctx_id) |
4560 | Serge | 1057 | { |
5060 | serge | 1058 | struct intel_context *ctx = NULL; |
4560 | Serge | 1059 | struct i915_ctx_hang_stats *hs; |
1060 | |||
5060 | serge | 1061 | if (ring->id != RCS && ctx_id != DEFAULT_CONTEXT_HANDLE) |
1062 | return ERR_PTR(-EINVAL); |
||
4560 | Serge | 1063 | |
5060 | serge | 1064 | ctx = i915_gem_context_get(file->driver_priv, ctx_id); |
1065 | if (IS_ERR(ctx)) |
||
1066 | return ctx; |
||
1067 | |||
1068 | hs = &ctx->hang_stats; |
||
4560 | Serge | 1069 | if (hs->banned) { |
1070 | DRM_DEBUG("Context %u tried to submit while banned\n", ctx_id); |
||
5060 | serge | 1071 | return ERR_PTR(-EIO); |
3263 | Serge | 1072 | } |
1073 | |||
5354 | serge | 1074 | if (i915.enable_execlists && !ctx->engine[ring->id].state) { |
6084 | serge | 1075 | int ret = intel_lr_context_deferred_alloc(ctx, ring); |
5354 | serge | 1076 | if (ret) { |
1077 | DRM_DEBUG("Could not create LRC %u: %d\n", ctx_id, ret); |
||
1078 | return ERR_PTR(ret); |
||
1079 | } |
||
1080 | } |
||
1081 | |||
5060 | serge | 1082 | return ctx; |
3263 | Serge | 1083 | } |
1084 | |||
5354 | serge | 1085 | void |
4560 | Serge | 1086 | i915_gem_execbuffer_move_to_active(struct list_head *vmas, |
6084 | serge | 1087 | struct drm_i915_gem_request *req) |
3263 | Serge | 1088 | { |
6084 | serge | 1089 | struct intel_engine_cs *ring = i915_gem_request_get_ring(req); |
4560 | Serge | 1090 | struct i915_vma *vma; |
3263 | Serge | 1091 | |
4560 | Serge | 1092 | list_for_each_entry(vma, vmas, exec_list) { |
5354 | serge | 1093 | struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; |
4560 | Serge | 1094 | struct drm_i915_gem_object *obj = vma->obj; |
3263 | Serge | 1095 | u32 old_read = obj->base.read_domains; |
1096 | u32 old_write = obj->base.write_domain; |
||
1097 | |||
6084 | serge | 1098 | obj->dirty = 1; /* be paranoid */ |
3480 | Serge | 1099 | obj->base.write_domain = obj->base.pending_write_domain; |
1100 | if (obj->base.write_domain == 0) |
||
1101 | obj->base.pending_read_domains |= obj->base.read_domains; |
||
3263 | Serge | 1102 | obj->base.read_domains = obj->base.pending_read_domains; |
1103 | |||
6084 | serge | 1104 | i915_vma_move_to_active(vma, req); |
3263 | Serge | 1105 | if (obj->base.write_domain) { |
6084 | serge | 1106 | i915_gem_request_assign(&obj->last_write_req, req); |
5060 | serge | 1107 | |
6084 | serge | 1108 | intel_fb_obj_invalidate(obj, ORIGIN_CS); |
5060 | serge | 1109 | |
1110 | /* update for the implicit flush after a batch */ |
||
1111 | obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; |
||
3263 | Serge | 1112 | } |
5354 | serge | 1113 | if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { |
6084 | serge | 1114 | i915_gem_request_assign(&obj->last_fenced_req, req); |
5354 | serge | 1115 | if (entry->flags & __EXEC_OBJECT_HAS_FENCE) { |
1116 | struct drm_i915_private *dev_priv = to_i915(ring->dev); |
||
1117 | list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list, |
||
1118 | &dev_priv->mm.fence_list); |
||
1119 | } |
||
1120 | } |
||
3263 | Serge | 1121 | |
1122 | trace_i915_gem_object_change_domain(obj, old_read, old_write); |
||
1123 | } |
||
1124 | } |
||
1125 | |||
5354 | serge | 1126 | void |
6084 | serge | 1127 | i915_gem_execbuffer_retire_commands(struct i915_execbuffer_params *params) |
3263 | Serge | 1128 | { |
1129 | /* Unconditionally force add_request to emit a full flush. */ |
||
6084 | serge | 1130 | params->ring->gpu_caches_dirty = true; |
3263 | Serge | 1131 | |
1132 | /* Add a breadcrumb for the completion of the batch buffer */ |
||
6084 | serge | 1133 | __i915_add_request(params->request, params->batch_obj, true); |
3263 | Serge | 1134 | } |
1135 | |||
1136 | static int |
||
1137 | i915_reset_gen7_sol_offsets(struct drm_device *dev, |
||
6084 | serge | 1138 | struct drm_i915_gem_request *req) |
3263 | Serge | 1139 | { |
6084 | serge | 1140 | struct intel_engine_cs *ring = req->ring; |
5060 | serge | 1141 | struct drm_i915_private *dev_priv = dev->dev_private; |
3263 | Serge | 1142 | int ret, i; |
1143 | |||
5060 | serge | 1144 | if (!IS_GEN7(dev) || ring != &dev_priv->ring[RCS]) { |
1145 | DRM_DEBUG("sol reset is gen7/rcs only\n"); |
||
1146 | return -EINVAL; |
||
1147 | } |
||
3263 | Serge | 1148 | |
6084 | serge | 1149 | ret = intel_ring_begin(req, 4 * 3); |
3263 | Serge | 1150 | if (ret) |
1151 | return ret; |
||
1152 | |||
1153 | for (i = 0; i < 4; i++) { |
||
1154 | intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); |
||
6937 | serge | 1155 | intel_ring_emit_reg(ring, GEN7_SO_WRITE_OFFSET(i)); |
3263 | Serge | 1156 | intel_ring_emit(ring, 0); |
1157 | } |
||
1158 | |||
1159 | intel_ring_advance(ring); |
||
1160 | |||
1161 | return 0; |
||
1162 | } |
||
1163 | |||
6084 | serge | 1164 | static struct drm_i915_gem_object* |
1165 | i915_gem_execbuffer_parse(struct intel_engine_cs *ring, |
||
1166 | struct drm_i915_gem_exec_object2 *shadow_exec_entry, |
||
1167 | struct eb_vmas *eb, |
||
1168 | struct drm_i915_gem_object *batch_obj, |
||
1169 | u32 batch_start_offset, |
||
1170 | u32 batch_len, |
||
1171 | bool is_master) |
||
5354 | serge | 1172 | { |
6084 | serge | 1173 | struct drm_i915_gem_object *shadow_batch_obj; |
1174 | struct i915_vma *vma; |
||
5354 | serge | 1175 | int ret; |
1176 | |||
6084 | serge | 1177 | shadow_batch_obj = i915_gem_batch_pool_get(&ring->batch_pool, |
1178 | PAGE_ALIGN(batch_len)); |
||
1179 | if (IS_ERR(shadow_batch_obj)) |
||
1180 | return shadow_batch_obj; |
||
5354 | serge | 1181 | |
6084 | serge | 1182 | ret = i915_parse_cmds(ring, |
1183 | batch_obj, |
||
1184 | shadow_batch_obj, |
||
1185 | batch_start_offset, |
||
1186 | batch_len, |
||
1187 | is_master); |
||
1188 | if (ret) |
||
1189 | goto err; |
||
5354 | serge | 1190 | |
6084 | serge | 1191 | ret = i915_gem_obj_ggtt_pin(shadow_batch_obj, 0, 0); |
1192 | if (ret) |
||
1193 | goto err; |
||
5354 | serge | 1194 | |
6084 | serge | 1195 | i915_gem_object_unpin_pages(shadow_batch_obj); |
5354 | serge | 1196 | |
6084 | serge | 1197 | memset(shadow_exec_entry, 0, sizeof(*shadow_exec_entry)); |
1198 | |||
1199 | vma = i915_gem_obj_to_ggtt(shadow_batch_obj); |
||
1200 | vma->exec_entry = shadow_exec_entry; |
||
1201 | vma->exec_entry->flags = __EXEC_OBJECT_HAS_PIN; |
||
1202 | drm_gem_object_reference(&shadow_batch_obj->base); |
||
1203 | list_add_tail(&vma->exec_list, &eb->vmas); |
||
1204 | |||
1205 | shadow_batch_obj->base.pending_read_domains = I915_GEM_DOMAIN_COMMAND; |
||
1206 | |||
1207 | return shadow_batch_obj; |
||
1208 | |||
1209 | err: |
||
1210 | i915_gem_object_unpin_pages(shadow_batch_obj); |
||
1211 | if (ret == -EACCES) /* unhandled chained batch */ |
||
1212 | return batch_obj; |
||
1213 | else |
||
1214 | return ERR_PTR(ret); |
||
5354 | serge | 1215 | } |
1216 | |||
1217 | int |
||
6084 | serge | 1218 | i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params, |
1219 | struct drm_i915_gem_execbuffer2 *args, |
||
1220 | struct list_head *vmas) |
||
5060 | serge | 1221 | { |
6084 | serge | 1222 | struct drm_device *dev = params->dev; |
1223 | struct intel_engine_cs *ring = params->ring; |
||
5060 | serge | 1224 | struct drm_i915_private *dev_priv = dev->dev_private; |
6084 | serge | 1225 | u64 exec_start, exec_len; |
5060 | serge | 1226 | int instp_mode; |
1227 | u32 instp_mask; |
||
6084 | serge | 1228 | int ret; |
5060 | serge | 1229 | |
6084 | serge | 1230 | ret = i915_gem_execbuffer_move_to_gpu(params->request, vmas); |
5060 | serge | 1231 | if (ret) |
6084 | serge | 1232 | return ret; |
5060 | serge | 1233 | |
6084 | serge | 1234 | ret = i915_switch_context(params->request); |
5060 | serge | 1235 | if (ret) |
6084 | serge | 1236 | return ret; |
5060 | serge | 1237 | |
6084 | serge | 1238 | WARN(params->ctx->ppgtt && params->ctx->ppgtt->pd_dirty_rings & (1< |
1239 | "%s didn't clear reload\n", ring->name); |
||
1240 | |||
5060 | serge | 1241 | instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK; |
1242 | instp_mask = I915_EXEC_CONSTANTS_MASK; |
||
1243 | switch (instp_mode) { |
||
1244 | case I915_EXEC_CONSTANTS_REL_GENERAL: |
||
1245 | case I915_EXEC_CONSTANTS_ABSOLUTE: |
||
1246 | case I915_EXEC_CONSTANTS_REL_SURFACE: |
||
1247 | if (instp_mode != 0 && ring != &dev_priv->ring[RCS]) { |
||
1248 | DRM_DEBUG("non-0 rel constants mode on non-RCS\n"); |
||
6084 | serge | 1249 | return -EINVAL; |
5060 | serge | 1250 | } |
1251 | |||
1252 | if (instp_mode != dev_priv->relative_constants_mode) { |
||
1253 | if (INTEL_INFO(dev)->gen < 4) { |
||
1254 | DRM_DEBUG("no rel constants on pre-gen4\n"); |
||
6084 | serge | 1255 | return -EINVAL; |
5060 | serge | 1256 | } |
1257 | |||
1258 | if (INTEL_INFO(dev)->gen > 5 && |
||
1259 | instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) { |
||
1260 | DRM_DEBUG("rel surface constants mode invalid on gen5+\n"); |
||
6084 | serge | 1261 | return -EINVAL; |
5060 | serge | 1262 | } |
1263 | |||
1264 | /* The HW changed the meaning on this bit on gen6 */ |
||
1265 | if (INTEL_INFO(dev)->gen >= 6) |
||
1266 | instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE; |
||
1267 | } |
||
1268 | break; |
||
1269 | default: |
||
1270 | DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode); |
||
6084 | serge | 1271 | return -EINVAL; |
5060 | serge | 1272 | } |
1273 | |||
1274 | if (ring == &dev_priv->ring[RCS] && |
||
6084 | serge | 1275 | instp_mode != dev_priv->relative_constants_mode) { |
1276 | ret = intel_ring_begin(params->request, 4); |
||
5060 | serge | 1277 | if (ret) |
6084 | serge | 1278 | return ret; |
5060 | serge | 1279 | |
1280 | intel_ring_emit(ring, MI_NOOP); |
||
1281 | intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); |
||
6937 | serge | 1282 | intel_ring_emit_reg(ring, INSTPM); |
5060 | serge | 1283 | intel_ring_emit(ring, instp_mask << 16 | instp_mode); |
1284 | intel_ring_advance(ring); |
||
1285 | |||
1286 | dev_priv->relative_constants_mode = instp_mode; |
||
1287 | } |
||
1288 | |||
1289 | if (args->flags & I915_EXEC_GEN7_SOL_RESET) { |
||
6084 | serge | 1290 | ret = i915_reset_gen7_sol_offsets(dev, params->request); |
5060 | serge | 1291 | if (ret) |
6084 | serge | 1292 | return ret; |
5060 | serge | 1293 | } |
1294 | |||
6084 | serge | 1295 | exec_len = args->batch_len; |
1296 | exec_start = params->batch_obj_vm_offset + |
||
1297 | params->args_batch_start_offset; |
||
5060 | serge | 1298 | |
7144 | serge | 1299 | if (exec_len == 0) |
1300 | exec_len = params->batch_obj->base.size; |
||
1301 | |||
6084 | serge | 1302 | ret = ring->dispatch_execbuffer(params->request, |
1303 | exec_start, exec_len, |
||
1304 | params->dispatch_flags); |
||
1305 | if (ret) |
||
1306 | return ret; |
||
5060 | serge | 1307 | |
6084 | serge | 1308 | trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags); |
5060 | serge | 1309 | |
6084 | serge | 1310 | i915_gem_execbuffer_move_to_active(vmas, params->request); |
1311 | i915_gem_execbuffer_retire_commands(params); |
||
5060 | serge | 1312 | |
6084 | serge | 1313 | return 0; |
5060 | serge | 1314 | } |
1315 | |||
1316 | /** |
||
1317 | * Find one BSD ring to dispatch the corresponding BSD command. |
||
7144 | serge | 1318 | * The ring index is returned. |
5060 | serge | 1319 | */ |
7144 | serge | 1320 | static unsigned int |
1321 | gen8_dispatch_bsd_ring(struct drm_i915_private *dev_priv, struct drm_file *file) |
||
5060 | serge | 1322 | { |
1323 | struct drm_i915_file_private *file_priv = file->driver_priv; |
||
1324 | |||
7144 | serge | 1325 | /* Check whether the file_priv has already selected one ring. */ |
1326 | if ((int)file_priv->bsd_ring < 0) { |
||
1327 | /* If not, use the ping-pong mechanism to select one. */ |
||
1328 | mutex_lock(&dev_priv->dev->struct_mutex); |
||
1329 | file_priv->bsd_ring = dev_priv->mm.bsd_ring_dispatch_index; |
||
1330 | dev_priv->mm.bsd_ring_dispatch_index ^= 1; |
||
1331 | mutex_unlock(&dev_priv->dev->struct_mutex); |
||
1332 | } |
||
5060 | serge | 1333 | |
7144 | serge | 1334 | return file_priv->bsd_ring; |
5060 | serge | 1335 | } |
1336 | |||
1337 | static struct drm_i915_gem_object * |
||
1338 | eb_get_batch(struct eb_vmas *eb) |
||
1339 | { |
||
1340 | struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list); |
||
1341 | |||
1342 | /* |
||
1343 | * SNA is doing fancy tricks with compressing batch buffers, which leads |
||
1344 | * to negative relocation deltas. Usually that works out ok since the |
||
1345 | * relocate address is still positive, except when the batch is placed |
||
1346 | * very low in the GTT. Ensure this doesn't happen. |
||
1347 | * |
||
1348 | * Note that actual hangs have only been observed on gen7, but for |
||
1349 | * paranoia do it everywhere. |
||
1350 | */ |
||
6937 | serge | 1351 | if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0) |
7144 | serge | 1352 | vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS; |
5060 | serge | 1353 | |
1354 | return vma->obj; |
||
1355 | } |
||
1356 | |||
7144 | serge | 1357 | #define I915_USER_RINGS (4) |
1358 | |||
1359 | static const enum intel_ring_id user_ring_map[I915_USER_RINGS + 1] = { |
||
1360 | [I915_EXEC_DEFAULT] = RCS, |
||
1361 | [I915_EXEC_RENDER] = RCS, |
||
1362 | [I915_EXEC_BLT] = BCS, |
||
1363 | [I915_EXEC_BSD] = VCS, |
||
1364 | [I915_EXEC_VEBOX] = VECS |
||
1365 | }; |
||
1366 | |||
5060 | serge | 1367 | static int |
7144 | serge | 1368 | eb_select_ring(struct drm_i915_private *dev_priv, |
1369 | struct drm_file *file, |
||
1370 | struct drm_i915_gem_execbuffer2 *args, |
||
1371 | struct intel_engine_cs **ring) |
||
1372 | { |
||
1373 | unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK; |
||
1374 | |||
1375 | if (user_ring_id > I915_USER_RINGS) { |
||
1376 | DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id); |
||
1377 | return -EINVAL; |
||
1378 | } |
||
1379 | |||
1380 | if ((user_ring_id != I915_EXEC_BSD) && |
||
1381 | ((args->flags & I915_EXEC_BSD_MASK) != 0)) { |
||
1382 | DRM_DEBUG("execbuf with non bsd ring but with invalid " |
||
1383 | "bsd dispatch flags: %d\n", (int)(args->flags)); |
||
1384 | return -EINVAL; |
||
1385 | } |
||
1386 | |||
1387 | if (user_ring_id == I915_EXEC_BSD && HAS_BSD2(dev_priv)) { |
||
1388 | unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK; |
||
1389 | |||
1390 | if (bsd_idx == I915_EXEC_BSD_DEFAULT) { |
||
1391 | bsd_idx = gen8_dispatch_bsd_ring(dev_priv, file); |
||
1392 | } else if (bsd_idx >= I915_EXEC_BSD_RING1 && |
||
1393 | bsd_idx <= I915_EXEC_BSD_RING2) { |
||
1394 | bsd_idx >>= I915_EXEC_BSD_SHIFT; |
||
1395 | bsd_idx--; |
||
1396 | } else { |
||
1397 | DRM_DEBUG("execbuf with unknown bsd ring: %u\n", |
||
1398 | bsd_idx); |
||
1399 | return -EINVAL; |
||
1400 | } |
||
1401 | |||
1402 | *ring = &dev_priv->ring[_VCS(bsd_idx)]; |
||
1403 | } else { |
||
1404 | *ring = &dev_priv->ring[user_ring_map[user_ring_id]]; |
||
1405 | } |
||
1406 | |||
1407 | if (!intel_ring_initialized(*ring)) { |
||
1408 | DRM_DEBUG("execbuf with invalid ring: %u\n", user_ring_id); |
||
1409 | return -EINVAL; |
||
1410 | } |
||
1411 | |||
1412 | return 0; |
||
1413 | } |
||
1414 | |||
1415 | static int |
||
3263 | Serge | 1416 | i915_gem_do_execbuffer(struct drm_device *dev, void *data, |
1417 | struct drm_file *file, |
||
1418 | struct drm_i915_gem_execbuffer2 *args, |
||
5060 | serge | 1419 | struct drm_i915_gem_exec_object2 *exec) |
3263 | Serge | 1420 | { |
5060 | serge | 1421 | struct drm_i915_private *dev_priv = dev->dev_private; |
7144 | serge | 1422 | struct drm_i915_gem_request *req = NULL; |
4560 | Serge | 1423 | struct eb_vmas *eb; |
3263 | Serge | 1424 | struct drm_i915_gem_object *batch_obj; |
6084 | serge | 1425 | struct drm_i915_gem_exec_object2 shadow_exec_entry; |
5060 | serge | 1426 | struct intel_engine_cs *ring; |
1427 | struct intel_context *ctx; |
||
1428 | struct i915_address_space *vm; |
||
6084 | serge | 1429 | struct i915_execbuffer_params params_master; /* XXX: will be removed later */ |
1430 | struct i915_execbuffer_params *params = ¶ms_master; |
||
4560 | Serge | 1431 | const u32 ctx_id = i915_execbuffer2_get_context_id(*args); |
6084 | serge | 1432 | u32 dispatch_flags; |
5060 | serge | 1433 | int ret; |
3480 | Serge | 1434 | bool need_relocs; |
3263 | Serge | 1435 | |
3480 | Serge | 1436 | if (!i915_gem_check_execbuffer(args)) |
3263 | Serge | 1437 | return -EINVAL; |
1438 | |||
5354 | serge | 1439 | ret = validate_exec_list(dev, exec, args->buffer_count); |
3263 | Serge | 1440 | if (ret) |
1441 | return ret; |
||
1442 | |||
6084 | serge | 1443 | dispatch_flags = 0; |
3263 | Serge | 1444 | if (args->flags & I915_EXEC_SECURE) { |
1445 | |||
6084 | serge | 1446 | dispatch_flags |= I915_DISPATCH_SECURE; |
3263 | Serge | 1447 | } |
1448 | if (args->flags & I915_EXEC_IS_PINNED) |
||
6084 | serge | 1449 | dispatch_flags |= I915_DISPATCH_PINNED; |
3263 | Serge | 1450 | |
7144 | serge | 1451 | ret = eb_select_ring(dev_priv, file, args, &ring); |
1452 | if (ret) |
||
1453 | return ret; |
||
5060 | serge | 1454 | |
3263 | Serge | 1455 | if (args->buffer_count < 1) { |
1456 | DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count); |
||
1457 | return -EINVAL; |
||
1458 | } |
||
1459 | |||
6084 | serge | 1460 | if (args->flags & I915_EXEC_RESOURCE_STREAMER) { |
1461 | if (!HAS_RESOURCE_STREAMER(dev)) { |
||
1462 | DRM_DEBUG("RS is only allowed for Haswell, Gen8 and above\n"); |
||
1463 | return -EINVAL; |
||
1464 | } |
||
1465 | if (ring->id != RCS) { |
||
1466 | DRM_DEBUG("RS is not available on %s\n", |
||
1467 | ring->name); |
||
1468 | return -EINVAL; |
||
1469 | } |
||
1470 | |||
1471 | dispatch_flags |= I915_DISPATCH_RS; |
||
1472 | } |
||
1473 | |||
4560 | Serge | 1474 | intel_runtime_pm_get(dev_priv); |
1475 | |||
3263 | Serge | 1476 | ret = i915_mutex_lock_interruptible(dev); |
1477 | if (ret) |
||
1478 | goto pre_mutex_err; |
||
1479 | |||
5060 | serge | 1480 | ctx = i915_gem_validate_context(dev, file, ring, ctx_id); |
1481 | if (IS_ERR(ctx)) { |
||
4560 | Serge | 1482 | mutex_unlock(&dev->struct_mutex); |
5060 | serge | 1483 | ret = PTR_ERR(ctx); |
4560 | Serge | 1484 | goto pre_mutex_err; |
1485 | } |
||
1486 | |||
5060 | serge | 1487 | i915_gem_context_reference(ctx); |
1488 | |||
5354 | serge | 1489 | if (ctx->ppgtt) |
1490 | vm = &ctx->ppgtt->base; |
||
1491 | else |
||
5060 | serge | 1492 | vm = &dev_priv->gtt.base; |
1493 | |||
6084 | serge | 1494 | memset(¶ms_master, 0x00, sizeof(params_master)); |
1495 | |||
3480 | Serge | 1496 | eb = eb_create(args); |
3263 | Serge | 1497 | if (eb == NULL) { |
5060 | serge | 1498 | i915_gem_context_unreference(ctx); |
3263 | Serge | 1499 | mutex_unlock(&dev->struct_mutex); |
1500 | ret = -ENOMEM; |
||
6084 | serge | 1501 | goto pre_mutex_err; |
3263 | Serge | 1502 | } |
1503 | |||
1504 | /* Look up object handles */ |
||
4560 | Serge | 1505 | ret = eb_lookup_vmas(eb, exec, args, vm, file); |
3480 | Serge | 1506 | if (ret) |
6084 | serge | 1507 | goto err; |
3263 | Serge | 1508 | |
1509 | /* take note of the batch buffer before we might reorder the lists */ |
||
5060 | serge | 1510 | batch_obj = eb_get_batch(eb); |
3263 | Serge | 1511 | |
1512 | /* Move the objects en-masse into the GTT, evicting if necessary. */ |
||
3480 | Serge | 1513 | need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0; |
6084 | serge | 1514 | ret = i915_gem_execbuffer_reserve(ring, &eb->vmas, ctx, &need_relocs); |
3263 | Serge | 1515 | if (ret) |
1516 | goto err; |
||
1517 | |||
1518 | /* The objects are in their final locations, apply the relocations. */ |
||
3480 | Serge | 1519 | if (need_relocs) |
4560 | Serge | 1520 | ret = i915_gem_execbuffer_relocate(eb); |
3263 | Serge | 1521 | if (ret) { |
1522 | if (ret == -EFAULT) { |
||
3480 | Serge | 1523 | ret = i915_gem_execbuffer_relocate_slow(dev, args, file, ring, |
6084 | serge | 1524 | eb, exec, ctx); |
3263 | Serge | 1525 | BUG_ON(!mutex_is_locked(&dev->struct_mutex)); |
1526 | } |
||
1527 | if (ret) |
||
1528 | goto err; |
||
6084 | serge | 1529 | } |
3263 | Serge | 1530 | |
1531 | /* Set the pending read domains for the batch buffer to COMMAND */ |
||
1532 | if (batch_obj->base.pending_write_domain) { |
||
1533 | DRM_DEBUG("Attempting to use self-modifying batch buffer\n"); |
||
1534 | ret = -EINVAL; |
||
1535 | goto err; |
||
1536 | } |
||
6084 | serge | 1537 | |
1538 | params->args_batch_start_offset = args->batch_start_offset; |
||
1539 | |||
5060 | serge | 1540 | #if 0 |
6084 | serge | 1541 | if (i915_needs_cmd_parser(ring) && args->batch_len) { |
1542 | struct drm_i915_gem_object *parsed_batch_obj; |
||
1543 | |||
1544 | parsed_batch_obj = i915_gem_execbuffer_parse(ring, |
||
1545 | &shadow_exec_entry, |
||
1546 | eb, |
||
1547 | batch_obj, |
||
1548 | args->batch_start_offset, |
||
1549 | args->batch_len, |
||
1550 | file->is_master); |
||
1551 | if (IS_ERR(parsed_batch_obj)) { |
||
1552 | ret = PTR_ERR(parsed_batch_obj); |
||
5060 | serge | 1553 | goto err; |
6084 | serge | 1554 | } |
1555 | |||
5060 | serge | 1556 | /* |
6084 | serge | 1557 | * parsed_batch_obj == batch_obj means batch not fully parsed: |
1558 | * Accept, but don't promote to secure. |
||
5060 | serge | 1559 | */ |
6084 | serge | 1560 | |
1561 | if (parsed_batch_obj != batch_obj) { |
||
1562 | /* |
||
1563 | * Batch parsed and accepted: |
||
1564 | * |
||
1565 | * Set the DISPATCH_SECURE bit to remove the NON_SECURE |
||
1566 | * bit from MI_BATCH_BUFFER_START commands issued in |
||
1567 | * the dispatch_execbuffer implementations. We |
||
1568 | * specifically don't want that set on batches the |
||
1569 | * command parser has accepted. |
||
1570 | */ |
||
1571 | dispatch_flags |= I915_DISPATCH_SECURE; |
||
1572 | params->args_batch_start_offset = 0; |
||
1573 | batch_obj = parsed_batch_obj; |
||
1574 | } |
||
5060 | serge | 1575 | } |
6084 | serge | 1576 | #endif |
5367 | serge | 1577 | |
6084 | serge | 1578 | batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND; |
1579 | |||
3263 | Serge | 1580 | /* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure |
1581 | * batch" bit. Hence we need to pin secure batches into the global gtt. |
||
4560 | Serge | 1582 | * hsw should have this fixed, but bdw mucks it up again. */ |
6084 | serge | 1583 | if (dispatch_flags & I915_DISPATCH_SECURE) { |
5354 | serge | 1584 | /* |
1585 | * So on first glance it looks freaky that we pin the batch here |
||
1586 | * outside of the reservation loop. But: |
||
1587 | * - The batch is already pinned into the relevant ppgtt, so we |
||
1588 | * already have the backing storage fully allocated. |
||
1589 | * - No other BO uses the global gtt (well contexts, but meh), |
||
6084 | serge | 1590 | * so we don't really have issues with multiple objects not |
5354 | serge | 1591 | * fitting due to fragmentation. |
1592 | * So this is actually safe. |
||
1593 | */ |
||
1594 | ret = i915_gem_obj_ggtt_pin(batch_obj, 0, 0); |
||
1595 | if (ret) |
||
1596 | goto err; |
||
3263 | Serge | 1597 | |
6084 | serge | 1598 | params->batch_obj_vm_offset = i915_gem_obj_ggtt_offset(batch_obj); |
5354 | serge | 1599 | } else |
6084 | serge | 1600 | params->batch_obj_vm_offset = i915_gem_obj_offset(batch_obj, vm); |
3263 | Serge | 1601 | |
6084 | serge | 1602 | /* Allocate a request for this batch buffer nice and early. */ |
7144 | serge | 1603 | req = i915_gem_request_alloc(ring, ctx); |
1604 | if (IS_ERR(req)) { |
||
1605 | ret = PTR_ERR(req); |
||
6084 | serge | 1606 | goto err_batch_unpin; |
7144 | serge | 1607 | } |
3263 | Serge | 1608 | |
7144 | serge | 1609 | ret = i915_gem_request_add_to_client(req, file); |
6084 | serge | 1610 | if (ret) |
1611 | goto err_batch_unpin; |
||
1612 | |||
5354 | serge | 1613 | /* |
6084 | serge | 1614 | * Save assorted stuff away to pass through to *_submission(). |
1615 | * NB: This data should be 'persistent' and not local as it will |
||
1616 | * kept around beyond the duration of the IOCTL once the GPU |
||
1617 | * scheduler arrives. |
||
1618 | */ |
||
1619 | params->dev = dev; |
||
1620 | params->file = file; |
||
1621 | params->ring = ring; |
||
1622 | params->dispatch_flags = dispatch_flags; |
||
1623 | params->batch_obj = batch_obj; |
||
1624 | params->ctx = ctx; |
||
7144 | serge | 1625 | params->request = req; |
6084 | serge | 1626 | |
1627 | ret = dev_priv->gt.execbuf_submit(params, args, &eb->vmas); |
||
1628 | |||
1629 | err_batch_unpin: |
||
1630 | /* |
||
5354 | serge | 1631 | * FIXME: We crucially rely upon the active tracking for the (ppgtt) |
1632 | * batch vma for correctness. For less ugly and less fragility this |
||
1633 | * needs to be adjusted to also track the ggtt batch vma properly as |
||
1634 | * active. |
||
1635 | */ |
||
6084 | serge | 1636 | if (dispatch_flags & I915_DISPATCH_SECURE) |
5354 | serge | 1637 | i915_gem_object_ggtt_unpin(batch_obj); |
6084 | serge | 1638 | |
3263 | Serge | 1639 | err: |
5060 | serge | 1640 | /* the request owns the ref now */ |
1641 | i915_gem_context_unreference(ctx); |
||
3263 | Serge | 1642 | eb_destroy(eb); |
1643 | |||
6084 | serge | 1644 | /* |
1645 | * If the request was created but not successfully submitted then it |
||
1646 | * must be freed again. If it was submitted then it is being tracked |
||
1647 | * on the active request list and no clean up is required here. |
||
1648 | */ |
||
7144 | serge | 1649 | if (ret && !IS_ERR_OR_NULL(req)) |
1650 | i915_gem_request_cancel(req); |
||
6084 | serge | 1651 | |
3263 | Serge | 1652 | mutex_unlock(&dev->struct_mutex); |
1653 | |||
1654 | pre_mutex_err: |
||
4560 | Serge | 1655 | /* intel_gpu_busy should also get a ref, so it will free when the device |
1656 | * is really idle. */ |
||
1657 | intel_runtime_pm_put(dev_priv); |
||
3263 | Serge | 1658 | return ret; |
1659 | } |
||
1660 | |||
4246 | Serge | 1661 | #if 0 |
1662 | /* |
||
1663 | * Legacy execbuffer just creates an exec2 list from the original exec object |
||
1664 | * list array and passes it to the real function. |
||
1665 | */ |
||
1666 | int |
||
1667 | i915_gem_execbuffer(struct drm_device *dev, void *data, |
||
1668 | struct drm_file *file) |
||
1669 | { |
||
1670 | struct drm_i915_gem_execbuffer *args = data; |
||
1671 | struct drm_i915_gem_execbuffer2 exec2; |
||
1672 | struct drm_i915_gem_exec_object *exec_list = NULL; |
||
1673 | struct drm_i915_gem_exec_object2 *exec2_list = NULL; |
||
1674 | int ret, i; |
||
3480 | Serge | 1675 | |
4246 | Serge | 1676 | if (args->buffer_count < 1) { |
1677 | DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count); |
||
1678 | return -EINVAL; |
||
1679 | } |
||
3480 | Serge | 1680 | |
4246 | Serge | 1681 | /* Copy in the exec list from userland */ |
1682 | exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count); |
||
1683 | exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count); |
||
1684 | if (exec_list == NULL || exec2_list == NULL) { |
||
1685 | DRM_DEBUG("Failed to allocate exec list for %d buffers\n", |
||
1686 | args->buffer_count); |
||
1687 | drm_free_large(exec_list); |
||
1688 | drm_free_large(exec2_list); |
||
1689 | return -ENOMEM; |
||
1690 | } |
||
1691 | ret = copy_from_user(exec_list, |
||
1692 | to_user_ptr(args->buffers_ptr), |
||
1693 | sizeof(*exec_list) * args->buffer_count); |
||
1694 | if (ret != 0) { |
||
1695 | DRM_DEBUG("copy %d exec entries failed %d\n", |
||
1696 | args->buffer_count, ret); |
||
1697 | drm_free_large(exec_list); |
||
1698 | drm_free_large(exec2_list); |
||
1699 | return -EFAULT; |
||
1700 | } |
||
1701 | |||
1702 | for (i = 0; i < args->buffer_count; i++) { |
||
1703 | exec2_list[i].handle = exec_list[i].handle; |
||
1704 | exec2_list[i].relocation_count = exec_list[i].relocation_count; |
||
1705 | exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr; |
||
1706 | exec2_list[i].alignment = exec_list[i].alignment; |
||
1707 | exec2_list[i].offset = exec_list[i].offset; |
||
1708 | if (INTEL_INFO(dev)->gen < 4) |
||
1709 | exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE; |
||
1710 | else |
||
1711 | exec2_list[i].flags = 0; |
||
1712 | } |
||
1713 | |||
1714 | exec2.buffers_ptr = args->buffers_ptr; |
||
1715 | exec2.buffer_count = args->buffer_count; |
||
1716 | exec2.batch_start_offset = args->batch_start_offset; |
||
1717 | exec2.batch_len = args->batch_len; |
||
1718 | exec2.DR1 = args->DR1; |
||
1719 | exec2.DR4 = args->DR4; |
||
1720 | exec2.num_cliprects = args->num_cliprects; |
||
1721 | exec2.cliprects_ptr = args->cliprects_ptr; |
||
1722 | exec2.flags = I915_EXEC_RENDER; |
||
1723 | i915_execbuffer2_set_context_id(exec2, 0); |
||
1724 | |||
5060 | serge | 1725 | ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list); |
4246 | Serge | 1726 | if (!ret) { |
5060 | serge | 1727 | struct drm_i915_gem_exec_object __user *user_exec_list = |
1728 | to_user_ptr(args->buffers_ptr); |
||
1729 | |||
4246 | Serge | 1730 | /* Copy the new buffer offsets back to the user's exec list. */ |
5060 | serge | 1731 | for (i = 0; i < args->buffer_count; i++) { |
6937 | serge | 1732 | exec2_list[i].offset = |
1733 | gen8_canonical_addr(exec2_list[i].offset); |
||
5060 | serge | 1734 | ret = __copy_to_user(&user_exec_list[i].offset, |
1735 | &exec2_list[i].offset, |
||
1736 | sizeof(user_exec_list[i].offset)); |
||
6084 | serge | 1737 | if (ret) { |
1738 | ret = -EFAULT; |
||
1739 | DRM_DEBUG("failed to copy %d exec entries " |
||
1740 | "back to user (%d)\n", |
||
1741 | args->buffer_count, ret); |
||
5060 | serge | 1742 | break; |
1743 | } |
||
4246 | Serge | 1744 | } |
1745 | } |
||
1746 | |||
1747 | drm_free_large(exec_list); |
||
1748 | drm_free_large(exec2_list); |
||
1749 | return ret; |
||
1750 | } |
||
1751 | #endif |
||
1752 | |||
3263 | Serge | 1753 | int |
1754 | i915_gem_execbuffer2(struct drm_device *dev, void *data, |
||
1755 | struct drm_file *file) |
||
1756 | { |
||
1757 | struct drm_i915_gem_execbuffer2 *args = data; |
||
1758 | struct drm_i915_gem_exec_object2 *exec2_list = NULL; |
||
1759 | int ret; |
||
1760 | |||
1761 | if (args->buffer_count < 1 || |
||
1762 | args->buffer_count > UINT_MAX / sizeof(*exec2_list)) { |
||
1763 | DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count); |
||
1764 | return -EINVAL; |
||
1765 | } |
||
1766 | |||
5060 | serge | 1767 | if (args->rsvd2 != 0) { |
1768 | DRM_DEBUG("dirty rvsd2 field\n"); |
||
1769 | return -EINVAL; |
||
1770 | } |
||
1771 | |||
3480 | Serge | 1772 | exec2_list = kmalloc(sizeof(*exec2_list)*args->buffer_count, |
1773 | GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY); |
||
3263 | Serge | 1774 | if (exec2_list == NULL) { |
1775 | DRM_DEBUG("Failed to allocate exec list for %d buffers\n", |
||
1776 | args->buffer_count); |
||
1777 | return -ENOMEM; |
||
1778 | } |
||
1779 | ret = copy_from_user(exec2_list, |
||
4539 | Serge | 1780 | to_user_ptr(args->buffers_ptr), |
3263 | Serge | 1781 | sizeof(*exec2_list) * args->buffer_count); |
1782 | if (ret != 0) { |
||
1783 | DRM_DEBUG("copy %d exec entries failed %d\n", |
||
1784 | args->buffer_count, ret); |
||
3266 | Serge | 1785 | kfree(exec2_list); |
1786 | FAIL(); |
||
3263 | Serge | 1787 | return -EFAULT; |
1788 | } |
||
1789 | |||
5060 | serge | 1790 | ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list); |
3263 | Serge | 1791 | if (!ret) { |
1792 | /* Copy the new buffer offsets back to the user's exec list. */ |
||
5060 | serge | 1793 | struct drm_i915_gem_exec_object2 __user *user_exec_list = |
1794 | to_user_ptr(args->buffers_ptr); |
||
1795 | int i; |
||
1796 | |||
1797 | for (i = 0; i < args->buffer_count; i++) { |
||
6937 | serge | 1798 | exec2_list[i].offset = |
1799 | gen8_canonical_addr(exec2_list[i].offset); |
||
5060 | serge | 1800 | ret = __copy_to_user(&user_exec_list[i].offset, |
1801 | &exec2_list[i].offset, |
||
1802 | sizeof(user_exec_list[i].offset)); |
||
6084 | serge | 1803 | if (ret) { |
1804 | ret = -EFAULT; |
||
1805 | DRM_DEBUG("failed to copy %d exec entries " |
||
5060 | serge | 1806 | "back to user\n", |
1807 | args->buffer_count); |
||
1808 | break; |
||
1809 | } |
||
3263 | Serge | 1810 | } |
1811 | } |
||
1812 | |||
3266 | Serge | 1813 | kfree(exec2_list); |
3263 | Serge | 1814 | return ret; |
1815 | }>>>>>>>=>>><>> |