Subversion Repositories Kolibri OS

Rev

Rev 6084 | Rev 6935 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
3263 Serge 1
/*
2
 * Copyright © 2008,2010 Intel Corporation
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
 * IN THE SOFTWARE.
22
 *
23
 * Authors:
24
 *    Eric Anholt 
25
 *    Chris Wilson 
26
 *
27
 */
28
 
29
#include 
30
#include 
31
#include "i915_drv.h"
32
#include "i915_trace.h"
33
#include "intel_drv.h"
5060 serge 34
#include 
3263 Serge 35
 
4560 Serge 36
#define  __EXEC_OBJECT_HAS_PIN (1<<31)
37
#define  __EXEC_OBJECT_HAS_FENCE (1<<30)
5354 serge 38
#define  __EXEC_OBJECT_NEEDS_MAP (1<<29)
5060 serge 39
#define  __EXEC_OBJECT_NEEDS_BIAS (1<<28)
3263 Serge 40
 
5060 serge 41
#define BATCH_OFFSET_BIAS (256*1024)
3263 Serge 42
 
4560 Serge 43
struct eb_vmas {
44
	struct list_head vmas;
3263 Serge 45
	int and;
3480 Serge 46
	union {
4560 Serge 47
		struct i915_vma *lut[0];
5060 serge 48
		struct hlist_head buckets[0];
3480 Serge 49
	};
3263 Serge 50
};
51
 
4560 Serge 52
static struct eb_vmas *
3480 Serge 53
eb_create(struct drm_i915_gem_execbuffer2 *args)
3263 Serge 54
{
4560 Serge 55
	struct eb_vmas *eb = NULL;
3480 Serge 56
 
57
	if (args->flags & I915_EXEC_HANDLE_LUT) {
4560 Serge 58
		unsigned size = args->buffer_count;
59
		size *= sizeof(struct i915_vma *);
60
		size += sizeof(struct eb_vmas);
3480 Serge 61
		eb = kmalloc(size, GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
62
	}
63
 
64
	if (eb == NULL) {
4560 Serge 65
		unsigned size = args->buffer_count;
66
		unsigned count = PAGE_SIZE / sizeof(struct hlist_head) / 2;
3480 Serge 67
		BUILD_BUG_ON_NOT_POWER_OF_2(PAGE_SIZE / sizeof(struct hlist_head));
68
		while (count > 2*size)
6084 serge 69
			count >>= 1;
70
		eb = kzalloc(count*sizeof(struct hlist_head) +
4560 Serge 71
			     sizeof(struct eb_vmas),
3480 Serge 72
			     GFP_TEMPORARY);
6084 serge 73
		if (eb == NULL)
74
			return eb;
3263 Serge 75
 
6084 serge 76
		eb->and = count - 1;
3480 Serge 77
	} else
78
		eb->and = -args->buffer_count;
79
 
4560 Serge 80
	INIT_LIST_HEAD(&eb->vmas);
3263 Serge 81
	return eb;
82
}
83
 
84
static void
4560 Serge 85
eb_reset(struct eb_vmas *eb)
3263 Serge 86
{
3480 Serge 87
	if (eb->and >= 0)
6084 serge 88
		memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
3263 Serge 89
}
90
 
3480 Serge 91
static int
4560 Serge 92
eb_lookup_vmas(struct eb_vmas *eb,
6084 serge 93
	       struct drm_i915_gem_exec_object2 *exec,
94
	       const struct drm_i915_gem_execbuffer2 *args,
4560 Serge 95
	       struct i915_address_space *vm,
6084 serge 96
	       struct drm_file *file)
3263 Serge 97
{
4560 Serge 98
	struct drm_i915_gem_object *obj;
99
	struct list_head objects;
100
	int i, ret;
3480 Serge 101
 
4560 Serge 102
	INIT_LIST_HEAD(&objects);
3480 Serge 103
	spin_lock(&file->table_lock);
4560 Serge 104
	/* Grab a reference to the object and release the lock so we can lookup
105
	 * or create the VMA without using GFP_ATOMIC */
3480 Serge 106
	for (i = 0; i < args->buffer_count; i++) {
6084 serge 107
		obj = to_intel_bo(idr_find(&file->object_idr, exec[i].handle));
3480 Serge 108
		if (obj == NULL) {
109
			spin_unlock(&file->table_lock);
110
			DRM_DEBUG("Invalid object handle %d at index %d\n",
111
				   exec[i].handle, i);
4560 Serge 112
			ret = -ENOENT;
113
			goto err;
3480 Serge 114
		}
115
 
4560 Serge 116
		if (!list_empty(&obj->obj_exec_link)) {
3480 Serge 117
			spin_unlock(&file->table_lock);
118
			DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n",
119
				   obj, exec[i].handle, i);
4560 Serge 120
			ret = -EINVAL;
121
			goto err;
3480 Serge 122
		}
123
 
124
		drm_gem_object_reference(&obj->base);
4560 Serge 125
		list_add_tail(&obj->obj_exec_link, &objects);
126
	}
127
	spin_unlock(&file->table_lock);
3480 Serge 128
 
4560 Serge 129
	i = 0;
130
	while (!list_empty(&objects)) {
131
		struct i915_vma *vma;
132
 
133
		obj = list_first_entry(&objects,
134
				       struct drm_i915_gem_object,
135
				       obj_exec_link);
136
 
137
		/*
138
		 * NOTE: We can leak any vmas created here when something fails
139
		 * later on. But that's no issue since vma_unbind can deal with
140
		 * vmas which are not actually bound. And since only
141
		 * lookup_or_create exists as an interface to get at the vma
142
		 * from the (obj, vm) we don't run the risk of creating
143
		 * duplicated vmas for the same vm.
144
		 */
5354 serge 145
		vma = i915_gem_obj_lookup_or_create_vma(obj, vm);
4560 Serge 146
		if (IS_ERR(vma)) {
147
			DRM_DEBUG("Failed to lookup VMA\n");
148
			ret = PTR_ERR(vma);
149
			goto err;
150
		}
151
 
152
		/* Transfer ownership from the objects list to the vmas list. */
153
		list_add_tail(&vma->exec_list, &eb->vmas);
154
		list_del_init(&obj->obj_exec_link);
155
 
156
		vma->exec_entry = &exec[i];
3480 Serge 157
		if (eb->and < 0) {
4560 Serge 158
			eb->lut[i] = vma;
3480 Serge 159
		} else {
160
			uint32_t handle = args->flags & I915_EXEC_HANDLE_LUT ? i : exec[i].handle;
4560 Serge 161
			vma->exec_handle = handle;
162
			hlist_add_head(&vma->exec_node,
3480 Serge 163
				       &eb->buckets[handle & eb->and]);
164
		}
4560 Serge 165
		++i;
3480 Serge 166
	}
167
 
168
	return 0;
4560 Serge 169
 
170
 
171
err:
172
	while (!list_empty(&objects)) {
173
		obj = list_first_entry(&objects,
174
				       struct drm_i915_gem_object,
175
				       obj_exec_link);
176
		list_del_init(&obj->obj_exec_link);
177
		drm_gem_object_unreference(&obj->base);
178
	}
179
	/*
180
	 * Objects already transfered to the vmas list will be unreferenced by
181
	 * eb_destroy.
182
	 */
183
 
184
	return ret;
3263 Serge 185
}
186
 
4560 Serge 187
static struct i915_vma *eb_get_vma(struct eb_vmas *eb, unsigned long handle)
3263 Serge 188
{
3480 Serge 189
	if (eb->and < 0) {
190
		if (handle >= -eb->and)
191
			return NULL;
192
		return eb->lut[handle];
193
	} else {
6084 serge 194
		struct hlist_head *head;
195
		struct hlist_node *node;
3263 Serge 196
 
6084 serge 197
		head = &eb->buckets[handle & eb->and];
198
		hlist_for_each(node, head) {
4560 Serge 199
			struct i915_vma *vma;
3480 Serge 200
 
4560 Serge 201
			vma = hlist_entry(node, struct i915_vma, exec_node);
202
			if (vma->exec_handle == handle)
203
				return vma;
6084 serge 204
		}
205
		return NULL;
3263 Serge 206
	}
207
}
208
 
209
static void
4560 Serge 210
i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma)
3263 Serge 211
{
4560 Serge 212
	struct drm_i915_gem_exec_object2 *entry;
213
	struct drm_i915_gem_object *obj = vma->obj;
3480 Serge 214
 
4560 Serge 215
	if (!drm_mm_node_allocated(&vma->node))
216
		return;
217
 
218
	entry = vma->exec_entry;
219
 
220
	if (entry->flags & __EXEC_OBJECT_HAS_FENCE)
221
		i915_gem_object_unpin_fence(obj);
222
 
223
	if (entry->flags & __EXEC_OBJECT_HAS_PIN)
5060 serge 224
		vma->pin_count--;
4560 Serge 225
 
226
	entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);
227
}
228
 
229
static void eb_destroy(struct eb_vmas *eb)
230
{
231
	while (!list_empty(&eb->vmas)) {
232
		struct i915_vma *vma;
233
 
234
		vma = list_first_entry(&eb->vmas,
235
				       struct i915_vma,
3480 Serge 236
				       exec_list);
4560 Serge 237
		list_del_init(&vma->exec_list);
238
		i915_gem_execbuffer_unreserve_vma(vma);
239
		drm_gem_object_unreference(&vma->obj->base);
3480 Serge 240
	}
3263 Serge 241
	kfree(eb);
242
}
243
 
244
static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
245
{
4560 Serge 246
	return (HAS_LLC(obj->base.dev) ||
247
		obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
3263 Serge 248
		obj->cache_level != I915_CACHE_NONE);
249
}
250
 
251
static int
4371 Serge 252
relocate_entry_cpu(struct drm_i915_gem_object *obj,
5060 serge 253
		   struct drm_i915_gem_relocation_entry *reloc,
254
		   uint64_t target_offset)
4371 Serge 255
{
6084 serge 256
	struct drm_device *dev = obj->base.dev;
257
	uint32_t page_offset = offset_in_page(reloc->offset);
5060 serge 258
	uint64_t delta = reloc->delta + target_offset;
4371 Serge 259
	char *vaddr;
4560 Serge 260
	int ret;
4371 Serge 261
 
4560 Serge 262
	ret = i915_gem_object_set_to_cpu_domain(obj, true);
4371 Serge 263
	if (ret)
264
		return ret;
265
 
5354 serge 266
	vaddr = kmap_atomic(i915_gem_object_get_page(obj,
267
				reloc->offset >> PAGE_SHIFT));
5060 serge 268
	*(uint32_t *)(vaddr + page_offset) = lower_32_bits(delta);
4371 Serge 269
 
5060 serge 270
	if (INTEL_INFO(dev)->gen >= 8) {
271
		page_offset = offset_in_page(page_offset + sizeof(uint32_t));
272
 
273
		if (page_offset == 0) {
5354 serge 274
			kunmap_atomic(vaddr);
275
			vaddr = kmap_atomic(i915_gem_object_get_page(obj,
276
			    (reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT));
5060 serge 277
		}
278
 
279
		*(uint32_t *)(vaddr + page_offset) = upper_32_bits(delta);
280
	}
281
 
5354 serge 282
	kunmap_atomic(vaddr);
283
 
4371 Serge 284
	return 0;
285
}
286
 
287
static int
288
relocate_entry_gtt(struct drm_i915_gem_object *obj,
5060 serge 289
		   struct drm_i915_gem_relocation_entry *reloc,
290
		   uint64_t target_offset)
4371 Serge 291
{
292
	struct drm_device *dev = obj->base.dev;
293
	struct drm_i915_private *dev_priv = dev->dev_private;
5060 serge 294
	uint64_t delta = reloc->delta + target_offset;
5354 serge 295
	uint64_t offset;
4371 Serge 296
	void __iomem *reloc_page;
4560 Serge 297
	int ret;
4371 Serge 298
 
299
	ret = i915_gem_object_set_to_gtt_domain(obj, true);
300
	if (ret)
301
		return ret;
302
 
303
	ret = i915_gem_object_put_fence(obj);
304
	if (ret)
305
		return ret;
306
 
307
	/* Map the page containing the relocation we're going to perform.  */
5354 serge 308
	offset = i915_gem_obj_ggtt_offset(obj);
309
	offset += reloc->offset;
4539 Serge 310
    MapPage(dev_priv->gtt.mappable,dev_priv->gtt.mappable_base +
5354 serge 311
                                 (offset & PAGE_MASK), PG_SW);
4539 Serge 312
	reloc_page = dev_priv->gtt.mappable;
5354 serge 313
	iowrite32(lower_32_bits(delta), reloc_page + offset_in_page(offset));
4371 Serge 314
 
6084 serge 315
//	io_mapping_unmap_atomic(reloc_page);
5060 serge 316
 
6084 serge 317
	return 0;
318
}
5354 serge 319
 
6084 serge 320
static void
321
clflush_write32(void *addr, uint32_t value)
322
{
323
	/* This is not a fast path, so KISS. */
324
	drm_clflush_virt_range(addr, sizeof(uint32_t));
325
	*(uint32_t *)addr = value;
326
	drm_clflush_virt_range(addr, sizeof(uint32_t));
327
}
328
 
329
static int
330
relocate_entry_clflush(struct drm_i915_gem_object *obj,
331
		       struct drm_i915_gem_relocation_entry *reloc,
332
		       uint64_t target_offset)
333
{
334
	struct drm_device *dev = obj->base.dev;
335
	uint32_t page_offset = offset_in_page(reloc->offset);
336
	uint64_t delta = (int)reloc->delta + target_offset;
337
	char *vaddr;
338
	int ret;
339
 
340
	ret = i915_gem_object_set_to_gtt_domain(obj, true);
341
	if (ret)
342
		return ret;
343
 
344
	vaddr = kmap_atomic(i915_gem_object_get_page(obj,
345
				reloc->offset >> PAGE_SHIFT));
346
	clflush_write32(vaddr + page_offset, lower_32_bits(delta));
347
 
348
	if (INTEL_INFO(dev)->gen >= 8) {
349
		page_offset = offset_in_page(page_offset + sizeof(uint32_t));
350
 
351
		if (page_offset == 0) {
352
			kunmap_atomic(vaddr);
353
			vaddr = kmap_atomic(i915_gem_object_get_page(obj,
354
			    (reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT));
355
		}
356
 
357
		clflush_write32(vaddr + page_offset, upper_32_bits(delta));
358
	}
359
 
360
	kunmap_atomic(vaddr);
361
 
4371 Serge 362
	return 0;
363
}
364
 
365
static int
3263 Serge 366
i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
4560 Serge 367
				   struct eb_vmas *eb,
5060 serge 368
				   struct drm_i915_gem_relocation_entry *reloc)
3263 Serge 369
{
370
	struct drm_device *dev = obj->base.dev;
371
	struct drm_gem_object *target_obj;
372
	struct drm_i915_gem_object *target_i915_obj;
4560 Serge 373
	struct i915_vma *target_vma;
5060 serge 374
	uint64_t target_offset;
4560 Serge 375
	int ret;
3263 Serge 376
 
377
	/* we've already hold a reference to all valid objects */
4560 Serge 378
	target_vma = eb_get_vma(eb, reloc->target_handle);
379
	if (unlikely(target_vma == NULL))
3263 Serge 380
		return -ENOENT;
4560 Serge 381
	target_i915_obj = target_vma->obj;
382
	target_obj = &target_vma->obj->base;
3263 Serge 383
 
4560 Serge 384
	target_offset = target_vma->node.start;
3263 Serge 385
 
386
	/* Sandybridge PPGTT errata: We need a global gtt mapping for MI and
387
	 * pipe_control writes because the gpu doesn't properly redirect them
388
	 * through the ppgtt for non_secure batchbuffers. */
389
	if (unlikely(IS_GEN6(dev) &&
6084 serge 390
	    reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION)) {
391
		ret = i915_vma_bind(target_vma, target_i915_obj->cache_level,
392
				    PIN_GLOBAL);
393
		if (WARN_ONCE(ret, "Unexpected failure to bind target VMA!"))
394
			return ret;
395
	}
3263 Serge 396
 
397
	/* Validate that the target is in a valid r/w GPU domain */
398
	if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
399
		DRM_DEBUG("reloc with multiple write domains: "
400
			  "obj %p target %d offset %d "
401
			  "read %08x write %08x",
402
			  obj, reloc->target_handle,
403
			  (int) reloc->offset,
404
			  reloc->read_domains,
405
			  reloc->write_domain);
4560 Serge 406
		return -EINVAL;
3263 Serge 407
	}
408
	if (unlikely((reloc->write_domain | reloc->read_domains)
409
		     & ~I915_GEM_GPU_DOMAINS)) {
410
		DRM_DEBUG("reloc with read/write non-GPU domains: "
411
			  "obj %p target %d offset %d "
412
			  "read %08x write %08x",
413
			  obj, reloc->target_handle,
414
			  (int) reloc->offset,
415
			  reloc->read_domains,
416
			  reloc->write_domain);
4560 Serge 417
		return -EINVAL;
3263 Serge 418
	}
419
 
420
	target_obj->pending_read_domains |= reloc->read_domains;
421
	target_obj->pending_write_domain |= reloc->write_domain;
422
 
423
	/* If the relocation already has the right value in it, no
424
	 * more work needs to be done.
425
	 */
426
	if (target_offset == reloc->presumed_offset)
427
		return 0;
428
 
429
	/* Check that the relocation address is valid... */
4560 Serge 430
	if (unlikely(reloc->offset >
431
		obj->base.size - (INTEL_INFO(dev)->gen >= 8 ? 8 : 4))) {
3263 Serge 432
		DRM_DEBUG("Relocation beyond object bounds: "
433
			  "obj %p target %d offset %d size %d.\n",
434
			  obj, reloc->target_handle,
435
			  (int) reloc->offset,
436
			  (int) obj->base.size);
4560 Serge 437
		return -EINVAL;
3263 Serge 438
	}
439
	if (unlikely(reloc->offset & 3)) {
440
		DRM_DEBUG("Relocation not 4-byte aligned: "
441
			  "obj %p target %d offset %d.\n",
442
			  obj, reloc->target_handle,
443
			  (int) reloc->offset);
4560 Serge 444
		return -EINVAL;
3263 Serge 445
	}
446
 
447
	/* We can't wait for rendering with pagefaults disabled */
448
 
4371 Serge 449
	if (use_cpu_reloc(obj))
5060 serge 450
		ret = relocate_entry_cpu(obj, reloc, target_offset);
6084 serge 451
	else if (obj->map_and_fenceable)
5060 serge 452
		ret = relocate_entry_gtt(obj, reloc, target_offset);
6084 serge 453
    else if (1)
454
		ret = relocate_entry_clflush(obj, reloc, target_offset);
455
	else {
456
		WARN_ONCE(1, "Impossible case in relocation handling\n");
457
		ret = -ENODEV;
458
	}
3263 Serge 459
 
6084 serge 460
	if (ret)
461
		return ret;
3263 Serge 462
 
463
	/* and update the user's relocation entry */
464
	reloc->presumed_offset = target_offset;
465
 
466
	return 0;
467
}
468
 
469
static int
4560 Serge 470
i915_gem_execbuffer_relocate_vma(struct i915_vma *vma,
471
				 struct eb_vmas *eb)
3263 Serge 472
{
473
#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
3266 Serge 474
	struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(64)];
3263 Serge 475
	struct drm_i915_gem_relocation_entry __user *user_relocs;
4560 Serge 476
	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
3263 Serge 477
	int remain, ret;
478
 
4539 Serge 479
	user_relocs = to_user_ptr(entry->relocs_ptr);
3263 Serge 480
 
481
	remain = entry->relocation_count;
482
	while (remain) {
483
		struct drm_i915_gem_relocation_entry *r = stack_reloc;
484
		int count = remain;
485
		if (count > ARRAY_SIZE(stack_reloc))
486
			count = ARRAY_SIZE(stack_reloc);
487
		remain -= count;
488
 
489
        memcpy(r, user_relocs, count*sizeof(r[0]));
490
 
491
		do {
492
			u64 offset = r->presumed_offset;
493
 
5060 serge 494
			ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r);
3263 Serge 495
			if (ret)
496
				return ret;
497
 
4392 Serge 498
		if (r->presumed_offset != offset)
499
		{
3263 Serge 500
            memcpy(&user_relocs->presumed_offset,
501
                   &r->presumed_offset,
502
                   sizeof(r->presumed_offset));
4392 Serge 503
		}
3263 Serge 504
 
505
			user_relocs++;
506
			r++;
507
		} while (--count);
508
	}
509
 
510
	return 0;
511
#undef N_RELOC
512
}
513
 
514
static int
4560 Serge 515
i915_gem_execbuffer_relocate_vma_slow(struct i915_vma *vma,
516
				      struct eb_vmas *eb,
517
				      struct drm_i915_gem_relocation_entry *relocs)
3263 Serge 518
{
4560 Serge 519
	const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
3263 Serge 520
	int i, ret;
521
 
522
	for (i = 0; i < entry->relocation_count; i++) {
5060 serge 523
		ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i]);
3263 Serge 524
		if (ret)
525
			return ret;
526
	}
527
 
528
	return 0;
529
}
530
 
531
static int
4560 Serge 532
i915_gem_execbuffer_relocate(struct eb_vmas *eb)
3263 Serge 533
{
4560 Serge 534
	struct i915_vma *vma;
3263 Serge 535
	int ret = 0;
536
 
537
	/* This is the fast path and we cannot handle a pagefault whilst
538
	 * holding the struct mutex lest the user pass in the relocations
539
	 * contained within a mmaped bo. For in such a case we, the page
540
	 * fault handler would call i915_gem_fault() and we would try to
541
	 * acquire the struct mutex again. Obviously this is bad and so
542
	 * lockdep complains vehemently.
543
	 */
4104 Serge 544
//	pagefault_disable();
4560 Serge 545
	list_for_each_entry(vma, &eb->vmas, exec_list) {
546
		ret = i915_gem_execbuffer_relocate_vma(vma, eb);
3263 Serge 547
		if (ret)
548
			break;
549
	}
550
//   pagefault_enable();
551
 
552
	return ret;
553
}
554
 
6084 serge 555
static bool only_mappable_for_reloc(unsigned int flags)
556
{
557
	return (flags & (EXEC_OBJECT_NEEDS_FENCE | __EXEC_OBJECT_NEEDS_MAP)) ==
558
		__EXEC_OBJECT_NEEDS_MAP;
559
}
560
 
3263 Serge 561
static int
4560 Serge 562
i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
5060 serge 563
				struct intel_engine_cs *ring,
6084 serge 564
				bool *need_reloc)
3263 Serge 565
{
5060 serge 566
	struct drm_i915_gem_object *obj = vma->obj;
4560 Serge 567
	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
5060 serge 568
	uint64_t flags;
3263 Serge 569
	int ret;
570
 
6084 serge 571
	flags = PIN_USER;
5060 serge 572
	if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
573
		flags |= PIN_GLOBAL;
574
 
6084 serge 575
	if (!drm_mm_node_allocated(&vma->node)) {
576
		/* Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset,
577
		 * limit address to the first 4GBs for unflagged objects.
578
		 */
579
		if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0)
580
			flags |= PIN_ZONE_4G;
581
		if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
582
			flags |= PIN_GLOBAL | PIN_MAPPABLE;
583
		if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
584
			flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
585
		if ((flags & PIN_MAPPABLE) == 0)
586
			flags |= PIN_HIGH;
587
	}
588
 
5060 serge 589
	ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags);
6084 serge 590
	if ((ret == -ENOSPC  || ret == -E2BIG) &&
591
	    only_mappable_for_reloc(entry->flags))
592
		ret = i915_gem_object_pin(obj, vma->vm,
593
					  entry->alignment,
594
					  flags & ~PIN_MAPPABLE);
3263 Serge 595
	if (ret)
596
		return ret;
597
 
598
	entry->flags |= __EXEC_OBJECT_HAS_PIN;
599
 
6084 serge 600
	if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
601
		ret = i915_gem_object_get_fence(obj);
602
		if (ret)
603
			return ret;
3263 Serge 604
 
6084 serge 605
		if (i915_gem_object_pin_fence(obj))
606
			entry->flags |= __EXEC_OBJECT_HAS_FENCE;
3263 Serge 607
	}
608
 
4560 Serge 609
	if (entry->offset != vma->node.start) {
610
		entry->offset = vma->node.start;
3480 Serge 611
		*need_reloc = true;
612
	}
3266 Serge 613
 
3480 Serge 614
	if (entry->flags & EXEC_OBJECT_WRITE) {
615
		obj->base.pending_read_domains = I915_GEM_DOMAIN_RENDER;
616
		obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER;
617
	}
618
 
3263 Serge 619
	return 0;
620
}
621
 
5060 serge 622
static bool
5354 serge 623
need_reloc_mappable(struct i915_vma *vma)
5060 serge 624
{
625
	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
5354 serge 626
 
627
	if (entry->relocation_count == 0)
628
		return false;
629
 
630
	if (!i915_is_ggtt(vma->vm))
631
		return false;
632
 
633
	/* See also use_cpu_reloc() */
634
	if (HAS_LLC(vma->obj->base.dev))
635
		return false;
636
 
637
	if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU)
638
		return false;
639
 
640
	return true;
641
}
642
 
643
static bool
644
eb_vma_misplaced(struct i915_vma *vma)
645
{
646
	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
5060 serge 647
	struct drm_i915_gem_object *obj = vma->obj;
648
 
5354 serge 649
	WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP &&
5060 serge 650
	       !i915_is_ggtt(vma->vm));
651
 
652
	if (entry->alignment &&
653
	    vma->node.start & (entry->alignment - 1))
654
		return true;
655
 
656
	if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
657
	    vma->node.start < BATCH_OFFSET_BIAS)
658
		return true;
659
 
6084 serge 660
	/* avoid costly ping-pong once a batch bo ended up non-mappable */
661
	if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable)
662
		return !only_mappable_for_reloc(entry->flags);
663
 
664
	if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0 &&
665
	    (vma->node.start + vma->node.size - 1) >> 32)
666
		return true;
667
 
5060 serge 668
	return false;
669
}
670
 
3263 Serge 671
static int
5060 serge 672
i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
4560 Serge 673
			    struct list_head *vmas,
6084 serge 674
			    struct intel_context *ctx,
3480 Serge 675
			    bool *need_relocs)
3263 Serge 676
{
677
	struct drm_i915_gem_object *obj;
4560 Serge 678
	struct i915_vma *vma;
679
	struct i915_address_space *vm;
680
	struct list_head ordered_vmas;
3263 Serge 681
	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
682
	int retry;
683
 
5060 serge 684
	i915_gem_retire_requests_ring(ring);
685
 
4560 Serge 686
	vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
687
 
688
	INIT_LIST_HEAD(&ordered_vmas);
689
	while (!list_empty(vmas)) {
3263 Serge 690
		struct drm_i915_gem_exec_object2 *entry;
691
		bool need_fence, need_mappable;
692
 
4560 Serge 693
		vma = list_first_entry(vmas, struct i915_vma, exec_list);
694
		obj = vma->obj;
695
		entry = vma->exec_entry;
3263 Serge 696
 
6084 serge 697
		if (ctx->flags & CONTEXT_NO_ZEROMAP)
698
			entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
699
 
5354 serge 700
		if (!has_fenced_gpu_access)
701
			entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;
3263 Serge 702
		need_fence =
703
			entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
704
			obj->tiling_mode != I915_TILING_NONE;
4560 Serge 705
		need_mappable = need_fence || need_reloc_mappable(vma);
3263 Serge 706
 
5354 serge 707
		if (need_mappable) {
708
			entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
4560 Serge 709
			list_move(&vma->exec_list, &ordered_vmas);
5354 serge 710
		} else
4560 Serge 711
			list_move_tail(&vma->exec_list, &ordered_vmas);
3263 Serge 712
 
3480 Serge 713
		obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
3263 Serge 714
		obj->base.pending_write_domain = 0;
715
	}
4560 Serge 716
	list_splice(&ordered_vmas, vmas);
3263 Serge 717
 
718
	/* Attempt to pin all of the buffers into the GTT.
719
	 * This is done in 3 phases:
720
	 *
721
	 * 1a. Unbind all objects that do not match the GTT constraints for
722
	 *     the execbuffer (fenceable, mappable, alignment etc).
723
	 * 1b. Increment pin count for already bound objects.
724
	 * 2.  Bind new objects.
725
	 * 3.  Decrement pin count.
726
	 *
727
	 * This avoid unnecessary unbinding of later objects in order to make
728
	 * room for the earlier objects *unless* we need to defragment.
729
	 */
730
	retry = 0;
731
	do {
732
		int ret = 0;
733
 
734
		/* Unbind any ill-fitting objects or pin. */
4560 Serge 735
		list_for_each_entry(vma, vmas, exec_list) {
736
			if (!drm_mm_node_allocated(&vma->node))
3263 Serge 737
				continue;
738
 
5354 serge 739
			if (eb_vma_misplaced(vma))
4560 Serge 740
				ret = i915_vma_unbind(vma);
3263 Serge 741
			else
4560 Serge 742
				ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
3263 Serge 743
			if (ret)
744
				goto err;
745
		}
746
 
747
		/* Bind fresh objects */
4560 Serge 748
		list_for_each_entry(vma, vmas, exec_list) {
749
			if (drm_mm_node_allocated(&vma->node))
3263 Serge 750
				continue;
751
 
4560 Serge 752
			ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
3263 Serge 753
			if (ret)
754
				goto err;
755
		}
756
 
4560 Serge 757
err:
3263 Serge 758
		if (ret != -ENOSPC || retry++)
759
			return ret;
760
 
4560 Serge 761
		/* Decrement pin count for bound objects */
762
		list_for_each_entry(vma, vmas, exec_list)
763
			i915_gem_execbuffer_unreserve_vma(vma);
764
 
5060 serge 765
		ret = i915_gem_evict_vm(vm, true);
3263 Serge 766
		if (ret)
767
			return ret;
768
	} while (1);
769
}
770
 
771
static int
772
i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
3480 Serge 773
				  struct drm_i915_gem_execbuffer2 *args,
3263 Serge 774
				  struct drm_file *file,
5060 serge 775
				  struct intel_engine_cs *ring,
4560 Serge 776
				  struct eb_vmas *eb,
6084 serge 777
				  struct drm_i915_gem_exec_object2 *exec,
778
				  struct intel_context *ctx)
3263 Serge 779
{
780
	struct drm_i915_gem_relocation_entry *reloc;
4560 Serge 781
	struct i915_address_space *vm;
782
	struct i915_vma *vma;
3480 Serge 783
	bool need_relocs;
3263 Serge 784
	int *reloc_offset;
785
	int i, total, ret;
4560 Serge 786
	unsigned count = args->buffer_count;
3263 Serge 787
 
4560 Serge 788
	vm = list_first_entry(&eb->vmas, struct i915_vma, exec_list)->vm;
789
 
3263 Serge 790
	/* We may process another execbuffer during the unlock... */
4560 Serge 791
	while (!list_empty(&eb->vmas)) {
792
		vma = list_first_entry(&eb->vmas, struct i915_vma, exec_list);
793
		list_del_init(&vma->exec_list);
794
		i915_gem_execbuffer_unreserve_vma(vma);
795
		drm_gem_object_unreference(&vma->obj->base);
3263 Serge 796
	}
797
 
798
	mutex_unlock(&dev->struct_mutex);
799
 
800
	total = 0;
801
	for (i = 0; i < count; i++)
802
		total += exec[i].relocation_count;
803
 
6084 serge 804
    reloc_offset = __builtin_malloc(count * sizeof(*reloc_offset));
805
    reloc = __builtin_malloc(total * sizeof(*reloc));
3263 Serge 806
	if (reloc == NULL || reloc_offset == NULL) {
3266 Serge 807
        kfree(reloc);
808
        kfree(reloc_offset);
3263 Serge 809
		mutex_lock(&dev->struct_mutex);
810
		return -ENOMEM;
811
	}
812
 
813
	total = 0;
814
	for (i = 0; i < count; i++) {
815
		struct drm_i915_gem_relocation_entry __user *user_relocs;
816
		u64 invalid_offset = (u64)-1;
817
		int j;
818
 
4539 Serge 819
		user_relocs = to_user_ptr(exec[i].relocs_ptr);
3263 Serge 820
 
821
		if (copy_from_user(reloc+total, user_relocs,
822
				   exec[i].relocation_count * sizeof(*reloc))) {
823
			ret = -EFAULT;
824
			mutex_lock(&dev->struct_mutex);
825
			goto err;
826
		}
827
 
828
		/* As we do not update the known relocation offsets after
829
		 * relocating (due to the complexities in lock handling),
830
		 * we need to mark them as invalid now so that we force the
831
		 * relocation processing next time. Just in case the target
832
		 * object is evicted and then rebound into its old
833
		 * presumed_offset before the next execbuffer - if that
834
		 * happened we would make the mistake of assuming that the
835
		 * relocations were valid.
836
		 */
837
		for (j = 0; j < exec[i].relocation_count; j++) {
5060 serge 838
			if (__copy_to_user(&user_relocs[j].presumed_offset,
6084 serge 839
					   &invalid_offset,
840
					   sizeof(invalid_offset))) {
3263 Serge 841
				ret = -EFAULT;
842
				mutex_lock(&dev->struct_mutex);
843
				goto err;
844
			}
845
		}
846
 
847
		reloc_offset[i] = total;
848
		total += exec[i].relocation_count;
849
	}
850
 
851
	ret = i915_mutex_lock_interruptible(dev);
852
	if (ret) {
853
		mutex_lock(&dev->struct_mutex);
854
		goto err;
855
	}
856
 
857
	/* reacquire the objects */
858
	eb_reset(eb);
4560 Serge 859
	ret = eb_lookup_vmas(eb, exec, args, vm, file);
3480 Serge 860
	if (ret)
6084 serge 861
		goto err;
3263 Serge 862
 
3480 Serge 863
	need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
6084 serge 864
	ret = i915_gem_execbuffer_reserve(ring, &eb->vmas, ctx, &need_relocs);
3263 Serge 865
	if (ret)
866
		goto err;
867
 
4560 Serge 868
	list_for_each_entry(vma, &eb->vmas, exec_list) {
869
		int offset = vma->exec_entry - exec;
870
		ret = i915_gem_execbuffer_relocate_vma_slow(vma, eb,
871
							    reloc + reloc_offset[offset]);
3263 Serge 872
		if (ret)
873
			goto err;
874
	}
875
 
876
	/* Leave the user relocations as are, this is the painfully slow path,
877
	 * and we want to avoid the complication of dropping the lock whilst
878
	 * having buffers reserved in the aperture and so causing spurious
879
	 * ENOSPC for random operations.
880
	 */
881
 
882
err:
3266 Serge 883
    kfree(reloc);
884
    kfree(reloc_offset);
3263 Serge 885
	return ret;
886
}
887
 
888
static int
6084 serge 889
i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
4560 Serge 890
				struct list_head *vmas)
3263 Serge 891
{
6084 serge 892
	const unsigned other_rings = ~intel_ring_flag(req->ring);
4560 Serge 893
	struct i915_vma *vma;
3263 Serge 894
	uint32_t flush_domains = 0;
4104 Serge 895
	bool flush_chipset = false;
3263 Serge 896
	int ret;
897
 
4560 Serge 898
	list_for_each_entry(vma, vmas, exec_list) {
899
		struct drm_i915_gem_object *obj = vma->obj;
3263 Serge 900
 
6084 serge 901
		if (obj->active & other_rings) {
902
			ret = i915_gem_object_sync(obj, req->ring, &req);
903
			if (ret)
904
				return ret;
905
		}
906
 
3263 Serge 907
		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
4104 Serge 908
			flush_chipset |= i915_gem_clflush_object(obj, false);
3263 Serge 909
 
910
		flush_domains |= obj->base.write_domain;
911
	}
912
 
4104 Serge 913
	if (flush_chipset)
6084 serge 914
		i915_gem_chipset_flush(req->ring->dev);
3263 Serge 915
 
916
	if (flush_domains & I915_GEM_DOMAIN_GTT)
917
		wmb();
918
 
919
	/* Unconditionally invalidate gpu caches and ensure that we do flush
920
	 * any residual writes from the previous batch.
921
	 */
6084 serge 922
	return intel_ring_invalidate_all_caches(req);
3263 Serge 923
}
924
 
925
static bool
926
i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
927
{
3480 Serge 928
	if (exec->flags & __I915_EXEC_UNKNOWN_FLAGS)
929
		return false;
930
 
6084 serge 931
	/* Kernel clipping was a DRI1 misfeature */
932
	if (exec->num_cliprects || exec->cliprects_ptr)
933
		return false;
934
 
935
	if (exec->DR4 == 0xffffffff) {
936
		DRM_DEBUG("UXA submitting garbage DR4, fixing up\n");
937
		exec->DR4 = 0;
938
	}
939
	if (exec->DR1 || exec->DR4)
940
		return false;
941
 
942
	if ((exec->batch_start_offset | exec->batch_len) & 0x7)
943
		return false;
944
 
945
	return true;
3263 Serge 946
}
947
 
948
static int
5354 serge 949
validate_exec_list(struct drm_device *dev,
950
		   struct drm_i915_gem_exec_object2 *exec,
3263 Serge 951
		   int count)
952
{
4560 Serge 953
	unsigned relocs_total = 0;
954
	unsigned relocs_max = UINT_MAX / sizeof(struct drm_i915_gem_relocation_entry);
5354 serge 955
	unsigned invalid_flags;
956
	int i;
3263 Serge 957
 
5354 serge 958
	invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
959
	if (USES_FULL_PPGTT(dev))
960
		invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
961
 
3263 Serge 962
	for (i = 0; i < count; i++) {
3746 Serge 963
		char __user *ptr = to_user_ptr(exec[i].relocs_ptr);
3263 Serge 964
		int length; /* limited by fault_in_pages_readable() */
965
 
5354 serge 966
		if (exec[i].flags & invalid_flags)
3263 Serge 967
			return -EINVAL;
968
 
6084 serge 969
		if (exec[i].alignment && !is_power_of_2(exec[i].alignment))
970
			return -EINVAL;
971
 
3480 Serge 972
		/* First check for malicious input causing overflow in
973
		 * the worst case where we need to allocate the entire
974
		 * relocation tree as a single array.
975
		 */
976
		if (exec[i].relocation_count > relocs_max - relocs_total)
977
			return -EINVAL;
978
		relocs_total += exec[i].relocation_count;
979
 
3263 Serge 980
		length = exec[i].relocation_count *
981
			sizeof(struct drm_i915_gem_relocation_entry);
3746 Serge 982
		/*
983
		 * We must check that the entire relocation array is safe
984
		 * to read, but since we may need to update the presumed
985
		 * offsets during execution, check for full write access.
986
		 */
4560 Serge 987
	}
3263 Serge 988
 
4560 Serge 989
	return 0;
990
}
991
 
5060 serge 992
static struct intel_context *
4560 Serge 993
i915_gem_validate_context(struct drm_device *dev, struct drm_file *file,
5060 serge 994
			  struct intel_engine_cs *ring, const u32 ctx_id)
4560 Serge 995
{
5060 serge 996
	struct intel_context *ctx = NULL;
4560 Serge 997
	struct i915_ctx_hang_stats *hs;
998
 
5060 serge 999
	if (ring->id != RCS && ctx_id != DEFAULT_CONTEXT_HANDLE)
1000
		return ERR_PTR(-EINVAL);
4560 Serge 1001
 
5060 serge 1002
	ctx = i915_gem_context_get(file->driver_priv, ctx_id);
1003
	if (IS_ERR(ctx))
1004
		return ctx;
1005
 
1006
	hs = &ctx->hang_stats;
4560 Serge 1007
	if (hs->banned) {
1008
		DRM_DEBUG("Context %u tried to submit while banned\n", ctx_id);
5060 serge 1009
		return ERR_PTR(-EIO);
3263 Serge 1010
	}
1011
 
5354 serge 1012
	if (i915.enable_execlists && !ctx->engine[ring->id].state) {
6084 serge 1013
		int ret = intel_lr_context_deferred_alloc(ctx, ring);
5354 serge 1014
		if (ret) {
1015
			DRM_DEBUG("Could not create LRC %u: %d\n", ctx_id, ret);
1016
			return ERR_PTR(ret);
1017
		}
1018
	}
1019
 
5060 serge 1020
	return ctx;
3263 Serge 1021
}
1022
 
5354 serge 1023
void
4560 Serge 1024
i915_gem_execbuffer_move_to_active(struct list_head *vmas,
6084 serge 1025
				   struct drm_i915_gem_request *req)
3263 Serge 1026
{
6084 serge 1027
	struct intel_engine_cs *ring = i915_gem_request_get_ring(req);
4560 Serge 1028
	struct i915_vma *vma;
3263 Serge 1029
 
4560 Serge 1030
	list_for_each_entry(vma, vmas, exec_list) {
5354 serge 1031
		struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
4560 Serge 1032
		struct drm_i915_gem_object *obj = vma->obj;
3263 Serge 1033
		u32 old_read = obj->base.read_domains;
1034
		u32 old_write = obj->base.write_domain;
1035
 
6084 serge 1036
		obj->dirty = 1; /* be paranoid  */
3480 Serge 1037
		obj->base.write_domain = obj->base.pending_write_domain;
1038
		if (obj->base.write_domain == 0)
1039
			obj->base.pending_read_domains |= obj->base.read_domains;
3263 Serge 1040
		obj->base.read_domains = obj->base.pending_read_domains;
1041
 
6084 serge 1042
		i915_vma_move_to_active(vma, req);
3263 Serge 1043
		if (obj->base.write_domain) {
6084 serge 1044
			i915_gem_request_assign(&obj->last_write_req, req);
5060 serge 1045
 
6084 serge 1046
			intel_fb_obj_invalidate(obj, ORIGIN_CS);
5060 serge 1047
 
1048
			/* update for the implicit flush after a batch */
1049
			obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
3263 Serge 1050
		}
5354 serge 1051
		if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
6084 serge 1052
			i915_gem_request_assign(&obj->last_fenced_req, req);
5354 serge 1053
			if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
1054
				struct drm_i915_private *dev_priv = to_i915(ring->dev);
1055
				list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list,
1056
					       &dev_priv->mm.fence_list);
1057
			}
1058
		}
3263 Serge 1059
 
1060
		trace_i915_gem_object_change_domain(obj, old_read, old_write);
1061
	}
1062
}
1063
 
5354 serge 1064
void
6084 serge 1065
i915_gem_execbuffer_retire_commands(struct i915_execbuffer_params *params)
3263 Serge 1066
{
1067
	/* Unconditionally force add_request to emit a full flush. */
6084 serge 1068
	params->ring->gpu_caches_dirty = true;
3263 Serge 1069
 
1070
	/* Add a breadcrumb for the completion of the batch buffer */
6084 serge 1071
	__i915_add_request(params->request, params->batch_obj, true);
3263 Serge 1072
}
1073
 
1074
static int
1075
i915_reset_gen7_sol_offsets(struct drm_device *dev,
6084 serge 1076
			    struct drm_i915_gem_request *req)
3263 Serge 1077
{
6084 serge 1078
	struct intel_engine_cs *ring = req->ring;
5060 serge 1079
	struct drm_i915_private *dev_priv = dev->dev_private;
3263 Serge 1080
	int ret, i;
1081
 
5060 serge 1082
	if (!IS_GEN7(dev) || ring != &dev_priv->ring[RCS]) {
1083
		DRM_DEBUG("sol reset is gen7/rcs only\n");
1084
		return -EINVAL;
1085
	}
3263 Serge 1086
 
6084 serge 1087
	ret = intel_ring_begin(req, 4 * 3);
3263 Serge 1088
	if (ret)
1089
		return ret;
1090
 
1091
	for (i = 0; i < 4; i++) {
1092
		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1093
		intel_ring_emit(ring, GEN7_SO_WRITE_OFFSET(i));
1094
		intel_ring_emit(ring, 0);
1095
	}
1096
 
1097
	intel_ring_advance(ring);
1098
 
1099
	return 0;
1100
}
1101
 
6084 serge 1102
static struct drm_i915_gem_object*
1103
i915_gem_execbuffer_parse(struct intel_engine_cs *ring,
1104
			  struct drm_i915_gem_exec_object2 *shadow_exec_entry,
1105
			  struct eb_vmas *eb,
1106
			  struct drm_i915_gem_object *batch_obj,
1107
			  u32 batch_start_offset,
1108
			  u32 batch_len,
1109
			  bool is_master)
5354 serge 1110
{
6084 serge 1111
	struct drm_i915_gem_object *shadow_batch_obj;
1112
	struct i915_vma *vma;
5354 serge 1113
	int ret;
1114
 
6084 serge 1115
	shadow_batch_obj = i915_gem_batch_pool_get(&ring->batch_pool,
1116
						   PAGE_ALIGN(batch_len));
1117
	if (IS_ERR(shadow_batch_obj))
1118
		return shadow_batch_obj;
5354 serge 1119
 
6084 serge 1120
	ret = i915_parse_cmds(ring,
1121
			      batch_obj,
1122
			      shadow_batch_obj,
1123
			      batch_start_offset,
1124
			      batch_len,
1125
			      is_master);
1126
	if (ret)
1127
		goto err;
5354 serge 1128
 
6084 serge 1129
	ret = i915_gem_obj_ggtt_pin(shadow_batch_obj, 0, 0);
1130
	if (ret)
1131
		goto err;
5354 serge 1132
 
6084 serge 1133
	i915_gem_object_unpin_pages(shadow_batch_obj);
5354 serge 1134
 
6084 serge 1135
	memset(shadow_exec_entry, 0, sizeof(*shadow_exec_entry));
1136
 
1137
	vma = i915_gem_obj_to_ggtt(shadow_batch_obj);
1138
	vma->exec_entry = shadow_exec_entry;
1139
	vma->exec_entry->flags = __EXEC_OBJECT_HAS_PIN;
1140
	drm_gem_object_reference(&shadow_batch_obj->base);
1141
	list_add_tail(&vma->exec_list, &eb->vmas);
1142
 
1143
	shadow_batch_obj->base.pending_read_domains = I915_GEM_DOMAIN_COMMAND;
1144
 
1145
	return shadow_batch_obj;
1146
 
1147
err:
1148
	i915_gem_object_unpin_pages(shadow_batch_obj);
1149
	if (ret == -EACCES) /* unhandled chained batch */
1150
		return batch_obj;
1151
	else
1152
		return ERR_PTR(ret);
5354 serge 1153
}
1154
 
1155
int
6084 serge 1156
i915_gem_ringbuffer_submission(struct i915_execbuffer_params *params,
1157
			       struct drm_i915_gem_execbuffer2 *args,
1158
			       struct list_head *vmas)
5060 serge 1159
{
6084 serge 1160
	struct drm_device *dev = params->dev;
1161
	struct intel_engine_cs *ring = params->ring;
5060 serge 1162
	struct drm_i915_private *dev_priv = dev->dev_private;
6084 serge 1163
	u64 exec_start, exec_len;
5060 serge 1164
	int instp_mode;
1165
	u32 instp_mask;
6084 serge 1166
	int ret;
5060 serge 1167
 
6084 serge 1168
	ret = i915_gem_execbuffer_move_to_gpu(params->request, vmas);
5060 serge 1169
	if (ret)
6084 serge 1170
		return ret;
5060 serge 1171
 
6084 serge 1172
	ret = i915_switch_context(params->request);
5060 serge 1173
	if (ret)
6084 serge 1174
		return ret;
5060 serge 1175
 
6084 serge 1176
	WARN(params->ctx->ppgtt && params->ctx->ppgtt->pd_dirty_rings & (1<id),
1177
	     "%s didn't clear reload\n", ring->name);
1178
 
5060 serge 1179
	instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK;
1180
	instp_mask = I915_EXEC_CONSTANTS_MASK;
1181
	switch (instp_mode) {
1182
	case I915_EXEC_CONSTANTS_REL_GENERAL:
1183
	case I915_EXEC_CONSTANTS_ABSOLUTE:
1184
	case I915_EXEC_CONSTANTS_REL_SURFACE:
1185
		if (instp_mode != 0 && ring != &dev_priv->ring[RCS]) {
1186
			DRM_DEBUG("non-0 rel constants mode on non-RCS\n");
6084 serge 1187
			return -EINVAL;
5060 serge 1188
		}
1189
 
1190
		if (instp_mode != dev_priv->relative_constants_mode) {
1191
			if (INTEL_INFO(dev)->gen < 4) {
1192
				DRM_DEBUG("no rel constants on pre-gen4\n");
6084 serge 1193
				return -EINVAL;
5060 serge 1194
			}
1195
 
1196
			if (INTEL_INFO(dev)->gen > 5 &&
1197
			    instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) {
1198
				DRM_DEBUG("rel surface constants mode invalid on gen5+\n");
6084 serge 1199
				return -EINVAL;
5060 serge 1200
			}
1201
 
1202
			/* The HW changed the meaning on this bit on gen6 */
1203
			if (INTEL_INFO(dev)->gen >= 6)
1204
				instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
1205
		}
1206
		break;
1207
	default:
1208
		DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode);
6084 serge 1209
		return -EINVAL;
5060 serge 1210
	}
1211
 
1212
	if (ring == &dev_priv->ring[RCS] &&
6084 serge 1213
	    instp_mode != dev_priv->relative_constants_mode) {
1214
		ret = intel_ring_begin(params->request, 4);
5060 serge 1215
		if (ret)
6084 serge 1216
			return ret;
5060 serge 1217
 
1218
		intel_ring_emit(ring, MI_NOOP);
1219
		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1220
		intel_ring_emit(ring, INSTPM);
1221
		intel_ring_emit(ring, instp_mask << 16 | instp_mode);
1222
		intel_ring_advance(ring);
1223
 
1224
		dev_priv->relative_constants_mode = instp_mode;
1225
	}
1226
 
1227
	if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
6084 serge 1228
		ret = i915_reset_gen7_sol_offsets(dev, params->request);
5060 serge 1229
		if (ret)
6084 serge 1230
			return ret;
5060 serge 1231
	}
1232
 
6084 serge 1233
	exec_len   = args->batch_len;
1234
	exec_start = params->batch_obj_vm_offset +
1235
		     params->args_batch_start_offset;
5060 serge 1236
 
6084 serge 1237
	ret = ring->dispatch_execbuffer(params->request,
1238
					exec_start, exec_len,
1239
					params->dispatch_flags);
1240
	if (ret)
1241
		return ret;
5060 serge 1242
 
6084 serge 1243
	trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags);
5060 serge 1244
 
6084 serge 1245
	i915_gem_execbuffer_move_to_active(vmas, params->request);
1246
	i915_gem_execbuffer_retire_commands(params);
5060 serge 1247
 
6084 serge 1248
	return 0;
5060 serge 1249
}
1250
 
1251
/**
1252
 * Find one BSD ring to dispatch the corresponding BSD command.
1253
 * The Ring ID is returned.
1254
 */
1255
static int gen8_dispatch_bsd_ring(struct drm_device *dev,
1256
				  struct drm_file *file)
1257
{
1258
	struct drm_i915_private *dev_priv = dev->dev_private;
1259
	struct drm_i915_file_private *file_priv = file->driver_priv;
1260
 
1261
	/* Check whether the file_priv is using one ring */
1262
	if (file_priv->bsd_ring)
1263
		return file_priv->bsd_ring->id;
1264
	else {
1265
		/* If no, use the ping-pong mechanism to select one ring */
1266
		int ring_id;
1267
 
1268
		mutex_lock(&dev->struct_mutex);
1269
		if (dev_priv->mm.bsd_ring_dispatch_index == 0) {
1270
			ring_id = VCS;
1271
			dev_priv->mm.bsd_ring_dispatch_index = 1;
1272
		} else {
1273
			ring_id = VCS2;
1274
			dev_priv->mm.bsd_ring_dispatch_index = 0;
1275
		}
1276
		file_priv->bsd_ring = &dev_priv->ring[ring_id];
1277
		mutex_unlock(&dev->struct_mutex);
1278
		return ring_id;
1279
	}
1280
}
1281
 
1282
static struct drm_i915_gem_object *
1283
eb_get_batch(struct eb_vmas *eb)
1284
{
1285
	struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list);
1286
 
1287
	/*
1288
	 * SNA is doing fancy tricks with compressing batch buffers, which leads
1289
	 * to negative relocation deltas. Usually that works out ok since the
1290
	 * relocate address is still positive, except when the batch is placed
1291
	 * very low in the GTT. Ensure this doesn't happen.
1292
	 *
1293
	 * Note that actual hangs have only been observed on gen7, but for
1294
	 * paranoia do it everywhere.
1295
	 */
1296
	vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
1297
 
1298
	return vma->obj;
1299
}
1300
 
1301
static int
3263 Serge 1302
i915_gem_do_execbuffer(struct drm_device *dev, void *data,
1303
		       struct drm_file *file,
1304
		       struct drm_i915_gem_execbuffer2 *args,
5060 serge 1305
		       struct drm_i915_gem_exec_object2 *exec)
3263 Serge 1306
{
5060 serge 1307
	struct drm_i915_private *dev_priv = dev->dev_private;
4560 Serge 1308
	struct eb_vmas *eb;
3263 Serge 1309
	struct drm_i915_gem_object *batch_obj;
6084 serge 1310
	struct drm_i915_gem_exec_object2 shadow_exec_entry;
5060 serge 1311
	struct intel_engine_cs *ring;
1312
	struct intel_context *ctx;
1313
	struct i915_address_space *vm;
6084 serge 1314
	struct i915_execbuffer_params params_master; /* XXX: will be removed later */
1315
	struct i915_execbuffer_params *params = ¶ms_master;
4560 Serge 1316
	const u32 ctx_id = i915_execbuffer2_get_context_id(*args);
6084 serge 1317
	u32 dispatch_flags;
5060 serge 1318
	int ret;
3480 Serge 1319
	bool need_relocs;
3263 Serge 1320
 
3480 Serge 1321
	if (!i915_gem_check_execbuffer(args))
3263 Serge 1322
		return -EINVAL;
1323
 
5354 serge 1324
	ret = validate_exec_list(dev, exec, args->buffer_count);
3263 Serge 1325
	if (ret)
1326
		return ret;
1327
 
6084 serge 1328
	dispatch_flags = 0;
3263 Serge 1329
	if (args->flags & I915_EXEC_SECURE) {
1330
 
6084 serge 1331
		dispatch_flags |= I915_DISPATCH_SECURE;
3263 Serge 1332
	}
1333
	if (args->flags & I915_EXEC_IS_PINNED)
6084 serge 1334
		dispatch_flags |= I915_DISPATCH_PINNED;
3263 Serge 1335
 
5060 serge 1336
	if ((args->flags & I915_EXEC_RING_MASK) > LAST_USER_RING) {
3263 Serge 1337
		DRM_DEBUG("execbuf with unknown ring: %d\n",
1338
			  (int)(args->flags & I915_EXEC_RING_MASK));
1339
		return -EINVAL;
1340
	}
5060 serge 1341
 
6084 serge 1342
	if (((args->flags & I915_EXEC_RING_MASK) != I915_EXEC_BSD) &&
1343
	    ((args->flags & I915_EXEC_BSD_MASK) != 0)) {
1344
		DRM_DEBUG("execbuf with non bsd ring but with invalid "
1345
			"bsd dispatch flags: %d\n", (int)(args->flags));
1346
		return -EINVAL;
1347
	}
1348
 
5060 serge 1349
	if ((args->flags & I915_EXEC_RING_MASK) == I915_EXEC_DEFAULT)
1350
		ring = &dev_priv->ring[RCS];
1351
	else if ((args->flags & I915_EXEC_RING_MASK) == I915_EXEC_BSD) {
1352
		if (HAS_BSD2(dev)) {
1353
			int ring_id;
6084 serge 1354
 
1355
			switch (args->flags & I915_EXEC_BSD_MASK) {
1356
			case I915_EXEC_BSD_DEFAULT:
1357
				ring_id = gen8_dispatch_bsd_ring(dev, file);
1358
				ring = &dev_priv->ring[ring_id];
1359
				break;
1360
			case I915_EXEC_BSD_RING1:
1361
				ring = &dev_priv->ring[VCS];
1362
				break;
1363
			case I915_EXEC_BSD_RING2:
1364
				ring = &dev_priv->ring[VCS2];
1365
				break;
1366
			default:
1367
				DRM_DEBUG("execbuf with unknown bsd ring: %d\n",
1368
					  (int)(args->flags & I915_EXEC_BSD_MASK));
1369
				return -EINVAL;
1370
			}
5060 serge 1371
		} else
1372
			ring = &dev_priv->ring[VCS];
1373
	} else
1374
		ring = &dev_priv->ring[(args->flags & I915_EXEC_RING_MASK) - 1];
1375
 
3263 Serge 1376
	if (!intel_ring_initialized(ring)) {
1377
		DRM_DEBUG("execbuf with invalid ring: %d\n",
1378
			  (int)(args->flags & I915_EXEC_RING_MASK));
1379
		return -EINVAL;
1380
	}
1381
 
1382
	if (args->buffer_count < 1) {
1383
		DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1384
		return -EINVAL;
1385
	}
1386
 
6084 serge 1387
	if (args->flags & I915_EXEC_RESOURCE_STREAMER) {
1388
		if (!HAS_RESOURCE_STREAMER(dev)) {
1389
			DRM_DEBUG("RS is only allowed for Haswell, Gen8 and above\n");
1390
			return -EINVAL;
1391
		}
1392
		if (ring->id != RCS) {
1393
			DRM_DEBUG("RS is not available on %s\n",
1394
				 ring->name);
1395
			return -EINVAL;
1396
		}
1397
 
1398
		dispatch_flags |= I915_DISPATCH_RS;
1399
	}
1400
 
4560 Serge 1401
	intel_runtime_pm_get(dev_priv);
1402
 
3263 Serge 1403
	ret = i915_mutex_lock_interruptible(dev);
1404
	if (ret)
1405
		goto pre_mutex_err;
1406
 
5060 serge 1407
	ctx = i915_gem_validate_context(dev, file, ring, ctx_id);
1408
	if (IS_ERR(ctx)) {
4560 Serge 1409
		mutex_unlock(&dev->struct_mutex);
5060 serge 1410
		ret = PTR_ERR(ctx);
4560 Serge 1411
		goto pre_mutex_err;
1412
	}
1413
 
5060 serge 1414
	i915_gem_context_reference(ctx);
1415
 
5354 serge 1416
	if (ctx->ppgtt)
1417
		vm = &ctx->ppgtt->base;
1418
	else
5060 serge 1419
		vm = &dev_priv->gtt.base;
1420
 
6084 serge 1421
	memset(¶ms_master, 0x00, sizeof(params_master));
1422
 
3480 Serge 1423
	eb = eb_create(args);
3263 Serge 1424
	if (eb == NULL) {
5060 serge 1425
		i915_gem_context_unreference(ctx);
3263 Serge 1426
		mutex_unlock(&dev->struct_mutex);
1427
		ret = -ENOMEM;
6084 serge 1428
		goto pre_mutex_err;
3263 Serge 1429
	}
1430
 
1431
	/* Look up object handles */
4560 Serge 1432
	ret = eb_lookup_vmas(eb, exec, args, vm, file);
3480 Serge 1433
	if (ret)
6084 serge 1434
		goto err;
3263 Serge 1435
 
1436
	/* take note of the batch buffer before we might reorder the lists */
5060 serge 1437
	batch_obj = eb_get_batch(eb);
3263 Serge 1438
 
1439
	/* Move the objects en-masse into the GTT, evicting if necessary. */
3480 Serge 1440
	need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
6084 serge 1441
	ret = i915_gem_execbuffer_reserve(ring, &eb->vmas, ctx, &need_relocs);
3263 Serge 1442
	if (ret)
1443
		goto err;
1444
 
1445
	/* The objects are in their final locations, apply the relocations. */
3480 Serge 1446
	if (need_relocs)
4560 Serge 1447
		ret = i915_gem_execbuffer_relocate(eb);
3263 Serge 1448
	if (ret) {
1449
		if (ret == -EFAULT) {
3480 Serge 1450
			ret = i915_gem_execbuffer_relocate_slow(dev, args, file, ring,
6084 serge 1451
								eb, exec, ctx);
3263 Serge 1452
			BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1453
		}
1454
		if (ret)
1455
			goto err;
6084 serge 1456
	}
3263 Serge 1457
 
1458
	/* Set the pending read domains for the batch buffer to COMMAND */
1459
	if (batch_obj->base.pending_write_domain) {
1460
		DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
1461
		ret = -EINVAL;
1462
		goto err;
1463
	}
6084 serge 1464
 
1465
	params->args_batch_start_offset = args->batch_start_offset;
1466
 
5060 serge 1467
#if 0
6084 serge 1468
	if (i915_needs_cmd_parser(ring) && args->batch_len) {
1469
		struct drm_i915_gem_object *parsed_batch_obj;
1470
 
1471
		parsed_batch_obj = i915_gem_execbuffer_parse(ring,
1472
						      &shadow_exec_entry,
1473
						      eb,
1474
						      batch_obj,
1475
						      args->batch_start_offset,
1476
						      args->batch_len,
1477
						      file->is_master);
1478
		if (IS_ERR(parsed_batch_obj)) {
1479
			ret = PTR_ERR(parsed_batch_obj);
5060 serge 1480
			goto err;
6084 serge 1481
		}
1482
 
5060 serge 1483
		/*
6084 serge 1484
		 * parsed_batch_obj == batch_obj means batch not fully parsed:
1485
		 * Accept, but don't promote to secure.
5060 serge 1486
		 */
6084 serge 1487
 
1488
		if (parsed_batch_obj != batch_obj) {
1489
			/*
1490
			 * Batch parsed and accepted:
1491
			 *
1492
			 * Set the DISPATCH_SECURE bit to remove the NON_SECURE
1493
			 * bit from MI_BATCH_BUFFER_START commands issued in
1494
			 * the dispatch_execbuffer implementations. We
1495
			 * specifically don't want that set on batches the
1496
			 * command parser has accepted.
1497
			 */
1498
			dispatch_flags |= I915_DISPATCH_SECURE;
1499
			params->args_batch_start_offset = 0;
1500
			batch_obj = parsed_batch_obj;
1501
		}
5060 serge 1502
	}
6084 serge 1503
#endif
5367 serge 1504
 
6084 serge 1505
	batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
1506
 
3263 Serge 1507
	/* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
1508
	 * batch" bit. Hence we need to pin secure batches into the global gtt.
4560 Serge 1509
	 * hsw should have this fixed, but bdw mucks it up again. */
6084 serge 1510
	if (dispatch_flags & I915_DISPATCH_SECURE) {
5354 serge 1511
		/*
1512
		 * So on first glance it looks freaky that we pin the batch here
1513
		 * outside of the reservation loop. But:
1514
		 * - The batch is already pinned into the relevant ppgtt, so we
1515
		 *   already have the backing storage fully allocated.
1516
		 * - No other BO uses the global gtt (well contexts, but meh),
6084 serge 1517
		 *   so we don't really have issues with multiple objects not
5354 serge 1518
		 *   fitting due to fragmentation.
1519
		 * So this is actually safe.
1520
		 */
1521
		ret = i915_gem_obj_ggtt_pin(batch_obj, 0, 0);
1522
		if (ret)
1523
			goto err;
3263 Serge 1524
 
6084 serge 1525
		params->batch_obj_vm_offset = i915_gem_obj_ggtt_offset(batch_obj);
5354 serge 1526
	} else
6084 serge 1527
		params->batch_obj_vm_offset = i915_gem_obj_offset(batch_obj, vm);
3263 Serge 1528
 
6084 serge 1529
	/* Allocate a request for this batch buffer nice and early. */
1530
	ret = i915_gem_request_alloc(ring, ctx, ¶ms->request);
1531
	if (ret)
1532
		goto err_batch_unpin;
3263 Serge 1533
 
6084 serge 1534
	ret = i915_gem_request_add_to_client(params->request, file);
1535
	if (ret)
1536
		goto err_batch_unpin;
1537
 
5354 serge 1538
	/*
6084 serge 1539
	 * Save assorted stuff away to pass through to *_submission().
1540
	 * NB: This data should be 'persistent' and not local as it will
1541
	 * kept around beyond the duration of the IOCTL once the GPU
1542
	 * scheduler arrives.
1543
	 */
1544
	params->dev                     = dev;
1545
	params->file                    = file;
1546
	params->ring                    = ring;
1547
	params->dispatch_flags          = dispatch_flags;
1548
	params->batch_obj               = batch_obj;
1549
	params->ctx                     = ctx;
1550
 
1551
	ret = dev_priv->gt.execbuf_submit(params, args, &eb->vmas);
1552
 
1553
err_batch_unpin:
1554
	/*
5354 serge 1555
	 * FIXME: We crucially rely upon the active tracking for the (ppgtt)
1556
	 * batch vma for correctness. For less ugly and less fragility this
1557
	 * needs to be adjusted to also track the ggtt batch vma properly as
1558
	 * active.
1559
	 */
6084 serge 1560
	if (dispatch_flags & I915_DISPATCH_SECURE)
5354 serge 1561
		i915_gem_object_ggtt_unpin(batch_obj);
6084 serge 1562
 
3263 Serge 1563
err:
5060 serge 1564
	/* the request owns the ref now */
1565
	i915_gem_context_unreference(ctx);
3263 Serge 1566
	eb_destroy(eb);
1567
 
6084 serge 1568
	/*
1569
	 * If the request was created but not successfully submitted then it
1570
	 * must be freed again. If it was submitted then it is being tracked
1571
	 * on the active request list and no clean up is required here.
1572
	 */
1573
	if (ret && params->request)
1574
		i915_gem_request_cancel(params->request);
1575
 
3263 Serge 1576
	mutex_unlock(&dev->struct_mutex);
1577
 
1578
pre_mutex_err:
4560 Serge 1579
	/* intel_gpu_busy should also get a ref, so it will free when the device
1580
	 * is really idle. */
1581
	intel_runtime_pm_put(dev_priv);
3263 Serge 1582
	return ret;
1583
}
1584
 
4246 Serge 1585
#if 0
1586
/*
1587
 * Legacy execbuffer just creates an exec2 list from the original exec object
1588
 * list array and passes it to the real function.
1589
 */
1590
int
1591
i915_gem_execbuffer(struct drm_device *dev, void *data,
1592
		    struct drm_file *file)
1593
{
1594
	struct drm_i915_gem_execbuffer *args = data;
1595
	struct drm_i915_gem_execbuffer2 exec2;
1596
	struct drm_i915_gem_exec_object *exec_list = NULL;
1597
	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1598
	int ret, i;
3480 Serge 1599
 
4246 Serge 1600
	if (args->buffer_count < 1) {
1601
		DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1602
		return -EINVAL;
1603
	}
3480 Serge 1604
 
4246 Serge 1605
	/* Copy in the exec list from userland */
1606
	exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count);
1607
	exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
1608
	if (exec_list == NULL || exec2_list == NULL) {
1609
		DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1610
			  args->buffer_count);
1611
		drm_free_large(exec_list);
1612
		drm_free_large(exec2_list);
1613
		return -ENOMEM;
1614
	}
1615
	ret = copy_from_user(exec_list,
1616
			     to_user_ptr(args->buffers_ptr),
1617
			     sizeof(*exec_list) * args->buffer_count);
1618
	if (ret != 0) {
1619
		DRM_DEBUG("copy %d exec entries failed %d\n",
1620
			  args->buffer_count, ret);
1621
		drm_free_large(exec_list);
1622
		drm_free_large(exec2_list);
1623
		return -EFAULT;
1624
	}
1625
 
1626
	for (i = 0; i < args->buffer_count; i++) {
1627
		exec2_list[i].handle = exec_list[i].handle;
1628
		exec2_list[i].relocation_count = exec_list[i].relocation_count;
1629
		exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
1630
		exec2_list[i].alignment = exec_list[i].alignment;
1631
		exec2_list[i].offset = exec_list[i].offset;
1632
		if (INTEL_INFO(dev)->gen < 4)
1633
			exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
1634
		else
1635
			exec2_list[i].flags = 0;
1636
	}
1637
 
1638
	exec2.buffers_ptr = args->buffers_ptr;
1639
	exec2.buffer_count = args->buffer_count;
1640
	exec2.batch_start_offset = args->batch_start_offset;
1641
	exec2.batch_len = args->batch_len;
1642
	exec2.DR1 = args->DR1;
1643
	exec2.DR4 = args->DR4;
1644
	exec2.num_cliprects = args->num_cliprects;
1645
	exec2.cliprects_ptr = args->cliprects_ptr;
1646
	exec2.flags = I915_EXEC_RENDER;
1647
	i915_execbuffer2_set_context_id(exec2, 0);
1648
 
5060 serge 1649
	ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list);
4246 Serge 1650
	if (!ret) {
5060 serge 1651
		struct drm_i915_gem_exec_object __user *user_exec_list =
1652
			to_user_ptr(args->buffers_ptr);
1653
 
4246 Serge 1654
		/* Copy the new buffer offsets back to the user's exec list. */
5060 serge 1655
		for (i = 0; i < args->buffer_count; i++) {
1656
			ret = __copy_to_user(&user_exec_list[i].offset,
1657
					     &exec2_list[i].offset,
1658
					     sizeof(user_exec_list[i].offset));
6084 serge 1659
			if (ret) {
1660
				ret = -EFAULT;
1661
				DRM_DEBUG("failed to copy %d exec entries "
1662
					  "back to user (%d)\n",
1663
					  args->buffer_count, ret);
5060 serge 1664
				break;
1665
			}
4246 Serge 1666
		}
1667
	}
1668
 
1669
	drm_free_large(exec_list);
1670
	drm_free_large(exec2_list);
1671
	return ret;
1672
}
1673
#endif
1674
 
3263 Serge 1675
int
1676
i915_gem_execbuffer2(struct drm_device *dev, void *data,
1677
		     struct drm_file *file)
1678
{
1679
	struct drm_i915_gem_execbuffer2 *args = data;
1680
	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1681
	int ret;
1682
 
1683
	if (args->buffer_count < 1 ||
1684
	    args->buffer_count > UINT_MAX / sizeof(*exec2_list)) {
1685
		DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count);
1686
		return -EINVAL;
1687
	}
1688
 
5060 serge 1689
	if (args->rsvd2 != 0) {
1690
		DRM_DEBUG("dirty rvsd2 field\n");
1691
		return -EINVAL;
1692
	}
1693
 
3480 Serge 1694
	exec2_list = kmalloc(sizeof(*exec2_list)*args->buffer_count,
1695
			     GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
3263 Serge 1696
	if (exec2_list == NULL) {
1697
		DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1698
			  args->buffer_count);
1699
		return -ENOMEM;
1700
	}
1701
	ret = copy_from_user(exec2_list,
4539 Serge 1702
			     to_user_ptr(args->buffers_ptr),
3263 Serge 1703
			     sizeof(*exec2_list) * args->buffer_count);
1704
	if (ret != 0) {
1705
		DRM_DEBUG("copy %d exec entries failed %d\n",
1706
			  args->buffer_count, ret);
3266 Serge 1707
        kfree(exec2_list);
1708
        FAIL();
3263 Serge 1709
		return -EFAULT;
1710
	}
1711
 
5060 serge 1712
	ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list);
3263 Serge 1713
	if (!ret) {
1714
		/* Copy the new buffer offsets back to the user's exec list. */
5060 serge 1715
		struct drm_i915_gem_exec_object2 __user *user_exec_list =
1716
				   to_user_ptr(args->buffers_ptr);
1717
		int i;
1718
 
1719
		for (i = 0; i < args->buffer_count; i++) {
1720
			ret = __copy_to_user(&user_exec_list[i].offset,
1721
					     &exec2_list[i].offset,
1722
					     sizeof(user_exec_list[i].offset));
6084 serge 1723
			if (ret) {
1724
				ret = -EFAULT;
1725
				DRM_DEBUG("failed to copy %d exec entries "
5060 serge 1726
					  "back to user\n",
1727
					  args->buffer_count);
1728
				break;
1729
			}
3263 Serge 1730
		}
1731
	}
1732
 
3266 Serge 1733
    kfree(exec2_list);
3263 Serge 1734
	return ret;
1735
}