Subversion Repositories Kolibri OS

Rev

Rev 4560 | Rev 5354 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
3263 Serge 1
/*
2
 * Copyright © 2008,2010 Intel Corporation
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
 * IN THE SOFTWARE.
22
 *
23
 * Authors:
24
 *    Eric Anholt 
25
 *    Chris Wilson 
26
 *
27
 */
28
 
29
#include 
30
#include 
31
#include "i915_drv.h"
32
#include "i915_trace.h"
33
#include "intel_drv.h"
5060 serge 34
#include 
3263 Serge 35
 
4560 Serge 36
#define  __EXEC_OBJECT_HAS_PIN (1<<31)
37
#define  __EXEC_OBJECT_HAS_FENCE (1<<30)
5060 serge 38
#define  __EXEC_OBJECT_NEEDS_BIAS (1<<28)
3263 Serge 39
 
5060 serge 40
#define BATCH_OFFSET_BIAS (256*1024)
3263 Serge 41
 
42
static unsigned long
43
copy_from_user(void *to, const void __user *from, unsigned long n)
44
{
45
    memcpy(to, from, n);
46
    return 0;
47
}
48
 
4560 Serge 49
struct eb_vmas {
50
	struct list_head vmas;
3263 Serge 51
	int and;
3480 Serge 52
	union {
4560 Serge 53
		struct i915_vma *lut[0];
5060 serge 54
		struct hlist_head buckets[0];
3480 Serge 55
	};
3263 Serge 56
};
57
 
4560 Serge 58
static struct eb_vmas *
3480 Serge 59
eb_create(struct drm_i915_gem_execbuffer2 *args)
3263 Serge 60
{
4560 Serge 61
	struct eb_vmas *eb = NULL;
3480 Serge 62
 
63
	if (args->flags & I915_EXEC_HANDLE_LUT) {
4560 Serge 64
		unsigned size = args->buffer_count;
65
		size *= sizeof(struct i915_vma *);
66
		size += sizeof(struct eb_vmas);
3480 Serge 67
		eb = kmalloc(size, GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
68
	}
69
 
70
	if (eb == NULL) {
4560 Serge 71
		unsigned size = args->buffer_count;
72
		unsigned count = PAGE_SIZE / sizeof(struct hlist_head) / 2;
3480 Serge 73
		BUILD_BUG_ON_NOT_POWER_OF_2(PAGE_SIZE / sizeof(struct hlist_head));
74
		while (count > 2*size)
3263 Serge 75
		count >>= 1;
76
	eb = kzalloc(count*sizeof(struct hlist_head) +
4560 Serge 77
			     sizeof(struct eb_vmas),
3480 Serge 78
			     GFP_TEMPORARY);
3263 Serge 79
	if (eb == NULL)
80
		return eb;
81
 
82
	eb->and = count - 1;
3480 Serge 83
	} else
84
		eb->and = -args->buffer_count;
85
 
4560 Serge 86
	INIT_LIST_HEAD(&eb->vmas);
3263 Serge 87
	return eb;
88
}
89
 
90
static void
4560 Serge 91
eb_reset(struct eb_vmas *eb)
3263 Serge 92
{
3480 Serge 93
	if (eb->and >= 0)
3263 Serge 94
	memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
95
}
96
 
3480 Serge 97
static int
4560 Serge 98
eb_lookup_vmas(struct eb_vmas *eb,
3480 Serge 99
		  struct drm_i915_gem_exec_object2 *exec,
100
		  const struct drm_i915_gem_execbuffer2 *args,
4560 Serge 101
	       struct i915_address_space *vm,
3480 Serge 102
		  struct drm_file *file)
3263 Serge 103
{
5060 serge 104
	struct drm_i915_private *dev_priv = vm->dev->dev_private;
4560 Serge 105
	struct drm_i915_gem_object *obj;
106
	struct list_head objects;
107
	int i, ret;
3480 Serge 108
 
4560 Serge 109
	INIT_LIST_HEAD(&objects);
3480 Serge 110
	spin_lock(&file->table_lock);
4560 Serge 111
	/* Grab a reference to the object and release the lock so we can lookup
112
	 * or create the VMA without using GFP_ATOMIC */
3480 Serge 113
	for (i = 0; i < args->buffer_count; i++) {
5060 serge 114
        obj = to_intel_bo(idr_find(&file->object_idr, exec[i].handle));
3480 Serge 115
		if (obj == NULL) {
116
			spin_unlock(&file->table_lock);
117
			DRM_DEBUG("Invalid object handle %d at index %d\n",
118
				   exec[i].handle, i);
4560 Serge 119
			ret = -ENOENT;
120
			goto err;
3480 Serge 121
		}
122
 
4560 Serge 123
		if (!list_empty(&obj->obj_exec_link)) {
3480 Serge 124
			spin_unlock(&file->table_lock);
125
			DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n",
126
				   obj, exec[i].handle, i);
4560 Serge 127
			ret = -EINVAL;
128
			goto err;
3480 Serge 129
		}
130
 
131
		drm_gem_object_reference(&obj->base);
4560 Serge 132
		list_add_tail(&obj->obj_exec_link, &objects);
133
	}
134
	spin_unlock(&file->table_lock);
3480 Serge 135
 
4560 Serge 136
	i = 0;
137
	while (!list_empty(&objects)) {
138
		struct i915_vma *vma;
5060 serge 139
		struct i915_address_space *bind_vm = vm;
4560 Serge 140
 
5060 serge 141
		if (exec[i].flags & EXEC_OBJECT_NEEDS_GTT &&
142
		    USES_FULL_PPGTT(vm->dev)) {
143
			ret = -EINVAL;
144
			goto err;
145
		}
146
 
147
		/* If we have secure dispatch, or the userspace assures us that
148
		 * they know what they're doing, use the GGTT VM.
149
		 */
150
		if (((args->flags & I915_EXEC_SECURE) &&
151
		    (i == (args->buffer_count - 1))))
152
			bind_vm = &dev_priv->gtt.base;
153
 
4560 Serge 154
		obj = list_first_entry(&objects,
155
				       struct drm_i915_gem_object,
156
				       obj_exec_link);
157
 
158
		/*
159
		 * NOTE: We can leak any vmas created here when something fails
160
		 * later on. But that's no issue since vma_unbind can deal with
161
		 * vmas which are not actually bound. And since only
162
		 * lookup_or_create exists as an interface to get at the vma
163
		 * from the (obj, vm) we don't run the risk of creating
164
		 * duplicated vmas for the same vm.
165
		 */
5060 serge 166
		vma = i915_gem_obj_lookup_or_create_vma(obj, bind_vm);
4560 Serge 167
		if (IS_ERR(vma)) {
168
			DRM_DEBUG("Failed to lookup VMA\n");
169
			ret = PTR_ERR(vma);
170
			goto err;
171
		}
172
 
173
		/* Transfer ownership from the objects list to the vmas list. */
174
		list_add_tail(&vma->exec_list, &eb->vmas);
175
		list_del_init(&obj->obj_exec_link);
176
 
177
		vma->exec_entry = &exec[i];
3480 Serge 178
		if (eb->and < 0) {
4560 Serge 179
			eb->lut[i] = vma;
3480 Serge 180
		} else {
181
			uint32_t handle = args->flags & I915_EXEC_HANDLE_LUT ? i : exec[i].handle;
4560 Serge 182
			vma->exec_handle = handle;
183
			hlist_add_head(&vma->exec_node,
3480 Serge 184
				       &eb->buckets[handle & eb->and]);
185
		}
4560 Serge 186
		++i;
3480 Serge 187
	}
188
 
189
	return 0;
4560 Serge 190
 
191
 
192
err:
193
	while (!list_empty(&objects)) {
194
		obj = list_first_entry(&objects,
195
				       struct drm_i915_gem_object,
196
				       obj_exec_link);
197
		list_del_init(&obj->obj_exec_link);
198
		drm_gem_object_unreference(&obj->base);
199
	}
200
	/*
201
	 * Objects already transfered to the vmas list will be unreferenced by
202
	 * eb_destroy.
203
	 */
204
 
205
	return ret;
3263 Serge 206
}
207
 
4560 Serge 208
static struct i915_vma *eb_get_vma(struct eb_vmas *eb, unsigned long handle)
3263 Serge 209
{
3480 Serge 210
	if (eb->and < 0) {
211
		if (handle >= -eb->and)
212
			return NULL;
213
		return eb->lut[handle];
214
	} else {
3263 Serge 215
	struct hlist_head *head;
216
	struct hlist_node *node;
217
 
218
	head = &eb->buckets[handle & eb->and];
219
	hlist_for_each(node, head) {
4560 Serge 220
			struct i915_vma *vma;
3480 Serge 221
 
4560 Serge 222
			vma = hlist_entry(node, struct i915_vma, exec_node);
223
			if (vma->exec_handle == handle)
224
				return vma;
3263 Serge 225
	}
226
	return NULL;
3480 Serge 227
	}
3263 Serge 228
}
229
 
230
static void
4560 Serge 231
i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma)
3263 Serge 232
{
4560 Serge 233
	struct drm_i915_gem_exec_object2 *entry;
234
	struct drm_i915_gem_object *obj = vma->obj;
3480 Serge 235
 
4560 Serge 236
	if (!drm_mm_node_allocated(&vma->node))
237
		return;
238
 
239
	entry = vma->exec_entry;
240
 
241
	if (entry->flags & __EXEC_OBJECT_HAS_FENCE)
242
		i915_gem_object_unpin_fence(obj);
243
 
244
	if (entry->flags & __EXEC_OBJECT_HAS_PIN)
5060 serge 245
		vma->pin_count--;
4560 Serge 246
 
247
	entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);
248
}
249
 
250
static void eb_destroy(struct eb_vmas *eb)
251
{
252
	while (!list_empty(&eb->vmas)) {
253
		struct i915_vma *vma;
254
 
255
		vma = list_first_entry(&eb->vmas,
256
				       struct i915_vma,
3480 Serge 257
				       exec_list);
4560 Serge 258
		list_del_init(&vma->exec_list);
259
		i915_gem_execbuffer_unreserve_vma(vma);
260
		drm_gem_object_unreference(&vma->obj->base);
3480 Serge 261
	}
3263 Serge 262
	kfree(eb);
263
}
264
 
265
static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
266
{
4560 Serge 267
	return (HAS_LLC(obj->base.dev) ||
268
		obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
3263 Serge 269
		!obj->map_and_fenceable ||
270
		obj->cache_level != I915_CACHE_NONE);
271
}
272
 
273
static int
4371 Serge 274
relocate_entry_cpu(struct drm_i915_gem_object *obj,
5060 serge 275
		   struct drm_i915_gem_relocation_entry *reloc,
276
		   uint64_t target_offset)
4371 Serge 277
{
4539 Serge 278
    struct drm_device *dev = obj->base.dev;
279
    struct drm_i915_private *dev_priv = dev->dev_private;
5060 serge 280
    uint32_t page_offset = offset_in_page(reloc->offset);
281
	uint64_t delta = reloc->delta + target_offset;
4371 Serge 282
	char *vaddr;
4560 Serge 283
	int ret;
4371 Serge 284
 
4560 Serge 285
	ret = i915_gem_object_set_to_cpu_domain(obj, true);
4371 Serge 286
	if (ret)
287
		return ret;
288
 
5060 serge 289
	vaddr = (char*)dev_priv->gtt.mappable+4096;
290
	MapPage(vaddr,(addr_t)i915_gem_object_get_page(obj,reloc->offset >> PAGE_SHIFT), PG_SW);
291
	*(uint32_t *)(vaddr + page_offset) = lower_32_bits(delta);
4371 Serge 292
 
5060 serge 293
	if (INTEL_INFO(dev)->gen >= 8) {
294
		page_offset = offset_in_page(page_offset + sizeof(uint32_t));
295
 
296
		if (page_offset == 0) {
297
			MapPage(vaddr,(addr_t)i915_gem_object_get_page(obj,
298
			    (reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT), PG_SW);
299
		}
300
 
301
		*(uint32_t *)(vaddr + page_offset) = upper_32_bits(delta);
302
	}
303
 
4371 Serge 304
	return 0;
305
}
306
 
307
static int
308
relocate_entry_gtt(struct drm_i915_gem_object *obj,
5060 serge 309
		   struct drm_i915_gem_relocation_entry *reloc,
310
		   uint64_t target_offset)
4371 Serge 311
{
312
	struct drm_device *dev = obj->base.dev;
313
	struct drm_i915_private *dev_priv = dev->dev_private;
5060 serge 314
	uint64_t delta = reloc->delta + target_offset;
4371 Serge 315
	uint32_t __iomem *reloc_entry;
316
	void __iomem *reloc_page;
4560 Serge 317
	int ret;
4371 Serge 318
 
319
	ret = i915_gem_object_set_to_gtt_domain(obj, true);
320
	if (ret)
321
		return ret;
322
 
323
	ret = i915_gem_object_put_fence(obj);
324
	if (ret)
325
		return ret;
326
 
327
	/* Map the page containing the relocation we're going to perform.  */
328
	reloc->offset += i915_gem_obj_ggtt_offset(obj);
4539 Serge 329
    MapPage(dev_priv->gtt.mappable,dev_priv->gtt.mappable_base +
330
                                 (reloc->offset & PAGE_MASK), PG_SW);
331
	reloc_page = dev_priv->gtt.mappable;
4371 Serge 332
	reloc_entry = (uint32_t __iomem *)
333
		(reloc_page + offset_in_page(reloc->offset));
5060 serge 334
	iowrite32(lower_32_bits(delta), reloc_entry);
4371 Serge 335
 
5060 serge 336
 
4371 Serge 337
	return 0;
338
}
339
 
340
static int
3263 Serge 341
i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
4560 Serge 342
				   struct eb_vmas *eb,
5060 serge 343
				   struct drm_i915_gem_relocation_entry *reloc)
3263 Serge 344
{
345
	struct drm_device *dev = obj->base.dev;
346
	struct drm_gem_object *target_obj;
347
	struct drm_i915_gem_object *target_i915_obj;
4560 Serge 348
	struct i915_vma *target_vma;
5060 serge 349
	uint64_t target_offset;
4560 Serge 350
	int ret;
3263 Serge 351
 
352
	/* we've already hold a reference to all valid objects */
4560 Serge 353
	target_vma = eb_get_vma(eb, reloc->target_handle);
354
	if (unlikely(target_vma == NULL))
3263 Serge 355
		return -ENOENT;
4560 Serge 356
	target_i915_obj = target_vma->obj;
357
	target_obj = &target_vma->obj->base;
3263 Serge 358
 
4560 Serge 359
	target_offset = target_vma->node.start;
3263 Serge 360
 
361
	/* Sandybridge PPGTT errata: We need a global gtt mapping for MI and
362
	 * pipe_control writes because the gpu doesn't properly redirect them
363
	 * through the ppgtt for non_secure batchbuffers. */
364
	if (unlikely(IS_GEN6(dev) &&
365
	    reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
366
	    !target_i915_obj->has_global_gtt_mapping)) {
5060 serge 367
		struct i915_vma *vma =
368
			list_first_entry(&target_i915_obj->vma_list,
369
					 typeof(*vma), vma_link);
370
		vma->bind_vma(vma, target_i915_obj->cache_level, GLOBAL_BIND);
3263 Serge 371
	}
372
 
373
	/* Validate that the target is in a valid r/w GPU domain */
374
	if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
375
		DRM_DEBUG("reloc with multiple write domains: "
376
			  "obj %p target %d offset %d "
377
			  "read %08x write %08x",
378
			  obj, reloc->target_handle,
379
			  (int) reloc->offset,
380
			  reloc->read_domains,
381
			  reloc->write_domain);
4560 Serge 382
		return -EINVAL;
3263 Serge 383
	}
384
	if (unlikely((reloc->write_domain | reloc->read_domains)
385
		     & ~I915_GEM_GPU_DOMAINS)) {
386
		DRM_DEBUG("reloc with read/write non-GPU domains: "
387
			  "obj %p target %d offset %d "
388
			  "read %08x write %08x",
389
			  obj, reloc->target_handle,
390
			  (int) reloc->offset,
391
			  reloc->read_domains,
392
			  reloc->write_domain);
4560 Serge 393
		return -EINVAL;
3263 Serge 394
	}
395
 
396
	target_obj->pending_read_domains |= reloc->read_domains;
397
	target_obj->pending_write_domain |= reloc->write_domain;
398
 
399
	/* If the relocation already has the right value in it, no
400
	 * more work needs to be done.
401
	 */
402
	if (target_offset == reloc->presumed_offset)
403
		return 0;
404
 
405
	/* Check that the relocation address is valid... */
4560 Serge 406
	if (unlikely(reloc->offset >
407
		obj->base.size - (INTEL_INFO(dev)->gen >= 8 ? 8 : 4))) {
3263 Serge 408
		DRM_DEBUG("Relocation beyond object bounds: "
409
			  "obj %p target %d offset %d size %d.\n",
410
			  obj, reloc->target_handle,
411
			  (int) reloc->offset,
412
			  (int) obj->base.size);
4560 Serge 413
		return -EINVAL;
3263 Serge 414
	}
415
	if (unlikely(reloc->offset & 3)) {
416
		DRM_DEBUG("Relocation not 4-byte aligned: "
417
			  "obj %p target %d offset %d.\n",
418
			  obj, reloc->target_handle,
419
			  (int) reloc->offset);
4560 Serge 420
		return -EINVAL;
3263 Serge 421
	}
422
 
423
	/* We can't wait for rendering with pagefaults disabled */
424
 
4371 Serge 425
	if (use_cpu_reloc(obj))
5060 serge 426
		ret = relocate_entry_cpu(obj, reloc, target_offset);
4371 Serge 427
	else
5060 serge 428
		ret = relocate_entry_gtt(obj, reloc, target_offset);
3263 Serge 429
 
430
		if (ret)
431
			return ret;
432
 
433
	/* and update the user's relocation entry */
434
	reloc->presumed_offset = target_offset;
435
 
436
	return 0;
437
}
438
 
439
static int
4560 Serge 440
i915_gem_execbuffer_relocate_vma(struct i915_vma *vma,
441
				 struct eb_vmas *eb)
3263 Serge 442
{
443
#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
3266 Serge 444
	struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(64)];
3263 Serge 445
	struct drm_i915_gem_relocation_entry __user *user_relocs;
4560 Serge 446
	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
3263 Serge 447
	int remain, ret;
448
 
4539 Serge 449
	user_relocs = to_user_ptr(entry->relocs_ptr);
3263 Serge 450
 
451
	remain = entry->relocation_count;
452
	while (remain) {
453
		struct drm_i915_gem_relocation_entry *r = stack_reloc;
454
		int count = remain;
455
		if (count > ARRAY_SIZE(stack_reloc))
456
			count = ARRAY_SIZE(stack_reloc);
457
		remain -= count;
458
 
459
        memcpy(r, user_relocs, count*sizeof(r[0]));
460
 
461
		do {
462
			u64 offset = r->presumed_offset;
463
 
5060 serge 464
			ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r);
3263 Serge 465
			if (ret)
466
				return ret;
467
 
4392 Serge 468
		if (r->presumed_offset != offset)
469
		{
3263 Serge 470
            memcpy(&user_relocs->presumed_offset,
471
                   &r->presumed_offset,
472
                   sizeof(r->presumed_offset));
4392 Serge 473
		}
3263 Serge 474
 
475
			user_relocs++;
476
			r++;
477
		} while (--count);
478
	}
479
 
480
	return 0;
481
#undef N_RELOC
482
}
483
 
484
static int
4560 Serge 485
i915_gem_execbuffer_relocate_vma_slow(struct i915_vma *vma,
486
				      struct eb_vmas *eb,
487
				      struct drm_i915_gem_relocation_entry *relocs)
3263 Serge 488
{
4560 Serge 489
	const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
3263 Serge 490
	int i, ret;
491
 
492
	for (i = 0; i < entry->relocation_count; i++) {
5060 serge 493
		ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i]);
3263 Serge 494
		if (ret)
495
			return ret;
496
	}
497
 
498
	return 0;
499
}
500
 
501
static int
4560 Serge 502
i915_gem_execbuffer_relocate(struct eb_vmas *eb)
3263 Serge 503
{
4560 Serge 504
	struct i915_vma *vma;
3263 Serge 505
	int ret = 0;
506
 
507
	/* This is the fast path and we cannot handle a pagefault whilst
508
	 * holding the struct mutex lest the user pass in the relocations
509
	 * contained within a mmaped bo. For in such a case we, the page
510
	 * fault handler would call i915_gem_fault() and we would try to
511
	 * acquire the struct mutex again. Obviously this is bad and so
512
	 * lockdep complains vehemently.
513
	 */
4104 Serge 514
//	pagefault_disable();
4560 Serge 515
	list_for_each_entry(vma, &eb->vmas, exec_list) {
516
		ret = i915_gem_execbuffer_relocate_vma(vma, eb);
3263 Serge 517
		if (ret)
518
			break;
519
	}
520
//   pagefault_enable();
521
 
522
	return ret;
523
}
524
 
525
static int
4560 Serge 526
need_reloc_mappable(struct i915_vma *vma)
3263 Serge 527
{
4560 Serge 528
	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
529
	return entry->relocation_count && !use_cpu_reloc(vma->obj) &&
530
		i915_is_ggtt(vma->vm);
3263 Serge 531
}
532
 
533
static int
4560 Serge 534
i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
5060 serge 535
				struct intel_engine_cs *ring,
3480 Serge 536
				   bool *need_reloc)
3263 Serge 537
{
5060 serge 538
	struct drm_i915_gem_object *obj = vma->obj;
4560 Serge 539
	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
3263 Serge 540
	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
5060 serge 541
	bool need_fence;
542
	uint64_t flags;
3263 Serge 543
	int ret;
544
 
5060 serge 545
	flags = 0;
546
 
3263 Serge 547
	need_fence =
548
		has_fenced_gpu_access &&
549
		entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
550
		obj->tiling_mode != I915_TILING_NONE;
5060 serge 551
	if (need_fence || need_reloc_mappable(vma))
552
		flags |= PIN_MAPPABLE;
3263 Serge 553
 
5060 serge 554
	if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
555
		flags |= PIN_GLOBAL;
556
	if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
557
		flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
558
 
559
	ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags);
3263 Serge 560
	if (ret)
561
		return ret;
562
 
563
	entry->flags |= __EXEC_OBJECT_HAS_PIN;
564
 
565
	if (has_fenced_gpu_access) {
566
		if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
567
			ret = i915_gem_object_get_fence(obj);
568
			if (ret)
569
				return ret;
570
 
571
			if (i915_gem_object_pin_fence(obj))
572
				entry->flags |= __EXEC_OBJECT_HAS_FENCE;
573
 
574
			obj->pending_fenced_gpu_access = true;
575
		}
576
	}
577
 
4560 Serge 578
	if (entry->offset != vma->node.start) {
579
		entry->offset = vma->node.start;
3480 Serge 580
		*need_reloc = true;
581
	}
3266 Serge 582
 
3480 Serge 583
	if (entry->flags & EXEC_OBJECT_WRITE) {
584
		obj->base.pending_read_domains = I915_GEM_DOMAIN_RENDER;
585
		obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER;
586
	}
587
 
3263 Serge 588
	return 0;
589
}
590
 
5060 serge 591
static bool
592
eb_vma_misplaced(struct i915_vma *vma, bool has_fenced_gpu_access)
593
{
594
	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
595
	struct drm_i915_gem_object *obj = vma->obj;
596
	bool need_fence, need_mappable;
597
 
598
	need_fence =
599
		has_fenced_gpu_access &&
600
		entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
601
		obj->tiling_mode != I915_TILING_NONE;
602
	need_mappable = need_fence || need_reloc_mappable(vma);
603
 
604
	WARN_ON((need_mappable || need_fence) &&
605
	       !i915_is_ggtt(vma->vm));
606
 
607
	if (entry->alignment &&
608
	    vma->node.start & (entry->alignment - 1))
609
		return true;
610
 
611
	if (need_mappable && !obj->map_and_fenceable)
612
		return true;
613
 
614
	if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
615
	    vma->node.start < BATCH_OFFSET_BIAS)
616
		return true;
617
 
618
	return false;
619
}
620
 
3263 Serge 621
static int
5060 serge 622
i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
4560 Serge 623
			    struct list_head *vmas,
3480 Serge 624
			    bool *need_relocs)
3263 Serge 625
{
626
	struct drm_i915_gem_object *obj;
4560 Serge 627
	struct i915_vma *vma;
628
	struct i915_address_space *vm;
629
	struct list_head ordered_vmas;
3263 Serge 630
	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
631
	int retry;
632
 
4560 Serge 633
	if (list_empty(vmas))
634
		return 0;
635
 
5060 serge 636
	i915_gem_retire_requests_ring(ring);
637
 
4560 Serge 638
	vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
639
 
640
	INIT_LIST_HEAD(&ordered_vmas);
641
	while (!list_empty(vmas)) {
3263 Serge 642
		struct drm_i915_gem_exec_object2 *entry;
643
		bool need_fence, need_mappable;
644
 
4560 Serge 645
		vma = list_first_entry(vmas, struct i915_vma, exec_list);
646
		obj = vma->obj;
647
		entry = vma->exec_entry;
3263 Serge 648
 
649
		need_fence =
650
			has_fenced_gpu_access &&
651
			entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
652
			obj->tiling_mode != I915_TILING_NONE;
4560 Serge 653
		need_mappable = need_fence || need_reloc_mappable(vma);
3263 Serge 654
 
655
		if (need_mappable)
4560 Serge 656
			list_move(&vma->exec_list, &ordered_vmas);
3263 Serge 657
		else
4560 Serge 658
			list_move_tail(&vma->exec_list, &ordered_vmas);
3263 Serge 659
 
3480 Serge 660
		obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
3263 Serge 661
		obj->base.pending_write_domain = 0;
662
		obj->pending_fenced_gpu_access = false;
663
	}
4560 Serge 664
	list_splice(&ordered_vmas, vmas);
3263 Serge 665
 
666
	/* Attempt to pin all of the buffers into the GTT.
667
	 * This is done in 3 phases:
668
	 *
669
	 * 1a. Unbind all objects that do not match the GTT constraints for
670
	 *     the execbuffer (fenceable, mappable, alignment etc).
671
	 * 1b. Increment pin count for already bound objects.
672
	 * 2.  Bind new objects.
673
	 * 3.  Decrement pin count.
674
	 *
675
	 * This avoid unnecessary unbinding of later objects in order to make
676
	 * room for the earlier objects *unless* we need to defragment.
677
	 */
678
	retry = 0;
679
	do {
680
		int ret = 0;
681
 
682
		/* Unbind any ill-fitting objects or pin. */
4560 Serge 683
		list_for_each_entry(vma, vmas, exec_list) {
684
			if (!drm_mm_node_allocated(&vma->node))
3263 Serge 685
				continue;
686
 
5060 serge 687
			if (eb_vma_misplaced(vma, has_fenced_gpu_access))
4560 Serge 688
				ret = i915_vma_unbind(vma);
3263 Serge 689
			else
4560 Serge 690
				ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
3263 Serge 691
			if (ret)
692
				goto err;
693
		}
694
 
695
		/* Bind fresh objects */
4560 Serge 696
		list_for_each_entry(vma, vmas, exec_list) {
697
			if (drm_mm_node_allocated(&vma->node))
3263 Serge 698
				continue;
699
 
4560 Serge 700
			ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
3263 Serge 701
			if (ret)
702
				goto err;
703
		}
704
 
4560 Serge 705
err:
3263 Serge 706
		if (ret != -ENOSPC || retry++)
707
			return ret;
708
 
4560 Serge 709
		/* Decrement pin count for bound objects */
710
		list_for_each_entry(vma, vmas, exec_list)
711
			i915_gem_execbuffer_unreserve_vma(vma);
712
 
5060 serge 713
		ret = i915_gem_evict_vm(vm, true);
3263 Serge 714
		if (ret)
715
			return ret;
716
	} while (1);
717
}
718
 
719
static int
720
i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
3480 Serge 721
				  struct drm_i915_gem_execbuffer2 *args,
3263 Serge 722
				  struct drm_file *file,
5060 serge 723
				  struct intel_engine_cs *ring,
4560 Serge 724
				  struct eb_vmas *eb,
725
				  struct drm_i915_gem_exec_object2 *exec)
3263 Serge 726
{
727
	struct drm_i915_gem_relocation_entry *reloc;
4560 Serge 728
	struct i915_address_space *vm;
729
	struct i915_vma *vma;
3480 Serge 730
	bool need_relocs;
3263 Serge 731
	int *reloc_offset;
732
	int i, total, ret;
4560 Serge 733
	unsigned count = args->buffer_count;
3263 Serge 734
 
4560 Serge 735
	if (WARN_ON(list_empty(&eb->vmas)))
736
		return 0;
737
 
738
	vm = list_first_entry(&eb->vmas, struct i915_vma, exec_list)->vm;
739
 
3263 Serge 740
	/* We may process another execbuffer during the unlock... */
4560 Serge 741
	while (!list_empty(&eb->vmas)) {
742
		vma = list_first_entry(&eb->vmas, struct i915_vma, exec_list);
743
		list_del_init(&vma->exec_list);
744
		i915_gem_execbuffer_unreserve_vma(vma);
745
		drm_gem_object_unreference(&vma->obj->base);
3263 Serge 746
	}
747
 
748
	mutex_unlock(&dev->struct_mutex);
749
 
750
	total = 0;
751
	for (i = 0; i < count; i++)
752
		total += exec[i].relocation_count;
753
 
754
    reloc_offset = malloc(count * sizeof(*reloc_offset));
755
    reloc = malloc(total * sizeof(*reloc));
756
	if (reloc == NULL || reloc_offset == NULL) {
3266 Serge 757
        kfree(reloc);
758
        kfree(reloc_offset);
3263 Serge 759
		mutex_lock(&dev->struct_mutex);
760
		return -ENOMEM;
761
	}
762
 
763
	total = 0;
764
	for (i = 0; i < count; i++) {
765
		struct drm_i915_gem_relocation_entry __user *user_relocs;
766
		u64 invalid_offset = (u64)-1;
767
		int j;
768
 
4539 Serge 769
		user_relocs = to_user_ptr(exec[i].relocs_ptr);
3263 Serge 770
 
771
		if (copy_from_user(reloc+total, user_relocs,
772
				   exec[i].relocation_count * sizeof(*reloc))) {
773
			ret = -EFAULT;
774
			mutex_lock(&dev->struct_mutex);
775
			goto err;
776
		}
777
 
778
		/* As we do not update the known relocation offsets after
779
		 * relocating (due to the complexities in lock handling),
780
		 * we need to mark them as invalid now so that we force the
781
		 * relocation processing next time. Just in case the target
782
		 * object is evicted and then rebound into its old
783
		 * presumed_offset before the next execbuffer - if that
784
		 * happened we would make the mistake of assuming that the
785
		 * relocations were valid.
786
		 */
787
		for (j = 0; j < exec[i].relocation_count; j++) {
5060 serge 788
			if (__copy_to_user(&user_relocs[j].presumed_offset,
3263 Serge 789
					 &invalid_offset,
790
					 sizeof(invalid_offset))) {
791
				ret = -EFAULT;
792
				mutex_lock(&dev->struct_mutex);
793
				goto err;
794
			}
795
		}
796
 
797
		reloc_offset[i] = total;
798
		total += exec[i].relocation_count;
799
	}
800
 
801
	ret = i915_mutex_lock_interruptible(dev);
802
	if (ret) {
803
		mutex_lock(&dev->struct_mutex);
804
		goto err;
805
	}
806
 
807
	/* reacquire the objects */
808
	eb_reset(eb);
4560 Serge 809
	ret = eb_lookup_vmas(eb, exec, args, vm, file);
3480 Serge 810
	if (ret)
3263 Serge 811
			goto err;
812
 
3480 Serge 813
	need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
4560 Serge 814
	ret = i915_gem_execbuffer_reserve(ring, &eb->vmas, &need_relocs);
3263 Serge 815
	if (ret)
816
		goto err;
817
 
4560 Serge 818
	list_for_each_entry(vma, &eb->vmas, exec_list) {
819
		int offset = vma->exec_entry - exec;
820
		ret = i915_gem_execbuffer_relocate_vma_slow(vma, eb,
821
							    reloc + reloc_offset[offset]);
3263 Serge 822
		if (ret)
823
			goto err;
824
	}
825
 
826
	/* Leave the user relocations as are, this is the painfully slow path,
827
	 * and we want to avoid the complication of dropping the lock whilst
828
	 * having buffers reserved in the aperture and so causing spurious
829
	 * ENOSPC for random operations.
830
	 */
831
 
832
err:
3266 Serge 833
    kfree(reloc);
834
    kfree(reloc_offset);
3263 Serge 835
	return ret;
836
}
837
 
838
static int
5060 serge 839
i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring,
4560 Serge 840
				struct list_head *vmas)
3263 Serge 841
{
4560 Serge 842
	struct i915_vma *vma;
3263 Serge 843
	uint32_t flush_domains = 0;
4104 Serge 844
	bool flush_chipset = false;
3263 Serge 845
	int ret;
846
 
4560 Serge 847
	list_for_each_entry(vma, vmas, exec_list) {
848
		struct drm_i915_gem_object *obj = vma->obj;
3263 Serge 849
		ret = i915_gem_object_sync(obj, ring);
850
		if (ret)
851
			return ret;
852
 
853
		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
4104 Serge 854
			flush_chipset |= i915_gem_clflush_object(obj, false);
3263 Serge 855
 
856
		flush_domains |= obj->base.write_domain;
857
	}
858
 
4104 Serge 859
	if (flush_chipset)
3263 Serge 860
		i915_gem_chipset_flush(ring->dev);
861
 
862
	if (flush_domains & I915_GEM_DOMAIN_GTT)
863
		wmb();
864
 
865
	/* Unconditionally invalidate gpu caches and ensure that we do flush
866
	 * any residual writes from the previous batch.
867
	 */
868
	return intel_ring_invalidate_all_caches(ring);
869
}
870
 
871
static bool
872
i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
873
{
3480 Serge 874
	if (exec->flags & __I915_EXEC_UNKNOWN_FLAGS)
875
		return false;
876
 
3263 Serge 877
	return ((exec->batch_start_offset | exec->batch_len) & 0x7) == 0;
878
}
879
 
880
static int
881
validate_exec_list(struct drm_i915_gem_exec_object2 *exec,
882
		   int count)
883
{
884
	int i;
4560 Serge 885
	unsigned relocs_total = 0;
886
	unsigned relocs_max = UINT_MAX / sizeof(struct drm_i915_gem_relocation_entry);
3263 Serge 887
 
888
	for (i = 0; i < count; i++) {
3746 Serge 889
		char __user *ptr = to_user_ptr(exec[i].relocs_ptr);
3263 Serge 890
		int length; /* limited by fault_in_pages_readable() */
891
 
3480 Serge 892
		if (exec[i].flags & __EXEC_OBJECT_UNKNOWN_FLAGS)
3263 Serge 893
			return -EINVAL;
894
 
3480 Serge 895
		/* First check for malicious input causing overflow in
896
		 * the worst case where we need to allocate the entire
897
		 * relocation tree as a single array.
898
		 */
899
		if (exec[i].relocation_count > relocs_max - relocs_total)
900
			return -EINVAL;
901
		relocs_total += exec[i].relocation_count;
902
 
3263 Serge 903
		length = exec[i].relocation_count *
904
			sizeof(struct drm_i915_gem_relocation_entry);
3746 Serge 905
		/*
906
		 * We must check that the entire relocation array is safe
907
		 * to read, but since we may need to update the presumed
908
		 * offsets during execution, check for full write access.
909
		 */
4560 Serge 910
	}
3263 Serge 911
 
4560 Serge 912
	return 0;
913
}
914
 
5060 serge 915
static struct intel_context *
4560 Serge 916
i915_gem_validate_context(struct drm_device *dev, struct drm_file *file,
5060 serge 917
			  struct intel_engine_cs *ring, const u32 ctx_id)
4560 Serge 918
{
5060 serge 919
	struct intel_context *ctx = NULL;
4560 Serge 920
	struct i915_ctx_hang_stats *hs;
921
 
5060 serge 922
	if (ring->id != RCS && ctx_id != DEFAULT_CONTEXT_HANDLE)
923
		return ERR_PTR(-EINVAL);
4560 Serge 924
 
5060 serge 925
	ctx = i915_gem_context_get(file->driver_priv, ctx_id);
926
	if (IS_ERR(ctx))
927
		return ctx;
928
 
929
	hs = &ctx->hang_stats;
4560 Serge 930
	if (hs->banned) {
931
		DRM_DEBUG("Context %u tried to submit while banned\n", ctx_id);
5060 serge 932
		return ERR_PTR(-EIO);
3263 Serge 933
	}
934
 
5060 serge 935
	return ctx;
3263 Serge 936
}
937
 
938
static void
4560 Serge 939
i915_gem_execbuffer_move_to_active(struct list_head *vmas,
5060 serge 940
				   struct intel_engine_cs *ring)
3263 Serge 941
{
4560 Serge 942
	struct i915_vma *vma;
3263 Serge 943
 
4560 Serge 944
	list_for_each_entry(vma, vmas, exec_list) {
945
		struct drm_i915_gem_object *obj = vma->obj;
3263 Serge 946
		u32 old_read = obj->base.read_domains;
947
		u32 old_write = obj->base.write_domain;
948
 
3480 Serge 949
		obj->base.write_domain = obj->base.pending_write_domain;
950
		if (obj->base.write_domain == 0)
951
			obj->base.pending_read_domains |= obj->base.read_domains;
3263 Serge 952
		obj->base.read_domains = obj->base.pending_read_domains;
953
		obj->fenced_gpu_access = obj->pending_fenced_gpu_access;
954
 
4560 Serge 955
		i915_vma_move_to_active(vma, ring);
3263 Serge 956
		if (obj->base.write_domain) {
957
			obj->dirty = 1;
958
			obj->last_write_seqno = intel_ring_get_seqno(ring);
5060 serge 959
 
960
			intel_fb_obj_invalidate(obj, ring);
961
 
962
			/* update for the implicit flush after a batch */
963
			obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
3263 Serge 964
		}
965
 
966
		trace_i915_gem_object_change_domain(obj, old_read, old_write);
967
	}
968
}
969
 
970
static void
971
i915_gem_execbuffer_retire_commands(struct drm_device *dev,
972
				    struct drm_file *file,
5060 serge 973
				    struct intel_engine_cs *ring,
4104 Serge 974
				    struct drm_i915_gem_object *obj)
3263 Serge 975
{
976
	/* Unconditionally force add_request to emit a full flush. */
977
	ring->gpu_caches_dirty = true;
978
 
979
	/* Add a breadcrumb for the completion of the batch buffer */
4104 Serge 980
	(void)__i915_add_request(ring, file, obj, NULL);
3263 Serge 981
}
982
 
983
static int
984
i915_reset_gen7_sol_offsets(struct drm_device *dev,
5060 serge 985
			    struct intel_engine_cs *ring)
3263 Serge 986
{
5060 serge 987
	struct drm_i915_private *dev_priv = dev->dev_private;
3263 Serge 988
	int ret, i;
989
 
5060 serge 990
	if (!IS_GEN7(dev) || ring != &dev_priv->ring[RCS]) {
991
		DRM_DEBUG("sol reset is gen7/rcs only\n");
992
		return -EINVAL;
993
	}
3263 Serge 994
 
995
	ret = intel_ring_begin(ring, 4 * 3);
996
	if (ret)
997
		return ret;
998
 
999
	for (i = 0; i < 4; i++) {
1000
		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1001
		intel_ring_emit(ring, GEN7_SO_WRITE_OFFSET(i));
1002
		intel_ring_emit(ring, 0);
1003
	}
1004
 
1005
	intel_ring_advance(ring);
1006
 
1007
	return 0;
1008
}
1009
 
1010
static int
5060 serge 1011
legacy_ringbuffer_submission(struct drm_device *dev, struct drm_file *file,
1012
			     struct intel_engine_cs *ring,
1013
			     struct intel_context *ctx,
1014
			     struct drm_i915_gem_execbuffer2 *args,
1015
			     struct list_head *vmas,
1016
			     struct drm_i915_gem_object *batch_obj,
1017
			     u64 exec_start, u32 flags)
1018
{
1019
	struct drm_clip_rect *cliprects = NULL;
1020
	struct drm_i915_private *dev_priv = dev->dev_private;
1021
	u64 exec_len;
1022
	int instp_mode;
1023
	u32 instp_mask;
1024
	int i, ret = 0;
1025
 
1026
	if (args->num_cliprects != 0) {
1027
		if (ring != &dev_priv->ring[RCS]) {
1028
			DRM_DEBUG("clip rectangles are only valid with the render ring\n");
1029
			return -EINVAL;
1030
		}
1031
 
1032
		if (INTEL_INFO(dev)->gen >= 5) {
1033
			DRM_DEBUG("clip rectangles are only valid on pre-gen5\n");
1034
			return -EINVAL;
1035
		}
1036
 
1037
		if (args->num_cliprects > UINT_MAX / sizeof(*cliprects)) {
1038
			DRM_DEBUG("execbuf with %u cliprects\n",
1039
				  args->num_cliprects);
1040
			return -EINVAL;
1041
		}
1042
 
1043
		cliprects = kcalloc(args->num_cliprects,
1044
				    sizeof(*cliprects),
1045
				    GFP_KERNEL);
1046
		if (cliprects == NULL) {
1047
			ret = -ENOMEM;
1048
			goto error;
1049
		}
1050
 
1051
		if (copy_from_user(cliprects,
1052
				   to_user_ptr(args->cliprects_ptr),
1053
				   sizeof(*cliprects)*args->num_cliprects)) {
1054
			ret = -EFAULT;
1055
			goto error;
1056
		}
1057
	} else {
1058
		if (args->DR4 == 0xffffffff) {
1059
			DRM_DEBUG("UXA submitting garbage DR4, fixing up\n");
1060
			args->DR4 = 0;
1061
		}
1062
 
1063
		if (args->DR1 || args->DR4 || args->cliprects_ptr) {
1064
			DRM_DEBUG("0 cliprects but dirt in cliprects fields\n");
1065
			return -EINVAL;
1066
		}
1067
	}
1068
 
1069
	ret = i915_gem_execbuffer_move_to_gpu(ring, vmas);
1070
	if (ret)
1071
		goto error;
1072
 
1073
	ret = i915_switch_context(ring, ctx);
1074
	if (ret)
1075
		goto error;
1076
 
1077
	instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK;
1078
	instp_mask = I915_EXEC_CONSTANTS_MASK;
1079
	switch (instp_mode) {
1080
	case I915_EXEC_CONSTANTS_REL_GENERAL:
1081
	case I915_EXEC_CONSTANTS_ABSOLUTE:
1082
	case I915_EXEC_CONSTANTS_REL_SURFACE:
1083
		if (instp_mode != 0 && ring != &dev_priv->ring[RCS]) {
1084
			DRM_DEBUG("non-0 rel constants mode on non-RCS\n");
1085
			ret = -EINVAL;
1086
			goto error;
1087
		}
1088
 
1089
		if (instp_mode != dev_priv->relative_constants_mode) {
1090
			if (INTEL_INFO(dev)->gen < 4) {
1091
				DRM_DEBUG("no rel constants on pre-gen4\n");
1092
				ret = -EINVAL;
1093
				goto error;
1094
			}
1095
 
1096
			if (INTEL_INFO(dev)->gen > 5 &&
1097
			    instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) {
1098
				DRM_DEBUG("rel surface constants mode invalid on gen5+\n");
1099
				ret = -EINVAL;
1100
				goto error;
1101
			}
1102
 
1103
			/* The HW changed the meaning on this bit on gen6 */
1104
			if (INTEL_INFO(dev)->gen >= 6)
1105
				instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
1106
		}
1107
		break;
1108
	default:
1109
		DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode);
1110
		ret = -EINVAL;
1111
		goto error;
1112
	}
1113
 
1114
	if (ring == &dev_priv->ring[RCS] &&
1115
			instp_mode != dev_priv->relative_constants_mode) {
1116
		ret = intel_ring_begin(ring, 4);
1117
		if (ret)
1118
			goto error;
1119
 
1120
		intel_ring_emit(ring, MI_NOOP);
1121
		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1122
		intel_ring_emit(ring, INSTPM);
1123
		intel_ring_emit(ring, instp_mask << 16 | instp_mode);
1124
		intel_ring_advance(ring);
1125
 
1126
		dev_priv->relative_constants_mode = instp_mode;
1127
	}
1128
 
1129
	if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
1130
		ret = i915_reset_gen7_sol_offsets(dev, ring);
1131
		if (ret)
1132
			goto error;
1133
	}
1134
 
1135
	exec_len = args->batch_len;
1136
	if (cliprects) {
1137
		for (i = 0; i < args->num_cliprects; i++) {
1138
			ret = i915_emit_box(dev, &cliprects[i],
1139
					    args->DR1, args->DR4);
1140
			if (ret)
1141
				goto error;
1142
 
1143
			ret = ring->dispatch_execbuffer(ring,
1144
							exec_start, exec_len,
1145
							flags);
1146
			if (ret)
1147
				goto error;
1148
		}
1149
	} else {
1150
		ret = ring->dispatch_execbuffer(ring,
1151
						exec_start, exec_len,
1152
						flags);
1153
		if (ret)
1154
			return ret;
1155
	}
1156
 
1157
	trace_i915_gem_ring_dispatch(ring, intel_ring_get_seqno(ring), flags);
1158
 
1159
	i915_gem_execbuffer_move_to_active(vmas, ring);
1160
	i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj);
1161
 
1162
error:
1163
	kfree(cliprects);
1164
	return ret;
1165
}
1166
 
1167
/**
1168
 * Find one BSD ring to dispatch the corresponding BSD command.
1169
 * The Ring ID is returned.
1170
 */
1171
static int gen8_dispatch_bsd_ring(struct drm_device *dev,
1172
				  struct drm_file *file)
1173
{
1174
	struct drm_i915_private *dev_priv = dev->dev_private;
1175
	struct drm_i915_file_private *file_priv = file->driver_priv;
1176
 
1177
	/* Check whether the file_priv is using one ring */
1178
	if (file_priv->bsd_ring)
1179
		return file_priv->bsd_ring->id;
1180
	else {
1181
		/* If no, use the ping-pong mechanism to select one ring */
1182
		int ring_id;
1183
 
1184
		mutex_lock(&dev->struct_mutex);
1185
		if (dev_priv->mm.bsd_ring_dispatch_index == 0) {
1186
			ring_id = VCS;
1187
			dev_priv->mm.bsd_ring_dispatch_index = 1;
1188
		} else {
1189
			ring_id = VCS2;
1190
			dev_priv->mm.bsd_ring_dispatch_index = 0;
1191
		}
1192
		file_priv->bsd_ring = &dev_priv->ring[ring_id];
1193
		mutex_unlock(&dev->struct_mutex);
1194
		return ring_id;
1195
	}
1196
}
1197
 
1198
static struct drm_i915_gem_object *
1199
eb_get_batch(struct eb_vmas *eb)
1200
{
1201
	struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list);
1202
 
1203
	/*
1204
	 * SNA is doing fancy tricks with compressing batch buffers, which leads
1205
	 * to negative relocation deltas. Usually that works out ok since the
1206
	 * relocate address is still positive, except when the batch is placed
1207
	 * very low in the GTT. Ensure this doesn't happen.
1208
	 *
1209
	 * Note that actual hangs have only been observed on gen7, but for
1210
	 * paranoia do it everywhere.
1211
	 */
1212
	vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
1213
 
1214
	return vma->obj;
1215
}
1216
 
1217
static int
3263 Serge 1218
i915_gem_do_execbuffer(struct drm_device *dev, void *data,
1219
		       struct drm_file *file,
1220
		       struct drm_i915_gem_execbuffer2 *args,
5060 serge 1221
		       struct drm_i915_gem_exec_object2 *exec)
3263 Serge 1222
{
5060 serge 1223
	struct drm_i915_private *dev_priv = dev->dev_private;
4560 Serge 1224
	struct eb_vmas *eb;
3263 Serge 1225
	struct drm_i915_gem_object *batch_obj;
5060 serge 1226
	struct intel_engine_cs *ring;
1227
	struct intel_context *ctx;
1228
	struct i915_address_space *vm;
4560 Serge 1229
	const u32 ctx_id = i915_execbuffer2_get_context_id(*args);
5060 serge 1230
	u64 exec_start = args->batch_start_offset;
1231
	u32 flags;
1232
	int ret;
3480 Serge 1233
	bool need_relocs;
3263 Serge 1234
 
3480 Serge 1235
	if (!i915_gem_check_execbuffer(args))
3263 Serge 1236
		return -EINVAL;
1237
 
1238
	ret = validate_exec_list(exec, args->buffer_count);
1239
	if (ret)
1240
		return ret;
1241
 
1242
	flags = 0;
1243
	if (args->flags & I915_EXEC_SECURE) {
1244
 
1245
		flags |= I915_DISPATCH_SECURE;
1246
	}
1247
	if (args->flags & I915_EXEC_IS_PINNED)
1248
		flags |= I915_DISPATCH_PINNED;
1249
 
5060 serge 1250
	if ((args->flags & I915_EXEC_RING_MASK) > LAST_USER_RING) {
3263 Serge 1251
		DRM_DEBUG("execbuf with unknown ring: %d\n",
1252
			  (int)(args->flags & I915_EXEC_RING_MASK));
1253
		return -EINVAL;
1254
	}
5060 serge 1255
 
1256
	if ((args->flags & I915_EXEC_RING_MASK) == I915_EXEC_DEFAULT)
1257
		ring = &dev_priv->ring[RCS];
1258
	else if ((args->flags & I915_EXEC_RING_MASK) == I915_EXEC_BSD) {
1259
		if (HAS_BSD2(dev)) {
1260
			int ring_id;
1261
			ring_id = gen8_dispatch_bsd_ring(dev, file);
1262
			ring = &dev_priv->ring[ring_id];
1263
		} else
1264
			ring = &dev_priv->ring[VCS];
1265
	} else
1266
		ring = &dev_priv->ring[(args->flags & I915_EXEC_RING_MASK) - 1];
1267
 
3263 Serge 1268
	if (!intel_ring_initialized(ring)) {
1269
		DRM_DEBUG("execbuf with invalid ring: %d\n",
1270
			  (int)(args->flags & I915_EXEC_RING_MASK));
1271
		return -EINVAL;
1272
	}
1273
 
1274
	if (args->buffer_count < 1) {
1275
		DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1276
		return -EINVAL;
1277
	}
1278
 
4560 Serge 1279
	intel_runtime_pm_get(dev_priv);
1280
 
3263 Serge 1281
	ret = i915_mutex_lock_interruptible(dev);
1282
	if (ret)
1283
		goto pre_mutex_err;
1284
 
4104 Serge 1285
	if (dev_priv->ums.mm_suspended) {
3263 Serge 1286
		mutex_unlock(&dev->struct_mutex);
1287
		ret = -EBUSY;
1288
		goto pre_mutex_err;
1289
	}
1290
 
5060 serge 1291
	ctx = i915_gem_validate_context(dev, file, ring, ctx_id);
1292
	if (IS_ERR(ctx)) {
4560 Serge 1293
		mutex_unlock(&dev->struct_mutex);
5060 serge 1294
		ret = PTR_ERR(ctx);
4560 Serge 1295
		goto pre_mutex_err;
1296
	}
1297
 
5060 serge 1298
	i915_gem_context_reference(ctx);
1299
 
1300
	vm = ctx->vm;
1301
	if (!USES_FULL_PPGTT(dev))
1302
		vm = &dev_priv->gtt.base;
1303
 
3480 Serge 1304
	eb = eb_create(args);
3263 Serge 1305
	if (eb == NULL) {
5060 serge 1306
		i915_gem_context_unreference(ctx);
3263 Serge 1307
		mutex_unlock(&dev->struct_mutex);
1308
		ret = -ENOMEM;
5060 serge 1309
        goto pre_mutex_err;
3263 Serge 1310
	}
1311
 
1312
	/* Look up object handles */
4560 Serge 1313
	ret = eb_lookup_vmas(eb, exec, args, vm, file);
3480 Serge 1314
	if (ret)
3263 Serge 1315
			goto err;
1316
 
1317
	/* take note of the batch buffer before we might reorder the lists */
5060 serge 1318
	batch_obj = eb_get_batch(eb);
3263 Serge 1319
 
1320
	/* Move the objects en-masse into the GTT, evicting if necessary. */
3480 Serge 1321
	need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
4560 Serge 1322
	ret = i915_gem_execbuffer_reserve(ring, &eb->vmas, &need_relocs);
3263 Serge 1323
	if (ret)
1324
		goto err;
1325
 
1326
	/* The objects are in their final locations, apply the relocations. */
3480 Serge 1327
	if (need_relocs)
4560 Serge 1328
		ret = i915_gem_execbuffer_relocate(eb);
3263 Serge 1329
	if (ret) {
1330
		if (ret == -EFAULT) {
3480 Serge 1331
			ret = i915_gem_execbuffer_relocate_slow(dev, args, file, ring,
4560 Serge 1332
								eb, exec);
3263 Serge 1333
			BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1334
		}
1335
		if (ret)
1336
			goto err;
5060 serge 1337
        }
3263 Serge 1338
 
1339
	/* Set the pending read domains for the batch buffer to COMMAND */
1340
	if (batch_obj->base.pending_write_domain) {
1341
		DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
1342
		ret = -EINVAL;
1343
		goto err;
1344
	}
1345
	batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
5060 serge 1346
#if 0
1347
	if (i915_needs_cmd_parser(ring)) {
1348
		ret = i915_parse_cmds(ring,
1349
				      batch_obj,
1350
				      args->batch_start_offset,
1351
				      file->is_master);
1352
		if (ret)
1353
			goto err;
3263 Serge 1354
 
5060 serge 1355
		/*
1356
		 * XXX: Actually do this when enabling batch copy...
1357
		 *
1358
		 * Set the DISPATCH_SECURE bit to remove the NON_SECURE bit
1359
		 * from MI_BATCH_BUFFER_START commands issued in the
1360
		 * dispatch_execbuffer implementations. We specifically don't
1361
		 * want that set when the command parser is enabled.
1362
		 */
1363
	}
1364
#endif
3263 Serge 1365
	/* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
1366
	 * batch" bit. Hence we need to pin secure batches into the global gtt.
4560 Serge 1367
	 * hsw should have this fixed, but bdw mucks it up again. */
5060 serge 1368
	if (flags & I915_DISPATCH_SECURE &&
1369
	    !batch_obj->has_global_gtt_mapping) {
1370
		/* When we have multiple VMs, we'll need to make sure that we
1371
		 * allocate space first */
1372
		struct i915_vma *vma = i915_gem_obj_to_ggtt(batch_obj);
1373
		BUG_ON(!vma);
1374
		vma->bind_vma(vma, batch_obj->cache_level, GLOBAL_BIND);
1375
	}
3263 Serge 1376
 
5060 serge 1377
	if (flags & I915_DISPATCH_SECURE)
1378
		exec_start += i915_gem_obj_ggtt_offset(batch_obj);
1379
	else
1380
		exec_start += i915_gem_obj_offset(batch_obj, vm);
3263 Serge 1381
 
5060 serge 1382
	ret = legacy_ringbuffer_submission(dev, file, ring, ctx,
1383
			args, &eb->vmas, batch_obj, exec_start, flags);
1384
   	if (ret)
3263 Serge 1385
		goto err;
1386
 
1387
err:
5060 serge 1388
	/* the request owns the ref now */
1389
	i915_gem_context_unreference(ctx);
3263 Serge 1390
	eb_destroy(eb);
1391
 
1392
	mutex_unlock(&dev->struct_mutex);
1393
 
1394
pre_mutex_err:
4560 Serge 1395
	/* intel_gpu_busy should also get a ref, so it will free when the device
1396
	 * is really idle. */
1397
	intel_runtime_pm_put(dev_priv);
3263 Serge 1398
	return ret;
1399
}
1400
 
4246 Serge 1401
#if 0
1402
/*
1403
 * Legacy execbuffer just creates an exec2 list from the original exec object
1404
 * list array and passes it to the real function.
1405
 */
1406
int
1407
i915_gem_execbuffer(struct drm_device *dev, void *data,
1408
		    struct drm_file *file)
1409
{
1410
	struct drm_i915_gem_execbuffer *args = data;
1411
	struct drm_i915_gem_execbuffer2 exec2;
1412
	struct drm_i915_gem_exec_object *exec_list = NULL;
1413
	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1414
	int ret, i;
3480 Serge 1415
 
4246 Serge 1416
	if (args->buffer_count < 1) {
1417
		DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1418
		return -EINVAL;
1419
	}
3480 Serge 1420
 
4246 Serge 1421
	/* Copy in the exec list from userland */
1422
	exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count);
1423
	exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
1424
	if (exec_list == NULL || exec2_list == NULL) {
1425
		DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1426
			  args->buffer_count);
1427
		drm_free_large(exec_list);
1428
		drm_free_large(exec2_list);
1429
		return -ENOMEM;
1430
	}
1431
	ret = copy_from_user(exec_list,
1432
			     to_user_ptr(args->buffers_ptr),
1433
			     sizeof(*exec_list) * args->buffer_count);
1434
	if (ret != 0) {
1435
		DRM_DEBUG("copy %d exec entries failed %d\n",
1436
			  args->buffer_count, ret);
1437
		drm_free_large(exec_list);
1438
		drm_free_large(exec2_list);
1439
		return -EFAULT;
1440
	}
1441
 
1442
	for (i = 0; i < args->buffer_count; i++) {
1443
		exec2_list[i].handle = exec_list[i].handle;
1444
		exec2_list[i].relocation_count = exec_list[i].relocation_count;
1445
		exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
1446
		exec2_list[i].alignment = exec_list[i].alignment;
1447
		exec2_list[i].offset = exec_list[i].offset;
1448
		if (INTEL_INFO(dev)->gen < 4)
1449
			exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
1450
		else
1451
			exec2_list[i].flags = 0;
1452
	}
1453
 
1454
	exec2.buffers_ptr = args->buffers_ptr;
1455
	exec2.buffer_count = args->buffer_count;
1456
	exec2.batch_start_offset = args->batch_start_offset;
1457
	exec2.batch_len = args->batch_len;
1458
	exec2.DR1 = args->DR1;
1459
	exec2.DR4 = args->DR4;
1460
	exec2.num_cliprects = args->num_cliprects;
1461
	exec2.cliprects_ptr = args->cliprects_ptr;
1462
	exec2.flags = I915_EXEC_RENDER;
1463
	i915_execbuffer2_set_context_id(exec2, 0);
1464
 
5060 serge 1465
	ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list);
4246 Serge 1466
	if (!ret) {
5060 serge 1467
		struct drm_i915_gem_exec_object __user *user_exec_list =
1468
			to_user_ptr(args->buffers_ptr);
1469
 
4246 Serge 1470
		/* Copy the new buffer offsets back to the user's exec list. */
5060 serge 1471
		for (i = 0; i < args->buffer_count; i++) {
1472
			ret = __copy_to_user(&user_exec_list[i].offset,
1473
					     &exec2_list[i].offset,
1474
					     sizeof(user_exec_list[i].offset));
4246 Serge 1475
		if (ret) {
1476
			ret = -EFAULT;
1477
			DRM_DEBUG("failed to copy %d exec entries "
1478
				  "back to user (%d)\n",
1479
				  args->buffer_count, ret);
5060 serge 1480
				break;
1481
			}
4246 Serge 1482
		}
1483
	}
1484
 
1485
	drm_free_large(exec_list);
1486
	drm_free_large(exec2_list);
1487
	return ret;
1488
}
1489
#endif
1490
 
3263 Serge 1491
int
1492
i915_gem_execbuffer2(struct drm_device *dev, void *data,
1493
		     struct drm_file *file)
1494
{
1495
	struct drm_i915_gem_execbuffer2 *args = data;
1496
	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1497
	int ret;
1498
 
1499
	if (args->buffer_count < 1 ||
1500
	    args->buffer_count > UINT_MAX / sizeof(*exec2_list)) {
1501
		DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count);
1502
		return -EINVAL;
1503
	}
1504
 
5060 serge 1505
	if (args->rsvd2 != 0) {
1506
		DRM_DEBUG("dirty rvsd2 field\n");
1507
		return -EINVAL;
1508
	}
1509
 
3480 Serge 1510
	exec2_list = kmalloc(sizeof(*exec2_list)*args->buffer_count,
1511
			     GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
3263 Serge 1512
	if (exec2_list == NULL) {
1513
		DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1514
			  args->buffer_count);
1515
		return -ENOMEM;
1516
	}
1517
	ret = copy_from_user(exec2_list,
4539 Serge 1518
			     to_user_ptr(args->buffers_ptr),
3263 Serge 1519
			     sizeof(*exec2_list) * args->buffer_count);
1520
	if (ret != 0) {
1521
		DRM_DEBUG("copy %d exec entries failed %d\n",
1522
			  args->buffer_count, ret);
3266 Serge 1523
        kfree(exec2_list);
1524
        FAIL();
3263 Serge 1525
		return -EFAULT;
1526
	}
1527
 
5060 serge 1528
	ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list);
3263 Serge 1529
	if (!ret) {
1530
		/* Copy the new buffer offsets back to the user's exec list. */
5060 serge 1531
		struct drm_i915_gem_exec_object2 __user *user_exec_list =
1532
				   to_user_ptr(args->buffers_ptr);
1533
		int i;
1534
 
1535
		for (i = 0; i < args->buffer_count; i++) {
1536
			ret = __copy_to_user(&user_exec_list[i].offset,
1537
					     &exec2_list[i].offset,
1538
					     sizeof(user_exec_list[i].offset));
3263 Serge 1539
		if (ret) {
1540
			ret = -EFAULT;
1541
			DRM_DEBUG("failed to copy %d exec entries "
5060 serge 1542
					  "back to user\n",
1543
					  args->buffer_count);
1544
				break;
1545
			}
3263 Serge 1546
		}
1547
	}
1548
 
3266 Serge 1549
    kfree(exec2_list);
3263 Serge 1550
	return ret;
1551
}