Subversion Repositories Kolibri OS

Rev

Rev 5060 | Rev 5367 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
3263 Serge 1
/*
2
 * Copyright © 2008,2010 Intel Corporation
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
 * IN THE SOFTWARE.
22
 *
23
 * Authors:
24
 *    Eric Anholt 
25
 *    Chris Wilson 
26
 *
27
 */
28
 
29
#include 
30
#include 
31
#include "i915_drv.h"
32
#include "i915_trace.h"
33
#include "intel_drv.h"
5060 serge 34
#include 
3263 Serge 35
 
4560 Serge 36
#define  __EXEC_OBJECT_HAS_PIN (1<<31)
37
#define  __EXEC_OBJECT_HAS_FENCE (1<<30)
5354 serge 38
#define  __EXEC_OBJECT_NEEDS_MAP (1<<29)
5060 serge 39
#define  __EXEC_OBJECT_NEEDS_BIAS (1<<28)
3263 Serge 40
 
5060 serge 41
#define BATCH_OFFSET_BIAS (256*1024)
3263 Serge 42
 
43
static unsigned long
44
copy_from_user(void *to, const void __user *from, unsigned long n)
45
{
46
    memcpy(to, from, n);
47
    return 0;
48
}
49
 
4560 Serge 50
struct eb_vmas {
51
	struct list_head vmas;
3263 Serge 52
	int and;
3480 Serge 53
	union {
4560 Serge 54
		struct i915_vma *lut[0];
5060 serge 55
		struct hlist_head buckets[0];
3480 Serge 56
	};
3263 Serge 57
};
58
 
4560 Serge 59
static struct eb_vmas *
3480 Serge 60
eb_create(struct drm_i915_gem_execbuffer2 *args)
3263 Serge 61
{
4560 Serge 62
	struct eb_vmas *eb = NULL;
3480 Serge 63
 
64
	if (args->flags & I915_EXEC_HANDLE_LUT) {
4560 Serge 65
		unsigned size = args->buffer_count;
66
		size *= sizeof(struct i915_vma *);
67
		size += sizeof(struct eb_vmas);
3480 Serge 68
		eb = kmalloc(size, GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
69
	}
70
 
71
	if (eb == NULL) {
4560 Serge 72
		unsigned size = args->buffer_count;
73
		unsigned count = PAGE_SIZE / sizeof(struct hlist_head) / 2;
3480 Serge 74
		BUILD_BUG_ON_NOT_POWER_OF_2(PAGE_SIZE / sizeof(struct hlist_head));
75
		while (count > 2*size)
3263 Serge 76
		count >>= 1;
77
	eb = kzalloc(count*sizeof(struct hlist_head) +
4560 Serge 78
			     sizeof(struct eb_vmas),
3480 Serge 79
			     GFP_TEMPORARY);
3263 Serge 80
	if (eb == NULL)
81
		return eb;
82
 
83
	eb->and = count - 1;
3480 Serge 84
	} else
85
		eb->and = -args->buffer_count;
86
 
4560 Serge 87
	INIT_LIST_HEAD(&eb->vmas);
3263 Serge 88
	return eb;
89
}
90
 
91
static void
4560 Serge 92
eb_reset(struct eb_vmas *eb)
3263 Serge 93
{
3480 Serge 94
	if (eb->and >= 0)
3263 Serge 95
	memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
96
}
97
 
3480 Serge 98
static int
4560 Serge 99
eb_lookup_vmas(struct eb_vmas *eb,
3480 Serge 100
		  struct drm_i915_gem_exec_object2 *exec,
101
		  const struct drm_i915_gem_execbuffer2 *args,
4560 Serge 102
	       struct i915_address_space *vm,
3480 Serge 103
		  struct drm_file *file)
3263 Serge 104
{
4560 Serge 105
	struct drm_i915_gem_object *obj;
106
	struct list_head objects;
107
	int i, ret;
3480 Serge 108
 
4560 Serge 109
	INIT_LIST_HEAD(&objects);
3480 Serge 110
	spin_lock(&file->table_lock);
4560 Serge 111
	/* Grab a reference to the object and release the lock so we can lookup
112
	 * or create the VMA without using GFP_ATOMIC */
3480 Serge 113
	for (i = 0; i < args->buffer_count; i++) {
5060 serge 114
        obj = to_intel_bo(idr_find(&file->object_idr, exec[i].handle));
3480 Serge 115
		if (obj == NULL) {
116
			spin_unlock(&file->table_lock);
117
			DRM_DEBUG("Invalid object handle %d at index %d\n",
118
				   exec[i].handle, i);
4560 Serge 119
			ret = -ENOENT;
120
			goto err;
3480 Serge 121
		}
122
 
4560 Serge 123
		if (!list_empty(&obj->obj_exec_link)) {
3480 Serge 124
			spin_unlock(&file->table_lock);
125
			DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n",
126
				   obj, exec[i].handle, i);
4560 Serge 127
			ret = -EINVAL;
128
			goto err;
3480 Serge 129
		}
130
 
131
		drm_gem_object_reference(&obj->base);
4560 Serge 132
		list_add_tail(&obj->obj_exec_link, &objects);
133
	}
134
	spin_unlock(&file->table_lock);
3480 Serge 135
 
4560 Serge 136
	i = 0;
137
	while (!list_empty(&objects)) {
138
		struct i915_vma *vma;
139
 
140
		obj = list_first_entry(&objects,
141
				       struct drm_i915_gem_object,
142
				       obj_exec_link);
143
 
144
		/*
145
		 * NOTE: We can leak any vmas created here when something fails
146
		 * later on. But that's no issue since vma_unbind can deal with
147
		 * vmas which are not actually bound. And since only
148
		 * lookup_or_create exists as an interface to get at the vma
149
		 * from the (obj, vm) we don't run the risk of creating
150
		 * duplicated vmas for the same vm.
151
		 */
5354 serge 152
		vma = i915_gem_obj_lookup_or_create_vma(obj, vm);
4560 Serge 153
		if (IS_ERR(vma)) {
154
			DRM_DEBUG("Failed to lookup VMA\n");
155
			ret = PTR_ERR(vma);
156
			goto err;
157
		}
158
 
159
		/* Transfer ownership from the objects list to the vmas list. */
160
		list_add_tail(&vma->exec_list, &eb->vmas);
161
		list_del_init(&obj->obj_exec_link);
162
 
163
		vma->exec_entry = &exec[i];
3480 Serge 164
		if (eb->and < 0) {
4560 Serge 165
			eb->lut[i] = vma;
3480 Serge 166
		} else {
167
			uint32_t handle = args->flags & I915_EXEC_HANDLE_LUT ? i : exec[i].handle;
4560 Serge 168
			vma->exec_handle = handle;
169
			hlist_add_head(&vma->exec_node,
3480 Serge 170
				       &eb->buckets[handle & eb->and]);
171
		}
4560 Serge 172
		++i;
3480 Serge 173
	}
174
 
175
	return 0;
4560 Serge 176
 
177
 
178
err:
179
	while (!list_empty(&objects)) {
180
		obj = list_first_entry(&objects,
181
				       struct drm_i915_gem_object,
182
				       obj_exec_link);
183
		list_del_init(&obj->obj_exec_link);
184
		drm_gem_object_unreference(&obj->base);
185
	}
186
	/*
187
	 * Objects already transfered to the vmas list will be unreferenced by
188
	 * eb_destroy.
189
	 */
190
 
191
	return ret;
3263 Serge 192
}
193
 
4560 Serge 194
static struct i915_vma *eb_get_vma(struct eb_vmas *eb, unsigned long handle)
3263 Serge 195
{
3480 Serge 196
	if (eb->and < 0) {
197
		if (handle >= -eb->and)
198
			return NULL;
199
		return eb->lut[handle];
200
	} else {
3263 Serge 201
	struct hlist_head *head;
202
	struct hlist_node *node;
203
 
204
	head = &eb->buckets[handle & eb->and];
205
	hlist_for_each(node, head) {
4560 Serge 206
			struct i915_vma *vma;
3480 Serge 207
 
4560 Serge 208
			vma = hlist_entry(node, struct i915_vma, exec_node);
209
			if (vma->exec_handle == handle)
210
				return vma;
3263 Serge 211
	}
212
	return NULL;
3480 Serge 213
	}
3263 Serge 214
}
215
 
216
static void
4560 Serge 217
i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma)
3263 Serge 218
{
4560 Serge 219
	struct drm_i915_gem_exec_object2 *entry;
220
	struct drm_i915_gem_object *obj = vma->obj;
3480 Serge 221
 
4560 Serge 222
	if (!drm_mm_node_allocated(&vma->node))
223
		return;
224
 
225
	entry = vma->exec_entry;
226
 
227
	if (entry->flags & __EXEC_OBJECT_HAS_FENCE)
228
		i915_gem_object_unpin_fence(obj);
229
 
230
	if (entry->flags & __EXEC_OBJECT_HAS_PIN)
5060 serge 231
		vma->pin_count--;
4560 Serge 232
 
233
	entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);
234
}
235
 
236
static void eb_destroy(struct eb_vmas *eb)
237
{
238
	while (!list_empty(&eb->vmas)) {
239
		struct i915_vma *vma;
240
 
241
		vma = list_first_entry(&eb->vmas,
242
				       struct i915_vma,
3480 Serge 243
				       exec_list);
4560 Serge 244
		list_del_init(&vma->exec_list);
245
		i915_gem_execbuffer_unreserve_vma(vma);
246
		drm_gem_object_unreference(&vma->obj->base);
3480 Serge 247
	}
3263 Serge 248
	kfree(eb);
249
}
250
 
251
static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
252
{
4560 Serge 253
	return (HAS_LLC(obj->base.dev) ||
254
		obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
3263 Serge 255
		!obj->map_and_fenceable ||
256
		obj->cache_level != I915_CACHE_NONE);
257
}
258
 
259
static int
4371 Serge 260
relocate_entry_cpu(struct drm_i915_gem_object *obj,
5060 serge 261
		   struct drm_i915_gem_relocation_entry *reloc,
262
		   uint64_t target_offset)
4371 Serge 263
{
4539 Serge 264
    struct drm_device *dev = obj->base.dev;
5060 serge 265
    uint32_t page_offset = offset_in_page(reloc->offset);
266
	uint64_t delta = reloc->delta + target_offset;
4371 Serge 267
	char *vaddr;
4560 Serge 268
	int ret;
4371 Serge 269
 
4560 Serge 270
	ret = i915_gem_object_set_to_cpu_domain(obj, true);
4371 Serge 271
	if (ret)
272
		return ret;
273
 
5354 serge 274
	vaddr = kmap_atomic(i915_gem_object_get_page(obj,
275
				reloc->offset >> PAGE_SHIFT));
5060 serge 276
	*(uint32_t *)(vaddr + page_offset) = lower_32_bits(delta);
4371 Serge 277
 
5060 serge 278
	if (INTEL_INFO(dev)->gen >= 8) {
279
		page_offset = offset_in_page(page_offset + sizeof(uint32_t));
280
 
281
		if (page_offset == 0) {
5354 serge 282
			kunmap_atomic(vaddr);
283
			vaddr = kmap_atomic(i915_gem_object_get_page(obj,
284
			    (reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT));
5060 serge 285
		}
286
 
287
		*(uint32_t *)(vaddr + page_offset) = upper_32_bits(delta);
288
	}
289
 
5354 serge 290
	kunmap_atomic(vaddr);
291
 
4371 Serge 292
	return 0;
293
}
294
 
295
static int
296
relocate_entry_gtt(struct drm_i915_gem_object *obj,
5060 serge 297
		   struct drm_i915_gem_relocation_entry *reloc,
298
		   uint64_t target_offset)
4371 Serge 299
{
300
	struct drm_device *dev = obj->base.dev;
301
	struct drm_i915_private *dev_priv = dev->dev_private;
5060 serge 302
	uint64_t delta = reloc->delta + target_offset;
5354 serge 303
	uint64_t offset;
4371 Serge 304
	void __iomem *reloc_page;
4560 Serge 305
	int ret;
4371 Serge 306
 
307
	ret = i915_gem_object_set_to_gtt_domain(obj, true);
308
	if (ret)
309
		return ret;
310
 
311
	ret = i915_gem_object_put_fence(obj);
312
	if (ret)
313
		return ret;
314
 
315
	/* Map the page containing the relocation we're going to perform.  */
5354 serge 316
	offset = i915_gem_obj_ggtt_offset(obj);
317
	offset += reloc->offset;
4539 Serge 318
    MapPage(dev_priv->gtt.mappable,dev_priv->gtt.mappable_base +
5354 serge 319
                                 (offset & PAGE_MASK), PG_SW);
4539 Serge 320
	reloc_page = dev_priv->gtt.mappable;
5354 serge 321
	iowrite32(lower_32_bits(delta), reloc_page + offset_in_page(offset));
4371 Serge 322
 
5060 serge 323
 
5354 serge 324
 
4371 Serge 325
	return 0;
326
}
327
 
328
static int
3263 Serge 329
i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
4560 Serge 330
				   struct eb_vmas *eb,
5060 serge 331
				   struct drm_i915_gem_relocation_entry *reloc)
3263 Serge 332
{
333
	struct drm_device *dev = obj->base.dev;
334
	struct drm_gem_object *target_obj;
335
	struct drm_i915_gem_object *target_i915_obj;
4560 Serge 336
	struct i915_vma *target_vma;
5060 serge 337
	uint64_t target_offset;
4560 Serge 338
	int ret;
3263 Serge 339
 
340
	/* we've already hold a reference to all valid objects */
4560 Serge 341
	target_vma = eb_get_vma(eb, reloc->target_handle);
342
	if (unlikely(target_vma == NULL))
3263 Serge 343
		return -ENOENT;
4560 Serge 344
	target_i915_obj = target_vma->obj;
345
	target_obj = &target_vma->obj->base;
3263 Serge 346
 
4560 Serge 347
	target_offset = target_vma->node.start;
3263 Serge 348
 
349
	/* Sandybridge PPGTT errata: We need a global gtt mapping for MI and
350
	 * pipe_control writes because the gpu doesn't properly redirect them
351
	 * through the ppgtt for non_secure batchbuffers. */
352
	if (unlikely(IS_GEN6(dev) &&
353
	    reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
5354 serge 354
	    !(target_vma->bound & GLOBAL_BIND)))
355
		target_vma->bind_vma(target_vma, target_i915_obj->cache_level,
356
				GLOBAL_BIND);
3263 Serge 357
 
358
	/* Validate that the target is in a valid r/w GPU domain */
359
	if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
360
		DRM_DEBUG("reloc with multiple write domains: "
361
			  "obj %p target %d offset %d "
362
			  "read %08x write %08x",
363
			  obj, reloc->target_handle,
364
			  (int) reloc->offset,
365
			  reloc->read_domains,
366
			  reloc->write_domain);
4560 Serge 367
		return -EINVAL;
3263 Serge 368
	}
369
	if (unlikely((reloc->write_domain | reloc->read_domains)
370
		     & ~I915_GEM_GPU_DOMAINS)) {
371
		DRM_DEBUG("reloc with read/write non-GPU domains: "
372
			  "obj %p target %d offset %d "
373
			  "read %08x write %08x",
374
			  obj, reloc->target_handle,
375
			  (int) reloc->offset,
376
			  reloc->read_domains,
377
			  reloc->write_domain);
4560 Serge 378
		return -EINVAL;
3263 Serge 379
	}
380
 
381
	target_obj->pending_read_domains |= reloc->read_domains;
382
	target_obj->pending_write_domain |= reloc->write_domain;
383
 
384
	/* If the relocation already has the right value in it, no
385
	 * more work needs to be done.
386
	 */
387
	if (target_offset == reloc->presumed_offset)
388
		return 0;
389
 
390
	/* Check that the relocation address is valid... */
4560 Serge 391
	if (unlikely(reloc->offset >
392
		obj->base.size - (INTEL_INFO(dev)->gen >= 8 ? 8 : 4))) {
3263 Serge 393
		DRM_DEBUG("Relocation beyond object bounds: "
394
			  "obj %p target %d offset %d size %d.\n",
395
			  obj, reloc->target_handle,
396
			  (int) reloc->offset,
397
			  (int) obj->base.size);
4560 Serge 398
		return -EINVAL;
3263 Serge 399
	}
400
	if (unlikely(reloc->offset & 3)) {
401
		DRM_DEBUG("Relocation not 4-byte aligned: "
402
			  "obj %p target %d offset %d.\n",
403
			  obj, reloc->target_handle,
404
			  (int) reloc->offset);
4560 Serge 405
		return -EINVAL;
3263 Serge 406
	}
407
 
408
	/* We can't wait for rendering with pagefaults disabled */
409
 
4371 Serge 410
	if (use_cpu_reloc(obj))
5060 serge 411
		ret = relocate_entry_cpu(obj, reloc, target_offset);
4371 Serge 412
	else
5060 serge 413
		ret = relocate_entry_gtt(obj, reloc, target_offset);
3263 Serge 414
 
415
		if (ret)
416
			return ret;
417
 
418
	/* and update the user's relocation entry */
419
	reloc->presumed_offset = target_offset;
420
 
421
	return 0;
422
}
423
 
424
static int
4560 Serge 425
i915_gem_execbuffer_relocate_vma(struct i915_vma *vma,
426
				 struct eb_vmas *eb)
3263 Serge 427
{
428
#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
3266 Serge 429
	struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(64)];
3263 Serge 430
	struct drm_i915_gem_relocation_entry __user *user_relocs;
4560 Serge 431
	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
3263 Serge 432
	int remain, ret;
433
 
4539 Serge 434
	user_relocs = to_user_ptr(entry->relocs_ptr);
3263 Serge 435
 
436
	remain = entry->relocation_count;
437
	while (remain) {
438
		struct drm_i915_gem_relocation_entry *r = stack_reloc;
439
		int count = remain;
440
		if (count > ARRAY_SIZE(stack_reloc))
441
			count = ARRAY_SIZE(stack_reloc);
442
		remain -= count;
443
 
444
        memcpy(r, user_relocs, count*sizeof(r[0]));
445
 
446
		do {
447
			u64 offset = r->presumed_offset;
448
 
5060 serge 449
			ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r);
3263 Serge 450
			if (ret)
451
				return ret;
452
 
4392 Serge 453
		if (r->presumed_offset != offset)
454
		{
3263 Serge 455
            memcpy(&user_relocs->presumed_offset,
456
                   &r->presumed_offset,
457
                   sizeof(r->presumed_offset));
4392 Serge 458
		}
3263 Serge 459
 
460
			user_relocs++;
461
			r++;
462
		} while (--count);
463
	}
464
 
465
	return 0;
466
#undef N_RELOC
467
}
468
 
469
static int
4560 Serge 470
i915_gem_execbuffer_relocate_vma_slow(struct i915_vma *vma,
471
				      struct eb_vmas *eb,
472
				      struct drm_i915_gem_relocation_entry *relocs)
3263 Serge 473
{
4560 Serge 474
	const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
3263 Serge 475
	int i, ret;
476
 
477
	for (i = 0; i < entry->relocation_count; i++) {
5060 serge 478
		ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i]);
3263 Serge 479
		if (ret)
480
			return ret;
481
	}
482
 
483
	return 0;
484
}
485
 
486
static int
4560 Serge 487
i915_gem_execbuffer_relocate(struct eb_vmas *eb)
3263 Serge 488
{
4560 Serge 489
	struct i915_vma *vma;
3263 Serge 490
	int ret = 0;
491
 
492
	/* This is the fast path and we cannot handle a pagefault whilst
493
	 * holding the struct mutex lest the user pass in the relocations
494
	 * contained within a mmaped bo. For in such a case we, the page
495
	 * fault handler would call i915_gem_fault() and we would try to
496
	 * acquire the struct mutex again. Obviously this is bad and so
497
	 * lockdep complains vehemently.
498
	 */
4104 Serge 499
//	pagefault_disable();
4560 Serge 500
	list_for_each_entry(vma, &eb->vmas, exec_list) {
501
		ret = i915_gem_execbuffer_relocate_vma(vma, eb);
3263 Serge 502
		if (ret)
503
			break;
504
	}
505
//   pagefault_enable();
506
 
507
	return ret;
508
}
509
 
510
static int
4560 Serge 511
i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
5060 serge 512
				struct intel_engine_cs *ring,
3480 Serge 513
				   bool *need_reloc)
3263 Serge 514
{
5060 serge 515
	struct drm_i915_gem_object *obj = vma->obj;
4560 Serge 516
	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
5060 serge 517
	uint64_t flags;
3263 Serge 518
	int ret;
519
 
5060 serge 520
	flags = 0;
5354 serge 521
	if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
522
		flags |= PIN_GLOBAL | PIN_MAPPABLE;
5060 serge 523
	if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
524
		flags |= PIN_GLOBAL;
525
	if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
526
		flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
527
 
528
	ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags);
3263 Serge 529
	if (ret)
530
		return ret;
531
 
532
	entry->flags |= __EXEC_OBJECT_HAS_PIN;
533
 
534
		if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
535
			ret = i915_gem_object_get_fence(obj);
536
			if (ret)
537
				return ret;
538
 
539
			if (i915_gem_object_pin_fence(obj))
540
				entry->flags |= __EXEC_OBJECT_HAS_FENCE;
541
	}
542
 
4560 Serge 543
	if (entry->offset != vma->node.start) {
544
		entry->offset = vma->node.start;
3480 Serge 545
		*need_reloc = true;
546
	}
3266 Serge 547
 
3480 Serge 548
	if (entry->flags & EXEC_OBJECT_WRITE) {
549
		obj->base.pending_read_domains = I915_GEM_DOMAIN_RENDER;
550
		obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER;
551
	}
552
 
3263 Serge 553
	return 0;
554
}
555
 
5060 serge 556
static bool
5354 serge 557
need_reloc_mappable(struct i915_vma *vma)
5060 serge 558
{
559
	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
5354 serge 560
 
561
	if (entry->relocation_count == 0)
562
		return false;
563
 
564
	if (!i915_is_ggtt(vma->vm))
565
		return false;
566
 
567
	/* See also use_cpu_reloc() */
568
	if (HAS_LLC(vma->obj->base.dev))
569
		return false;
570
 
571
	if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU)
572
		return false;
573
 
574
	return true;
575
}
576
 
577
static bool
578
eb_vma_misplaced(struct i915_vma *vma)
579
{
580
	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
5060 serge 581
	struct drm_i915_gem_object *obj = vma->obj;
582
 
5354 serge 583
	WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP &&
5060 serge 584
	       !i915_is_ggtt(vma->vm));
585
 
586
	if (entry->alignment &&
587
	    vma->node.start & (entry->alignment - 1))
588
		return true;
589
 
5354 serge 590
	if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable)
5060 serge 591
		return true;
592
 
593
	if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
594
	    vma->node.start < BATCH_OFFSET_BIAS)
595
		return true;
596
 
597
	return false;
598
}
599
 
3263 Serge 600
static int
5060 serge 601
i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
4560 Serge 602
			    struct list_head *vmas,
3480 Serge 603
			    bool *need_relocs)
3263 Serge 604
{
605
	struct drm_i915_gem_object *obj;
4560 Serge 606
	struct i915_vma *vma;
607
	struct i915_address_space *vm;
608
	struct list_head ordered_vmas;
3263 Serge 609
	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
610
	int retry;
611
 
5060 serge 612
	i915_gem_retire_requests_ring(ring);
613
 
4560 Serge 614
	vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
615
 
616
	INIT_LIST_HEAD(&ordered_vmas);
617
	while (!list_empty(vmas)) {
3263 Serge 618
		struct drm_i915_gem_exec_object2 *entry;
619
		bool need_fence, need_mappable;
620
 
4560 Serge 621
		vma = list_first_entry(vmas, struct i915_vma, exec_list);
622
		obj = vma->obj;
623
		entry = vma->exec_entry;
3263 Serge 624
 
5354 serge 625
		if (!has_fenced_gpu_access)
626
			entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;
3263 Serge 627
		need_fence =
628
			entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
629
			obj->tiling_mode != I915_TILING_NONE;
4560 Serge 630
		need_mappable = need_fence || need_reloc_mappable(vma);
3263 Serge 631
 
5354 serge 632
		if (need_mappable) {
633
			entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
4560 Serge 634
			list_move(&vma->exec_list, &ordered_vmas);
5354 serge 635
		} else
4560 Serge 636
			list_move_tail(&vma->exec_list, &ordered_vmas);
3263 Serge 637
 
3480 Serge 638
		obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
3263 Serge 639
		obj->base.pending_write_domain = 0;
640
	}
4560 Serge 641
	list_splice(&ordered_vmas, vmas);
3263 Serge 642
 
643
	/* Attempt to pin all of the buffers into the GTT.
644
	 * This is done in 3 phases:
645
	 *
646
	 * 1a. Unbind all objects that do not match the GTT constraints for
647
	 *     the execbuffer (fenceable, mappable, alignment etc).
648
	 * 1b. Increment pin count for already bound objects.
649
	 * 2.  Bind new objects.
650
	 * 3.  Decrement pin count.
651
	 *
652
	 * This avoid unnecessary unbinding of later objects in order to make
653
	 * room for the earlier objects *unless* we need to defragment.
654
	 */
655
	retry = 0;
656
	do {
657
		int ret = 0;
658
 
659
		/* Unbind any ill-fitting objects or pin. */
4560 Serge 660
		list_for_each_entry(vma, vmas, exec_list) {
661
			if (!drm_mm_node_allocated(&vma->node))
3263 Serge 662
				continue;
663
 
5354 serge 664
			if (eb_vma_misplaced(vma))
4560 Serge 665
				ret = i915_vma_unbind(vma);
3263 Serge 666
			else
4560 Serge 667
				ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
3263 Serge 668
			if (ret)
669
				goto err;
670
		}
671
 
672
		/* Bind fresh objects */
4560 Serge 673
		list_for_each_entry(vma, vmas, exec_list) {
674
			if (drm_mm_node_allocated(&vma->node))
3263 Serge 675
				continue;
676
 
4560 Serge 677
			ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
3263 Serge 678
			if (ret)
679
				goto err;
680
		}
681
 
4560 Serge 682
err:
3263 Serge 683
		if (ret != -ENOSPC || retry++)
684
			return ret;
685
 
4560 Serge 686
		/* Decrement pin count for bound objects */
687
		list_for_each_entry(vma, vmas, exec_list)
688
			i915_gem_execbuffer_unreserve_vma(vma);
689
 
5060 serge 690
		ret = i915_gem_evict_vm(vm, true);
3263 Serge 691
		if (ret)
692
			return ret;
693
	} while (1);
694
}
695
 
696
static int
697
i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
3480 Serge 698
				  struct drm_i915_gem_execbuffer2 *args,
3263 Serge 699
				  struct drm_file *file,
5060 serge 700
				  struct intel_engine_cs *ring,
4560 Serge 701
				  struct eb_vmas *eb,
702
				  struct drm_i915_gem_exec_object2 *exec)
3263 Serge 703
{
704
	struct drm_i915_gem_relocation_entry *reloc;
4560 Serge 705
	struct i915_address_space *vm;
706
	struct i915_vma *vma;
3480 Serge 707
	bool need_relocs;
3263 Serge 708
	int *reloc_offset;
709
	int i, total, ret;
4560 Serge 710
	unsigned count = args->buffer_count;
3263 Serge 711
 
4560 Serge 712
	vm = list_first_entry(&eb->vmas, struct i915_vma, exec_list)->vm;
713
 
3263 Serge 714
	/* We may process another execbuffer during the unlock... */
4560 Serge 715
	while (!list_empty(&eb->vmas)) {
716
		vma = list_first_entry(&eb->vmas, struct i915_vma, exec_list);
717
		list_del_init(&vma->exec_list);
718
		i915_gem_execbuffer_unreserve_vma(vma);
719
		drm_gem_object_unreference(&vma->obj->base);
3263 Serge 720
	}
721
 
722
	mutex_unlock(&dev->struct_mutex);
723
 
724
	total = 0;
725
	for (i = 0; i < count; i++)
726
		total += exec[i].relocation_count;
727
 
728
    reloc_offset = malloc(count * sizeof(*reloc_offset));
729
    reloc = malloc(total * sizeof(*reloc));
730
	if (reloc == NULL || reloc_offset == NULL) {
3266 Serge 731
        kfree(reloc);
732
        kfree(reloc_offset);
3263 Serge 733
		mutex_lock(&dev->struct_mutex);
734
		return -ENOMEM;
735
	}
736
 
737
	total = 0;
738
	for (i = 0; i < count; i++) {
739
		struct drm_i915_gem_relocation_entry __user *user_relocs;
740
		u64 invalid_offset = (u64)-1;
741
		int j;
742
 
4539 Serge 743
		user_relocs = to_user_ptr(exec[i].relocs_ptr);
3263 Serge 744
 
745
		if (copy_from_user(reloc+total, user_relocs,
746
				   exec[i].relocation_count * sizeof(*reloc))) {
747
			ret = -EFAULT;
748
			mutex_lock(&dev->struct_mutex);
749
			goto err;
750
		}
751
 
752
		/* As we do not update the known relocation offsets after
753
		 * relocating (due to the complexities in lock handling),
754
		 * we need to mark them as invalid now so that we force the
755
		 * relocation processing next time. Just in case the target
756
		 * object is evicted and then rebound into its old
757
		 * presumed_offset before the next execbuffer - if that
758
		 * happened we would make the mistake of assuming that the
759
		 * relocations were valid.
760
		 */
761
		for (j = 0; j < exec[i].relocation_count; j++) {
5060 serge 762
			if (__copy_to_user(&user_relocs[j].presumed_offset,
3263 Serge 763
					 &invalid_offset,
764
					 sizeof(invalid_offset))) {
765
				ret = -EFAULT;
766
				mutex_lock(&dev->struct_mutex);
767
				goto err;
768
			}
769
		}
770
 
771
		reloc_offset[i] = total;
772
		total += exec[i].relocation_count;
773
	}
774
 
775
	ret = i915_mutex_lock_interruptible(dev);
776
	if (ret) {
777
		mutex_lock(&dev->struct_mutex);
778
		goto err;
779
	}
780
 
781
	/* reacquire the objects */
782
	eb_reset(eb);
4560 Serge 783
	ret = eb_lookup_vmas(eb, exec, args, vm, file);
3480 Serge 784
	if (ret)
3263 Serge 785
			goto err;
786
 
3480 Serge 787
	need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
4560 Serge 788
	ret = i915_gem_execbuffer_reserve(ring, &eb->vmas, &need_relocs);
3263 Serge 789
	if (ret)
790
		goto err;
791
 
4560 Serge 792
	list_for_each_entry(vma, &eb->vmas, exec_list) {
793
		int offset = vma->exec_entry - exec;
794
		ret = i915_gem_execbuffer_relocate_vma_slow(vma, eb,
795
							    reloc + reloc_offset[offset]);
3263 Serge 796
		if (ret)
797
			goto err;
798
	}
799
 
800
	/* Leave the user relocations as are, this is the painfully slow path,
801
	 * and we want to avoid the complication of dropping the lock whilst
802
	 * having buffers reserved in the aperture and so causing spurious
803
	 * ENOSPC for random operations.
804
	 */
805
 
806
err:
3266 Serge 807
    kfree(reloc);
808
    kfree(reloc_offset);
3263 Serge 809
	return ret;
810
}
811
 
812
static int
5060 serge 813
i915_gem_execbuffer_move_to_gpu(struct intel_engine_cs *ring,
4560 Serge 814
				struct list_head *vmas)
3263 Serge 815
{
4560 Serge 816
	struct i915_vma *vma;
3263 Serge 817
	uint32_t flush_domains = 0;
4104 Serge 818
	bool flush_chipset = false;
3263 Serge 819
	int ret;
820
 
4560 Serge 821
	list_for_each_entry(vma, vmas, exec_list) {
822
		struct drm_i915_gem_object *obj = vma->obj;
3263 Serge 823
		ret = i915_gem_object_sync(obj, ring);
824
		if (ret)
825
			return ret;
826
 
827
		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
4104 Serge 828
			flush_chipset |= i915_gem_clflush_object(obj, false);
3263 Serge 829
 
830
		flush_domains |= obj->base.write_domain;
831
	}
832
 
4104 Serge 833
	if (flush_chipset)
3263 Serge 834
		i915_gem_chipset_flush(ring->dev);
835
 
836
	if (flush_domains & I915_GEM_DOMAIN_GTT)
837
		wmb();
838
 
839
	/* Unconditionally invalidate gpu caches and ensure that we do flush
840
	 * any residual writes from the previous batch.
841
	 */
842
	return intel_ring_invalidate_all_caches(ring);
843
}
844
 
845
static bool
846
i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
847
{
3480 Serge 848
	if (exec->flags & __I915_EXEC_UNKNOWN_FLAGS)
849
		return false;
850
 
3263 Serge 851
	return ((exec->batch_start_offset | exec->batch_len) & 0x7) == 0;
852
}
853
 
854
static int
5354 serge 855
validate_exec_list(struct drm_device *dev,
856
		   struct drm_i915_gem_exec_object2 *exec,
3263 Serge 857
		   int count)
858
{
4560 Serge 859
	unsigned relocs_total = 0;
860
	unsigned relocs_max = UINT_MAX / sizeof(struct drm_i915_gem_relocation_entry);
5354 serge 861
	unsigned invalid_flags;
862
	int i;
3263 Serge 863
 
5354 serge 864
	invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
865
	if (USES_FULL_PPGTT(dev))
866
		invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
867
 
3263 Serge 868
	for (i = 0; i < count; i++) {
3746 Serge 869
		char __user *ptr = to_user_ptr(exec[i].relocs_ptr);
3263 Serge 870
		int length; /* limited by fault_in_pages_readable() */
871
 
5354 serge 872
		if (exec[i].flags & invalid_flags)
3263 Serge 873
			return -EINVAL;
874
 
3480 Serge 875
		/* First check for malicious input causing overflow in
876
		 * the worst case where we need to allocate the entire
877
		 * relocation tree as a single array.
878
		 */
879
		if (exec[i].relocation_count > relocs_max - relocs_total)
880
			return -EINVAL;
881
		relocs_total += exec[i].relocation_count;
882
 
3263 Serge 883
		length = exec[i].relocation_count *
884
			sizeof(struct drm_i915_gem_relocation_entry);
3746 Serge 885
		/*
886
		 * We must check that the entire relocation array is safe
887
		 * to read, but since we may need to update the presumed
888
		 * offsets during execution, check for full write access.
889
		 */
4560 Serge 890
	}
3263 Serge 891
 
4560 Serge 892
	return 0;
893
}
894
 
5060 serge 895
static struct intel_context *
4560 Serge 896
i915_gem_validate_context(struct drm_device *dev, struct drm_file *file,
5060 serge 897
			  struct intel_engine_cs *ring, const u32 ctx_id)
4560 Serge 898
{
5060 serge 899
	struct intel_context *ctx = NULL;
4560 Serge 900
	struct i915_ctx_hang_stats *hs;
901
 
5060 serge 902
	if (ring->id != RCS && ctx_id != DEFAULT_CONTEXT_HANDLE)
903
		return ERR_PTR(-EINVAL);
4560 Serge 904
 
5060 serge 905
	ctx = i915_gem_context_get(file->driver_priv, ctx_id);
906
	if (IS_ERR(ctx))
907
		return ctx;
908
 
909
	hs = &ctx->hang_stats;
4560 Serge 910
	if (hs->banned) {
911
		DRM_DEBUG("Context %u tried to submit while banned\n", ctx_id);
5060 serge 912
		return ERR_PTR(-EIO);
3263 Serge 913
	}
914
 
5354 serge 915
	if (i915.enable_execlists && !ctx->engine[ring->id].state) {
916
		int ret = intel_lr_context_deferred_create(ctx, ring);
917
		if (ret) {
918
			DRM_DEBUG("Could not create LRC %u: %d\n", ctx_id, ret);
919
			return ERR_PTR(ret);
920
		}
921
	}
922
 
5060 serge 923
	return ctx;
3263 Serge 924
}
925
 
5354 serge 926
void
4560 Serge 927
i915_gem_execbuffer_move_to_active(struct list_head *vmas,
5060 serge 928
				   struct intel_engine_cs *ring)
3263 Serge 929
{
5354 serge 930
	u32 seqno = intel_ring_get_seqno(ring);
4560 Serge 931
	struct i915_vma *vma;
3263 Serge 932
 
4560 Serge 933
	list_for_each_entry(vma, vmas, exec_list) {
5354 serge 934
		struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
4560 Serge 935
		struct drm_i915_gem_object *obj = vma->obj;
3263 Serge 936
		u32 old_read = obj->base.read_domains;
937
		u32 old_write = obj->base.write_domain;
938
 
3480 Serge 939
		obj->base.write_domain = obj->base.pending_write_domain;
940
		if (obj->base.write_domain == 0)
941
			obj->base.pending_read_domains |= obj->base.read_domains;
3263 Serge 942
		obj->base.read_domains = obj->base.pending_read_domains;
943
 
4560 Serge 944
		i915_vma_move_to_active(vma, ring);
3263 Serge 945
		if (obj->base.write_domain) {
946
			obj->dirty = 1;
5354 serge 947
			obj->last_write_seqno = seqno;
5060 serge 948
 
949
			intel_fb_obj_invalidate(obj, ring);
950
 
951
			/* update for the implicit flush after a batch */
952
			obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
3263 Serge 953
		}
5354 serge 954
		if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
955
			obj->last_fenced_seqno = seqno;
956
			if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
957
				struct drm_i915_private *dev_priv = to_i915(ring->dev);
958
				list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list,
959
					       &dev_priv->mm.fence_list);
960
			}
961
		}
3263 Serge 962
 
963
		trace_i915_gem_object_change_domain(obj, old_read, old_write);
964
	}
965
}
966
 
5354 serge 967
void
3263 Serge 968
i915_gem_execbuffer_retire_commands(struct drm_device *dev,
969
				    struct drm_file *file,
5060 serge 970
				    struct intel_engine_cs *ring,
4104 Serge 971
				    struct drm_i915_gem_object *obj)
3263 Serge 972
{
973
	/* Unconditionally force add_request to emit a full flush. */
974
	ring->gpu_caches_dirty = true;
975
 
976
	/* Add a breadcrumb for the completion of the batch buffer */
4104 Serge 977
	(void)__i915_add_request(ring, file, obj, NULL);
3263 Serge 978
}
979
 
980
static int
981
i915_reset_gen7_sol_offsets(struct drm_device *dev,
5060 serge 982
			    struct intel_engine_cs *ring)
3263 Serge 983
{
5060 serge 984
	struct drm_i915_private *dev_priv = dev->dev_private;
3263 Serge 985
	int ret, i;
986
 
5060 serge 987
	if (!IS_GEN7(dev) || ring != &dev_priv->ring[RCS]) {
988
		DRM_DEBUG("sol reset is gen7/rcs only\n");
989
		return -EINVAL;
990
	}
3263 Serge 991
 
992
	ret = intel_ring_begin(ring, 4 * 3);
993
	if (ret)
994
		return ret;
995
 
996
	for (i = 0; i < 4; i++) {
997
		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
998
		intel_ring_emit(ring, GEN7_SO_WRITE_OFFSET(i));
999
		intel_ring_emit(ring, 0);
1000
	}
1001
 
1002
	intel_ring_advance(ring);
1003
 
1004
	return 0;
1005
}
1006
 
1007
static int
5354 serge 1008
i915_emit_box(struct intel_engine_cs *ring,
1009
	      struct drm_clip_rect *box,
1010
	      int DR1, int DR4)
1011
{
1012
	int ret;
1013
 
1014
	if (box->y2 <= box->y1 || box->x2 <= box->x1 ||
1015
	    box->y2 <= 0 || box->x2 <= 0) {
1016
		DRM_ERROR("Bad box %d,%d..%d,%d\n",
1017
			  box->x1, box->y1, box->x2, box->y2);
1018
		return -EINVAL;
1019
	}
1020
 
1021
	if (INTEL_INFO(ring->dev)->gen >= 4) {
1022
		ret = intel_ring_begin(ring, 4);
1023
		if (ret)
1024
			return ret;
1025
 
1026
		intel_ring_emit(ring, GFX_OP_DRAWRECT_INFO_I965);
1027
		intel_ring_emit(ring, (box->x1 & 0xffff) | box->y1 << 16);
1028
		intel_ring_emit(ring, ((box->x2 - 1) & 0xffff) | (box->y2 - 1) << 16);
1029
		intel_ring_emit(ring, DR4);
1030
	} else {
1031
		ret = intel_ring_begin(ring, 6);
1032
		if (ret)
1033
			return ret;
1034
 
1035
		intel_ring_emit(ring, GFX_OP_DRAWRECT_INFO);
1036
		intel_ring_emit(ring, DR1);
1037
		intel_ring_emit(ring, (box->x1 & 0xffff) | box->y1 << 16);
1038
		intel_ring_emit(ring, ((box->x2 - 1) & 0xffff) | (box->y2 - 1) << 16);
1039
		intel_ring_emit(ring, DR4);
1040
		intel_ring_emit(ring, 0);
1041
	}
1042
	intel_ring_advance(ring);
1043
 
1044
	return 0;
1045
}
1046
 
1047
 
1048
int
1049
i915_gem_ringbuffer_submission(struct drm_device *dev, struct drm_file *file,
5060 serge 1050
			     struct intel_engine_cs *ring,
1051
			     struct intel_context *ctx,
1052
			     struct drm_i915_gem_execbuffer2 *args,
1053
			     struct list_head *vmas,
1054
			     struct drm_i915_gem_object *batch_obj,
1055
			     u64 exec_start, u32 flags)
1056
{
1057
	struct drm_clip_rect *cliprects = NULL;
1058
	struct drm_i915_private *dev_priv = dev->dev_private;
1059
	u64 exec_len;
1060
	int instp_mode;
1061
	u32 instp_mask;
1062
	int i, ret = 0;
1063
 
1064
	if (args->num_cliprects != 0) {
1065
		if (ring != &dev_priv->ring[RCS]) {
1066
			DRM_DEBUG("clip rectangles are only valid with the render ring\n");
1067
			return -EINVAL;
1068
		}
1069
 
1070
		if (INTEL_INFO(dev)->gen >= 5) {
1071
			DRM_DEBUG("clip rectangles are only valid on pre-gen5\n");
1072
			return -EINVAL;
1073
		}
1074
 
1075
		if (args->num_cliprects > UINT_MAX / sizeof(*cliprects)) {
1076
			DRM_DEBUG("execbuf with %u cliprects\n",
1077
				  args->num_cliprects);
1078
			return -EINVAL;
1079
		}
1080
 
1081
		cliprects = kcalloc(args->num_cliprects,
1082
				    sizeof(*cliprects),
1083
				    GFP_KERNEL);
1084
		if (cliprects == NULL) {
1085
			ret = -ENOMEM;
1086
			goto error;
1087
		}
1088
 
1089
		if (copy_from_user(cliprects,
1090
				   to_user_ptr(args->cliprects_ptr),
1091
				   sizeof(*cliprects)*args->num_cliprects)) {
1092
			ret = -EFAULT;
1093
			goto error;
1094
		}
1095
	} else {
1096
		if (args->DR4 == 0xffffffff) {
1097
			DRM_DEBUG("UXA submitting garbage DR4, fixing up\n");
1098
			args->DR4 = 0;
1099
		}
1100
 
1101
		if (args->DR1 || args->DR4 || args->cliprects_ptr) {
1102
			DRM_DEBUG("0 cliprects but dirt in cliprects fields\n");
1103
			return -EINVAL;
1104
		}
1105
	}
1106
 
1107
	ret = i915_gem_execbuffer_move_to_gpu(ring, vmas);
1108
	if (ret)
1109
		goto error;
1110
 
1111
	ret = i915_switch_context(ring, ctx);
1112
	if (ret)
1113
		goto error;
1114
 
1115
	instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK;
1116
	instp_mask = I915_EXEC_CONSTANTS_MASK;
1117
	switch (instp_mode) {
1118
	case I915_EXEC_CONSTANTS_REL_GENERAL:
1119
	case I915_EXEC_CONSTANTS_ABSOLUTE:
1120
	case I915_EXEC_CONSTANTS_REL_SURFACE:
1121
		if (instp_mode != 0 && ring != &dev_priv->ring[RCS]) {
1122
			DRM_DEBUG("non-0 rel constants mode on non-RCS\n");
1123
			ret = -EINVAL;
1124
			goto error;
1125
		}
1126
 
1127
		if (instp_mode != dev_priv->relative_constants_mode) {
1128
			if (INTEL_INFO(dev)->gen < 4) {
1129
				DRM_DEBUG("no rel constants on pre-gen4\n");
1130
				ret = -EINVAL;
1131
				goto error;
1132
			}
1133
 
1134
			if (INTEL_INFO(dev)->gen > 5 &&
1135
			    instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) {
1136
				DRM_DEBUG("rel surface constants mode invalid on gen5+\n");
1137
				ret = -EINVAL;
1138
				goto error;
1139
			}
1140
 
1141
			/* The HW changed the meaning on this bit on gen6 */
1142
			if (INTEL_INFO(dev)->gen >= 6)
1143
				instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
1144
		}
1145
		break;
1146
	default:
1147
		DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode);
1148
		ret = -EINVAL;
1149
		goto error;
1150
	}
1151
 
1152
	if (ring == &dev_priv->ring[RCS] &&
1153
			instp_mode != dev_priv->relative_constants_mode) {
1154
		ret = intel_ring_begin(ring, 4);
1155
		if (ret)
1156
			goto error;
1157
 
1158
		intel_ring_emit(ring, MI_NOOP);
1159
		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1160
		intel_ring_emit(ring, INSTPM);
1161
		intel_ring_emit(ring, instp_mask << 16 | instp_mode);
1162
		intel_ring_advance(ring);
1163
 
1164
		dev_priv->relative_constants_mode = instp_mode;
1165
	}
1166
 
1167
	if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
1168
		ret = i915_reset_gen7_sol_offsets(dev, ring);
1169
		if (ret)
1170
			goto error;
1171
	}
1172
 
1173
	exec_len = args->batch_len;
1174
	if (cliprects) {
1175
		for (i = 0; i < args->num_cliprects; i++) {
5354 serge 1176
			ret = i915_emit_box(ring, &cliprects[i],
5060 serge 1177
					    args->DR1, args->DR4);
1178
			if (ret)
1179
				goto error;
1180
 
1181
			ret = ring->dispatch_execbuffer(ring,
1182
							exec_start, exec_len,
1183
							flags);
1184
			if (ret)
1185
				goto error;
1186
		}
1187
	} else {
1188
		ret = ring->dispatch_execbuffer(ring,
1189
						exec_start, exec_len,
1190
						flags);
1191
		if (ret)
1192
			return ret;
1193
	}
1194
 
1195
	trace_i915_gem_ring_dispatch(ring, intel_ring_get_seqno(ring), flags);
1196
 
1197
	i915_gem_execbuffer_move_to_active(vmas, ring);
1198
	i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj);
1199
 
1200
error:
1201
	kfree(cliprects);
1202
	return ret;
1203
}
1204
 
1205
/**
1206
 * Find one BSD ring to dispatch the corresponding BSD command.
1207
 * The Ring ID is returned.
1208
 */
1209
static int gen8_dispatch_bsd_ring(struct drm_device *dev,
1210
				  struct drm_file *file)
1211
{
1212
	struct drm_i915_private *dev_priv = dev->dev_private;
1213
	struct drm_i915_file_private *file_priv = file->driver_priv;
1214
 
1215
	/* Check whether the file_priv is using one ring */
1216
	if (file_priv->bsd_ring)
1217
		return file_priv->bsd_ring->id;
1218
	else {
1219
		/* If no, use the ping-pong mechanism to select one ring */
1220
		int ring_id;
1221
 
1222
		mutex_lock(&dev->struct_mutex);
1223
		if (dev_priv->mm.bsd_ring_dispatch_index == 0) {
1224
			ring_id = VCS;
1225
			dev_priv->mm.bsd_ring_dispatch_index = 1;
1226
		} else {
1227
			ring_id = VCS2;
1228
			dev_priv->mm.bsd_ring_dispatch_index = 0;
1229
		}
1230
		file_priv->bsd_ring = &dev_priv->ring[ring_id];
1231
		mutex_unlock(&dev->struct_mutex);
1232
		return ring_id;
1233
	}
1234
}
1235
 
1236
static struct drm_i915_gem_object *
1237
eb_get_batch(struct eb_vmas *eb)
1238
{
1239
	struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list);
1240
 
1241
	/*
1242
	 * SNA is doing fancy tricks with compressing batch buffers, which leads
1243
	 * to negative relocation deltas. Usually that works out ok since the
1244
	 * relocate address is still positive, except when the batch is placed
1245
	 * very low in the GTT. Ensure this doesn't happen.
1246
	 *
1247
	 * Note that actual hangs have only been observed on gen7, but for
1248
	 * paranoia do it everywhere.
1249
	 */
1250
	vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
1251
 
1252
	return vma->obj;
1253
}
1254
 
1255
static int
3263 Serge 1256
i915_gem_do_execbuffer(struct drm_device *dev, void *data,
1257
		       struct drm_file *file,
1258
		       struct drm_i915_gem_execbuffer2 *args,
5060 serge 1259
		       struct drm_i915_gem_exec_object2 *exec)
3263 Serge 1260
{
5060 serge 1261
	struct drm_i915_private *dev_priv = dev->dev_private;
4560 Serge 1262
	struct eb_vmas *eb;
3263 Serge 1263
	struct drm_i915_gem_object *batch_obj;
5060 serge 1264
	struct intel_engine_cs *ring;
1265
	struct intel_context *ctx;
1266
	struct i915_address_space *vm;
4560 Serge 1267
	const u32 ctx_id = i915_execbuffer2_get_context_id(*args);
5060 serge 1268
	u64 exec_start = args->batch_start_offset;
1269
	u32 flags;
1270
	int ret;
3480 Serge 1271
	bool need_relocs;
3263 Serge 1272
 
3480 Serge 1273
	if (!i915_gem_check_execbuffer(args))
3263 Serge 1274
		return -EINVAL;
1275
 
5354 serge 1276
	ret = validate_exec_list(dev, exec, args->buffer_count);
3263 Serge 1277
	if (ret)
1278
		return ret;
1279
 
1280
	flags = 0;
1281
	if (args->flags & I915_EXEC_SECURE) {
1282
 
1283
		flags |= I915_DISPATCH_SECURE;
1284
	}
1285
	if (args->flags & I915_EXEC_IS_PINNED)
1286
		flags |= I915_DISPATCH_PINNED;
1287
 
5060 serge 1288
	if ((args->flags & I915_EXEC_RING_MASK) > LAST_USER_RING) {
3263 Serge 1289
		DRM_DEBUG("execbuf with unknown ring: %d\n",
1290
			  (int)(args->flags & I915_EXEC_RING_MASK));
1291
		return -EINVAL;
1292
	}
5060 serge 1293
 
1294
	if ((args->flags & I915_EXEC_RING_MASK) == I915_EXEC_DEFAULT)
1295
		ring = &dev_priv->ring[RCS];
1296
	else if ((args->flags & I915_EXEC_RING_MASK) == I915_EXEC_BSD) {
1297
		if (HAS_BSD2(dev)) {
1298
			int ring_id;
1299
			ring_id = gen8_dispatch_bsd_ring(dev, file);
1300
			ring = &dev_priv->ring[ring_id];
1301
		} else
1302
			ring = &dev_priv->ring[VCS];
1303
	} else
1304
		ring = &dev_priv->ring[(args->flags & I915_EXEC_RING_MASK) - 1];
1305
 
3263 Serge 1306
	if (!intel_ring_initialized(ring)) {
1307
		DRM_DEBUG("execbuf with invalid ring: %d\n",
1308
			  (int)(args->flags & I915_EXEC_RING_MASK));
1309
		return -EINVAL;
1310
	}
1311
 
1312
	if (args->buffer_count < 1) {
1313
		DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1314
		return -EINVAL;
1315
	}
1316
 
4560 Serge 1317
	intel_runtime_pm_get(dev_priv);
1318
 
3263 Serge 1319
	ret = i915_mutex_lock_interruptible(dev);
1320
	if (ret)
1321
		goto pre_mutex_err;
1322
 
5060 serge 1323
	ctx = i915_gem_validate_context(dev, file, ring, ctx_id);
1324
	if (IS_ERR(ctx)) {
4560 Serge 1325
		mutex_unlock(&dev->struct_mutex);
5060 serge 1326
		ret = PTR_ERR(ctx);
4560 Serge 1327
		goto pre_mutex_err;
1328
	}
1329
 
5060 serge 1330
	i915_gem_context_reference(ctx);
1331
 
5354 serge 1332
	if (ctx->ppgtt)
1333
		vm = &ctx->ppgtt->base;
1334
	else
5060 serge 1335
		vm = &dev_priv->gtt.base;
1336
 
3480 Serge 1337
	eb = eb_create(args);
3263 Serge 1338
	if (eb == NULL) {
5060 serge 1339
		i915_gem_context_unreference(ctx);
3263 Serge 1340
		mutex_unlock(&dev->struct_mutex);
1341
		ret = -ENOMEM;
5060 serge 1342
        goto pre_mutex_err;
3263 Serge 1343
	}
1344
 
1345
	/* Look up object handles */
4560 Serge 1346
	ret = eb_lookup_vmas(eb, exec, args, vm, file);
3480 Serge 1347
	if (ret)
3263 Serge 1348
			goto err;
1349
 
1350
	/* take note of the batch buffer before we might reorder the lists */
5060 serge 1351
	batch_obj = eb_get_batch(eb);
3263 Serge 1352
 
1353
	/* Move the objects en-masse into the GTT, evicting if necessary. */
3480 Serge 1354
	need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
4560 Serge 1355
	ret = i915_gem_execbuffer_reserve(ring, &eb->vmas, &need_relocs);
3263 Serge 1356
	if (ret)
1357
		goto err;
1358
 
1359
	/* The objects are in their final locations, apply the relocations. */
3480 Serge 1360
	if (need_relocs)
4560 Serge 1361
		ret = i915_gem_execbuffer_relocate(eb);
3263 Serge 1362
	if (ret) {
1363
		if (ret == -EFAULT) {
3480 Serge 1364
			ret = i915_gem_execbuffer_relocate_slow(dev, args, file, ring,
4560 Serge 1365
								eb, exec);
3263 Serge 1366
			BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1367
		}
1368
		if (ret)
1369
			goto err;
5060 serge 1370
        }
3263 Serge 1371
 
1372
	/* Set the pending read domains for the batch buffer to COMMAND */
1373
	if (batch_obj->base.pending_write_domain) {
1374
		DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
1375
		ret = -EINVAL;
1376
		goto err;
1377
	}
1378
	batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
5060 serge 1379
#if 0
1380
	if (i915_needs_cmd_parser(ring)) {
1381
		ret = i915_parse_cmds(ring,
1382
				      batch_obj,
1383
				      args->batch_start_offset,
1384
				      file->is_master);
1385
		if (ret)
1386
			goto err;
3263 Serge 1387
 
5060 serge 1388
		/*
1389
		 * XXX: Actually do this when enabling batch copy...
1390
		 *
1391
		 * Set the DISPATCH_SECURE bit to remove the NON_SECURE bit
1392
		 * from MI_BATCH_BUFFER_START commands issued in the
1393
		 * dispatch_execbuffer implementations. We specifically don't
1394
		 * want that set when the command parser is enabled.
1395
		 */
1396
	}
1397
#endif
3263 Serge 1398
	/* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
1399
	 * batch" bit. Hence we need to pin secure batches into the global gtt.
4560 Serge 1400
	 * hsw should have this fixed, but bdw mucks it up again. */
5354 serge 1401
	if (flags & I915_DISPATCH_SECURE) {
1402
		/*
1403
		 * So on first glance it looks freaky that we pin the batch here
1404
		 * outside of the reservation loop. But:
1405
		 * - The batch is already pinned into the relevant ppgtt, so we
1406
		 *   already have the backing storage fully allocated.
1407
		 * - No other BO uses the global gtt (well contexts, but meh),
1408
		 *   so we don't really have issues with mutliple objects not
1409
		 *   fitting due to fragmentation.
1410
		 * So this is actually safe.
1411
		 */
1412
		ret = i915_gem_obj_ggtt_pin(batch_obj, 0, 0);
1413
		if (ret)
1414
			goto err;
3263 Serge 1415
 
5060 serge 1416
		exec_start += i915_gem_obj_ggtt_offset(batch_obj);
5354 serge 1417
	} else
5060 serge 1418
		exec_start += i915_gem_obj_offset(batch_obj, vm);
3263 Serge 1419
 
5354 serge 1420
	ret = dev_priv->gt.do_execbuf(dev, file, ring, ctx, args,
1421
				      &eb->vmas, batch_obj, exec_start, flags);
3263 Serge 1422
 
5354 serge 1423
	/*
1424
	 * FIXME: We crucially rely upon the active tracking for the (ppgtt)
1425
	 * batch vma for correctness. For less ugly and less fragility this
1426
	 * needs to be adjusted to also track the ggtt batch vma properly as
1427
	 * active.
1428
	 */
1429
	if (flags & I915_DISPATCH_SECURE)
1430
		i915_gem_object_ggtt_unpin(batch_obj);
3263 Serge 1431
err:
5060 serge 1432
	/* the request owns the ref now */
1433
	i915_gem_context_unreference(ctx);
3263 Serge 1434
	eb_destroy(eb);
1435
 
1436
	mutex_unlock(&dev->struct_mutex);
1437
 
1438
pre_mutex_err:
4560 Serge 1439
	/* intel_gpu_busy should also get a ref, so it will free when the device
1440
	 * is really idle. */
1441
	intel_runtime_pm_put(dev_priv);
3263 Serge 1442
	return ret;
1443
}
1444
 
4246 Serge 1445
#if 0
1446
/*
1447
 * Legacy execbuffer just creates an exec2 list from the original exec object
1448
 * list array and passes it to the real function.
1449
 */
1450
int
1451
i915_gem_execbuffer(struct drm_device *dev, void *data,
1452
		    struct drm_file *file)
1453
{
1454
	struct drm_i915_gem_execbuffer *args = data;
1455
	struct drm_i915_gem_execbuffer2 exec2;
1456
	struct drm_i915_gem_exec_object *exec_list = NULL;
1457
	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1458
	int ret, i;
3480 Serge 1459
 
4246 Serge 1460
	if (args->buffer_count < 1) {
1461
		DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1462
		return -EINVAL;
1463
	}
3480 Serge 1464
 
4246 Serge 1465
	/* Copy in the exec list from userland */
1466
	exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count);
1467
	exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
1468
	if (exec_list == NULL || exec2_list == NULL) {
1469
		DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1470
			  args->buffer_count);
1471
		drm_free_large(exec_list);
1472
		drm_free_large(exec2_list);
1473
		return -ENOMEM;
1474
	}
1475
	ret = copy_from_user(exec_list,
1476
			     to_user_ptr(args->buffers_ptr),
1477
			     sizeof(*exec_list) * args->buffer_count);
1478
	if (ret != 0) {
1479
		DRM_DEBUG("copy %d exec entries failed %d\n",
1480
			  args->buffer_count, ret);
1481
		drm_free_large(exec_list);
1482
		drm_free_large(exec2_list);
1483
		return -EFAULT;
1484
	}
1485
 
1486
	for (i = 0; i < args->buffer_count; i++) {
1487
		exec2_list[i].handle = exec_list[i].handle;
1488
		exec2_list[i].relocation_count = exec_list[i].relocation_count;
1489
		exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
1490
		exec2_list[i].alignment = exec_list[i].alignment;
1491
		exec2_list[i].offset = exec_list[i].offset;
1492
		if (INTEL_INFO(dev)->gen < 4)
1493
			exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
1494
		else
1495
			exec2_list[i].flags = 0;
1496
	}
1497
 
1498
	exec2.buffers_ptr = args->buffers_ptr;
1499
	exec2.buffer_count = args->buffer_count;
1500
	exec2.batch_start_offset = args->batch_start_offset;
1501
	exec2.batch_len = args->batch_len;
1502
	exec2.DR1 = args->DR1;
1503
	exec2.DR4 = args->DR4;
1504
	exec2.num_cliprects = args->num_cliprects;
1505
	exec2.cliprects_ptr = args->cliprects_ptr;
1506
	exec2.flags = I915_EXEC_RENDER;
1507
	i915_execbuffer2_set_context_id(exec2, 0);
1508
 
5060 serge 1509
	ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list);
4246 Serge 1510
	if (!ret) {
5060 serge 1511
		struct drm_i915_gem_exec_object __user *user_exec_list =
1512
			to_user_ptr(args->buffers_ptr);
1513
 
4246 Serge 1514
		/* Copy the new buffer offsets back to the user's exec list. */
5060 serge 1515
		for (i = 0; i < args->buffer_count; i++) {
1516
			ret = __copy_to_user(&user_exec_list[i].offset,
1517
					     &exec2_list[i].offset,
1518
					     sizeof(user_exec_list[i].offset));
4246 Serge 1519
		if (ret) {
1520
			ret = -EFAULT;
1521
			DRM_DEBUG("failed to copy %d exec entries "
1522
				  "back to user (%d)\n",
1523
				  args->buffer_count, ret);
5060 serge 1524
				break;
1525
			}
4246 Serge 1526
		}
1527
	}
1528
 
1529
	drm_free_large(exec_list);
1530
	drm_free_large(exec2_list);
1531
	return ret;
1532
}
1533
#endif
1534
 
3263 Serge 1535
int
1536
i915_gem_execbuffer2(struct drm_device *dev, void *data,
1537
		     struct drm_file *file)
1538
{
1539
	struct drm_i915_gem_execbuffer2 *args = data;
1540
	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1541
	int ret;
1542
 
1543
	if (args->buffer_count < 1 ||
1544
	    args->buffer_count > UINT_MAX / sizeof(*exec2_list)) {
1545
		DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count);
1546
		return -EINVAL;
1547
	}
1548
 
5060 serge 1549
	if (args->rsvd2 != 0) {
1550
		DRM_DEBUG("dirty rvsd2 field\n");
1551
		return -EINVAL;
1552
	}
1553
 
3480 Serge 1554
	exec2_list = kmalloc(sizeof(*exec2_list)*args->buffer_count,
1555
			     GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
3263 Serge 1556
	if (exec2_list == NULL) {
1557
		DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1558
			  args->buffer_count);
1559
		return -ENOMEM;
1560
	}
1561
	ret = copy_from_user(exec2_list,
4539 Serge 1562
			     to_user_ptr(args->buffers_ptr),
3263 Serge 1563
			     sizeof(*exec2_list) * args->buffer_count);
1564
	if (ret != 0) {
1565
		DRM_DEBUG("copy %d exec entries failed %d\n",
1566
			  args->buffer_count, ret);
3266 Serge 1567
        kfree(exec2_list);
1568
        FAIL();
3263 Serge 1569
		return -EFAULT;
1570
	}
1571
 
5060 serge 1572
	ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list);
3263 Serge 1573
	if (!ret) {
1574
		/* Copy the new buffer offsets back to the user's exec list. */
5060 serge 1575
		struct drm_i915_gem_exec_object2 __user *user_exec_list =
1576
				   to_user_ptr(args->buffers_ptr);
1577
		int i;
1578
 
1579
		for (i = 0; i < args->buffer_count; i++) {
1580
			ret = __copy_to_user(&user_exec_list[i].offset,
1581
					     &exec2_list[i].offset,
1582
					     sizeof(user_exec_list[i].offset));
3263 Serge 1583
		if (ret) {
1584
			ret = -EFAULT;
1585
			DRM_DEBUG("failed to copy %d exec entries "
5060 serge 1586
					  "back to user\n",
1587
					  args->buffer_count);
1588
				break;
1589
			}
3263 Serge 1590
		}
1591
	}
1592
 
3266 Serge 1593
    kfree(exec2_list);
3263 Serge 1594
	return ret;
1595
}