Subversion Repositories Kolibri OS

Rev

Rev 3277 | Rev 3746 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
3263 Serge 1
/*
2
 * Copyright © 2008,2010 Intel Corporation
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
 * IN THE SOFTWARE.
22
 *
23
 * Authors:
24
 *    Eric Anholt 
25
 *    Chris Wilson 
26
 *
27
 */
28
 
29
#define iowrite32(v, addr)      writel((v), (addr))
30
 
31
#include 
32
#include 
33
#include "i915_drv.h"
34
#include "i915_trace.h"
35
#include "intel_drv.h"
36
//#include 
37
 
38
#define I915_EXEC_SECURE        (1<<9)
39
#define I915_EXEC_IS_PINNED     (1<<10)
40
 
41
#define wmb() asm volatile ("sfence")
42
 
43
struct drm_i915_gem_object *get_fb_obj();
44
 
45
static inline __attribute__((const))
46
bool is_power_of_2(unsigned long n)
47
{
48
    return (n != 0 && ((n & (n - 1)) == 0));
49
}
50
 
51
static unsigned long
52
copy_to_user(void __user *to, const void *from, unsigned long n)
53
{
54
    memcpy(to, from, n);
55
    return 0;
56
}
57
 
58
static unsigned long
59
copy_from_user(void *to, const void __user *from, unsigned long n)
60
{
61
    memcpy(to, from, n);
62
    return 0;
63
}
64
 
65
struct eb_objects {
3480 Serge 66
	struct list_head objects;
3263 Serge 67
	int and;
3480 Serge 68
	union {
69
		struct drm_i915_gem_object *lut[0];
3263 Serge 70
	struct hlist_head buckets[0];
3480 Serge 71
	};
3263 Serge 72
};
73
 
74
static struct eb_objects *
3480 Serge 75
eb_create(struct drm_i915_gem_execbuffer2 *args)
3263 Serge 76
{
3480 Serge 77
	struct eb_objects *eb = NULL;
78
 
79
	if (args->flags & I915_EXEC_HANDLE_LUT) {
80
		int size = args->buffer_count;
81
		size *= sizeof(struct drm_i915_gem_object *);
82
		size += sizeof(struct eb_objects);
83
		eb = kmalloc(size, GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
84
	}
85
 
86
	if (eb == NULL) {
87
		int size = args->buffer_count;
3263 Serge 88
	int count = PAGE_SIZE / sizeof(struct hlist_head) / 2;
3480 Serge 89
		BUILD_BUG_ON_NOT_POWER_OF_2(PAGE_SIZE / sizeof(struct hlist_head));
90
		while (count > 2*size)
3263 Serge 91
		count >>= 1;
92
	eb = kzalloc(count*sizeof(struct hlist_head) +
93
		     sizeof(struct eb_objects),
3480 Serge 94
			     GFP_TEMPORARY);
3263 Serge 95
	if (eb == NULL)
96
		return eb;
97
 
98
	eb->and = count - 1;
3480 Serge 99
	} else
100
		eb->and = -args->buffer_count;
101
 
102
	INIT_LIST_HEAD(&eb->objects);
3263 Serge 103
	return eb;
104
}
105
 
106
static void
107
eb_reset(struct eb_objects *eb)
108
{
3480 Serge 109
	if (eb->and >= 0)
3263 Serge 110
	memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
111
}
112
 
3480 Serge 113
static int
114
eb_lookup_objects(struct eb_objects *eb,
115
		  struct drm_i915_gem_exec_object2 *exec,
116
		  const struct drm_i915_gem_execbuffer2 *args,
117
		  struct drm_file *file)
3263 Serge 118
{
3480 Serge 119
	int i;
120
 
121
	spin_lock(&file->table_lock);
122
	for (i = 0; i < args->buffer_count; i++) {
123
		struct drm_i915_gem_object *obj;
124
 
125
        if(exec[i].handle == -2)
126
            obj = get_fb_obj();
127
        else
128
		    obj = to_intel_bo(idr_find(&file->object_idr, exec[i].handle));
129
		if (obj == NULL) {
130
			spin_unlock(&file->table_lock);
131
			DRM_DEBUG("Invalid object handle %d at index %d\n",
132
				   exec[i].handle, i);
133
			return -ENOENT;
134
		}
135
 
136
		if (!list_empty(&obj->exec_list)) {
137
			spin_unlock(&file->table_lock);
138
			DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n",
139
				   obj, exec[i].handle, i);
140
			return -EINVAL;
141
		}
142
 
143
		drm_gem_object_reference(&obj->base);
144
		list_add_tail(&obj->exec_list, &eb->objects);
145
 
146
		obj->exec_entry = &exec[i];
147
		if (eb->and < 0) {
148
			eb->lut[i] = obj;
149
		} else {
150
			uint32_t handle = args->flags & I915_EXEC_HANDLE_LUT ? i : exec[i].handle;
151
			obj->exec_handle = handle;
3263 Serge 152
	hlist_add_head(&obj->exec_node,
3480 Serge 153
				       &eb->buckets[handle & eb->and]);
154
		}
155
	}
156
	spin_unlock(&file->table_lock);
157
 
158
	return 0;
3263 Serge 159
}
160
 
161
static struct drm_i915_gem_object *
162
eb_get_object(struct eb_objects *eb, unsigned long handle)
163
{
3480 Serge 164
	if (eb->and < 0) {
165
		if (handle >= -eb->and)
166
			return NULL;
167
		return eb->lut[handle];
168
	} else {
3263 Serge 169
	struct hlist_head *head;
170
	struct hlist_node *node;
171
 
172
	head = &eb->buckets[handle & eb->and];
173
	hlist_for_each(node, head) {
3480 Serge 174
			struct drm_i915_gem_object *obj;
175
 
3263 Serge 176
		obj = hlist_entry(node, struct drm_i915_gem_object, exec_node);
177
		if (obj->exec_handle == handle)
178
			return obj;
179
	}
180
	return NULL;
3480 Serge 181
	}
3263 Serge 182
}
183
 
184
static void
185
eb_destroy(struct eb_objects *eb)
186
{
3480 Serge 187
	while (!list_empty(&eb->objects)) {
188
		struct drm_i915_gem_object *obj;
189
 
190
		obj = list_first_entry(&eb->objects,
191
				       struct drm_i915_gem_object,
192
				       exec_list);
193
		list_del_init(&obj->exec_list);
194
		drm_gem_object_unreference(&obj->base);
195
	}
3263 Serge 196
	kfree(eb);
197
}
198
 
199
static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
200
{
201
	return (obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
202
		!obj->map_and_fenceable ||
203
		obj->cache_level != I915_CACHE_NONE);
204
}
205
 
206
static int
207
i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
208
				   struct eb_objects *eb,
209
				   struct drm_i915_gem_relocation_entry *reloc)
210
{
211
	struct drm_device *dev = obj->base.dev;
212
	struct drm_gem_object *target_obj;
213
	struct drm_i915_gem_object *target_i915_obj;
214
	uint32_t target_offset;
215
	int ret = -EINVAL;
216
 
217
	/* we've already hold a reference to all valid objects */
218
	target_obj = &eb_get_object(eb, reloc->target_handle)->base;
219
	if (unlikely(target_obj == NULL))
220
		return -ENOENT;
221
 
222
	target_i915_obj = to_intel_bo(target_obj);
223
	target_offset = target_i915_obj->gtt_offset;
224
 
225
	/* Sandybridge PPGTT errata: We need a global gtt mapping for MI and
226
	 * pipe_control writes because the gpu doesn't properly redirect them
227
	 * through the ppgtt for non_secure batchbuffers. */
228
	if (unlikely(IS_GEN6(dev) &&
229
	    reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
230
	    !target_i915_obj->has_global_gtt_mapping)) {
231
		i915_gem_gtt_bind_object(target_i915_obj,
232
					 target_i915_obj->cache_level);
233
	}
234
 
235
	/* Validate that the target is in a valid r/w GPU domain */
236
	if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
237
		DRM_DEBUG("reloc with multiple write domains: "
238
			  "obj %p target %d offset %d "
239
			  "read %08x write %08x",
240
			  obj, reloc->target_handle,
241
			  (int) reloc->offset,
242
			  reloc->read_domains,
243
			  reloc->write_domain);
244
		return ret;
245
	}
246
	if (unlikely((reloc->write_domain | reloc->read_domains)
247
		     & ~I915_GEM_GPU_DOMAINS)) {
248
		DRM_DEBUG("reloc with read/write non-GPU domains: "
249
			  "obj %p target %d offset %d "
250
			  "read %08x write %08x",
251
			  obj, reloc->target_handle,
252
			  (int) reloc->offset,
253
			  reloc->read_domains,
254
			  reloc->write_domain);
255
		return ret;
256
	}
257
 
258
	target_obj->pending_read_domains |= reloc->read_domains;
259
	target_obj->pending_write_domain |= reloc->write_domain;
260
 
261
	/* If the relocation already has the right value in it, no
262
	 * more work needs to be done.
263
	 */
264
	if (target_offset == reloc->presumed_offset)
265
		return 0;
266
 
267
	/* Check that the relocation address is valid... */
268
	if (unlikely(reloc->offset > obj->base.size - 4)) {
269
		DRM_DEBUG("Relocation beyond object bounds: "
270
			  "obj %p target %d offset %d size %d.\n",
271
			  obj, reloc->target_handle,
272
			  (int) reloc->offset,
273
			  (int) obj->base.size);
274
		return ret;
275
	}
276
	if (unlikely(reloc->offset & 3)) {
277
		DRM_DEBUG("Relocation not 4-byte aligned: "
278
			  "obj %p target %d offset %d.\n",
279
			  obj, reloc->target_handle,
280
			  (int) reloc->offset);
281
		return ret;
282
	}
283
 
284
	/* We can't wait for rendering with pagefaults disabled */
285
 
286
	reloc->delta += target_offset;
287
	if (use_cpu_reloc(obj)) {
288
		uint32_t page_offset = reloc->offset & ~PAGE_MASK;
289
		char *vaddr;
290
 
291
		ret = i915_gem_object_set_to_cpu_domain(obj, 1);
292
		if (ret)
293
			return ret;
294
 
295
        vaddr = (char *)MapIoMem((addr_t)i915_gem_object_get_page(obj,
296
                                 reloc->offset >> PAGE_SHIFT), 4096, 3);
297
		*(uint32_t *)(vaddr + page_offset) = reloc->delta;
298
        FreeKernelSpace(vaddr);
299
	} else {
300
		struct drm_i915_private *dev_priv = dev->dev_private;
301
		uint32_t __iomem *reloc_entry;
302
		void __iomem *reloc_page;
303
 
304
		ret = i915_gem_object_set_to_gtt_domain(obj, true);
305
		if (ret)
306
			return ret;
307
 
308
		ret = i915_gem_object_put_fence(obj);
309
		if (ret)
310
			return ret;
311
 
312
		/* Map the page containing the relocation we're going to perform.  */
313
		reloc->offset += obj->gtt_offset;
314
        reloc_page = (void*)MapIoMem(reloc->offset & PAGE_MASK, 4096, 3);
315
		reloc_entry = (uint32_t __iomem *)
316
			(reloc_page + (reloc->offset & ~PAGE_MASK));
317
		iowrite32(reloc->delta, reloc_entry);
318
        FreeKernelSpace(reloc_page);
319
	}
320
 
321
	/* and update the user's relocation entry */
322
	reloc->presumed_offset = target_offset;
323
 
324
	return 0;
325
}
326
 
327
static int
328
i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj,
329
				    struct eb_objects *eb)
330
{
331
#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
3266 Serge 332
	struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(64)];
3263 Serge 333
	struct drm_i915_gem_relocation_entry __user *user_relocs;
334
	struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
335
	int remain, ret;
336
 
337
	user_relocs = (void __user *)(uintptr_t)entry->relocs_ptr;
338
 
339
	remain = entry->relocation_count;
340
	while (remain) {
341
		struct drm_i915_gem_relocation_entry *r = stack_reloc;
342
		int count = remain;
343
		if (count > ARRAY_SIZE(stack_reloc))
344
			count = ARRAY_SIZE(stack_reloc);
345
		remain -= count;
346
 
347
        memcpy(r, user_relocs, count*sizeof(r[0]));
348
 
349
		do {
350
			u64 offset = r->presumed_offset;
351
 
352
			ret = i915_gem_execbuffer_relocate_entry(obj, eb, r);
353
			if (ret)
354
				return ret;
355
 
356
            memcpy(&user_relocs->presumed_offset,
357
                   &r->presumed_offset,
358
                   sizeof(r->presumed_offset));
359
 
360
			user_relocs++;
361
			r++;
362
		} while (--count);
363
	}
364
 
365
	return 0;
366
#undef N_RELOC
367
}
368
 
369
static int
370
i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj,
371
					 struct eb_objects *eb,
372
					 struct drm_i915_gem_relocation_entry *relocs)
373
{
374
	const struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
375
	int i, ret;
376
 
377
	for (i = 0; i < entry->relocation_count; i++) {
378
		ret = i915_gem_execbuffer_relocate_entry(obj, eb, &relocs[i]);
379
		if (ret)
380
			return ret;
381
	}
382
 
383
	return 0;
384
}
385
 
386
static int
387
i915_gem_execbuffer_relocate(struct drm_device *dev,
3480 Serge 388
			     struct eb_objects *eb)
3263 Serge 389
{
390
	struct drm_i915_gem_object *obj;
391
	int ret = 0;
392
 
393
	/* This is the fast path and we cannot handle a pagefault whilst
394
	 * holding the struct mutex lest the user pass in the relocations
395
	 * contained within a mmaped bo. For in such a case we, the page
396
	 * fault handler would call i915_gem_fault() and we would try to
397
	 * acquire the struct mutex again. Obviously this is bad and so
398
	 * lockdep complains vehemently.
399
	 */
400
//   pagefault_disable();
3480 Serge 401
	list_for_each_entry(obj, &eb->objects, exec_list) {
3263 Serge 402
		ret = i915_gem_execbuffer_relocate_object(obj, eb);
403
		if (ret)
404
			break;
405
	}
406
//   pagefault_enable();
407
 
408
	return ret;
409
}
410
 
411
#define  __EXEC_OBJECT_HAS_PIN (1<<31)
412
#define  __EXEC_OBJECT_HAS_FENCE (1<<30)
413
 
414
static int
415
need_reloc_mappable(struct drm_i915_gem_object *obj)
416
{
417
	struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
418
	return entry->relocation_count && !use_cpu_reloc(obj);
419
}
420
 
421
static int
422
i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj,
3480 Serge 423
				   struct intel_ring_buffer *ring,
424
				   bool *need_reloc)
3263 Serge 425
{
426
	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
427
	struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
428
	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
429
	bool need_fence, need_mappable;
430
	int ret;
431
 
3266 Serge 432
//    ENTER();
433
 
3263 Serge 434
	need_fence =
435
		has_fenced_gpu_access &&
436
		entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
437
		obj->tiling_mode != I915_TILING_NONE;
438
	need_mappable = need_fence || need_reloc_mappable(obj);
439
 
440
	ret = i915_gem_object_pin(obj, entry->alignment, need_mappable, false);
441
	if (ret)
3266 Serge 442
    {
443
        FAIL();
3263 Serge 444
		return ret;
3266 Serge 445
    };
3263 Serge 446
 
447
	entry->flags |= __EXEC_OBJECT_HAS_PIN;
448
 
449
	if (has_fenced_gpu_access) {
450
		if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
451
			ret = i915_gem_object_get_fence(obj);
452
			if (ret)
3266 Serge 453
            {
454
                FAIL();
3263 Serge 455
				return ret;
3266 Serge 456
            };
3263 Serge 457
 
458
			if (i915_gem_object_pin_fence(obj))
459
				entry->flags |= __EXEC_OBJECT_HAS_FENCE;
460
 
461
			obj->pending_fenced_gpu_access = true;
462
		}
463
	}
464
 
465
	/* Ensure ppgtt mapping exists if needed */
466
	if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) {
467
		i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
468
				       obj, obj->cache_level);
469
 
470
		obj->has_aliasing_ppgtt_mapping = 1;
471
	}
472
 
3480 Serge 473
	if (entry->offset != obj->gtt_offset) {
3263 Serge 474
	entry->offset = obj->gtt_offset;
3480 Serge 475
		*need_reloc = true;
476
	}
3266 Serge 477
 
3480 Serge 478
	if (entry->flags & EXEC_OBJECT_WRITE) {
479
		obj->base.pending_read_domains = I915_GEM_DOMAIN_RENDER;
480
		obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER;
481
	}
482
 
483
	if (entry->flags & EXEC_OBJECT_NEEDS_GTT &&
484
	    !obj->has_global_gtt_mapping)
485
		i915_gem_gtt_bind_object(obj, obj->cache_level);
486
 
3263 Serge 487
	return 0;
488
}
489
 
490
static void
491
i915_gem_execbuffer_unreserve_object(struct drm_i915_gem_object *obj)
492
{
493
	struct drm_i915_gem_exec_object2 *entry;
494
 
495
	if (!obj->gtt_space)
496
		return;
497
 
498
	entry = obj->exec_entry;
499
 
500
	if (entry->flags & __EXEC_OBJECT_HAS_FENCE)
501
		i915_gem_object_unpin_fence(obj);
502
 
503
	if (entry->flags & __EXEC_OBJECT_HAS_PIN)
504
		i915_gem_object_unpin(obj);
505
 
506
	entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);
507
}
508
 
509
static int
510
i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
511
			    struct drm_file *file,
3480 Serge 512
			    struct list_head *objects,
513
			    bool *need_relocs)
3263 Serge 514
{
515
	struct drm_i915_gem_object *obj;
516
	struct list_head ordered_objects;
517
	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
518
	int retry;
519
 
3266 Serge 520
//    ENTER();
521
 
3263 Serge 522
	INIT_LIST_HEAD(&ordered_objects);
523
	while (!list_empty(objects)) {
524
		struct drm_i915_gem_exec_object2 *entry;
525
		bool need_fence, need_mappable;
526
 
527
		obj = list_first_entry(objects,
528
				       struct drm_i915_gem_object,
529
				       exec_list);
530
		entry = obj->exec_entry;
531
 
532
		need_fence =
533
			has_fenced_gpu_access &&
534
			entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
535
			obj->tiling_mode != I915_TILING_NONE;
536
		need_mappable = need_fence || need_reloc_mappable(obj);
537
 
538
		if (need_mappable)
539
			list_move(&obj->exec_list, &ordered_objects);
540
		else
541
			list_move_tail(&obj->exec_list, &ordered_objects);
542
 
3480 Serge 543
		obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
3263 Serge 544
		obj->base.pending_write_domain = 0;
545
		obj->pending_fenced_gpu_access = false;
546
	}
547
	list_splice(&ordered_objects, objects);
548
 
549
	/* Attempt to pin all of the buffers into the GTT.
550
	 * This is done in 3 phases:
551
	 *
552
	 * 1a. Unbind all objects that do not match the GTT constraints for
553
	 *     the execbuffer (fenceable, mappable, alignment etc).
554
	 * 1b. Increment pin count for already bound objects.
555
	 * 2.  Bind new objects.
556
	 * 3.  Decrement pin count.
557
	 *
558
	 * This avoid unnecessary unbinding of later objects in order to make
559
	 * room for the earlier objects *unless* we need to defragment.
560
	 */
561
	retry = 0;
562
	do {
563
		int ret = 0;
564
 
565
		/* Unbind any ill-fitting objects or pin. */
566
		list_for_each_entry(obj, objects, exec_list) {
567
			struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
568
			bool need_fence, need_mappable;
569
 
570
			if (!obj->gtt_space)
571
				continue;
572
 
573
			need_fence =
574
				has_fenced_gpu_access &&
575
				entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
576
				obj->tiling_mode != I915_TILING_NONE;
577
			need_mappable = need_fence || need_reloc_mappable(obj);
578
 
579
			if ((entry->alignment && obj->gtt_offset & (entry->alignment - 1)) ||
580
			    (need_mappable && !obj->map_and_fenceable))
581
				ret = i915_gem_object_unbind(obj);
582
			else
3480 Serge 583
				ret = i915_gem_execbuffer_reserve_object(obj, ring, need_relocs);
3263 Serge 584
			if (ret)
585
				goto err;
586
		}
587
 
588
		/* Bind fresh objects */
589
		list_for_each_entry(obj, objects, exec_list) {
590
			if (obj->gtt_space)
591
				continue;
592
 
3480 Serge 593
			ret = i915_gem_execbuffer_reserve_object(obj, ring, need_relocs);
3263 Serge 594
			if (ret)
595
				goto err;
596
		}
597
 
598
err:		/* Decrement pin count for bound objects */
599
		list_for_each_entry(obj, objects, exec_list)
600
			i915_gem_execbuffer_unreserve_object(obj);
601
 
602
		if (ret != -ENOSPC || retry++)
3266 Serge 603
        {
604
//            LEAVE();
3263 Serge 605
			return ret;
3266 Serge 606
        };
3263 Serge 607
 
608
//       ret = i915_gem_evict_everything(ring->dev);
609
		if (ret)
610
			return ret;
611
	} while (1);
612
}
613
 
614
static int
615
i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
3480 Serge 616
				  struct drm_i915_gem_execbuffer2 *args,
3263 Serge 617
				  struct drm_file *file,
618
				  struct intel_ring_buffer *ring,
619
				  struct eb_objects *eb,
3480 Serge 620
				  struct drm_i915_gem_exec_object2 *exec)
3263 Serge 621
{
622
	struct drm_i915_gem_relocation_entry *reloc;
623
	struct drm_i915_gem_object *obj;
3480 Serge 624
	bool need_relocs;
3263 Serge 625
	int *reloc_offset;
626
	int i, total, ret;
3480 Serge 627
	int count = args->buffer_count;
3263 Serge 628
 
629
	/* We may process another execbuffer during the unlock... */
3480 Serge 630
	while (!list_empty(&eb->objects)) {
631
		obj = list_first_entry(&eb->objects,
3263 Serge 632
				       struct drm_i915_gem_object,
633
				       exec_list);
634
		list_del_init(&obj->exec_list);
635
		drm_gem_object_unreference(&obj->base);
636
	}
637
 
638
	mutex_unlock(&dev->struct_mutex);
639
 
640
	total = 0;
641
	for (i = 0; i < count; i++)
642
		total += exec[i].relocation_count;
643
 
644
    reloc_offset = malloc(count * sizeof(*reloc_offset));
645
    reloc = malloc(total * sizeof(*reloc));
646
	if (reloc == NULL || reloc_offset == NULL) {
3266 Serge 647
        kfree(reloc);
648
        kfree(reloc_offset);
3263 Serge 649
		mutex_lock(&dev->struct_mutex);
650
		return -ENOMEM;
651
	}
652
 
653
	total = 0;
654
	for (i = 0; i < count; i++) {
655
		struct drm_i915_gem_relocation_entry __user *user_relocs;
656
		u64 invalid_offset = (u64)-1;
657
		int j;
658
 
659
		user_relocs = (void __user *)(uintptr_t)exec[i].relocs_ptr;
660
 
661
		if (copy_from_user(reloc+total, user_relocs,
662
				   exec[i].relocation_count * sizeof(*reloc))) {
663
			ret = -EFAULT;
664
			mutex_lock(&dev->struct_mutex);
665
			goto err;
666
		}
667
 
668
		/* As we do not update the known relocation offsets after
669
		 * relocating (due to the complexities in lock handling),
670
		 * we need to mark them as invalid now so that we force the
671
		 * relocation processing next time. Just in case the target
672
		 * object is evicted and then rebound into its old
673
		 * presumed_offset before the next execbuffer - if that
674
		 * happened we would make the mistake of assuming that the
675
		 * relocations were valid.
676
		 */
677
		for (j = 0; j < exec[i].relocation_count; j++) {
678
			if (copy_to_user(&user_relocs[j].presumed_offset,
679
					 &invalid_offset,
680
					 sizeof(invalid_offset))) {
681
				ret = -EFAULT;
682
				mutex_lock(&dev->struct_mutex);
683
				goto err;
684
			}
685
		}
686
 
687
		reloc_offset[i] = total;
688
		total += exec[i].relocation_count;
689
	}
690
 
691
	ret = i915_mutex_lock_interruptible(dev);
692
	if (ret) {
693
		mutex_lock(&dev->struct_mutex);
694
		goto err;
695
	}
696
 
697
	/* reacquire the objects */
698
	eb_reset(eb);
3480 Serge 699
	ret = eb_lookup_objects(eb, exec, args, file);
700
	if (ret)
3263 Serge 701
			goto err;
702
 
3480 Serge 703
	need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
704
	ret = i915_gem_execbuffer_reserve(ring, file, &eb->objects, &need_relocs);
3263 Serge 705
	if (ret)
706
		goto err;
707
 
3480 Serge 708
	list_for_each_entry(obj, &eb->objects, exec_list) {
3263 Serge 709
		int offset = obj->exec_entry - exec;
710
		ret = i915_gem_execbuffer_relocate_object_slow(obj, eb,
711
							       reloc + reloc_offset[offset]);
712
		if (ret)
713
			goto err;
714
	}
715
 
716
	/* Leave the user relocations as are, this is the painfully slow path,
717
	 * and we want to avoid the complication of dropping the lock whilst
718
	 * having buffers reserved in the aperture and so causing spurious
719
	 * ENOSPC for random operations.
720
	 */
721
 
722
err:
3266 Serge 723
    kfree(reloc);
724
    kfree(reloc_offset);
3263 Serge 725
	return ret;
726
}
727
 
728
static int
729
i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
730
				struct list_head *objects)
731
{
732
	struct drm_i915_gem_object *obj;
733
	uint32_t flush_domains = 0;
734
	int ret;
735
 
736
	list_for_each_entry(obj, objects, exec_list) {
737
		ret = i915_gem_object_sync(obj, ring);
738
		if (ret)
739
			return ret;
740
 
741
		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
742
			i915_gem_clflush_object(obj);
743
 
744
		flush_domains |= obj->base.write_domain;
745
	}
746
 
747
	if (flush_domains & I915_GEM_DOMAIN_CPU)
748
		i915_gem_chipset_flush(ring->dev);
749
 
750
	if (flush_domains & I915_GEM_DOMAIN_GTT)
751
		wmb();
752
 
753
	/* Unconditionally invalidate gpu caches and ensure that we do flush
754
	 * any residual writes from the previous batch.
755
	 */
756
	return intel_ring_invalidate_all_caches(ring);
757
}
758
 
759
static bool
760
i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
761
{
3480 Serge 762
	if (exec->flags & __I915_EXEC_UNKNOWN_FLAGS)
763
		return false;
764
 
3263 Serge 765
	return ((exec->batch_start_offset | exec->batch_len) & 0x7) == 0;
766
}
767
 
768
static int
769
validate_exec_list(struct drm_i915_gem_exec_object2 *exec,
770
		   int count)
771
{
772
	int i;
3480 Serge 773
	int relocs_total = 0;
774
	int relocs_max = INT_MAX / sizeof(struct drm_i915_gem_relocation_entry);
3263 Serge 775
 
776
	for (i = 0; i < count; i++) {
777
		char __user *ptr = (char __user *)(uintptr_t)exec[i].relocs_ptr;
778
		int length; /* limited by fault_in_pages_readable() */
779
 
3480 Serge 780
		if (exec[i].flags & __EXEC_OBJECT_UNKNOWN_FLAGS)
3263 Serge 781
			return -EINVAL;
782
 
3480 Serge 783
		/* First check for malicious input causing overflow in
784
		 * the worst case where we need to allocate the entire
785
		 * relocation tree as a single array.
786
		 */
787
		if (exec[i].relocation_count > relocs_max - relocs_total)
788
			return -EINVAL;
789
		relocs_total += exec[i].relocation_count;
790
 
3263 Serge 791
		length = exec[i].relocation_count *
792
			sizeof(struct drm_i915_gem_relocation_entry);
793
		/* we may also need to update the presumed offsets */
794
//       if (!access_ok(VERIFY_WRITE, ptr, length))
795
//           return -EFAULT;
796
 
797
//       if (fault_in_multipages_readable(ptr, length))
798
//           return -EFAULT;
799
	}
800
 
801
	return 0;
802
}
803
 
804
static void
805
i915_gem_execbuffer_move_to_active(struct list_head *objects,
806
				   struct intel_ring_buffer *ring)
807
{
808
	struct drm_i915_gem_object *obj;
809
 
810
	list_for_each_entry(obj, objects, exec_list) {
811
		u32 old_read = obj->base.read_domains;
812
		u32 old_write = obj->base.write_domain;
813
 
3480 Serge 814
		obj->base.write_domain = obj->base.pending_write_domain;
815
		if (obj->base.write_domain == 0)
816
			obj->base.pending_read_domains |= obj->base.read_domains;
3263 Serge 817
		obj->base.read_domains = obj->base.pending_read_domains;
818
		obj->fenced_gpu_access = obj->pending_fenced_gpu_access;
819
 
820
		i915_gem_object_move_to_active(obj, ring);
821
		if (obj->base.write_domain) {
822
			obj->dirty = 1;
823
			obj->last_write_seqno = intel_ring_get_seqno(ring);
824
			if (obj->pin_count) /* check for potential scanout */
825
				intel_mark_fb_busy(obj);
826
		}
827
 
828
		trace_i915_gem_object_change_domain(obj, old_read, old_write);
829
	}
830
}
831
 
832
static void
833
i915_gem_execbuffer_retire_commands(struct drm_device *dev,
834
				    struct drm_file *file,
835
				    struct intel_ring_buffer *ring)
836
{
837
	/* Unconditionally force add_request to emit a full flush. */
838
	ring->gpu_caches_dirty = true;
839
 
840
	/* Add a breadcrumb for the completion of the batch buffer */
841
	(void)i915_add_request(ring, file, NULL);
842
}
843
 
844
static int
845
i915_reset_gen7_sol_offsets(struct drm_device *dev,
846
			    struct intel_ring_buffer *ring)
847
{
848
	drm_i915_private_t *dev_priv = dev->dev_private;
849
	int ret, i;
850
 
851
	if (!IS_GEN7(dev) || ring != &dev_priv->ring[RCS])
852
		return 0;
853
 
854
	ret = intel_ring_begin(ring, 4 * 3);
855
	if (ret)
856
		return ret;
857
 
858
	for (i = 0; i < 4; i++) {
859
		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
860
		intel_ring_emit(ring, GEN7_SO_WRITE_OFFSET(i));
861
		intel_ring_emit(ring, 0);
862
	}
863
 
864
	intel_ring_advance(ring);
865
 
866
	return 0;
867
}
868
 
869
static int
870
i915_gem_do_execbuffer(struct drm_device *dev, void *data,
871
		       struct drm_file *file,
872
		       struct drm_i915_gem_execbuffer2 *args,
873
		       struct drm_i915_gem_exec_object2 *exec)
874
{
875
	drm_i915_private_t *dev_priv = dev->dev_private;
876
	struct eb_objects *eb;
877
	struct drm_i915_gem_object *batch_obj;
878
	struct drm_clip_rect *cliprects = NULL;
879
	struct intel_ring_buffer *ring;
880
	u32 ctx_id = i915_execbuffer2_get_context_id(*args);
881
	u32 exec_start, exec_len;
3480 Serge 882
	u32 mask, flags;
3263 Serge 883
	int ret, mode, i;
3480 Serge 884
	bool need_relocs;
3263 Serge 885
 
3480 Serge 886
	if (!i915_gem_check_execbuffer(args))
887
	{
3266 Serge 888
        FAIL();
3263 Serge 889
		return -EINVAL;
890
	}
891
 
892
	ret = validate_exec_list(exec, args->buffer_count);
893
	if (ret)
3266 Serge 894
    {
895
        FAIL();
3263 Serge 896
		return ret;
3266 Serge 897
    };
3263 Serge 898
 
899
	flags = 0;
900
	if (args->flags & I915_EXEC_SECURE) {
901
 
902
		flags |= I915_DISPATCH_SECURE;
903
	}
904
	if (args->flags & I915_EXEC_IS_PINNED)
905
		flags |= I915_DISPATCH_PINNED;
906
 
907
	switch (args->flags & I915_EXEC_RING_MASK) {
908
	case I915_EXEC_DEFAULT:
909
	case I915_EXEC_RENDER:
910
		ring = &dev_priv->ring[RCS];
911
		break;
912
	case I915_EXEC_BSD:
913
		ring = &dev_priv->ring[VCS];
914
		if (ctx_id != 0) {
915
			DRM_DEBUG("Ring %s doesn't support contexts\n",
916
				  ring->name);
3266 Serge 917
            FAIL();
3263 Serge 918
			return -EPERM;
919
		}
920
		break;
921
	case I915_EXEC_BLT:
922
		ring = &dev_priv->ring[BCS];
923
		if (ctx_id != 0) {
924
			DRM_DEBUG("Ring %s doesn't support contexts\n",
925
				  ring->name);
926
			return -EPERM;
927
		}
928
		break;
929
	default:
930
		DRM_DEBUG("execbuf with unknown ring: %d\n",
931
			  (int)(args->flags & I915_EXEC_RING_MASK));
932
		return -EINVAL;
933
	}
934
	if (!intel_ring_initialized(ring)) {
935
		DRM_DEBUG("execbuf with invalid ring: %d\n",
936
			  (int)(args->flags & I915_EXEC_RING_MASK));
937
		return -EINVAL;
938
	}
939
 
940
	mode = args->flags & I915_EXEC_CONSTANTS_MASK;
941
	mask = I915_EXEC_CONSTANTS_MASK;
942
	switch (mode) {
943
	case I915_EXEC_CONSTANTS_REL_GENERAL:
944
	case I915_EXEC_CONSTANTS_ABSOLUTE:
945
	case I915_EXEC_CONSTANTS_REL_SURFACE:
946
		if (ring == &dev_priv->ring[RCS] &&
947
		    mode != dev_priv->relative_constants_mode) {
948
			if (INTEL_INFO(dev)->gen < 4)
949
				return -EINVAL;
950
 
951
			if (INTEL_INFO(dev)->gen > 5 &&
952
			    mode == I915_EXEC_CONSTANTS_REL_SURFACE)
953
				return -EINVAL;
954
 
955
			/* The HW changed the meaning on this bit on gen6 */
956
			if (INTEL_INFO(dev)->gen >= 6)
957
				mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
958
		}
959
		break;
960
	default:
961
		DRM_DEBUG("execbuf with unknown constants: %d\n", mode);
962
		return -EINVAL;
963
	}
964
 
965
	if (args->buffer_count < 1) {
966
		DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
967
		return -EINVAL;
968
	}
969
 
970
	if (args->num_cliprects != 0) {
971
		if (ring != &dev_priv->ring[RCS]) {
972
			DRM_DEBUG("clip rectangles are only valid with the render ring\n");
973
			return -EINVAL;
974
		}
975
 
976
		if (INTEL_INFO(dev)->gen >= 5) {
977
			DRM_DEBUG("clip rectangles are only valid on pre-gen5\n");
978
			return -EINVAL;
979
		}
980
 
981
		if (args->num_cliprects > UINT_MAX / sizeof(*cliprects)) {
982
			DRM_DEBUG("execbuf with %u cliprects\n",
983
				  args->num_cliprects);
984
			return -EINVAL;
985
		}
986
 
987
		cliprects = kmalloc(args->num_cliprects * sizeof(*cliprects),
988
				    GFP_KERNEL);
989
		if (cliprects == NULL) {
990
			ret = -ENOMEM;
991
			goto pre_mutex_err;
992
		}
993
 
994
		if (copy_from_user(cliprects,
995
				     (struct drm_clip_rect __user *)(uintptr_t)
996
				     args->cliprects_ptr,
997
				     sizeof(*cliprects)*args->num_cliprects)) {
998
			ret = -EFAULT;
999
			goto pre_mutex_err;
1000
		}
1001
	}
1002
 
1003
	ret = i915_mutex_lock_interruptible(dev);
1004
	if (ret)
1005
		goto pre_mutex_err;
1006
 
1007
	if (dev_priv->mm.suspended) {
1008
		mutex_unlock(&dev->struct_mutex);
1009
		ret = -EBUSY;
1010
		goto pre_mutex_err;
1011
	}
1012
 
3480 Serge 1013
	eb = eb_create(args);
3263 Serge 1014
	if (eb == NULL) {
1015
		mutex_unlock(&dev->struct_mutex);
1016
		ret = -ENOMEM;
1017
		goto pre_mutex_err;
1018
	}
1019
 
1020
	/* Look up object handles */
3480 Serge 1021
	ret = eb_lookup_objects(eb, exec, args, file);
1022
	if (ret)
3263 Serge 1023
			goto err;
1024
 
1025
	/* take note of the batch buffer before we might reorder the lists */
3480 Serge 1026
	batch_obj = list_entry(eb->objects.prev,
3263 Serge 1027
			       struct drm_i915_gem_object,
1028
			       exec_list);
1029
 
1030
	/* Move the objects en-masse into the GTT, evicting if necessary. */
3480 Serge 1031
	need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
1032
	ret = i915_gem_execbuffer_reserve(ring, file, &eb->objects, &need_relocs);
3263 Serge 1033
	if (ret)
1034
		goto err;
1035
 
1036
	/* The objects are in their final locations, apply the relocations. */
3480 Serge 1037
	if (need_relocs)
1038
		ret = i915_gem_execbuffer_relocate(dev, eb);
3263 Serge 1039
	if (ret) {
1040
		if (ret == -EFAULT) {
3480 Serge 1041
			ret = i915_gem_execbuffer_relocate_slow(dev, args, file, ring,
1042
								eb, exec);
3263 Serge 1043
			BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1044
		}
1045
		if (ret)
1046
			goto err;
1047
	}
1048
 
1049
	/* Set the pending read domains for the batch buffer to COMMAND */
1050
	if (batch_obj->base.pending_write_domain) {
1051
		DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
1052
		ret = -EINVAL;
1053
		goto err;
1054
	}
1055
	batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
1056
 
1057
	/* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
1058
	 * batch" bit. Hence we need to pin secure batches into the global gtt.
1059
	 * hsw should have this fixed, but let's be paranoid and do it
1060
	 * unconditionally for now. */
1061
	if (flags & I915_DISPATCH_SECURE && !batch_obj->has_global_gtt_mapping)
1062
		i915_gem_gtt_bind_object(batch_obj, batch_obj->cache_level);
1063
 
3480 Serge 1064
	ret = i915_gem_execbuffer_move_to_gpu(ring, &eb->objects);
3263 Serge 1065
	if (ret)
1066
		goto err;
1067
 
1068
	ret = i915_switch_context(ring, file, ctx_id);
1069
	if (ret)
1070
		goto err;
1071
 
1072
	if (ring == &dev_priv->ring[RCS] &&
1073
	    mode != dev_priv->relative_constants_mode) {
1074
		ret = intel_ring_begin(ring, 4);
1075
		if (ret)
1076
				goto err;
1077
 
1078
		intel_ring_emit(ring, MI_NOOP);
1079
		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1080
		intel_ring_emit(ring, INSTPM);
1081
		intel_ring_emit(ring, mask << 16 | mode);
1082
		intel_ring_advance(ring);
1083
 
1084
		dev_priv->relative_constants_mode = mode;
1085
	}
1086
 
1087
	if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
1088
		ret = i915_reset_gen7_sol_offsets(dev, ring);
1089
		if (ret)
1090
			goto err;
1091
	}
1092
 
1093
	exec_start = batch_obj->gtt_offset + args->batch_start_offset;
1094
	exec_len = args->batch_len;
1095
	if (cliprects) {
1096
 
1097
	} else {
1098
		ret = ring->dispatch_execbuffer(ring,
1099
						exec_start, exec_len,
1100
						flags);
1101
		if (ret)
1102
			goto err;
1103
	}
1104
 
3266 Serge 1105
	trace_i915_gem_ring_dispatch(ring, intel_ring_get_seqno(ring), flags);
3263 Serge 1106
 
3480 Serge 1107
	i915_gem_execbuffer_move_to_active(&eb->objects, ring);
3266 Serge 1108
	i915_gem_execbuffer_retire_commands(dev, file, ring);
1109
 
3263 Serge 1110
err:
1111
	eb_destroy(eb);
1112
 
1113
	mutex_unlock(&dev->struct_mutex);
1114
 
1115
pre_mutex_err:
1116
	kfree(cliprects);
1117
	return ret;
1118
}
1119
 
3480 Serge 1120
 
1121
 
3263 Serge 1122
int
1123
i915_gem_execbuffer2(struct drm_device *dev, void *data,
1124
		     struct drm_file *file)
1125
{
1126
	struct drm_i915_gem_execbuffer2 *args = data;
1127
	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1128
	int ret;
1129
 
3266 Serge 1130
//    ENTER();
1131
 
3263 Serge 1132
	if (args->buffer_count < 1 ||
1133
	    args->buffer_count > UINT_MAX / sizeof(*exec2_list)) {
1134
		DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count);
3266 Serge 1135
        FAIL();
3263 Serge 1136
		return -EINVAL;
1137
	}
1138
 
3480 Serge 1139
	exec2_list = kmalloc(sizeof(*exec2_list)*args->buffer_count,
1140
			     GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
3263 Serge 1141
	if (exec2_list == NULL) {
1142
		DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1143
			  args->buffer_count);
3266 Serge 1144
        FAIL();
3263 Serge 1145
		return -ENOMEM;
1146
	}
1147
	ret = copy_from_user(exec2_list,
1148
			     (struct drm_i915_relocation_entry __user *)
1149
			     (uintptr_t) args->buffers_ptr,
1150
			     sizeof(*exec2_list) * args->buffer_count);
1151
	if (ret != 0) {
1152
		DRM_DEBUG("copy %d exec entries failed %d\n",
1153
			  args->buffer_count, ret);
3266 Serge 1154
        kfree(exec2_list);
1155
        FAIL();
3263 Serge 1156
		return -EFAULT;
1157
	}
1158
 
1159
	ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list);
1160
	if (!ret) {
1161
		/* Copy the new buffer offsets back to the user's exec list. */
1162
		ret = copy_to_user((void __user *)(uintptr_t)args->buffers_ptr,
1163
				   exec2_list,
1164
				   sizeof(*exec2_list) * args->buffer_count);
1165
		if (ret) {
1166
			ret = -EFAULT;
1167
			DRM_DEBUG("failed to copy %d exec entries "
1168
				  "back to user (%d)\n",
1169
				  args->buffer_count, ret);
1170
		}
1171
	}
1172
 
3266 Serge 1173
    kfree(exec2_list);
1174
 
1175
//    LEAVE();
1176
 
3263 Serge 1177
	return ret;
1178
}