Subversion Repositories Kolibri OS

Rev

Rev 3480 | Rev 4104 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
3263 Serge 1
/*
2
 * Copyright © 2008,2010 Intel Corporation
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
 * IN THE SOFTWARE.
22
 *
23
 * Authors:
24
 *    Eric Anholt 
25
 *    Chris Wilson 
26
 *
27
 */
28
 
29
#include 
30
#include 
31
#include "i915_drv.h"
32
#include "i915_trace.h"
33
#include "intel_drv.h"
34
//#include 
35
 
36
#define I915_EXEC_SECURE        (1<<9)
37
#define I915_EXEC_IS_PINNED     (1<<10)
38
 
39
#define wmb() asm volatile ("sfence")
40
 
41
struct drm_i915_gem_object *get_fb_obj();
42
 
43
static inline __attribute__((const))
44
bool is_power_of_2(unsigned long n)
45
{
46
    return (n != 0 && ((n & (n - 1)) == 0));
47
}
48
 
49
static unsigned long
50
copy_to_user(void __user *to, const void *from, unsigned long n)
51
{
52
    memcpy(to, from, n);
53
    return 0;
54
}
55
 
56
static unsigned long
57
copy_from_user(void *to, const void __user *from, unsigned long n)
58
{
59
    memcpy(to, from, n);
60
    return 0;
61
}
62
 
63
struct eb_objects {
3480 Serge 64
	struct list_head objects;
3263 Serge 65
	int and;
3480 Serge 66
	union {
67
		struct drm_i915_gem_object *lut[0];
3263 Serge 68
	struct hlist_head buckets[0];
3480 Serge 69
	};
3263 Serge 70
};
71
 
72
static struct eb_objects *
3480 Serge 73
eb_create(struct drm_i915_gem_execbuffer2 *args)
3263 Serge 74
{
3480 Serge 75
	struct eb_objects *eb = NULL;
76
 
77
	if (args->flags & I915_EXEC_HANDLE_LUT) {
78
		int size = args->buffer_count;
79
		size *= sizeof(struct drm_i915_gem_object *);
80
		size += sizeof(struct eb_objects);
81
		eb = kmalloc(size, GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
82
	}
83
 
84
	if (eb == NULL) {
85
		int size = args->buffer_count;
3263 Serge 86
	int count = PAGE_SIZE / sizeof(struct hlist_head) / 2;
3480 Serge 87
		BUILD_BUG_ON_NOT_POWER_OF_2(PAGE_SIZE / sizeof(struct hlist_head));
88
		while (count > 2*size)
3263 Serge 89
		count >>= 1;
90
	eb = kzalloc(count*sizeof(struct hlist_head) +
91
		     sizeof(struct eb_objects),
3480 Serge 92
			     GFP_TEMPORARY);
3263 Serge 93
	if (eb == NULL)
94
		return eb;
95
 
96
	eb->and = count - 1;
3480 Serge 97
	} else
98
		eb->and = -args->buffer_count;
99
 
100
	INIT_LIST_HEAD(&eb->objects);
3263 Serge 101
	return eb;
102
}
103
 
104
static void
105
eb_reset(struct eb_objects *eb)
106
{
3480 Serge 107
	if (eb->and >= 0)
3263 Serge 108
	memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
109
}
110
 
3480 Serge 111
static int
112
eb_lookup_objects(struct eb_objects *eb,
113
		  struct drm_i915_gem_exec_object2 *exec,
114
		  const struct drm_i915_gem_execbuffer2 *args,
115
		  struct drm_file *file)
3263 Serge 116
{
3480 Serge 117
	int i;
118
 
119
	spin_lock(&file->table_lock);
120
	for (i = 0; i < args->buffer_count; i++) {
121
		struct drm_i915_gem_object *obj;
122
 
123
        if(exec[i].handle == -2)
124
            obj = get_fb_obj();
125
        else
126
		    obj = to_intel_bo(idr_find(&file->object_idr, exec[i].handle));
127
		if (obj == NULL) {
128
			spin_unlock(&file->table_lock);
129
			DRM_DEBUG("Invalid object handle %d at index %d\n",
130
				   exec[i].handle, i);
131
			return -ENOENT;
132
		}
133
 
134
		if (!list_empty(&obj->exec_list)) {
135
			spin_unlock(&file->table_lock);
136
			DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n",
137
				   obj, exec[i].handle, i);
138
			return -EINVAL;
139
		}
140
 
141
		drm_gem_object_reference(&obj->base);
142
		list_add_tail(&obj->exec_list, &eb->objects);
143
 
144
		obj->exec_entry = &exec[i];
145
		if (eb->and < 0) {
146
			eb->lut[i] = obj;
147
		} else {
148
			uint32_t handle = args->flags & I915_EXEC_HANDLE_LUT ? i : exec[i].handle;
149
			obj->exec_handle = handle;
3263 Serge 150
	hlist_add_head(&obj->exec_node,
3480 Serge 151
				       &eb->buckets[handle & eb->and]);
152
		}
153
	}
154
	spin_unlock(&file->table_lock);
155
 
156
	return 0;
3263 Serge 157
}
158
 
159
static struct drm_i915_gem_object *
160
eb_get_object(struct eb_objects *eb, unsigned long handle)
161
{
3480 Serge 162
	if (eb->and < 0) {
163
		if (handle >= -eb->and)
164
			return NULL;
165
		return eb->lut[handle];
166
	} else {
3263 Serge 167
	struct hlist_head *head;
168
	struct hlist_node *node;
169
 
170
	head = &eb->buckets[handle & eb->and];
171
	hlist_for_each(node, head) {
3480 Serge 172
			struct drm_i915_gem_object *obj;
173
 
3263 Serge 174
		obj = hlist_entry(node, struct drm_i915_gem_object, exec_node);
175
		if (obj->exec_handle == handle)
176
			return obj;
177
	}
178
	return NULL;
3480 Serge 179
	}
3263 Serge 180
}
181
 
182
static void
183
eb_destroy(struct eb_objects *eb)
184
{
3480 Serge 185
	while (!list_empty(&eb->objects)) {
186
		struct drm_i915_gem_object *obj;
187
 
188
		obj = list_first_entry(&eb->objects,
189
				       struct drm_i915_gem_object,
190
				       exec_list);
191
		list_del_init(&obj->exec_list);
192
		drm_gem_object_unreference(&obj->base);
193
	}
3263 Serge 194
	kfree(eb);
195
}
196
 
197
static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
198
{
199
	return (obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
200
		!obj->map_and_fenceable ||
201
		obj->cache_level != I915_CACHE_NONE);
202
}
203
 
204
static int
205
i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
206
				   struct eb_objects *eb,
207
				   struct drm_i915_gem_relocation_entry *reloc)
208
{
209
	struct drm_device *dev = obj->base.dev;
210
	struct drm_gem_object *target_obj;
211
	struct drm_i915_gem_object *target_i915_obj;
212
	uint32_t target_offset;
213
	int ret = -EINVAL;
214
 
215
	/* we've already hold a reference to all valid objects */
216
	target_obj = &eb_get_object(eb, reloc->target_handle)->base;
217
	if (unlikely(target_obj == NULL))
218
		return -ENOENT;
219
 
220
	target_i915_obj = to_intel_bo(target_obj);
221
	target_offset = target_i915_obj->gtt_offset;
222
 
223
	/* Sandybridge PPGTT errata: We need a global gtt mapping for MI and
224
	 * pipe_control writes because the gpu doesn't properly redirect them
225
	 * through the ppgtt for non_secure batchbuffers. */
226
	if (unlikely(IS_GEN6(dev) &&
227
	    reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
228
	    !target_i915_obj->has_global_gtt_mapping)) {
229
		i915_gem_gtt_bind_object(target_i915_obj,
230
					 target_i915_obj->cache_level);
231
	}
232
 
233
	/* Validate that the target is in a valid r/w GPU domain */
234
	if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
235
		DRM_DEBUG("reloc with multiple write domains: "
236
			  "obj %p target %d offset %d "
237
			  "read %08x write %08x",
238
			  obj, reloc->target_handle,
239
			  (int) reloc->offset,
240
			  reloc->read_domains,
241
			  reloc->write_domain);
242
		return ret;
243
	}
244
	if (unlikely((reloc->write_domain | reloc->read_domains)
245
		     & ~I915_GEM_GPU_DOMAINS)) {
246
		DRM_DEBUG("reloc with read/write non-GPU domains: "
247
			  "obj %p target %d offset %d "
248
			  "read %08x write %08x",
249
			  obj, reloc->target_handle,
250
			  (int) reloc->offset,
251
			  reloc->read_domains,
252
			  reloc->write_domain);
253
		return ret;
254
	}
255
 
256
	target_obj->pending_read_domains |= reloc->read_domains;
257
	target_obj->pending_write_domain |= reloc->write_domain;
258
 
259
	/* If the relocation already has the right value in it, no
260
	 * more work needs to be done.
261
	 */
262
	if (target_offset == reloc->presumed_offset)
263
		return 0;
264
 
265
	/* Check that the relocation address is valid... */
266
	if (unlikely(reloc->offset > obj->base.size - 4)) {
267
		DRM_DEBUG("Relocation beyond object bounds: "
268
			  "obj %p target %d offset %d size %d.\n",
269
			  obj, reloc->target_handle,
270
			  (int) reloc->offset,
271
			  (int) obj->base.size);
272
		return ret;
273
	}
274
	if (unlikely(reloc->offset & 3)) {
275
		DRM_DEBUG("Relocation not 4-byte aligned: "
276
			  "obj %p target %d offset %d.\n",
277
			  obj, reloc->target_handle,
278
			  (int) reloc->offset);
279
		return ret;
280
	}
281
 
282
	/* We can't wait for rendering with pagefaults disabled */
283
 
284
	reloc->delta += target_offset;
285
	if (use_cpu_reloc(obj)) {
286
		uint32_t page_offset = reloc->offset & ~PAGE_MASK;
287
		char *vaddr;
288
 
289
		ret = i915_gem_object_set_to_cpu_domain(obj, 1);
290
		if (ret)
291
			return ret;
292
 
293
        vaddr = (char *)MapIoMem((addr_t)i915_gem_object_get_page(obj,
294
                                 reloc->offset >> PAGE_SHIFT), 4096, 3);
295
		*(uint32_t *)(vaddr + page_offset) = reloc->delta;
296
        FreeKernelSpace(vaddr);
297
	} else {
298
		struct drm_i915_private *dev_priv = dev->dev_private;
299
		uint32_t __iomem *reloc_entry;
300
		void __iomem *reloc_page;
301
 
302
		ret = i915_gem_object_set_to_gtt_domain(obj, true);
303
		if (ret)
304
			return ret;
305
 
306
		ret = i915_gem_object_put_fence(obj);
307
		if (ret)
308
			return ret;
309
 
310
		/* Map the page containing the relocation we're going to perform.  */
311
		reloc->offset += obj->gtt_offset;
312
        reloc_page = (void*)MapIoMem(reloc->offset & PAGE_MASK, 4096, 3);
313
		reloc_entry = (uint32_t __iomem *)
314
			(reloc_page + (reloc->offset & ~PAGE_MASK));
315
		iowrite32(reloc->delta, reloc_entry);
316
        FreeKernelSpace(reloc_page);
317
	}
318
 
319
	/* and update the user's relocation entry */
320
	reloc->presumed_offset = target_offset;
321
 
322
	return 0;
323
}
324
 
325
static int
326
i915_gem_execbuffer_relocate_object(struct drm_i915_gem_object *obj,
327
				    struct eb_objects *eb)
328
{
329
#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
3266 Serge 330
	struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(64)];
3263 Serge 331
	struct drm_i915_gem_relocation_entry __user *user_relocs;
332
	struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
333
	int remain, ret;
334
 
335
	user_relocs = (void __user *)(uintptr_t)entry->relocs_ptr;
336
 
337
	remain = entry->relocation_count;
338
	while (remain) {
339
		struct drm_i915_gem_relocation_entry *r = stack_reloc;
340
		int count = remain;
341
		if (count > ARRAY_SIZE(stack_reloc))
342
			count = ARRAY_SIZE(stack_reloc);
343
		remain -= count;
344
 
345
        memcpy(r, user_relocs, count*sizeof(r[0]));
346
 
347
		do {
348
			u64 offset = r->presumed_offset;
349
 
350
			ret = i915_gem_execbuffer_relocate_entry(obj, eb, r);
351
			if (ret)
352
				return ret;
353
 
354
            memcpy(&user_relocs->presumed_offset,
355
                   &r->presumed_offset,
356
                   sizeof(r->presumed_offset));
357
 
358
			user_relocs++;
359
			r++;
360
		} while (--count);
361
	}
362
 
363
	return 0;
364
#undef N_RELOC
365
}
366
 
367
static int
368
i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj,
369
					 struct eb_objects *eb,
370
					 struct drm_i915_gem_relocation_entry *relocs)
371
{
372
	const struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
373
	int i, ret;
374
 
375
	for (i = 0; i < entry->relocation_count; i++) {
376
		ret = i915_gem_execbuffer_relocate_entry(obj, eb, &relocs[i]);
377
		if (ret)
378
			return ret;
379
	}
380
 
381
	return 0;
382
}
383
 
384
static int
3746 Serge 385
i915_gem_execbuffer_relocate(struct eb_objects *eb)
3263 Serge 386
{
387
	struct drm_i915_gem_object *obj;
388
	int ret = 0;
389
 
390
	/* This is the fast path and we cannot handle a pagefault whilst
391
	 * holding the struct mutex lest the user pass in the relocations
392
	 * contained within a mmaped bo. For in such a case we, the page
393
	 * fault handler would call i915_gem_fault() and we would try to
394
	 * acquire the struct mutex again. Obviously this is bad and so
395
	 * lockdep complains vehemently.
396
	 */
397
//   pagefault_disable();
3480 Serge 398
	list_for_each_entry(obj, &eb->objects, exec_list) {
3263 Serge 399
		ret = i915_gem_execbuffer_relocate_object(obj, eb);
400
		if (ret)
401
			break;
402
	}
403
//   pagefault_enable();
404
 
405
	return ret;
406
}
407
 
408
#define  __EXEC_OBJECT_HAS_PIN (1<<31)
409
#define  __EXEC_OBJECT_HAS_FENCE (1<<30)
410
 
411
static int
412
need_reloc_mappable(struct drm_i915_gem_object *obj)
413
{
414
	struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
415
	return entry->relocation_count && !use_cpu_reloc(obj);
416
}
417
 
418
static int
419
i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj,
3480 Serge 420
				   struct intel_ring_buffer *ring,
421
				   bool *need_reloc)
3263 Serge 422
{
423
	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
424
	struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
425
	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
426
	bool need_fence, need_mappable;
427
	int ret;
428
 
3266 Serge 429
//    ENTER();
430
 
3263 Serge 431
	need_fence =
432
		has_fenced_gpu_access &&
433
		entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
434
		obj->tiling_mode != I915_TILING_NONE;
435
	need_mappable = need_fence || need_reloc_mappable(obj);
436
 
437
	ret = i915_gem_object_pin(obj, entry->alignment, need_mappable, false);
438
	if (ret)
3266 Serge 439
    {
440
        FAIL();
3263 Serge 441
		return ret;
3266 Serge 442
    };
3263 Serge 443
 
444
	entry->flags |= __EXEC_OBJECT_HAS_PIN;
445
 
446
	if (has_fenced_gpu_access) {
447
		if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
448
			ret = i915_gem_object_get_fence(obj);
449
			if (ret)
3266 Serge 450
            {
451
                FAIL();
3263 Serge 452
				return ret;
3266 Serge 453
            };
3263 Serge 454
 
455
			if (i915_gem_object_pin_fence(obj))
456
				entry->flags |= __EXEC_OBJECT_HAS_FENCE;
457
 
458
			obj->pending_fenced_gpu_access = true;
459
		}
460
	}
461
 
462
	/* Ensure ppgtt mapping exists if needed */
463
	if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) {
464
		i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
465
				       obj, obj->cache_level);
466
 
467
		obj->has_aliasing_ppgtt_mapping = 1;
468
	}
469
 
3480 Serge 470
	if (entry->offset != obj->gtt_offset) {
3263 Serge 471
	entry->offset = obj->gtt_offset;
3480 Serge 472
		*need_reloc = true;
473
	}
3266 Serge 474
 
3480 Serge 475
	if (entry->flags & EXEC_OBJECT_WRITE) {
476
		obj->base.pending_read_domains = I915_GEM_DOMAIN_RENDER;
477
		obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER;
478
	}
479
 
480
	if (entry->flags & EXEC_OBJECT_NEEDS_GTT &&
481
	    !obj->has_global_gtt_mapping)
482
		i915_gem_gtt_bind_object(obj, obj->cache_level);
483
 
3263 Serge 484
	return 0;
485
}
486
 
487
static void
488
i915_gem_execbuffer_unreserve_object(struct drm_i915_gem_object *obj)
489
{
490
	struct drm_i915_gem_exec_object2 *entry;
491
 
492
	if (!obj->gtt_space)
493
		return;
494
 
495
	entry = obj->exec_entry;
496
 
497
	if (entry->flags & __EXEC_OBJECT_HAS_FENCE)
498
		i915_gem_object_unpin_fence(obj);
499
 
500
	if (entry->flags & __EXEC_OBJECT_HAS_PIN)
501
		i915_gem_object_unpin(obj);
502
 
503
	entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);
504
}
505
 
506
static int
507
i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
3480 Serge 508
			    struct list_head *objects,
509
			    bool *need_relocs)
3263 Serge 510
{
511
	struct drm_i915_gem_object *obj;
512
	struct list_head ordered_objects;
513
	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
514
	int retry;
515
 
3266 Serge 516
//    ENTER();
517
 
3263 Serge 518
	INIT_LIST_HEAD(&ordered_objects);
519
	while (!list_empty(objects)) {
520
		struct drm_i915_gem_exec_object2 *entry;
521
		bool need_fence, need_mappable;
522
 
523
		obj = list_first_entry(objects,
524
				       struct drm_i915_gem_object,
525
				       exec_list);
526
		entry = obj->exec_entry;
527
 
528
		need_fence =
529
			has_fenced_gpu_access &&
530
			entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
531
			obj->tiling_mode != I915_TILING_NONE;
532
		need_mappable = need_fence || need_reloc_mappable(obj);
533
 
534
		if (need_mappable)
535
			list_move(&obj->exec_list, &ordered_objects);
536
		else
537
			list_move_tail(&obj->exec_list, &ordered_objects);
538
 
3480 Serge 539
		obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
3263 Serge 540
		obj->base.pending_write_domain = 0;
541
		obj->pending_fenced_gpu_access = false;
542
	}
543
	list_splice(&ordered_objects, objects);
544
 
545
	/* Attempt to pin all of the buffers into the GTT.
546
	 * This is done in 3 phases:
547
	 *
548
	 * 1a. Unbind all objects that do not match the GTT constraints for
549
	 *     the execbuffer (fenceable, mappable, alignment etc).
550
	 * 1b. Increment pin count for already bound objects.
551
	 * 2.  Bind new objects.
552
	 * 3.  Decrement pin count.
553
	 *
554
	 * This avoid unnecessary unbinding of later objects in order to make
555
	 * room for the earlier objects *unless* we need to defragment.
556
	 */
557
	retry = 0;
558
	do {
559
		int ret = 0;
560
 
561
		/* Unbind any ill-fitting objects or pin. */
562
		list_for_each_entry(obj, objects, exec_list) {
563
			struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
564
			bool need_fence, need_mappable;
565
 
566
			if (!obj->gtt_space)
567
				continue;
568
 
569
			need_fence =
570
				has_fenced_gpu_access &&
571
				entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
572
				obj->tiling_mode != I915_TILING_NONE;
573
			need_mappable = need_fence || need_reloc_mappable(obj);
574
 
575
			if ((entry->alignment && obj->gtt_offset & (entry->alignment - 1)) ||
576
			    (need_mappable && !obj->map_and_fenceable))
577
				ret = i915_gem_object_unbind(obj);
578
			else
3480 Serge 579
				ret = i915_gem_execbuffer_reserve_object(obj, ring, need_relocs);
3263 Serge 580
			if (ret)
581
				goto err;
582
		}
583
 
584
		/* Bind fresh objects */
585
		list_for_each_entry(obj, objects, exec_list) {
586
			if (obj->gtt_space)
587
				continue;
588
 
3480 Serge 589
			ret = i915_gem_execbuffer_reserve_object(obj, ring, need_relocs);
3263 Serge 590
			if (ret)
591
				goto err;
592
		}
593
 
594
err:		/* Decrement pin count for bound objects */
595
		list_for_each_entry(obj, objects, exec_list)
596
			i915_gem_execbuffer_unreserve_object(obj);
597
 
598
		if (ret != -ENOSPC || retry++)
3266 Serge 599
        {
600
//            LEAVE();
3263 Serge 601
			return ret;
3266 Serge 602
        };
3263 Serge 603
 
604
//       ret = i915_gem_evict_everything(ring->dev);
605
		if (ret)
606
			return ret;
607
	} while (1);
608
}
609
 
610
static int
611
i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
3480 Serge 612
				  struct drm_i915_gem_execbuffer2 *args,
3263 Serge 613
				  struct drm_file *file,
614
				  struct intel_ring_buffer *ring,
615
				  struct eb_objects *eb,
3480 Serge 616
				  struct drm_i915_gem_exec_object2 *exec)
3263 Serge 617
{
618
	struct drm_i915_gem_relocation_entry *reloc;
619
	struct drm_i915_gem_object *obj;
3480 Serge 620
	bool need_relocs;
3263 Serge 621
	int *reloc_offset;
622
	int i, total, ret;
3480 Serge 623
	int count = args->buffer_count;
3263 Serge 624
 
625
	/* We may process another execbuffer during the unlock... */
3480 Serge 626
	while (!list_empty(&eb->objects)) {
627
		obj = list_first_entry(&eb->objects,
3263 Serge 628
				       struct drm_i915_gem_object,
629
				       exec_list);
630
		list_del_init(&obj->exec_list);
631
		drm_gem_object_unreference(&obj->base);
632
	}
633
 
634
	mutex_unlock(&dev->struct_mutex);
635
 
636
	total = 0;
637
	for (i = 0; i < count; i++)
638
		total += exec[i].relocation_count;
639
 
640
    reloc_offset = malloc(count * sizeof(*reloc_offset));
641
    reloc = malloc(total * sizeof(*reloc));
642
	if (reloc == NULL || reloc_offset == NULL) {
3266 Serge 643
        kfree(reloc);
644
        kfree(reloc_offset);
3263 Serge 645
		mutex_lock(&dev->struct_mutex);
646
		return -ENOMEM;
647
	}
648
 
649
	total = 0;
650
	for (i = 0; i < count; i++) {
651
		struct drm_i915_gem_relocation_entry __user *user_relocs;
652
		u64 invalid_offset = (u64)-1;
653
		int j;
654
 
655
		user_relocs = (void __user *)(uintptr_t)exec[i].relocs_ptr;
656
 
657
		if (copy_from_user(reloc+total, user_relocs,
658
				   exec[i].relocation_count * sizeof(*reloc))) {
659
			ret = -EFAULT;
660
			mutex_lock(&dev->struct_mutex);
661
			goto err;
662
		}
663
 
664
		/* As we do not update the known relocation offsets after
665
		 * relocating (due to the complexities in lock handling),
666
		 * we need to mark them as invalid now so that we force the
667
		 * relocation processing next time. Just in case the target
668
		 * object is evicted and then rebound into its old
669
		 * presumed_offset before the next execbuffer - if that
670
		 * happened we would make the mistake of assuming that the
671
		 * relocations were valid.
672
		 */
673
		for (j = 0; j < exec[i].relocation_count; j++) {
674
			if (copy_to_user(&user_relocs[j].presumed_offset,
675
					 &invalid_offset,
676
					 sizeof(invalid_offset))) {
677
				ret = -EFAULT;
678
				mutex_lock(&dev->struct_mutex);
679
				goto err;
680
			}
681
		}
682
 
683
		reloc_offset[i] = total;
684
		total += exec[i].relocation_count;
685
	}
686
 
687
	ret = i915_mutex_lock_interruptible(dev);
688
	if (ret) {
689
		mutex_lock(&dev->struct_mutex);
690
		goto err;
691
	}
692
 
693
	/* reacquire the objects */
694
	eb_reset(eb);
3480 Serge 695
	ret = eb_lookup_objects(eb, exec, args, file);
696
	if (ret)
3263 Serge 697
			goto err;
698
 
3480 Serge 699
	need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
3746 Serge 700
	ret = i915_gem_execbuffer_reserve(ring, &eb->objects, &need_relocs);
3263 Serge 701
	if (ret)
702
		goto err;
703
 
3480 Serge 704
	list_for_each_entry(obj, &eb->objects, exec_list) {
3263 Serge 705
		int offset = obj->exec_entry - exec;
706
		ret = i915_gem_execbuffer_relocate_object_slow(obj, eb,
707
							       reloc + reloc_offset[offset]);
708
		if (ret)
709
			goto err;
710
	}
711
 
712
	/* Leave the user relocations as are, this is the painfully slow path,
713
	 * and we want to avoid the complication of dropping the lock whilst
714
	 * having buffers reserved in the aperture and so causing spurious
715
	 * ENOSPC for random operations.
716
	 */
717
 
718
err:
3266 Serge 719
    kfree(reloc);
720
    kfree(reloc_offset);
3263 Serge 721
	return ret;
722
}
723
 
724
static int
725
i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
726
				struct list_head *objects)
727
{
728
	struct drm_i915_gem_object *obj;
729
	uint32_t flush_domains = 0;
730
	int ret;
731
 
732
	list_for_each_entry(obj, objects, exec_list) {
733
		ret = i915_gem_object_sync(obj, ring);
734
		if (ret)
735
			return ret;
736
 
737
		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
738
			i915_gem_clflush_object(obj);
739
 
740
		flush_domains |= obj->base.write_domain;
741
	}
742
 
743
	if (flush_domains & I915_GEM_DOMAIN_CPU)
744
		i915_gem_chipset_flush(ring->dev);
745
 
746
	if (flush_domains & I915_GEM_DOMAIN_GTT)
747
		wmb();
748
 
749
	/* Unconditionally invalidate gpu caches and ensure that we do flush
750
	 * any residual writes from the previous batch.
751
	 */
752
	return intel_ring_invalidate_all_caches(ring);
753
}
754
 
755
static bool
756
i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
757
{
3480 Serge 758
	if (exec->flags & __I915_EXEC_UNKNOWN_FLAGS)
759
		return false;
760
 
3263 Serge 761
	return ((exec->batch_start_offset | exec->batch_len) & 0x7) == 0;
762
}
763
 
764
static int
765
validate_exec_list(struct drm_i915_gem_exec_object2 *exec,
766
		   int count)
767
{
768
	int i;
3480 Serge 769
	int relocs_total = 0;
770
	int relocs_max = INT_MAX / sizeof(struct drm_i915_gem_relocation_entry);
3263 Serge 771
 
772
	for (i = 0; i < count; i++) {
3746 Serge 773
		char __user *ptr = to_user_ptr(exec[i].relocs_ptr);
3263 Serge 774
		int length; /* limited by fault_in_pages_readable() */
775
 
3480 Serge 776
		if (exec[i].flags & __EXEC_OBJECT_UNKNOWN_FLAGS)
3263 Serge 777
			return -EINVAL;
778
 
3480 Serge 779
		/* First check for malicious input causing overflow in
780
		 * the worst case where we need to allocate the entire
781
		 * relocation tree as a single array.
782
		 */
783
		if (exec[i].relocation_count > relocs_max - relocs_total)
784
			return -EINVAL;
785
		relocs_total += exec[i].relocation_count;
786
 
3263 Serge 787
		length = exec[i].relocation_count *
788
			sizeof(struct drm_i915_gem_relocation_entry);
3746 Serge 789
		/*
790
		 * We must check that the entire relocation array is safe
791
		 * to read, but since we may need to update the presumed
792
		 * offsets during execution, check for full write access.
793
		 */
3263 Serge 794
//       if (!access_ok(VERIFY_WRITE, ptr, length))
795
//           return -EFAULT;
796
 
797
//       if (fault_in_multipages_readable(ptr, length))
798
//           return -EFAULT;
799
	}
800
 
801
	return 0;
802
}
803
 
804
static void
805
i915_gem_execbuffer_move_to_active(struct list_head *objects,
806
				   struct intel_ring_buffer *ring)
807
{
808
	struct drm_i915_gem_object *obj;
809
 
810
	list_for_each_entry(obj, objects, exec_list) {
811
		u32 old_read = obj->base.read_domains;
812
		u32 old_write = obj->base.write_domain;
813
 
3480 Serge 814
		obj->base.write_domain = obj->base.pending_write_domain;
815
		if (obj->base.write_domain == 0)
816
			obj->base.pending_read_domains |= obj->base.read_domains;
3263 Serge 817
		obj->base.read_domains = obj->base.pending_read_domains;
818
		obj->fenced_gpu_access = obj->pending_fenced_gpu_access;
819
 
820
		i915_gem_object_move_to_active(obj, ring);
821
		if (obj->base.write_domain) {
822
			obj->dirty = 1;
823
			obj->last_write_seqno = intel_ring_get_seqno(ring);
824
			if (obj->pin_count) /* check for potential scanout */
825
				intel_mark_fb_busy(obj);
826
		}
827
 
828
		trace_i915_gem_object_change_domain(obj, old_read, old_write);
829
	}
830
}
831
 
832
static void
833
i915_gem_execbuffer_retire_commands(struct drm_device *dev,
834
				    struct drm_file *file,
835
				    struct intel_ring_buffer *ring)
836
{
837
	/* Unconditionally force add_request to emit a full flush. */
838
	ring->gpu_caches_dirty = true;
839
 
840
	/* Add a breadcrumb for the completion of the batch buffer */
841
	(void)i915_add_request(ring, file, NULL);
842
}
843
 
844
static int
845
i915_reset_gen7_sol_offsets(struct drm_device *dev,
846
			    struct intel_ring_buffer *ring)
847
{
848
	drm_i915_private_t *dev_priv = dev->dev_private;
849
	int ret, i;
850
 
851
	if (!IS_GEN7(dev) || ring != &dev_priv->ring[RCS])
852
		return 0;
853
 
854
	ret = intel_ring_begin(ring, 4 * 3);
855
	if (ret)
856
		return ret;
857
 
858
	for (i = 0; i < 4; i++) {
859
		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
860
		intel_ring_emit(ring, GEN7_SO_WRITE_OFFSET(i));
861
		intel_ring_emit(ring, 0);
862
	}
863
 
864
	intel_ring_advance(ring);
865
 
866
	return 0;
867
}
868
 
869
static int
870
i915_gem_do_execbuffer(struct drm_device *dev, void *data,
871
		       struct drm_file *file,
872
		       struct drm_i915_gem_execbuffer2 *args,
873
		       struct drm_i915_gem_exec_object2 *exec)
874
{
875
	drm_i915_private_t *dev_priv = dev->dev_private;
876
	struct eb_objects *eb;
877
	struct drm_i915_gem_object *batch_obj;
878
	struct drm_clip_rect *cliprects = NULL;
879
	struct intel_ring_buffer *ring;
880
	u32 ctx_id = i915_execbuffer2_get_context_id(*args);
881
	u32 exec_start, exec_len;
3480 Serge 882
	u32 mask, flags;
3263 Serge 883
	int ret, mode, i;
3480 Serge 884
	bool need_relocs;
3263 Serge 885
 
3480 Serge 886
	if (!i915_gem_check_execbuffer(args))
887
	{
3266 Serge 888
        FAIL();
3263 Serge 889
		return -EINVAL;
890
	}
891
 
892
	ret = validate_exec_list(exec, args->buffer_count);
893
	if (ret)
3266 Serge 894
    {
895
        FAIL();
3263 Serge 896
		return ret;
3266 Serge 897
    };
3263 Serge 898
 
899
	flags = 0;
900
	if (args->flags & I915_EXEC_SECURE) {
901
 
902
		flags |= I915_DISPATCH_SECURE;
903
	}
904
	if (args->flags & I915_EXEC_IS_PINNED)
905
		flags |= I915_DISPATCH_PINNED;
906
 
907
	switch (args->flags & I915_EXEC_RING_MASK) {
908
	case I915_EXEC_DEFAULT:
909
	case I915_EXEC_RENDER:
910
		ring = &dev_priv->ring[RCS];
911
		break;
912
	case I915_EXEC_BSD:
913
		ring = &dev_priv->ring[VCS];
914
		if (ctx_id != 0) {
915
			DRM_DEBUG("Ring %s doesn't support contexts\n",
916
				  ring->name);
3266 Serge 917
            FAIL();
3263 Serge 918
			return -EPERM;
919
		}
920
		break;
921
	case I915_EXEC_BLT:
922
		ring = &dev_priv->ring[BCS];
923
		if (ctx_id != 0) {
924
			DRM_DEBUG("Ring %s doesn't support contexts\n",
925
				  ring->name);
926
			return -EPERM;
927
		}
928
		break;
929
	default:
930
		DRM_DEBUG("execbuf with unknown ring: %d\n",
931
			  (int)(args->flags & I915_EXEC_RING_MASK));
932
		return -EINVAL;
933
	}
934
	if (!intel_ring_initialized(ring)) {
935
		DRM_DEBUG("execbuf with invalid ring: %d\n",
936
			  (int)(args->flags & I915_EXEC_RING_MASK));
937
		return -EINVAL;
938
	}
939
 
940
	mode = args->flags & I915_EXEC_CONSTANTS_MASK;
941
	mask = I915_EXEC_CONSTANTS_MASK;
942
	switch (mode) {
943
	case I915_EXEC_CONSTANTS_REL_GENERAL:
944
	case I915_EXEC_CONSTANTS_ABSOLUTE:
945
	case I915_EXEC_CONSTANTS_REL_SURFACE:
946
		if (ring == &dev_priv->ring[RCS] &&
947
		    mode != dev_priv->relative_constants_mode) {
948
			if (INTEL_INFO(dev)->gen < 4)
949
				return -EINVAL;
950
 
951
			if (INTEL_INFO(dev)->gen > 5 &&
952
			    mode == I915_EXEC_CONSTANTS_REL_SURFACE)
953
				return -EINVAL;
954
 
955
			/* The HW changed the meaning on this bit on gen6 */
956
			if (INTEL_INFO(dev)->gen >= 6)
957
				mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
958
		}
959
		break;
960
	default:
961
		DRM_DEBUG("execbuf with unknown constants: %d\n", mode);
962
		return -EINVAL;
963
	}
964
 
965
	if (args->buffer_count < 1) {
966
		DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
967
		return -EINVAL;
968
	}
969
 
970
	if (args->num_cliprects != 0) {
971
		if (ring != &dev_priv->ring[RCS]) {
972
			DRM_DEBUG("clip rectangles are only valid with the render ring\n");
973
			return -EINVAL;
974
		}
975
 
976
		if (INTEL_INFO(dev)->gen >= 5) {
977
			DRM_DEBUG("clip rectangles are only valid on pre-gen5\n");
978
			return -EINVAL;
979
		}
980
 
981
		if (args->num_cliprects > UINT_MAX / sizeof(*cliprects)) {
982
			DRM_DEBUG("execbuf with %u cliprects\n",
983
				  args->num_cliprects);
984
			return -EINVAL;
985
		}
986
 
987
		cliprects = kmalloc(args->num_cliprects * sizeof(*cliprects),
988
				    GFP_KERNEL);
989
		if (cliprects == NULL) {
990
			ret = -ENOMEM;
991
			goto pre_mutex_err;
992
		}
993
 
994
		if (copy_from_user(cliprects,
3746 Serge 995
				   to_user_ptr(args->cliprects_ptr),
3263 Serge 996
				     sizeof(*cliprects)*args->num_cliprects)) {
997
			ret = -EFAULT;
998
			goto pre_mutex_err;
999
		}
1000
	}
1001
 
1002
	ret = i915_mutex_lock_interruptible(dev);
1003
	if (ret)
1004
		goto pre_mutex_err;
1005
 
1006
	if (dev_priv->mm.suspended) {
1007
		mutex_unlock(&dev->struct_mutex);
1008
		ret = -EBUSY;
1009
		goto pre_mutex_err;
1010
	}
1011
 
3480 Serge 1012
	eb = eb_create(args);
3263 Serge 1013
	if (eb == NULL) {
1014
		mutex_unlock(&dev->struct_mutex);
1015
		ret = -ENOMEM;
1016
		goto pre_mutex_err;
1017
	}
1018
 
1019
	/* Look up object handles */
3480 Serge 1020
	ret = eb_lookup_objects(eb, exec, args, file);
1021
	if (ret)
3263 Serge 1022
			goto err;
1023
 
1024
	/* take note of the batch buffer before we might reorder the lists */
3480 Serge 1025
	batch_obj = list_entry(eb->objects.prev,
3263 Serge 1026
			       struct drm_i915_gem_object,
1027
			       exec_list);
1028
 
1029
	/* Move the objects en-masse into the GTT, evicting if necessary. */
3480 Serge 1030
	need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
3746 Serge 1031
	ret = i915_gem_execbuffer_reserve(ring, &eb->objects, &need_relocs);
3263 Serge 1032
	if (ret)
1033
		goto err;
1034
 
1035
	/* The objects are in their final locations, apply the relocations. */
3480 Serge 1036
	if (need_relocs)
3746 Serge 1037
		ret = i915_gem_execbuffer_relocate(eb);
3263 Serge 1038
	if (ret) {
1039
		if (ret == -EFAULT) {
3480 Serge 1040
			ret = i915_gem_execbuffer_relocate_slow(dev, args, file, ring,
1041
								eb, exec);
3263 Serge 1042
			BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1043
		}
1044
		if (ret)
1045
			goto err;
1046
	}
1047
 
1048
	/* Set the pending read domains for the batch buffer to COMMAND */
1049
	if (batch_obj->base.pending_write_domain) {
1050
		DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
1051
		ret = -EINVAL;
1052
		goto err;
1053
	}
1054
	batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
1055
 
1056
	/* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
1057
	 * batch" bit. Hence we need to pin secure batches into the global gtt.
1058
	 * hsw should have this fixed, but let's be paranoid and do it
1059
	 * unconditionally for now. */
1060
	if (flags & I915_DISPATCH_SECURE && !batch_obj->has_global_gtt_mapping)
1061
		i915_gem_gtt_bind_object(batch_obj, batch_obj->cache_level);
1062
 
3480 Serge 1063
	ret = i915_gem_execbuffer_move_to_gpu(ring, &eb->objects);
3263 Serge 1064
	if (ret)
1065
		goto err;
1066
 
1067
	ret = i915_switch_context(ring, file, ctx_id);
1068
	if (ret)
1069
		goto err;
1070
 
1071
	if (ring == &dev_priv->ring[RCS] &&
1072
	    mode != dev_priv->relative_constants_mode) {
1073
		ret = intel_ring_begin(ring, 4);
1074
		if (ret)
1075
				goto err;
1076
 
1077
		intel_ring_emit(ring, MI_NOOP);
1078
		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
1079
		intel_ring_emit(ring, INSTPM);
1080
		intel_ring_emit(ring, mask << 16 | mode);
1081
		intel_ring_advance(ring);
1082
 
1083
		dev_priv->relative_constants_mode = mode;
1084
	}
1085
 
1086
	if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
1087
		ret = i915_reset_gen7_sol_offsets(dev, ring);
1088
		if (ret)
1089
			goto err;
1090
	}
1091
 
1092
	exec_start = batch_obj->gtt_offset + args->batch_start_offset;
1093
	exec_len = args->batch_len;
1094
	if (cliprects) {
1095
 
1096
	} else {
1097
		ret = ring->dispatch_execbuffer(ring,
1098
						exec_start, exec_len,
1099
						flags);
1100
		if (ret)
1101
			goto err;
1102
	}
1103
 
3266 Serge 1104
	trace_i915_gem_ring_dispatch(ring, intel_ring_get_seqno(ring), flags);
3263 Serge 1105
 
3480 Serge 1106
	i915_gem_execbuffer_move_to_active(&eb->objects, ring);
3266 Serge 1107
	i915_gem_execbuffer_retire_commands(dev, file, ring);
1108
 
3263 Serge 1109
err:
1110
	eb_destroy(eb);
1111
 
1112
	mutex_unlock(&dev->struct_mutex);
1113
 
1114
pre_mutex_err:
1115
	kfree(cliprects);
1116
	return ret;
1117
}
1118
 
3480 Serge 1119
 
1120
 
3263 Serge 1121
int
1122
i915_gem_execbuffer2(struct drm_device *dev, void *data,
1123
		     struct drm_file *file)
1124
{
1125
	struct drm_i915_gem_execbuffer2 *args = data;
1126
	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
1127
	int ret;
1128
 
3266 Serge 1129
//    ENTER();
1130
 
3263 Serge 1131
	if (args->buffer_count < 1 ||
1132
	    args->buffer_count > UINT_MAX / sizeof(*exec2_list)) {
1133
		DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count);
3266 Serge 1134
        FAIL();
3263 Serge 1135
		return -EINVAL;
1136
	}
1137
 
3480 Serge 1138
	exec2_list = kmalloc(sizeof(*exec2_list)*args->buffer_count,
1139
			     GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
3263 Serge 1140
	if (exec2_list == NULL) {
1141
		DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1142
			  args->buffer_count);
3266 Serge 1143
        FAIL();
3263 Serge 1144
		return -ENOMEM;
1145
	}
1146
	ret = copy_from_user(exec2_list,
1147
			     (struct drm_i915_relocation_entry __user *)
1148
			     (uintptr_t) args->buffers_ptr,
1149
			     sizeof(*exec2_list) * args->buffer_count);
1150
	if (ret != 0) {
1151
		DRM_DEBUG("copy %d exec entries failed %d\n",
1152
			  args->buffer_count, ret);
3266 Serge 1153
        kfree(exec2_list);
1154
        FAIL();
3263 Serge 1155
		return -EFAULT;
1156
	}
1157
 
1158
	ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list);
1159
	if (!ret) {
1160
		/* Copy the new buffer offsets back to the user's exec list. */
1161
		ret = copy_to_user((void __user *)(uintptr_t)args->buffers_ptr,
1162
				   exec2_list,
1163
				   sizeof(*exec2_list) * args->buffer_count);
1164
		if (ret) {
1165
			ret = -EFAULT;
1166
			DRM_DEBUG("failed to copy %d exec entries "
1167
				  "back to user (%d)\n",
1168
				  args->buffer_count, ret);
1169
		}
1170
	}
1171
 
3266 Serge 1172
    kfree(exec2_list);
1173
 
1174
//    LEAVE();
1175
 
3263 Serge 1176
	return ret;
1177
}