Subversion Repositories Kolibri OS

Rev

Rev 6084 | Rev 6103 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2326 Serge 1
/*
6084 serge 2
 * Copyright © 2008-2015 Intel Corporation
2326 Serge 3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
 * IN THE SOFTWARE.
22
 *
23
 * Authors:
24
 *    Eric Anholt 
25
 *
26
 */
27
 
3031 serge 28
#include 
4280 Serge 29
#include 
3031 serge 30
#include 
2326 Serge 31
#include "i915_drv.h"
6084 serge 32
#include "i915_vgpu.h"
2351 Serge 33
#include "i915_trace.h"
2326 Serge 34
#include "intel_drv.h"
3260 Serge 35
#include 
2330 Serge 36
#include 
2326 Serge 37
//#include 
3746 Serge 38
#include 
2326 Serge 39
#include 
6084 serge 40
#define RQ_BUG_ON(expr)
2326 Serge 41
 
2344 Serge 42
extern int x86_clflush_size;
2332 Serge 43
 
3263 Serge 44
#define PROT_READ       0x1             /* page can be read */
45
#define PROT_WRITE      0x2             /* page can be written */
46
#define MAP_SHARED      0x01            /* Share changes */
47
 
2344 Serge 48
 
5060 serge 49
 
3266 Serge 50
struct drm_i915_gem_object *get_fb_obj();
51
 
3263 Serge 52
unsigned long vm_mmap(struct file *file, unsigned long addr,
53
         unsigned long len, unsigned long prot,
54
         unsigned long flag, unsigned long offset);
55
 
2344 Serge 56
 
2332 Serge 57
#define MAX_ERRNO       4095
58
 
59
#define IS_ERR_VALUE(x) unlikely((x) >= (unsigned long)-MAX_ERRNO)
60
 
61
 
62
static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
6084 serge 63
static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
5060 serge 64
static void
6084 serge 65
i915_gem_object_retire__write(struct drm_i915_gem_object *obj);
66
static void
67
i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring);
2326 Serge 68
 
4104 Serge 69
static bool cpu_cache_is_coherent(struct drm_device *dev,
70
				  enum i915_cache_level level)
71
{
72
	return HAS_LLC(dev) || level != I915_CACHE_NONE;
73
}
74
 
75
static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
76
{
77
	if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
78
		return true;
79
 
80
	return obj->pin_display;
81
}
82
 
2332 Serge 83
/* some bookkeeping */
84
static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
85
				  size_t size)
86
{
4104 Serge 87
	spin_lock(&dev_priv->mm.object_stat_lock);
2332 Serge 88
	dev_priv->mm.object_count++;
89
	dev_priv->mm.object_memory += size;
4104 Serge 90
	spin_unlock(&dev_priv->mm.object_stat_lock);
2332 Serge 91
}
92
 
93
static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
94
				     size_t size)
95
{
4104 Serge 96
	spin_lock(&dev_priv->mm.object_stat_lock);
2332 Serge 97
	dev_priv->mm.object_count--;
98
	dev_priv->mm.object_memory -= size;
4104 Serge 99
	spin_unlock(&dev_priv->mm.object_stat_lock);
2332 Serge 100
}
101
 
102
static int
3480 Serge 103
i915_gem_wait_for_error(struct i915_gpu_error *error)
2332 Serge 104
{
105
	int ret;
106
 
3480 Serge 107
#define EXIT_COND (!i915_reset_in_progress(error))
108
	if (EXIT_COND)
2332 Serge 109
		return 0;
3255 Serge 110
#if 0
3031 serge 111
	/*
112
	 * Only wait 10 seconds for the gpu reset to complete to avoid hanging
113
	 * userspace. If it takes that long something really bad is going on and
114
	 * we should simply try to bail out and fail as gracefully as possible.
115
	 */
3480 Serge 116
	ret = wait_event_interruptible_timeout(error->reset_queue,
117
					       EXIT_COND,
118
					       10*HZ);
3031 serge 119
	if (ret == 0) {
120
		DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
121
		return -EIO;
122
	} else if (ret < 0) {
2332 Serge 123
		return ret;
3031 serge 124
	}
2332 Serge 125
 
3255 Serge 126
#endif
3480 Serge 127
#undef EXIT_COND
3255 Serge 128
 
2332 Serge 129
	return 0;
130
}
131
 
132
int i915_mutex_lock_interruptible(struct drm_device *dev)
133
{
3480 Serge 134
	struct drm_i915_private *dev_priv = dev->dev_private;
2332 Serge 135
	int ret;
136
 
3480 Serge 137
	ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
2332 Serge 138
	if (ret)
139
		return ret;
140
 
3480 Serge 141
	ret = mutex_lock_interruptible(&dev->struct_mutex);
142
	if (ret)
143
		return ret;
2332 Serge 144
 
145
	WARN_ON(i915_verify_lists(dev));
146
	return 0;
147
}
148
 
149
int
150
i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
151
			    struct drm_file *file)
152
{
153
	struct drm_i915_private *dev_priv = dev->dev_private;
154
	struct drm_i915_gem_get_aperture *args = data;
6084 serge 155
	struct i915_gtt *ggtt = &dev_priv->gtt;
156
	struct i915_vma *vma;
2332 Serge 157
	size_t pinned;
158
 
159
	pinned = 0;
160
	mutex_lock(&dev->struct_mutex);
6084 serge 161
	list_for_each_entry(vma, &ggtt->base.active_list, mm_list)
162
		if (vma->pin_count)
163
			pinned += vma->node.size;
164
	list_for_each_entry(vma, &ggtt->base.inactive_list, mm_list)
165
		if (vma->pin_count)
166
			pinned += vma->node.size;
2332 Serge 167
	mutex_unlock(&dev->struct_mutex);
168
 
4104 Serge 169
	args->aper_size = dev_priv->gtt.base.total;
2342 Serge 170
	args->aper_available_size = args->aper_size - pinned;
2332 Serge 171
 
172
	return 0;
173
}
174
 
3480 Serge 175
void *i915_gem_object_alloc(struct drm_device *dev)
176
{
177
	struct drm_i915_private *dev_priv = dev->dev_private;
5367 serge 178
    return kzalloc(sizeof(struct drm_i915_gem_object), 0);
3480 Serge 179
}
180
 
181
void i915_gem_object_free(struct drm_i915_gem_object *obj)
182
{
183
	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
184
	kfree(obj);
185
}
186
 
3031 serge 187
static int
188
i915_gem_create(struct drm_file *file,
2332 Serge 189
		struct drm_device *dev,
190
		uint64_t size,
191
		uint32_t *handle_p)
192
{
193
	struct drm_i915_gem_object *obj;
194
	int ret;
195
	u32 handle;
196
 
197
	size = roundup(size, PAGE_SIZE);
2342 Serge 198
	if (size == 0)
199
		return -EINVAL;
2332 Serge 200
 
201
	/* Allocate the new object */
202
	obj = i915_gem_alloc_object(dev, size);
203
	if (obj == NULL)
204
		return -ENOMEM;
205
 
206
	ret = drm_gem_handle_create(file, &obj->base, &handle);
4104 Serge 207
	/* drop reference from allocate - handle holds it now */
208
	drm_gem_object_unreference_unlocked(&obj->base);
209
	if (ret)
2332 Serge 210
		return ret;
211
 
212
	*handle_p = handle;
213
	return 0;
214
}
215
 
216
int
217
i915_gem_dumb_create(struct drm_file *file,
218
		     struct drm_device *dev,
219
		     struct drm_mode_create_dumb *args)
220
{
221
	/* have to work out size/pitch and return them */
4560 Serge 222
	args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64);
2332 Serge 223
	args->size = args->pitch * args->height;
224
	return i915_gem_create(file, dev,
225
			       args->size, &args->handle);
226
}
227
 
2326 Serge 228
/**
2332 Serge 229
 * Creates a new mm object and returns a handle to it.
230
 */
231
int
232
i915_gem_create_ioctl(struct drm_device *dev, void *data,
233
		      struct drm_file *file)
234
{
235
	struct drm_i915_gem_create *args = data;
3031 serge 236
 
2332 Serge 237
	return i915_gem_create(file, dev,
238
			       args->size, &args->handle);
239
}
240
 
241
 
3260 Serge 242
#if 0
2332 Serge 243
 
3031 serge 244
static inline int
245
__copy_to_user_swizzled(char __user *cpu_vaddr,
246
			const char *gpu_vaddr, int gpu_offset,
6084 serge 247
			int length)
2332 Serge 248
{
3031 serge 249
	int ret, cpu_offset = 0;
2332 Serge 250
 
3031 serge 251
	while (length > 0) {
252
		int cacheline_end = ALIGN(gpu_offset + 1, 64);
253
		int this_length = min(cacheline_end - gpu_offset, length);
254
		int swizzled_gpu_offset = gpu_offset ^ 64;
2332 Serge 255
 
3031 serge 256
		ret = __copy_to_user(cpu_vaddr + cpu_offset,
257
				     gpu_vaddr + swizzled_gpu_offset,
258
				     this_length);
259
		if (ret)
260
			return ret + length;
2332 Serge 261
 
3031 serge 262
		cpu_offset += this_length;
263
		gpu_offset += this_length;
264
		length -= this_length;
265
	}
266
 
267
	return 0;
2332 Serge 268
}
269
 
3031 serge 270
static inline int
271
__copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
272
			  const char __user *cpu_vaddr,
273
			  int length)
2332 Serge 274
{
3031 serge 275
	int ret, cpu_offset = 0;
2332 Serge 276
 
277
	while (length > 0) {
278
		int cacheline_end = ALIGN(gpu_offset + 1, 64);
279
		int this_length = min(cacheline_end - gpu_offset, length);
280
		int swizzled_gpu_offset = gpu_offset ^ 64;
281
 
3031 serge 282
		ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
6084 serge 283
				       cpu_vaddr + cpu_offset,
284
				       this_length);
3031 serge 285
		if (ret)
286
			return ret + length;
287
 
2332 Serge 288
		cpu_offset += this_length;
289
		gpu_offset += this_length;
290
		length -= this_length;
291
	}
292
 
3031 serge 293
	return 0;
2332 Serge 294
}
295
 
3031 serge 296
/* Per-page copy function for the shmem pread fastpath.
297
 * Flushes invalid cachelines before reading the target if
298
 * needs_clflush is set. */
2332 Serge 299
static int
3031 serge 300
shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length,
301
		 char __user *user_data,
302
		 bool page_do_bit17_swizzling, bool needs_clflush)
303
{
6084 serge 304
	char *vaddr;
305
	int ret;
3031 serge 306
 
307
	if (unlikely(page_do_bit17_swizzling))
308
		return -EINVAL;
309
 
6084 serge 310
	vaddr = kmap_atomic(page);
3031 serge 311
	if (needs_clflush)
312
		drm_clflush_virt_range(vaddr + shmem_page_offset,
313
				       page_length);
6084 serge 314
	ret = __copy_to_user_inatomic(user_data,
3031 serge 315
				      vaddr + shmem_page_offset,
6084 serge 316
				      page_length);
317
	kunmap_atomic(vaddr);
3031 serge 318
 
319
	return ret ? -EFAULT : 0;
320
}
321
 
322
static void
323
shmem_clflush_swizzled_range(char *addr, unsigned long length,
324
			     bool swizzled)
325
{
326
	if (unlikely(swizzled)) {
327
		unsigned long start = (unsigned long) addr;
328
		unsigned long end = (unsigned long) addr + length;
329
 
330
		/* For swizzling simply ensure that we always flush both
331
		 * channels. Lame, but simple and it works. Swizzled
332
		 * pwrite/pread is far from a hotpath - current userspace
333
		 * doesn't use it at all. */
334
		start = round_down(start, 128);
335
		end = round_up(end, 128);
336
 
337
		drm_clflush_virt_range((void *)start, end - start);
338
	} else {
339
		drm_clflush_virt_range(addr, length);
340
	}
341
 
342
}
343
 
344
/* Only difference to the fast-path function is that this can handle bit17
345
 * and uses non-atomic copy and kmap functions. */
346
static int
347
shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
348
		 char __user *user_data,
349
		 bool page_do_bit17_swizzling, bool needs_clflush)
350
{
351
	char *vaddr;
352
	int ret;
353
 
354
	vaddr = kmap(page);
355
	if (needs_clflush)
356
		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
357
					     page_length,
358
					     page_do_bit17_swizzling);
359
 
360
	if (page_do_bit17_swizzling)
361
		ret = __copy_to_user_swizzled(user_data,
362
					      vaddr, shmem_page_offset,
363
					      page_length);
364
	else
365
		ret = __copy_to_user(user_data,
366
				     vaddr + shmem_page_offset,
367
				     page_length);
368
	kunmap(page);
369
 
370
	return ret ? - EFAULT : 0;
371
}
372
 
373
static int
374
i915_gem_shmem_pread(struct drm_device *dev,
6084 serge 375
		     struct drm_i915_gem_object *obj,
376
		     struct drm_i915_gem_pread *args,
377
		     struct drm_file *file)
2332 Serge 378
{
3031 serge 379
	char __user *user_data;
2332 Serge 380
	ssize_t remain;
381
	loff_t offset;
3031 serge 382
	int shmem_page_offset, page_length, ret = 0;
383
	int obj_do_bit17_swizzling, page_do_bit17_swizzling;
384
	int prefaulted = 0;
385
	int needs_clflush = 0;
3746 Serge 386
	struct sg_page_iter sg_iter;
2332 Serge 387
 
3746 Serge 388
	user_data = to_user_ptr(args->data_ptr);
2332 Serge 389
	remain = args->size;
390
 
3031 serge 391
	obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
392
 
5060 serge 393
	ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
3031 serge 394
	if (ret)
395
		return ret;
396
 
2332 Serge 397
	offset = args->offset;
398
 
3746 Serge 399
	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
400
			 offset >> PAGE_SHIFT) {
401
		struct page *page = sg_page_iter_page(&sg_iter);
2332 Serge 402
 
3031 serge 403
		if (remain <= 0)
404
			break;
405
 
2332 Serge 406
		/* Operation in this page
407
		 *
3031 serge 408
		 * shmem_page_offset = offset within page in shmem file
2332 Serge 409
		 * page_length = bytes to copy for this page
410
		 */
3031 serge 411
		shmem_page_offset = offset_in_page(offset);
2332 Serge 412
		page_length = remain;
3031 serge 413
		if ((shmem_page_offset + page_length) > PAGE_SIZE)
414
			page_length = PAGE_SIZE - shmem_page_offset;
2332 Serge 415
 
3031 serge 416
		page_do_bit17_swizzling = obj_do_bit17_swizzling &&
417
			(page_to_phys(page) & (1 << 17)) != 0;
2332 Serge 418
 
3031 serge 419
		ret = shmem_pread_fast(page, shmem_page_offset, page_length,
420
				       user_data, page_do_bit17_swizzling,
421
				       needs_clflush);
422
		if (ret == 0)
423
			goto next_page;
2332 Serge 424
 
3031 serge 425
		mutex_unlock(&dev->struct_mutex);
426
 
5060 serge 427
		if (likely(!i915.prefault_disable) && !prefaulted) {
3031 serge 428
			ret = fault_in_multipages_writeable(user_data, remain);
429
			/* Userspace is tricking us, but we've already clobbered
430
			 * its pages with the prefault and promised to write the
431
			 * data up to the first fault. Hence ignore any errors
432
			 * and just continue. */
433
			(void)ret;
434
			prefaulted = 1;
435
		}
436
 
437
		ret = shmem_pread_slow(page, shmem_page_offset, page_length,
438
				       user_data, page_do_bit17_swizzling,
439
				       needs_clflush);
440
 
441
		mutex_lock(&dev->struct_mutex);
442
 
2332 Serge 443
		if (ret)
3031 serge 444
			goto out;
2332 Serge 445
 
5060 serge 446
next_page:
2332 Serge 447
		remain -= page_length;
448
		user_data += page_length;
449
		offset += page_length;
450
	}
451
 
3031 serge 452
out:
453
	i915_gem_object_unpin_pages(obj);
454
 
455
	return ret;
2332 Serge 456
}
457
 
458
/**
3031 serge 459
 * Reads data from the object referenced by handle.
460
 *
461
 * On error, the contents of *data are undefined.
2332 Serge 462
 */
3031 serge 463
int
464
i915_gem_pread_ioctl(struct drm_device *dev, void *data,
465
		     struct drm_file *file)
466
{
467
	struct drm_i915_gem_pread *args = data;
468
	struct drm_i915_gem_object *obj;
469
	int ret = 0;
470
 
471
	if (args->size == 0)
472
		return 0;
473
 
474
	if (!access_ok(VERIFY_WRITE,
3746 Serge 475
		       to_user_ptr(args->data_ptr),
3031 serge 476
		       args->size))
477
		return -EFAULT;
478
 
479
	ret = i915_mutex_lock_interruptible(dev);
480
	if (ret)
481
		return ret;
482
 
483
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
484
	if (&obj->base == NULL) {
485
		ret = -ENOENT;
486
		goto unlock;
487
	}
488
 
489
	/* Bounds check source.  */
490
	if (args->offset > obj->base.size ||
491
	    args->size > obj->base.size - args->offset) {
492
		ret = -EINVAL;
493
		goto out;
494
	}
495
 
496
	/* prime objects have no backing filp to GEM pread/pwrite
497
	 * pages from.
498
	 */
499
	if (!obj->base.filp) {
500
		ret = -EINVAL;
501
		goto out;
502
	}
503
 
504
	trace_i915_gem_object_pread(obj, args->offset, args->size);
505
 
506
	ret = i915_gem_shmem_pread(dev, obj, args, file);
507
 
508
out:
509
	drm_gem_object_unreference(&obj->base);
510
unlock:
511
	mutex_unlock(&dev->struct_mutex);
512
	return ret;
513
}
514
 
515
/* This is the fast write path which cannot handle
516
 * page faults in the source data
517
 */
518
 
519
static inline int
520
fast_user_write(struct io_mapping *mapping,
521
		loff_t page_base, int page_offset,
522
		char __user *user_data,
523
		int length)
524
{
525
	void __iomem *vaddr_atomic;
526
	void *vaddr;
527
	unsigned long unwritten;
528
 
529
	vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
530
	/* We can use the cpu mem copy function because this is X86. */
531
	vaddr = (void __force*)vaddr_atomic + page_offset;
532
	unwritten = __copy_from_user_inatomic_nocache(vaddr,
533
						      user_data, length);
534
	io_mapping_unmap_atomic(vaddr_atomic);
535
	return unwritten;
536
}
3260 Serge 537
#endif
3031 serge 538
 
3260 Serge 539
#define offset_in_page(p)       ((unsigned long)(p) & ~PAGE_MASK)
3031 serge 540
/**
541
 * This is the fast pwrite path, where we copy the data directly from the
542
 * user into the GTT, uncached.
543
 */
2332 Serge 544
static int
3031 serge 545
i915_gem_gtt_pwrite_fast(struct drm_device *dev,
546
			 struct drm_i915_gem_object *obj,
547
			 struct drm_i915_gem_pwrite *args,
548
			 struct drm_file *file)
2332 Serge 549
{
5060 serge 550
	struct drm_i915_private *dev_priv = dev->dev_private;
2332 Serge 551
	ssize_t remain;
3031 serge 552
	loff_t offset, page_base;
553
	char __user *user_data;
554
	int page_offset, page_length, ret;
2332 Serge 555
 
5060 serge 556
	ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
3031 serge 557
	if (ret)
558
		goto out;
559
 
560
	ret = i915_gem_object_set_to_gtt_domain(obj, true);
561
	if (ret)
562
		goto out_unpin;
563
 
564
	ret = i915_gem_object_put_fence(obj);
565
	if (ret)
566
		goto out_unpin;
567
 
4539 Serge 568
	user_data = to_user_ptr(args->data_ptr);
2332 Serge 569
	remain = args->size;
570
 
4104 Serge 571
	offset = i915_gem_obj_ggtt_offset(obj) + args->offset;
2332 Serge 572
 
6084 serge 573
	intel_fb_obj_invalidate(obj, ORIGIN_GTT);
574
 
3031 serge 575
	while (remain > 0) {
576
		/* Operation in this page
577
		 *
578
		 * page_base = page offset within aperture
579
		 * page_offset = offset within page
580
		 * page_length = bytes to copy for this page
581
		 */
582
		page_base = offset & PAGE_MASK;
583
		page_offset = offset_in_page(offset);
584
		page_length = remain;
585
		if ((page_offset + remain) > PAGE_SIZE)
586
			page_length = PAGE_SIZE - page_offset;
2332 Serge 587
 
4539 Serge 588
        MapPage(dev_priv->gtt.mappable, dev_priv->gtt.mappable_base+page_base, PG_SW);
3031 serge 589
 
5060 serge 590
        memcpy((char*)dev_priv->gtt.mappable+page_offset, user_data, page_length);
3260 Serge 591
 
3031 serge 592
		remain -= page_length;
593
		user_data += page_length;
594
		offset += page_length;
2332 Serge 595
	}
596
 
6084 serge 597
out_flush:
598
	intel_fb_obj_flush(obj, false, ORIGIN_GTT);
3031 serge 599
out_unpin:
5060 serge 600
	i915_gem_object_ggtt_unpin(obj);
3031 serge 601
out:
6084 serge 602
	return ret;
3031 serge 603
}
604
 
605
/* Per-page copy function for the shmem pwrite fastpath.
606
 * Flushes invalid cachelines before writing to the target if
607
 * needs_clflush_before is set and flushes out any written cachelines after
608
 * writing if needs_clflush is set. */
609
static int
610
shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
611
		  char __user *user_data,
612
		  bool page_do_bit17_swizzling,
613
		  bool needs_clflush_before,
614
		  bool needs_clflush_after)
615
{
616
	char *vaddr;
5354 serge 617
	int ret;
3031 serge 618
 
619
	if (unlikely(page_do_bit17_swizzling))
620
		return -EINVAL;
621
 
5354 serge 622
	vaddr = kmap_atomic(page);
3031 serge 623
	if (needs_clflush_before)
624
		drm_clflush_virt_range(vaddr + shmem_page_offset,
625
				       page_length);
3260 Serge 626
	memcpy(vaddr + shmem_page_offset,
3031 serge 627
						user_data,
628
						page_length);
629
	if (needs_clflush_after)
630
		drm_clflush_virt_range(vaddr + shmem_page_offset,
631
				       page_length);
5354 serge 632
	kunmap_atomic(vaddr);
3031 serge 633
 
634
	return ret ? -EFAULT : 0;
635
}
3260 Serge 636
#if 0
3031 serge 637
 
638
/* Only difference to the fast-path function is that this can handle bit17
639
 * and uses non-atomic copy and kmap functions. */
640
static int
641
shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
642
		  char __user *user_data,
643
		  bool page_do_bit17_swizzling,
644
		  bool needs_clflush_before,
645
		  bool needs_clflush_after)
646
{
647
	char *vaddr;
648
	int ret;
649
 
650
	vaddr = kmap(page);
651
	if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
652
		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
653
					     page_length,
654
					     page_do_bit17_swizzling);
655
	if (page_do_bit17_swizzling)
656
		ret = __copy_from_user_swizzled(vaddr, shmem_page_offset,
657
						user_data,
658
						page_length);
659
	else
660
		ret = __copy_from_user(vaddr + shmem_page_offset,
661
				       user_data,
662
				       page_length);
663
	if (needs_clflush_after)
664
		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
665
					     page_length,
666
					     page_do_bit17_swizzling);
667
	kunmap(page);
668
 
669
	return ret ? -EFAULT : 0;
670
}
3260 Serge 671
#endif
3031 serge 672
 
3260 Serge 673
 
3031 serge 674
static int
675
i915_gem_shmem_pwrite(struct drm_device *dev,
676
		      struct drm_i915_gem_object *obj,
677
		      struct drm_i915_gem_pwrite *args,
678
		      struct drm_file *file)
679
{
680
	ssize_t remain;
681
	loff_t offset;
682
	char __user *user_data;
683
	int shmem_page_offset, page_length, ret = 0;
684
	int obj_do_bit17_swizzling, page_do_bit17_swizzling;
685
	int hit_slowpath = 0;
686
	int needs_clflush_after = 0;
687
	int needs_clflush_before = 0;
3746 Serge 688
	struct sg_page_iter sg_iter;
3031 serge 689
 
3746 Serge 690
	user_data = to_user_ptr(args->data_ptr);
3031 serge 691
	remain = args->size;
692
 
693
	obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
694
 
695
	if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
696
		/* If we're not in the cpu write domain, set ourself into the gtt
697
		 * write domain and manually flush cachelines (if required). This
698
		 * optimizes for the case when the gpu will use the data
699
		 * right away and we therefore have to clflush anyway. */
4104 Serge 700
		needs_clflush_after = cpu_write_needs_clflush(obj);
4560 Serge 701
		ret = i915_gem_object_wait_rendering(obj, false);
6084 serge 702
		if (ret)
703
			return ret;
704
	}
4104 Serge 705
	/* Same trick applies to invalidate partially written cachelines read
706
	 * before writing. */
707
	if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0)
708
		needs_clflush_before =
709
			!cpu_cache_is_coherent(dev, obj->cache_level);
3031 serge 710
 
711
	ret = i915_gem_object_get_pages(obj);
2332 Serge 712
	if (ret)
3031 serge 713
		return ret;
2332 Serge 714
 
6084 serge 715
	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
716
 
3031 serge 717
	i915_gem_object_pin_pages(obj);
2332 Serge 718
 
719
	offset = args->offset;
3031 serge 720
	obj->dirty = 1;
2332 Serge 721
 
3746 Serge 722
	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
723
			 offset >> PAGE_SHIFT) {
724
		struct page *page = sg_page_iter_page(&sg_iter);
3031 serge 725
		int partial_cacheline_write;
2332 Serge 726
 
3031 serge 727
		if (remain <= 0)
728
			break;
729
 
2332 Serge 730
		/* Operation in this page
731
		 *
732
		 * shmem_page_offset = offset within page in shmem file
733
		 * page_length = bytes to copy for this page
734
		 */
735
		shmem_page_offset = offset_in_page(offset);
736
 
737
		page_length = remain;
738
		if ((shmem_page_offset + page_length) > PAGE_SIZE)
739
			page_length = PAGE_SIZE - shmem_page_offset;
740
 
3031 serge 741
		/* If we don't overwrite a cacheline completely we need to be
742
		 * careful to have up-to-date data by first clflushing. Don't
743
		 * overcomplicate things and flush the entire patch. */
744
		partial_cacheline_write = needs_clflush_before &&
745
			((shmem_page_offset | page_length)
3260 Serge 746
				& (x86_clflush_size - 1));
2332 Serge 747
 
3031 serge 748
		page_do_bit17_swizzling = obj_do_bit17_swizzling &&
749
			(page_to_phys(page) & (1 << 17)) != 0;
2332 Serge 750
 
3031 serge 751
		ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
752
					user_data, page_do_bit17_swizzling,
753
					partial_cacheline_write,
754
					needs_clflush_after);
755
		if (ret == 0)
756
			goto next_page;
757
 
758
		hit_slowpath = 1;
759
		mutex_unlock(&dev->struct_mutex);
3260 Serge 760
		dbgprintf("%s need shmem_pwrite_slow\n",__FUNCTION__);
3031 serge 761
 
3260 Serge 762
//		ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
763
//					user_data, page_do_bit17_swizzling,
764
//					partial_cacheline_write,
765
//					needs_clflush_after);
766
 
3031 serge 767
		mutex_lock(&dev->struct_mutex);
768
 
769
		if (ret)
770
			goto out;
771
 
5354 serge 772
next_page:
2332 Serge 773
		remain -= page_length;
3031 serge 774
		user_data += page_length;
2332 Serge 775
		offset += page_length;
776
	}
777
 
778
out:
3031 serge 779
	i915_gem_object_unpin_pages(obj);
780
 
781
	if (hit_slowpath) {
3480 Serge 782
		/*
783
		 * Fixup: Flush cpu caches in case we didn't flush the dirty
784
		 * cachelines in-line while writing and the object moved
785
		 * out of the cpu write domain while we've dropped the lock.
786
		 */
787
		if (!needs_clflush_after &&
788
		    obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
4104 Serge 789
			if (i915_gem_clflush_object(obj, obj->pin_display))
6084 serge 790
				needs_clflush_after = true;
3031 serge 791
		}
2332 Serge 792
	}
793
 
3031 serge 794
	if (needs_clflush_after)
3243 Serge 795
		i915_gem_chipset_flush(dev);
6084 serge 796
	else
797
		obj->cache_dirty = true;
3031 serge 798
 
6084 serge 799
	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
2332 Serge 800
	return ret;
801
}
3031 serge 802
 
803
/**
804
 * Writes data to the object referenced by handle.
805
 *
806
 * On error, the contents of the buffer that were to be modified are undefined.
807
 */
808
int
809
i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
810
		      struct drm_file *file)
811
{
6084 serge 812
	struct drm_i915_private *dev_priv = dev->dev_private;
3031 serge 813
	struct drm_i915_gem_pwrite *args = data;
814
	struct drm_i915_gem_object *obj;
815
	int ret;
816
 
4104 Serge 817
	if (args->size == 0)
818
		return 0;
819
 
6084 serge 820
	intel_runtime_pm_get(dev_priv);
3480 Serge 821
 
3031 serge 822
	ret = i915_mutex_lock_interruptible(dev);
823
	if (ret)
6084 serge 824
		goto put_rpm;
3031 serge 825
 
826
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
827
	if (&obj->base == NULL) {
828
		ret = -ENOENT;
829
		goto unlock;
830
	}
831
 
832
	/* Bounds check destination. */
833
	if (args->offset > obj->base.size ||
834
	    args->size > obj->base.size - args->offset) {
835
		ret = -EINVAL;
836
		goto out;
837
	}
838
 
839
	/* prime objects have no backing filp to GEM pread/pwrite
840
	 * pages from.
841
	 */
842
	if (!obj->base.filp) {
843
		ret = -EINVAL;
844
		goto out;
845
	}
846
 
847
	trace_i915_gem_object_pwrite(obj, args->offset, args->size);
848
 
849
	ret = -EFAULT;
850
	/* We can only do the GTT pwrite on untiled buffers, as otherwise
851
	 * it would end up going through the fenced access, and we'll get
852
	 * different detiling behavior between reading and writing.
853
	 * pread/pwrite currently are reading and writing from the CPU
854
	 * perspective, requiring manual detiling by the client.
855
	 */
4104 Serge 856
	if (obj->tiling_mode == I915_TILING_NONE &&
857
	    obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
858
	    cpu_write_needs_clflush(obj)) {
3031 serge 859
		ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
860
		/* Note that the gtt paths might fail with non-page-backed user
861
		 * pointers (e.g. gtt mappings when moving data between
862
		 * textures). Fallback to the shmem path in that case. */
863
	}
864
 
865
	if (ret == -EFAULT || ret == -ENOSPC)
6084 serge 866
			ret = i915_gem_shmem_pwrite(dev, obj, args, file);
3031 serge 867
 
868
out:
869
	drm_gem_object_unreference(&obj->base);
870
unlock:
871
	mutex_unlock(&dev->struct_mutex);
6084 serge 872
put_rpm:
873
	intel_runtime_pm_put(dev_priv);
874
 
3031 serge 875
	return ret;
876
}
877
 
878
int
3480 Serge 879
i915_gem_check_wedge(struct i915_gpu_error *error,
3031 serge 880
		     bool interruptible)
881
{
3480 Serge 882
	if (i915_reset_in_progress(error)) {
3031 serge 883
		/* Non-interruptible callers can't handle -EAGAIN, hence return
884
		 * -EIO unconditionally for these. */
885
		if (!interruptible)
886
			return -EIO;
2332 Serge 887
 
3480 Serge 888
		/* Recovery complete, but the reset failed ... */
889
		if (i915_terminally_wedged(error))
3031 serge 890
			return -EIO;
2332 Serge 891
 
6084 serge 892
		/*
893
		 * Check if GPU Reset is in progress - we need intel_ring_begin
894
		 * to work properly to reinit the hw state while the gpu is
895
		 * still marked as reset-in-progress. Handle this with a flag.
896
		 */
897
		if (!error->reload_in_reset)
898
			return -EAGAIN;
3031 serge 899
	}
2332 Serge 900
 
3031 serge 901
	return 0;
902
}
2332 Serge 903
 
4560 Serge 904
static void fake_irq(unsigned long data)
905
{
906
//	wake_up_process((struct task_struct *)data);
907
}
908
 
909
static bool missed_irq(struct drm_i915_private *dev_priv,
5060 serge 910
		       struct intel_engine_cs *ring)
4560 Serge 911
{
912
	return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings);
913
}
914
 
6084 serge 915
static unsigned long local_clock_us(unsigned *cpu)
4560 Serge 916
{
6084 serge 917
	unsigned long t;
918
 
919
	/* Cheaply and approximately convert from nanoseconds to microseconds.
920
	 * The result and subsequent calculations are also defined in the same
921
	 * approximate microseconds units. The principal source of timing
922
	 * error here is from the simple truncation.
923
	 *
924
	 * Note that local_clock() is only defined wrt to the current CPU;
925
	 * the comparisons are no longer valid if we switch CPUs. Instead of
926
	 * blocking preemption for the entire busywait, we can detect the CPU
927
	 * switch and use that as indicator of system load and a reason to
928
	 * stop busywaiting, see busywait_stop().
929
	 */
930
	t = GetClockNs() >> 10;
931
 
932
	return t;
933
}
934
 
935
static bool busywait_stop(unsigned long timeout, unsigned cpu)
936
{
937
	unsigned this_cpu = 0;
938
 
939
	if (time_after(local_clock_us(&this_cpu), timeout))
4560 Serge 940
		return true;
941
 
6084 serge 942
	return this_cpu != cpu;
4560 Serge 943
}
944
 
6084 serge 945
static int __i915_spin_request(struct drm_i915_gem_request *req, int state)
946
{
947
	unsigned long timeout;
948
	unsigned cpu;
949
 
950
	/* When waiting for high frequency requests, e.g. during synchronous
951
	 * rendering split between the CPU and GPU, the finite amount of time
952
	 * required to set up the irq and wait upon it limits the response
953
	 * rate. By busywaiting on the request completion for a short while we
954
	 * can service the high frequency waits as quick as possible. However,
955
	 * if it is a slow request, we want to sleep as quickly as possible.
956
	 * The tradeoff between waiting and sleeping is roughly the time it
957
	 * takes to sleep on a request, on the order of a microsecond.
958
	 */
959
 
960
	if (req->ring->irq_refcount)
961
		return -EBUSY;
962
 
963
	/* Only spin if we know the GPU is processing this request */
964
	if (!i915_gem_request_started(req, true))
965
		return -EAGAIN;
966
 
967
	timeout = local_clock_us(&cpu) + 5;
968
	while (1 /*!need_resched()*/) {
969
		if (i915_gem_request_completed(req, true))
970
			return 0;
971
 
972
		if (busywait_stop(timeout, cpu))
973
			break;
974
 
975
		cpu_relax_lowlatency();
976
	}
977
 
978
	if (i915_gem_request_completed(req, false))
979
		return 0;
980
 
981
	return -EAGAIN;
982
}
983
 
3031 serge 984
/**
6084 serge 985
 * __i915_wait_request - wait until execution of request has finished
986
 * @req: duh!
987
 * @reset_counter: reset sequence associated with the given request
3031 serge 988
 * @interruptible: do an interruptible wait (normally yes)
989
 * @timeout: in - how long to wait (NULL forever); out - how much time remaining
990
 *
3480 Serge 991
 * Note: It is of utmost importance that the passed in seqno and reset_counter
992
 * values have been read by the caller in an smp safe manner. Where read-side
993
 * locks are involved, it is sufficient to read the reset_counter before
994
 * unlocking the lock that protects the seqno. For lockless tricks, the
995
 * reset_counter _must_ be read before, and an appropriate smp_rmb must be
996
 * inserted.
997
 *
6084 serge 998
 * Returns 0 if the request was found within the alloted time. Else returns the
3031 serge 999
 * errno with remaining time filled in timeout argument.
1000
 */
6084 serge 1001
int __i915_wait_request(struct drm_i915_gem_request *req,
3480 Serge 1002
			unsigned reset_counter,
4560 Serge 1003
			bool interruptible,
5060 serge 1004
			s64 *timeout,
6084 serge 1005
			struct intel_rps_client *rps)
3031 serge 1006
{
6084 serge 1007
	struct intel_engine_cs *ring = i915_gem_request_get_ring(req);
5060 serge 1008
	struct drm_device *dev = ring->dev;
1009
	struct drm_i915_private *dev_priv = dev->dev_private;
4560 Serge 1010
	const bool irq_test_in_progress =
1011
		ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring);
6084 serge 1012
	int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
6088 serge 1013
	wait_queue_t wait;
5060 serge 1014
	unsigned long timeout_expire;
1015
	s64 before, now;
3031 serge 1016
	int ret;
2332 Serge 1017
 
5060 serge 1018
	WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled");
4104 Serge 1019
 
6084 serge 1020
	if (list_empty(&req->list))
3031 serge 1021
		return 0;
2332 Serge 1022
 
6084 serge 1023
	if (i915_gem_request_completed(req, true))
1024
		return 0;
2332 Serge 1025
 
6084 serge 1026
	timeout_expire = 0;
1027
	if (timeout) {
1028
		if (WARN_ON(*timeout < 0))
1029
			return -EINVAL;
1030
 
1031
		if (*timeout == 0)
1032
			return -ETIME;
1033
 
1034
		timeout_expire = jiffies + nsecs_to_jiffies_timeout(*timeout);
3031 serge 1035
	}
2332 Serge 1036
 
6084 serge 1037
	if (INTEL_INFO(dev_priv)->gen >= 6)
1038
		gen6_rps_boost(dev_priv, rps, req->emitted_jiffies);
2332 Serge 1039
 
6084 serge 1040
	/* Record current time in case interrupted by signal, or wedged */
1041
	trace_i915_gem_request_wait_begin(req);
1042
	before = ktime_get_raw_ns();
1043
 
1044
	/* Optimistic spin for the next jiffie before touching IRQs */
1045
	ret = __i915_spin_request(req, state);
1046
	if (ret == 0)
1047
		goto out;
1048
 
1049
	if (!irq_test_in_progress && WARN_ON(!ring->irq_get(ring))) {
1050
		ret = -ENODEV;
1051
		goto out;
1052
	}
1053
 
6088 serge 1054
	INIT_LIST_HEAD(&wait.task_list);
1055
	wait.evnt = CreateEvent(NULL, MANUAL_DESTROY);
2332 Serge 1056
 
4560 Serge 1057
	for (;;) {
1058
        unsigned long flags;
1059
 
3480 Serge 1060
		/* We need to check whether any gpu reset happened in between
1061
		 * the caller grabbing the seqno and now ... */
4560 Serge 1062
		if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) {
1063
			/* ... but upgrade the -EAGAIN to an -EIO if the gpu
1064
			 * is truely gone. */
1065
			ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
1066
			if (ret == 0)
1067
				ret = -EAGAIN;
1068
			break;
1069
		}
3480 Serge 1070
 
6084 serge 1071
		if (i915_gem_request_completed(req, false)) {
4560 Serge 1072
			ret = 0;
1073
			break;
1074
		}
2332 Serge 1075
 
6088 serge 1076
		if (timeout && time_after_eq(jiffies, timeout_expire)) {
4560 Serge 1077
			ret = -ETIME;
1078
			break;
1079
		}
2332 Serge 1080
 
4560 Serge 1081
        spin_lock_irqsave(&ring->irq_queue.lock, flags);
6088 serge 1082
        if (list_empty(&wait.task_list))
1083
            __add_wait_queue(&ring->irq_queue, &wait);
4560 Serge 1084
        spin_unlock_irqrestore(&ring->irq_queue.lock, flags);
1085
 
6088 serge 1086
            WaitEventTimeout(wait.evnt, 1);
4560 Serge 1087
 
6088 serge 1088
        if (!list_empty(&wait.task_list)) {
4560 Serge 1089
            spin_lock_irqsave(&ring->irq_queue.lock, flags);
6088 serge 1090
            list_del_init(&wait.task_list);
4560 Serge 1091
            spin_unlock_irqrestore(&ring->irq_queue.lock, flags);
1092
        }
1093
 
6088 serge 1094
	};
4560 Serge 1095
 
1096
	if (!irq_test_in_progress)
6084 serge 1097
		ring->irq_put(ring);
2332 Serge 1098
 
6088 serge 1099
    DestroyEvent(wait.evnt);
1100
 
6084 serge 1101
out:
1102
	now = ktime_get_raw_ns();
1103
	trace_i915_gem_request_wait_end(req);
1104
 
1105
	if (timeout) {
1106
		s64 tres = *timeout - (now - before);
1107
 
1108
		*timeout = tres < 0 ? 0 : tres;
1109
 
1110
		/*
1111
		 * Apparently ktime isn't accurate enough and occasionally has a
1112
		 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
1113
		 * things up to make the test happy. We allow up to 1 jiffy.
1114
		 *
1115
		 * This is a regrssion from the timespec->ktime conversion.
1116
		 */
1117
		if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000)
1118
			*timeout = 0;
1119
	}
1120
 
4560 Serge 1121
	return ret;
3031 serge 1122
}
2332 Serge 1123
 
6084 serge 1124
int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
1125
				   struct drm_file *file)
1126
{
1127
	struct drm_i915_private *dev_private;
1128
	struct drm_i915_file_private *file_priv;
1129
 
1130
	WARN_ON(!req || !file || req->file_priv);
1131
 
1132
	if (!req || !file)
1133
		return -EINVAL;
1134
 
1135
	if (req->file_priv)
1136
		return -EINVAL;
1137
 
1138
	dev_private = req->ring->dev->dev_private;
1139
	file_priv = file->driver_priv;
1140
 
1141
	spin_lock(&file_priv->mm.lock);
1142
	req->file_priv = file_priv;
1143
	list_add_tail(&req->client_list, &file_priv->mm.request_list);
1144
	spin_unlock(&file_priv->mm.lock);
1145
 
1146
	req->pid = 1;
1147
 
1148
	return 0;
1149
}
1150
 
1151
static inline void
1152
i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
1153
{
1154
	struct drm_i915_file_private *file_priv = request->file_priv;
1155
 
1156
	if (!file_priv)
1157
		return;
1158
 
1159
	spin_lock(&file_priv->mm.lock);
1160
	list_del(&request->client_list);
1161
	request->file_priv = NULL;
1162
	spin_unlock(&file_priv->mm.lock);
1163
}
1164
 
1165
static void i915_gem_request_retire(struct drm_i915_gem_request *request)
1166
{
1167
	trace_i915_gem_request_retire(request);
1168
 
1169
	/* We know the GPU must have read the request to have
1170
	 * sent us the seqno + interrupt, so use the position
1171
	 * of tail of the request to update the last known position
1172
	 * of the GPU head.
1173
	 *
1174
	 * Note this requires that we are always called in request
1175
	 * completion order.
1176
	 */
1177
	request->ringbuf->last_retired_head = request->postfix;
1178
 
1179
	list_del_init(&request->list);
1180
	i915_gem_request_remove_from_client(request);
1181
 
1182
	i915_gem_request_unreference(request);
1183
}
1184
 
1185
static void
1186
__i915_gem_request_retire__upto(struct drm_i915_gem_request *req)
1187
{
1188
	struct intel_engine_cs *engine = req->ring;
1189
	struct drm_i915_gem_request *tmp;
1190
 
1191
 
1192
	if (list_empty(&req->list))
1193
		return;
1194
 
1195
	do {
1196
		tmp = list_first_entry(&engine->request_list,
1197
				       typeof(*tmp), list);
1198
 
1199
		i915_gem_request_retire(tmp);
1200
	} while (tmp != req);
1201
 
1202
	WARN_ON(i915_verify_lists(engine->dev));
1203
}
1204
 
3031 serge 1205
/**
6084 serge 1206
 * Waits for a request to be signaled, and cleans up the
3031 serge 1207
 * request and object lists appropriately for that event.
1208
 */
1209
int
6084 serge 1210
i915_wait_request(struct drm_i915_gem_request *req)
3031 serge 1211
{
6084 serge 1212
	struct drm_device *dev;
1213
	struct drm_i915_private *dev_priv;
1214
	bool interruptible;
3031 serge 1215
	int ret;
2332 Serge 1216
 
6084 serge 1217
	BUG_ON(req == NULL);
1218
 
1219
	dev = req->ring->dev;
1220
	dev_priv = dev->dev_private;
1221
	interruptible = dev_priv->mm.interruptible;
1222
 
3031 serge 1223
	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
2332 Serge 1224
 
3480 Serge 1225
	ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
3031 serge 1226
	if (ret)
1227
		return ret;
2332 Serge 1228
 
6084 serge 1229
	ret = __i915_wait_request(req,
1230
				  atomic_read(&dev_priv->gpu_error.reset_counter),
1231
				  interruptible, NULL, NULL);
3031 serge 1232
	if (ret)
1233
		return ret;
2332 Serge 1234
 
6084 serge 1235
	__i915_gem_request_retire__upto(req);
4104 Serge 1236
	return 0;
1237
}
1238
 
3031 serge 1239
/**
1240
 * Ensures that all rendering to the object has completed and the object is
1241
 * safe to unbind from the GTT or access from the CPU.
1242
 */
6084 serge 1243
int
3031 serge 1244
i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
1245
			       bool readonly)
1246
{
6084 serge 1247
	int ret, i;
2332 Serge 1248
 
6084 serge 1249
	if (!obj->active)
3031 serge 1250
		return 0;
2332 Serge 1251
 
6084 serge 1252
	if (readonly) {
1253
		if (obj->last_write_req != NULL) {
1254
			ret = i915_wait_request(obj->last_write_req);
1255
			if (ret)
1256
				return ret;
2332 Serge 1257
 
6084 serge 1258
			i = obj->last_write_req->ring->id;
1259
			if (obj->last_read_req[i] == obj->last_write_req)
1260
				i915_gem_object_retire__read(obj, i);
1261
			else
1262
				i915_gem_object_retire__write(obj);
1263
		}
1264
	} else {
1265
		for (i = 0; i < I915_NUM_RINGS; i++) {
1266
			if (obj->last_read_req[i] == NULL)
1267
				continue;
1268
 
1269
			ret = i915_wait_request(obj->last_read_req[i]);
1270
			if (ret)
1271
				return ret;
1272
 
1273
			i915_gem_object_retire__read(obj, i);
1274
		}
1275
		RQ_BUG_ON(obj->active);
1276
	}
1277
 
1278
	return 0;
3031 serge 1279
}
2332 Serge 1280
 
6084 serge 1281
static void
1282
i915_gem_object_retire_request(struct drm_i915_gem_object *obj,
1283
			       struct drm_i915_gem_request *req)
1284
{
1285
	int ring = req->ring->id;
1286
 
1287
	if (obj->last_read_req[ring] == req)
1288
		i915_gem_object_retire__read(obj, ring);
1289
	else if (obj->last_write_req == req)
1290
		i915_gem_object_retire__write(obj);
1291
 
1292
	__i915_gem_request_retire__upto(req);
1293
}
1294
 
3260 Serge 1295
/* A nonblocking variant of the above wait. This is a highly dangerous routine
1296
 * as the object state may change during this call.
1297
 */
1298
static __must_check int
1299
i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
6084 serge 1300
					    struct intel_rps_client *rps,
3260 Serge 1301
					    bool readonly)
1302
{
1303
	struct drm_device *dev = obj->base.dev;
1304
	struct drm_i915_private *dev_priv = dev->dev_private;
6084 serge 1305
	struct drm_i915_gem_request *requests[I915_NUM_RINGS];
3480 Serge 1306
	unsigned reset_counter;
6084 serge 1307
	int ret, i, n = 0;
2332 Serge 1308
 
3260 Serge 1309
	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1310
	BUG_ON(!dev_priv->mm.interruptible);
2332 Serge 1311
 
6084 serge 1312
	if (!obj->active)
3260 Serge 1313
		return 0;
2332 Serge 1314
 
3480 Serge 1315
	ret = i915_gem_check_wedge(&dev_priv->gpu_error, true);
3260 Serge 1316
	if (ret)
1317
		return ret;
2332 Serge 1318
 
6084 serge 1319
	reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
2332 Serge 1320
 
6084 serge 1321
	if (readonly) {
1322
		struct drm_i915_gem_request *req;
1323
 
1324
		req = obj->last_write_req;
1325
		if (req == NULL)
1326
			return 0;
1327
 
1328
		requests[n++] = i915_gem_request_reference(req);
1329
	} else {
1330
		for (i = 0; i < I915_NUM_RINGS; i++) {
1331
			struct drm_i915_gem_request *req;
1332
 
1333
			req = obj->last_read_req[i];
1334
			if (req == NULL)
1335
				continue;
1336
 
1337
			requests[n++] = i915_gem_request_reference(req);
1338
		}
1339
	}
1340
 
3260 Serge 1341
	mutex_unlock(&dev->struct_mutex);
6084 serge 1342
	for (i = 0; ret == 0 && i < n; i++)
1343
		ret = __i915_wait_request(requests[i], reset_counter, true,
1344
					  NULL, rps);
3260 Serge 1345
	mutex_lock(&dev->struct_mutex);
2332 Serge 1346
 
6084 serge 1347
	for (i = 0; i < n; i++) {
1348
		if (ret == 0)
1349
			i915_gem_object_retire_request(obj, requests[i]);
1350
		i915_gem_request_unreference(requests[i]);
1351
	}
1352
 
1353
	return ret;
3260 Serge 1354
}
2332 Serge 1355
 
6084 serge 1356
static struct intel_rps_client *to_rps_client(struct drm_file *file)
1357
{
1358
	struct drm_i915_file_private *fpriv = file->driver_priv;
1359
	return &fpriv->rps;
1360
}
1361
 
3260 Serge 1362
/**
1363
 * Called when user space prepares to use an object with the CPU, either
1364
 * through the mmap ioctl's mapping or a GTT mapping.
1365
 */
1366
int
1367
i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1368
			  struct drm_file *file)
1369
{
1370
	struct drm_i915_gem_set_domain *args = data;
1371
	struct drm_i915_gem_object *obj;
1372
	uint32_t read_domains = args->read_domains;
1373
	uint32_t write_domain = args->write_domain;
1374
	int ret;
2332 Serge 1375
 
3260 Serge 1376
	/* Only handle setting domains to types used by the CPU. */
1377
	if (write_domain & I915_GEM_GPU_DOMAINS)
1378
		return -EINVAL;
2332 Serge 1379
 
3260 Serge 1380
	if (read_domains & I915_GEM_GPU_DOMAINS)
1381
		return -EINVAL;
2332 Serge 1382
 
3260 Serge 1383
	/* Having something in the write domain implies it's in the read
1384
	 * domain, and only that read domain.  Enforce that in the request.
1385
	 */
1386
	if (write_domain != 0 && read_domains != write_domain)
1387
		return -EINVAL;
2332 Serge 1388
 
3260 Serge 1389
	ret = i915_mutex_lock_interruptible(dev);
1390
	if (ret)
1391
		return ret;
2332 Serge 1392
 
3260 Serge 1393
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1394
	if (&obj->base == NULL) {
1395
		ret = -ENOENT;
1396
		goto unlock;
1397
	}
2332 Serge 1398
 
3260 Serge 1399
	/* Try to flush the object off the GPU without holding the lock.
1400
	 * We will repeat the flush holding the lock in the normal manner
1401
	 * to catch cases where we are gazumped.
1402
	 */
5060 serge 1403
	ret = i915_gem_object_wait_rendering__nonblocking(obj,
6084 serge 1404
							  to_rps_client(file),
5060 serge 1405
							  !write_domain);
3260 Serge 1406
	if (ret)
1407
		goto unref;
2332 Serge 1408
 
6084 serge 1409
	if (read_domains & I915_GEM_DOMAIN_GTT)
3260 Serge 1410
		ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
6084 serge 1411
	else
3260 Serge 1412
		ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
2332 Serge 1413
 
6084 serge 1414
	if (write_domain != 0)
1415
		intel_fb_obj_invalidate(obj,
1416
					write_domain == I915_GEM_DOMAIN_GTT ?
1417
					ORIGIN_GTT : ORIGIN_CPU);
1418
 
3260 Serge 1419
unref:
1420
	drm_gem_object_unreference(&obj->base);
1421
unlock:
1422
	mutex_unlock(&dev->struct_mutex);
1423
	return ret;
1424
}
2332 Serge 1425
 
4293 Serge 1426
/**
1427
 * Called when user space has done writes to this buffer
1428
 */
1429
int
1430
i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1431
			 struct drm_file *file)
1432
{
1433
	struct drm_i915_gem_sw_finish *args = data;
1434
	struct drm_i915_gem_object *obj;
1435
	int ret = 0;
2332 Serge 1436
 
4293 Serge 1437
	ret = i915_mutex_lock_interruptible(dev);
1438
	if (ret)
1439
		return ret;
2332 Serge 1440
 
4293 Serge 1441
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1442
	if (&obj->base == NULL) {
1443
		ret = -ENOENT;
1444
		goto unlock;
1445
	}
2332 Serge 1446
 
4293 Serge 1447
	/* Pinned buffers may be scanout, so flush the cache */
1448
	if (obj->pin_display)
6084 serge 1449
		i915_gem_object_flush_cpu_write_domain(obj);
2332 Serge 1450
 
4293 Serge 1451
	drm_gem_object_unreference(&obj->base);
1452
unlock:
1453
	mutex_unlock(&dev->struct_mutex);
1454
	return ret;
1455
}
1456
 
3260 Serge 1457
/**
1458
 * Maps the contents of an object, returning the address it is mapped
1459
 * into.
1460
 *
1461
 * While the mapping holds a reference on the contents of the object, it doesn't
1462
 * imply a ref on the object itself.
5354 serge 1463
 *
1464
 * IMPORTANT:
1465
 *
1466
 * DRM driver writers who look a this function as an example for how to do GEM
1467
 * mmap support, please don't implement mmap support like here. The modern way
1468
 * to implement DRM mmap support is with an mmap offset ioctl (like
1469
 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
1470
 * That way debug tooling like valgrind will understand what's going on, hiding
1471
 * the mmap call in a driver private ioctl will break that. The i915 driver only
1472
 * does cpu mmaps this way because we didn't know better.
3260 Serge 1473
 */
1474
int
1475
i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1476
		    struct drm_file *file)
1477
{
1478
	struct drm_i915_gem_mmap *args = data;
1479
	struct drm_gem_object *obj;
4392 Serge 1480
	unsigned long addr;
2332 Serge 1481
 
6084 serge 1482
//	if (args->flags & ~(I915_MMAP_WC))
1483
//		return -EINVAL;
3260 Serge 1484
	obj = drm_gem_object_lookup(dev, file, args->handle);
1485
	if (obj == NULL)
1486
		return -ENOENT;
4104 Serge 1487
 
3260 Serge 1488
	/* prime objects have no backing filp to GEM mmap
1489
	 * pages from.
1490
	 */
1491
	if (!obj->filp) {
1492
		drm_gem_object_unreference_unlocked(obj);
1493
		return -EINVAL;
1494
	}
2332 Serge 1495
 
6084 serge 1496
	addr = vm_mmap(obj->filp, 0, args->size,
1497
		       PROT_READ | PROT_WRITE, MAP_SHARED,
1498
		       args->offset);
3260 Serge 1499
	drm_gem_object_unreference_unlocked(obj);
6084 serge 1500
	if (IS_ERR((void *)addr))
1501
		return addr;
2332 Serge 1502
 
3260 Serge 1503
	args->addr_ptr = (uint64_t) addr;
2332 Serge 1504
 
6084 serge 1505
	return 0;
3260 Serge 1506
}
2332 Serge 1507
 
1508
 
1509
 
1510
 
1511
 
1512
 
1513
 
1514
 
3031 serge 1515
 
1516
 
1517
 
1518
 
1519
 
1520
/**
1521
 * i915_gem_release_mmap - remove physical page mappings
1522
 * @obj: obj in question
1523
 *
1524
 * Preserve the reservation of the mmapping with the DRM core code, but
1525
 * relinquish ownership of the pages back to the system.
1526
 *
1527
 * It is vital that we remove the page mapping if we have mapped a tiled
1528
 * object through the GTT and then lose the fence register due to
1529
 * resource pressure. Similarly if the object has been moved out of the
1530
 * aperture, than pages mapped into userspace must be revoked. Removing the
1531
 * mapping will then trigger a page fault on the next user access, allowing
1532
 * fixup by i915_gem_fault().
1533
 */
1534
void
1535
i915_gem_release_mmap(struct drm_i915_gem_object *obj)
1536
{
1537
	if (!obj->fault_mappable)
1538
		return;
1539
 
4104 Serge 1540
//	drm_vma_node_unmap(&obj->base.vma_node, obj->base.dev->dev_mapping);
3031 serge 1541
	obj->fault_mappable = false;
1542
}
1543
 
6084 serge 1544
void
1545
i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv)
1546
{
1547
	struct drm_i915_gem_object *obj;
1548
 
1549
	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
1550
		i915_gem_release_mmap(obj);
1551
}
1552
 
3480 Serge 1553
uint32_t
2332 Serge 1554
i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
1555
{
1556
	uint32_t gtt_size;
1557
 
1558
	if (INTEL_INFO(dev)->gen >= 4 ||
1559
	    tiling_mode == I915_TILING_NONE)
1560
		return size;
1561
 
1562
	/* Previous chips need a power-of-two fence region when tiling */
1563
	if (INTEL_INFO(dev)->gen == 3)
1564
		gtt_size = 1024*1024;
1565
	else
1566
		gtt_size = 512*1024;
1567
 
1568
	while (gtt_size < size)
1569
		gtt_size <<= 1;
1570
 
1571
	return gtt_size;
1572
}
1573
 
1574
/**
1575
 * i915_gem_get_gtt_alignment - return required GTT alignment for an object
1576
 * @obj: object to check
1577
 *
1578
 * Return the required GTT alignment for an object, taking into account
1579
 * potential fence register mapping.
1580
 */
3480 Serge 1581
uint32_t
1582
i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size,
1583
			   int tiling_mode, bool fenced)
2332 Serge 1584
{
1585
	/*
1586
	 * Minimum alignment is 4k (GTT page size), but might be greater
1587
	 * if a fence register is needed for the object.
1588
	 */
3480 Serge 1589
	if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) ||
2332 Serge 1590
	    tiling_mode == I915_TILING_NONE)
1591
		return 4096;
1592
 
1593
	/*
1594
	 * Previous chips need to be aligned to the size of the smallest
1595
	 * fence register that can contain the object.
1596
	 */
1597
	return i915_gem_get_gtt_size(dev, size, tiling_mode);
1598
}
1599
 
1600
 
1601
 
3480 Serge 1602
int
1603
i915_gem_mmap_gtt(struct drm_file *file,
1604
          struct drm_device *dev,
6084 serge 1605
		  uint32_t handle,
3480 Serge 1606
          uint64_t *offset)
1607
{
1608
    struct drm_i915_private *dev_priv = dev->dev_private;
1609
    struct drm_i915_gem_object *obj;
1610
    unsigned long pfn;
1611
    char *mem, *ptr;
1612
    int ret;
1613
 
1614
    ret = i915_mutex_lock_interruptible(dev);
1615
    if (ret)
1616
        return ret;
1617
 
1618
    obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle));
1619
    if (&obj->base == NULL) {
1620
        ret = -ENOENT;
1621
        goto unlock;
1622
    }
1623
 
1624
    if (obj->madv != I915_MADV_WILLNEED) {
5060 serge 1625
		DRM_DEBUG("Attempting to mmap a purgeable buffer\n");
1626
		ret = -EFAULT;
3480 Serge 1627
        goto out;
1628
    }
1629
    /* Now bind it into the GTT if needed */
5060 serge 1630
	ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
3480 Serge 1631
    if (ret)
1632
        goto out;
1633
 
1634
    ret = i915_gem_object_set_to_gtt_domain(obj, 1);
1635
    if (ret)
1636
        goto unpin;
1637
 
1638
    ret = i915_gem_object_get_fence(obj);
1639
    if (ret)
1640
        goto unpin;
1641
 
1642
    obj->fault_mappable = true;
1643
 
4104 Serge 1644
    pfn = dev_priv->gtt.mappable_base + i915_gem_obj_ggtt_offset(obj);
3480 Serge 1645
 
1646
    /* Finally, remap it using the new GTT offset */
1647
 
1648
    mem = UserAlloc(obj->base.size);
1649
    if(unlikely(mem == NULL))
1650
    {
1651
        ret = -ENOMEM;
1652
        goto unpin;
1653
    }
1654
 
1655
    for(ptr = mem; ptr < mem + obj->base.size; ptr+= 4096, pfn+= 4096)
1656
        MapPage(ptr, pfn, PG_SHARED|PG_UW);
1657
 
1658
unpin:
5060 serge 1659
    i915_gem_object_unpin_pages(obj);
3480 Serge 1660
 
1661
 
5367 serge 1662
    *offset = (uint32_t)mem;
3480 Serge 1663
 
1664
out:
6088 serge 1665
	drm_gem_object_unreference(&obj->base);
3480 Serge 1666
unlock:
6088 serge 1667
	mutex_unlock(&dev->struct_mutex);
1668
	return ret;
3480 Serge 1669
}
1670
 
1671
/**
1672
 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
1673
 * @dev: DRM device
1674
 * @data: GTT mapping ioctl data
1675
 * @file: GEM object info
1676
 *
1677
 * Simply returns the fake offset to userspace so it can mmap it.
1678
 * The mmap call will end up in drm_gem_mmap(), which will set things
1679
 * up so we can get faults in the handler above.
1680
 *
1681
 * The fault handler will take care of binding the object into the GTT
1682
 * (since it may have been evicted to make room for something), allocating
1683
 * a fence register, and mapping the appropriate aperture address into
1684
 * userspace.
1685
 */
1686
int
1687
i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
6084 serge 1688
			struct drm_file *file)
3480 Serge 1689
{
6084 serge 1690
	struct drm_i915_gem_mmap_gtt *args = data;
3480 Serge 1691
 
6084 serge 1692
	return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
3480 Serge 1693
}
1694
 
3031 serge 1695
/* Immediately discard the backing storage */
1696
static void
1697
i915_gem_object_truncate(struct drm_i915_gem_object *obj)
1698
{
1699
//	i915_gem_object_free_mmap_offset(obj);
2332 Serge 1700
 
3263 Serge 1701
	if (obj->base.filp == NULL)
1702
		return;
2332 Serge 1703
 
3031 serge 1704
	/* Our goal here is to return as much of the memory as
1705
	 * is possible back to the system as we are called from OOM.
1706
	 * To do this we must instruct the shmfs to drop all of its
1707
	 * backing pages, *now*.
1708
	 */
5060 serge 1709
//	shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
3031 serge 1710
	obj->madv = __I915_MADV_PURGED;
1711
}
2332 Serge 1712
 
5060 serge 1713
/* Try to discard unwanted pages */
1714
static void
1715
i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
3031 serge 1716
{
5060 serge 1717
	struct address_space *mapping;
1718
 
1719
	switch (obj->madv) {
1720
	case I915_MADV_DONTNEED:
1721
		i915_gem_object_truncate(obj);
1722
	case __I915_MADV_PURGED:
1723
		return;
1724
	}
1725
 
1726
	if (obj->base.filp == NULL)
1727
		return;
1728
 
3031 serge 1729
}
2332 Serge 1730
 
3031 serge 1731
static void
1732
i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
1733
{
3746 Serge 1734
	struct sg_page_iter sg_iter;
1735
	int ret;
2332 Serge 1736
 
3031 serge 1737
	BUG_ON(obj->madv == __I915_MADV_PURGED);
2332 Serge 1738
 
3031 serge 1739
	ret = i915_gem_object_set_to_cpu_domain(obj, true);
1740
	if (ret) {
1741
		/* In the event of a disaster, abandon all caches and
1742
		 * hope for the best.
1743
		 */
1744
		WARN_ON(ret != -EIO);
4104 Serge 1745
		i915_gem_clflush_object(obj, true);
3031 serge 1746
		obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
1747
	}
2332 Serge 1748
 
6084 serge 1749
	i915_gem_gtt_finish_object(obj);
3031 serge 1750
	if (obj->madv == I915_MADV_DONTNEED)
1751
		obj->dirty = 0;
2332 Serge 1752
 
3746 Serge 1753
	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
1754
		struct page *page = sg_page_iter_page(&sg_iter);
2332 Serge 1755
 
6084 serge 1756
		page_cache_release(page);
3243 Serge 1757
	}
6084 serge 1758
	obj->dirty = 0;
3243 Serge 1759
 
1760
	sg_free_table(obj->pages);
1761
	kfree(obj->pages);
3031 serge 1762
}
2332 Serge 1763
 
3480 Serge 1764
int
3031 serge 1765
i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
1766
{
1767
	const struct drm_i915_gem_object_ops *ops = obj->ops;
2332 Serge 1768
 
3243 Serge 1769
	if (obj->pages == NULL)
3031 serge 1770
		return 0;
2332 Serge 1771
 
3031 serge 1772
	if (obj->pages_pin_count)
1773
		return -EBUSY;
1774
 
4104 Serge 1775
	BUG_ON(i915_gem_obj_bound_any(obj));
1776
 
3243 Serge 1777
	/* ->put_pages might need to allocate memory for the bit17 swizzle
1778
	 * array, hence protect them from being reaped by removing them from gtt
1779
	 * lists early. */
4104 Serge 1780
	list_del(&obj->global_list);
3243 Serge 1781
 
3031 serge 1782
	ops->put_pages(obj);
3243 Serge 1783
	obj->pages = NULL;
3031 serge 1784
 
5060 serge 1785
	i915_gem_object_invalidate(obj);
3031 serge 1786
 
1787
	return 0;
1788
}
1789
 
2332 Serge 1790
static int
3031 serge 1791
i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
2332 Serge 1792
{
3260 Serge 1793
	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
6084 serge 1794
	int page_count, i;
1795
	struct address_space *mapping;
1796
	struct sg_table *st;
3243 Serge 1797
	struct scatterlist *sg;
3746 Serge 1798
	struct sg_page_iter sg_iter;
3243 Serge 1799
	struct page *page;
3746 Serge 1800
	unsigned long last_pfn = 0;	/* suppress gcc warning */
6084 serge 1801
	int ret;
3243 Serge 1802
	gfp_t gfp;
2332 Serge 1803
 
3243 Serge 1804
	/* Assert that the object is not currently in any GPU domain. As it
1805
	 * wasn't in the GTT, there shouldn't be any way it could have been in
1806
	 * a GPU cache
2332 Serge 1807
	 */
3243 Serge 1808
	BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
1809
	BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
1810
 
1811
	st = kmalloc(sizeof(*st), GFP_KERNEL);
1812
	if (st == NULL)
1813
		return -ENOMEM;
1814
 
2332 Serge 1815
	page_count = obj->base.size / PAGE_SIZE;
3243 Serge 1816
	if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
1817
		kfree(st);
2332 Serge 1818
		return -ENOMEM;
3243 Serge 1819
	}
2332 Serge 1820
 
3243 Serge 1821
	/* Get the list of pages out of our struct file.  They'll be pinned
1822
	 * at this point until we release them.
1823
	 *
1824
	 * Fail silently without starting the shrinker
1825
	 */
3746 Serge 1826
	sg = st->sgl;
1827
	st->nents = 0;
1828
	for (i = 0; i < page_count; i++) {
4104 Serge 1829
        page = shmem_read_mapping_page_gfp(obj->base.filp, i, gfp);
3260 Serge 1830
		if (IS_ERR(page)) {
1831
            dbgprintf("%s invalid page %p\n", __FUNCTION__, page);
2332 Serge 1832
			goto err_pages;
3260 Serge 1833
		}
5354 serge 1834
#ifdef CONFIG_SWIOTLB
1835
		if (swiotlb_nr_tbl()) {
1836
			st->nents++;
1837
			sg_set_page(sg, page, PAGE_SIZE, 0);
1838
			sg = sg_next(sg);
1839
			continue;
1840
		}
1841
#endif
3746 Serge 1842
		if (!i || page_to_pfn(page) != last_pfn + 1) {
1843
			if (i)
1844
				sg = sg_next(sg);
1845
			st->nents++;
6084 serge 1846
			sg_set_page(sg, page, PAGE_SIZE, 0);
3746 Serge 1847
		} else {
1848
			sg->length += PAGE_SIZE;
1849
		}
1850
		last_pfn = page_to_pfn(page);
3243 Serge 1851
	}
5354 serge 1852
#ifdef CONFIG_SWIOTLB
1853
	if (!swiotlb_nr_tbl())
1854
#endif
3746 Serge 1855
		sg_mark_end(sg);
3243 Serge 1856
	obj->pages = st;
3031 serge 1857
 
6084 serge 1858
	ret = i915_gem_gtt_prepare_object(obj);
1859
	if (ret)
1860
		goto err_pages;
5367 serge 1861
 
1862
	if (obj->tiling_mode != I915_TILING_NONE &&
1863
	    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES)
1864
		i915_gem_object_pin_pages(obj);
1865
 
2332 Serge 1866
	return 0;
1867
 
1868
err_pages:
3746 Serge 1869
	sg_mark_end(sg);
1870
	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0)
1871
		page_cache_release(sg_page_iter_page(&sg_iter));
3243 Serge 1872
	sg_free_table(st);
1873
	kfree(st);
6084 serge 1874
 
3243 Serge 1875
	return PTR_ERR(page);
2332 Serge 1876
}
1877
 
3031 serge 1878
/* Ensure that the associated pages are gathered from the backing storage
1879
 * and pinned into our object. i915_gem_object_get_pages() may be called
1880
 * multiple times before they are released by a single call to
1881
 * i915_gem_object_put_pages() - once the pages are no longer referenced
1882
 * either as a result of memory pressure (reaping pages under the shrinker)
1883
 * or as the object is itself released.
1884
 */
1885
int
1886
i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2332 Serge 1887
{
3031 serge 1888
	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
1889
	const struct drm_i915_gem_object_ops *ops = obj->ops;
1890
	int ret;
2332 Serge 1891
 
3243 Serge 1892
	if (obj->pages)
3031 serge 1893
		return 0;
2332 Serge 1894
 
4392 Serge 1895
	if (obj->madv != I915_MADV_WILLNEED) {
5060 serge 1896
		DRM_DEBUG("Attempting to obtain a purgeable object\n");
1897
		return -EFAULT;
4392 Serge 1898
	}
1899
 
3031 serge 1900
	BUG_ON(obj->pages_pin_count);
2332 Serge 1901
 
3031 serge 1902
	ret = ops->get_pages(obj);
1903
	if (ret)
1904
		return ret;
2344 Serge 1905
 
4104 Serge 1906
	list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
6084 serge 1907
 
1908
	obj->get_page.sg = obj->pages->sgl;
1909
	obj->get_page.last = 0;
1910
 
1911
	return 0;
2332 Serge 1912
}
1913
 
6084 serge 1914
void i915_vma_move_to_active(struct i915_vma *vma,
1915
			     struct drm_i915_gem_request *req)
2332 Serge 1916
{
6084 serge 1917
	struct drm_i915_gem_object *obj = vma->obj;
1918
	struct intel_engine_cs *ring;
2332 Serge 1919
 
6084 serge 1920
	ring = i915_gem_request_get_ring(req);
2332 Serge 1921
 
1922
	/* Add a reference if we're newly entering the active list. */
6084 serge 1923
	if (obj->active == 0)
2344 Serge 1924
		drm_gem_object_reference(&obj->base);
6084 serge 1925
	obj->active |= intel_ring_flag(ring);
2332 Serge 1926
 
6084 serge 1927
	list_move_tail(&obj->ring_list[ring->id], &ring->active_list);
1928
	i915_gem_request_assign(&obj->last_read_req[ring->id], req);
2332 Serge 1929
 
6084 serge 1930
	list_move_tail(&vma->mm_list, &vma->vm->active_list);
2332 Serge 1931
}
1932
 
6084 serge 1933
static void
1934
i915_gem_object_retire__write(struct drm_i915_gem_object *obj)
4560 Serge 1935
{
6084 serge 1936
	RQ_BUG_ON(obj->last_write_req == NULL);
1937
	RQ_BUG_ON(!(obj->active & intel_ring_flag(obj->last_write_req->ring)));
1938
 
1939
	i915_gem_request_assign(&obj->last_write_req, NULL);
1940
	intel_fb_obj_flush(obj, true, ORIGIN_CS);
4560 Serge 1941
}
1942
 
2344 Serge 1943
static void
6084 serge 1944
i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring)
2344 Serge 1945
{
5060 serge 1946
	struct i915_vma *vma;
2332 Serge 1947
 
6084 serge 1948
	RQ_BUG_ON(obj->last_read_req[ring] == NULL);
1949
	RQ_BUG_ON(!(obj->active & (1 << ring)));
2332 Serge 1950
 
6084 serge 1951
	list_del_init(&obj->ring_list[ring]);
1952
	i915_gem_request_assign(&obj->last_read_req[ring], NULL);
2344 Serge 1953
 
6084 serge 1954
	if (obj->last_write_req && obj->last_write_req->ring->id == ring)
1955
		i915_gem_object_retire__write(obj);
5354 serge 1956
 
6084 serge 1957
	obj->active &= ~(1 << ring);
1958
	if (obj->active)
1959
		return;
2344 Serge 1960
 
6084 serge 1961
	/* Bump our place on the bound list to keep it roughly in LRU order
1962
	 * so that we don't steal from recently used but inactive objects
1963
	 * (unless we are forced to ofc!)
1964
	 */
1965
	list_move_tail(&obj->global_list,
1966
		       &to_i915(obj->base.dev)->mm.bound_list);
3031 serge 1967
 
6084 serge 1968
	list_for_each_entry(vma, &obj->vma_list, vma_link) {
1969
		if (!list_empty(&vma->mm_list))
1970
			list_move_tail(&vma->mm_list, &vma->vm->inactive_list);
1971
	}
2344 Serge 1972
 
6084 serge 1973
	i915_gem_request_assign(&obj->last_fenced_req, NULL);
2352 Serge 1974
	drm_gem_object_unreference(&obj->base);
1975
}
1976
 
3243 Serge 1977
static int
3480 Serge 1978
i915_gem_init_seqno(struct drm_device *dev, u32 seqno)
2344 Serge 1979
{
3243 Serge 1980
	struct drm_i915_private *dev_priv = dev->dev_private;
5060 serge 1981
	struct intel_engine_cs *ring;
3243 Serge 1982
	int ret, i, j;
2344 Serge 1983
 
3480 Serge 1984
	/* Carefully retire all requests without writing to the rings */
3243 Serge 1985
	for_each_ring(ring, dev_priv, i) {
3480 Serge 1986
		ret = intel_ring_idle(ring);
6084 serge 1987
		if (ret)
1988
			return ret;
3480 Serge 1989
	}
1990
	i915_gem_retire_requests(dev);
3243 Serge 1991
 
3480 Serge 1992
	/* Finally reset hw state */
3243 Serge 1993
	for_each_ring(ring, dev_priv, i) {
3480 Serge 1994
		intel_ring_init_seqno(ring, seqno);
1995
 
5060 serge 1996
		for (j = 0; j < ARRAY_SIZE(ring->semaphore.sync_seqno); j++)
1997
			ring->semaphore.sync_seqno[j] = 0;
3243 Serge 1998
	}
1999
 
2000
	return 0;
2344 Serge 2001
}
2002
 
3480 Serge 2003
int i915_gem_set_seqno(struct drm_device *dev, u32 seqno)
2004
{
2005
	struct drm_i915_private *dev_priv = dev->dev_private;
2006
	int ret;
2007
 
2008
	if (seqno == 0)
2009
		return -EINVAL;
2010
 
2011
	/* HWS page needs to be set less than what we
2012
	 * will inject to ring
2013
	 */
2014
	ret = i915_gem_init_seqno(dev, seqno - 1);
2015
	if (ret)
2016
		return ret;
2017
 
2018
	/* Carefully set the last_seqno value so that wrap
2019
	 * detection still works
2020
	 */
2021
	dev_priv->next_seqno = seqno;
2022
	dev_priv->last_seqno = seqno - 1;
2023
	if (dev_priv->last_seqno == 0)
2024
		dev_priv->last_seqno--;
2025
 
2026
	return 0;
2027
}
2028
 
3243 Serge 2029
int
2030
i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
2344 Serge 2031
{
3243 Serge 2032
	struct drm_i915_private *dev_priv = dev->dev_private;
2344 Serge 2033
 
3243 Serge 2034
	/* reserve 0 for non-seqno */
2035
	if (dev_priv->next_seqno == 0) {
3480 Serge 2036
		int ret = i915_gem_init_seqno(dev, 0);
3243 Serge 2037
		if (ret)
2038
			return ret;
2039
 
2040
		dev_priv->next_seqno = 1;
2041
	}
2042
 
3480 Serge 2043
	*seqno = dev_priv->last_seqno = dev_priv->next_seqno++;
3243 Serge 2044
	return 0;
2332 Serge 2045
}
2046
 
6084 serge 2047
/*
2048
 * NB: This function is not allowed to fail. Doing so would mean the the
2049
 * request is not being tracked for completion but the work itself is
2050
 * going to happen on the hardware. This would be a Bad Thing(tm).
2051
 */
2052
void __i915_add_request(struct drm_i915_gem_request *request,
2053
			struct drm_i915_gem_object *obj,
2054
			bool flush_caches)
2352 Serge 2055
{
6084 serge 2056
	struct intel_engine_cs *ring;
2057
	struct drm_i915_private *dev_priv;
5354 serge 2058
	struct intel_ringbuffer *ringbuf;
6084 serge 2059
	u32 request_start;
2352 Serge 2060
	int ret;
2332 Serge 2061
 
5354 serge 2062
	if (WARN_ON(request == NULL))
6084 serge 2063
		return;
5354 serge 2064
 
6084 serge 2065
	ring = request->ring;
2066
	dev_priv = ring->dev->dev_private;
2067
	ringbuf = request->ringbuf;
5354 serge 2068
 
6084 serge 2069
	/*
2070
	 * To ensure that this call will not fail, space for its emissions
2071
	 * should already have been reserved in the ring buffer. Let the ring
2072
	 * know that it is time to use that space up.
2073
	 */
2074
	intel_ring_reserved_space_use(ringbuf);
2075
 
5354 serge 2076
	request_start = intel_ring_get_tail(ringbuf);
3031 serge 2077
	/*
2078
	 * Emit any outstanding flushes - execbuf can fail to emit the flush
2079
	 * after having emitted the batchbuffer command. Hence we need to fix
2080
	 * things up similar to emitting the lazy request. The difference here
2081
	 * is that the flush _must_ happen before the next request, no matter
2082
	 * what.
2083
	 */
6084 serge 2084
	if (flush_caches) {
2085
		if (i915.enable_execlists)
2086
			ret = logical_ring_flush_all_caches(request);
2087
		else
2088
			ret = intel_ring_flush_all_caches(request);
2089
		/* Not allowed to fail! */
2090
		WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret);
5354 serge 2091
	}
2332 Serge 2092
 
3031 serge 2093
	/* Record the position of the start of the request so that
2094
	 * should we detect the updated seqno part-way through the
6084 serge 2095
	 * GPU processing the request, we never over-estimate the
3031 serge 2096
	 * position of the head.
2097
	 */
6084 serge 2098
	request->postfix = intel_ring_get_tail(ringbuf);
3031 serge 2099
 
6084 serge 2100
	if (i915.enable_execlists)
2101
		ret = ring->emit_request(request);
2102
	else {
2103
		ret = ring->add_request(request);
2104
 
2105
		request->tail = intel_ring_get_tail(ringbuf);
5354 serge 2106
	}
6084 serge 2107
	/* Not allowed to fail! */
2108
	WARN(ret, "emit|add_request failed: %d!\n", ret);
2332 Serge 2109
 
4104 Serge 2110
	request->head = request_start;
2111
 
2112
	/* Whilst this request exists, batch_obj will be on the
2113
	 * active_list, and so will hold the active reference. Only when this
2114
	 * request is retired will the the batch_obj be moved onto the
2115
	 * inactive_list and lose its active reference. Hence we do not need
2116
	 * to explicitly hold another reference here.
2117
	 */
4560 Serge 2118
	request->batch_obj = obj;
4104 Serge 2119
 
5060 serge 2120
	request->emitted_jiffies = jiffies;
6084 serge 2121
	request->previous_seqno = ring->last_submitted_seqno;
2122
	ring->last_submitted_seqno = request->seqno;
2352 Serge 2123
	list_add_tail(&request->list, &ring->request_list);
2332 Serge 2124
 
6084 serge 2125
	trace_i915_gem_request_add(request);
2332 Serge 2126
 
6084 serge 2127
//	i915_queue_hangcheck(ring->dev);
3263 Serge 2128
 
6084 serge 2129
	queue_delayed_work(dev_priv->wq,
2130
			   &dev_priv->mm.retire_work,
2131
			   round_jiffies_up_relative(HZ));
2132
	intel_mark_busy(dev_priv->dev);
2332 Serge 2133
 
6084 serge 2134
	/* Sanity check that the reserved size was large enough. */
2135
	intel_ring_reserved_space_end(ringbuf);
2352 Serge 2136
}
2332 Serge 2137
 
5060 serge 2138
static bool i915_context_is_banned(struct drm_i915_private *dev_priv,
2139
				   const struct intel_context *ctx)
4104 Serge 2140
{
5060 serge 2141
	unsigned long elapsed;
4104 Serge 2142
 
5060 serge 2143
    elapsed = GetTimerTicks()/100 - ctx->hang_stats.guilty_ts;
4104 Serge 2144
 
5060 serge 2145
	if (ctx->hang_stats.banned)
2146
		return true;
4104 Serge 2147
 
6084 serge 2148
	if (ctx->hang_stats.ban_period_seconds &&
2149
	    elapsed <= ctx->hang_stats.ban_period_seconds) {
5060 serge 2150
		if (!i915_gem_context_is_default(ctx)) {
2151
			DRM_DEBUG("context hanging too fast, banning!\n");
4104 Serge 2152
			return true;
5060 serge 2153
		} else if (i915_stop_ring_allow_ban(dev_priv)) {
2154
			if (i915_stop_ring_allow_warn(dev_priv))
6084 serge 2155
				DRM_ERROR("gpu hanging too fast, banning!\n");
4104 Serge 2156
			return true;
6084 serge 2157
		}
4104 Serge 2158
	}
2159
 
2160
	return false;
2161
}
2162
 
5060 serge 2163
static void i915_set_reset_status(struct drm_i915_private *dev_priv,
2164
				  struct intel_context *ctx,
2165
				  const bool guilty)
4560 Serge 2166
{
5060 serge 2167
	struct i915_ctx_hang_stats *hs;
4560 Serge 2168
 
5060 serge 2169
	if (WARN_ON(!ctx))
2170
		return;
4560 Serge 2171
 
5060 serge 2172
	hs = &ctx->hang_stats;
4560 Serge 2173
 
5060 serge 2174
	if (guilty) {
2175
		hs->banned = i915_context_is_banned(dev_priv, ctx);
2176
		hs->batch_active++;
2177
        hs->guilty_ts = GetTimerTicks()/100;
2178
	} else {
2179
		hs->batch_pending++;
4104 Serge 2180
	}
2181
}
2182
 
6084 serge 2183
void i915_gem_request_free(struct kref *req_ref)
4104 Serge 2184
{
6084 serge 2185
	struct drm_i915_gem_request *req = container_of(req_ref,
2186
						 typeof(*req), ref);
2187
	struct intel_context *ctx = req->ctx;
5354 serge 2188
 
6084 serge 2189
	if (req->file_priv)
2190
		i915_gem_request_remove_from_client(req);
4104 Serge 2191
 
5354 serge 2192
	if (ctx) {
2193
		if (i915.enable_execlists) {
6084 serge 2194
			if (ctx != req->ring->default_context)
2195
				intel_lr_context_unpin(req);
2196
		}
4104 Serge 2197
 
5354 serge 2198
		i915_gem_context_unreference(ctx);
2199
	}
6084 serge 2200
 
2201
	kfree(req);
4104 Serge 2202
}
2203
 
6084 serge 2204
int i915_gem_request_alloc(struct intel_engine_cs *ring,
2205
			   struct intel_context *ctx,
2206
			   struct drm_i915_gem_request **req_out)
2207
{
2208
	struct drm_i915_private *dev_priv = to_i915(ring->dev);
2209
	struct drm_i915_gem_request *req;
2210
	int ret;
2211
 
2212
	if (!req_out)
2213
		return -EINVAL;
2214
 
2215
	*req_out = NULL;
2216
 
2217
//	req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL);
2218
	req = kzalloc(sizeof(*req),0);
2219
	if (req == NULL)
2220
		return -ENOMEM;
2221
 
2222
	ret = i915_gem_get_seqno(ring->dev, &req->seqno);
2223
	if (ret)
2224
		goto err;
2225
 
2226
	kref_init(&req->ref);
2227
	req->i915 = dev_priv;
2228
	req->ring = ring;
2229
	req->ctx  = ctx;
2230
	i915_gem_context_reference(req->ctx);
2231
 
2232
	if (i915.enable_execlists)
2233
		ret = intel_logical_ring_alloc_request_extras(req);
2234
	else
2235
		ret = intel_ring_alloc_request_extras(req);
2236
	if (ret) {
2237
		i915_gem_context_unreference(req->ctx);
2238
		goto err;
2239
	}
2240
 
2241
	/*
2242
	 * Reserve space in the ring buffer for all the commands required to
2243
	 * eventually emit this request. This is to guarantee that the
2244
	 * i915_add_request() call can't fail. Note that the reserve may need
2245
	 * to be redone if the request is not actually submitted straight
2246
	 * away, e.g. because a GPU scheduler has deferred it.
2247
	 */
2248
	if (i915.enable_execlists)
2249
		ret = intel_logical_ring_reserve_space(req);
2250
	else
2251
		ret = intel_ring_reserve_space(req);
2252
	if (ret) {
2253
		/*
2254
		 * At this point, the request is fully allocated even if not
2255
		 * fully prepared. Thus it can be cleaned up using the proper
2256
		 * free code.
2257
		 */
2258
		i915_gem_request_cancel(req);
2259
		return ret;
2260
	}
2261
 
2262
	*req_out = req;
2263
	return 0;
2264
 
2265
err:
2266
	kfree(req);
2267
	return ret;
2268
}
2269
 
2270
void i915_gem_request_cancel(struct drm_i915_gem_request *req)
2271
{
2272
	intel_ring_reserved_space_cancel(req->ringbuf);
2273
 
2274
	i915_gem_request_unreference(req);
2275
}
2276
 
5060 serge 2277
struct drm_i915_gem_request *
2278
i915_gem_find_active_request(struct intel_engine_cs *ring)
3031 serge 2279
{
4539 Serge 2280
	struct drm_i915_gem_request *request;
4104 Serge 2281
 
4539 Serge 2282
	list_for_each_entry(request, &ring->request_list, list) {
6084 serge 2283
		if (i915_gem_request_completed(request, false))
4539 Serge 2284
			continue;
4104 Serge 2285
 
5060 serge 2286
		return request;
4539 Serge 2287
	}
5060 serge 2288
 
2289
	return NULL;
4539 Serge 2290
}
2291
 
5060 serge 2292
static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv,
2293
				       struct intel_engine_cs *ring)
2294
{
2295
	struct drm_i915_gem_request *request;
2296
	bool ring_hung;
2297
 
2298
	request = i915_gem_find_active_request(ring);
2299
 
2300
	if (request == NULL)
2301
		return;
2302
 
2303
	ring_hung = ring->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
2304
 
2305
	i915_set_reset_status(dev_priv, request->ctx, ring_hung);
2306
 
2307
	list_for_each_entry_continue(request, &ring->request_list, list)
2308
		i915_set_reset_status(dev_priv, request->ctx, false);
2309
}
2310
 
4539 Serge 2311
static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
5060 serge 2312
					struct intel_engine_cs *ring)
4539 Serge 2313
{
4560 Serge 2314
	while (!list_empty(&ring->active_list)) {
2315
		struct drm_i915_gem_object *obj;
2316
 
2317
		obj = list_first_entry(&ring->active_list,
2318
				       struct drm_i915_gem_object,
6084 serge 2319
				       ring_list[ring->id]);
4560 Serge 2320
 
6084 serge 2321
		i915_gem_object_retire__read(obj, ring->id);
4560 Serge 2322
	}
2323
 
2324
	/*
5354 serge 2325
	 * Clear the execlists queue up before freeing the requests, as those
2326
	 * are the ones that keep the context and ringbuffer backing objects
2327
	 * pinned in place.
2328
	 */
2329
	while (!list_empty(&ring->execlist_queue)) {
6084 serge 2330
		struct drm_i915_gem_request *submit_req;
5354 serge 2331
 
2332
		submit_req = list_first_entry(&ring->execlist_queue,
6084 serge 2333
				struct drm_i915_gem_request,
5354 serge 2334
				execlist_link);
2335
		list_del(&submit_req->execlist_link);
6084 serge 2336
 
2337
		if (submit_req->ctx != ring->default_context)
2338
			intel_lr_context_unpin(submit_req);
2339
 
2340
		i915_gem_request_unreference(submit_req);
5354 serge 2341
	}
2342
 
2343
	/*
4560 Serge 2344
	 * We must free the requests after all the corresponding objects have
2345
	 * been moved off active lists. Which is the same order as the normal
2346
	 * retire_requests function does. This is important if object hold
2347
	 * implicit references on things like e.g. ppgtt address spaces through
2348
	 * the request.
2349
	 */
3031 serge 2350
	while (!list_empty(&ring->request_list)) {
2351
		struct drm_i915_gem_request *request;
2332 Serge 2352
 
3031 serge 2353
		request = list_first_entry(&ring->request_list,
2354
					   struct drm_i915_gem_request,
2355
					   list);
2332 Serge 2356
 
6084 serge 2357
		i915_gem_request_retire(request);
3031 serge 2358
	}
2359
}
2332 Serge 2360
 
3031 serge 2361
void i915_gem_reset(struct drm_device *dev)
2362
{
2363
	struct drm_i915_private *dev_priv = dev->dev_private;
5060 serge 2364
	struct intel_engine_cs *ring;
3031 serge 2365
	int i;
2360 Serge 2366
 
4539 Serge 2367
	/*
2368
	 * Before we free the objects from the requests, we need to inspect
2369
	 * them for finding the guilty party. As the requests only borrow
2370
	 * their reference to the objects, the inspection must be done first.
2371
	 */
3031 serge 2372
	for_each_ring(ring, dev_priv, i)
4539 Serge 2373
		i915_gem_reset_ring_status(dev_priv, ring);
2360 Serge 2374
 
4539 Serge 2375
	for_each_ring(ring, dev_priv, i)
2376
		i915_gem_reset_ring_cleanup(dev_priv, ring);
2377
 
5060 serge 2378
	i915_gem_context_reset(dev);
4560 Serge 2379
 
3746 Serge 2380
	i915_gem_restore_fences(dev);
6084 serge 2381
 
2382
	WARN_ON(i915_verify_lists(dev));
3031 serge 2383
}
2360 Serge 2384
 
2352 Serge 2385
/**
2386
 * This function clears the request list as sequence numbers are passed.
2387
 */
3031 serge 2388
void
5060 serge 2389
i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
2352 Serge 2390
{
6084 serge 2391
	WARN_ON(i915_verify_lists(ring->dev));
2332 Serge 2392
 
6084 serge 2393
	/* Retire requests first as we use it above for the early return.
2394
	 * If we retire requests last, we may use a later seqno and so clear
2395
	 * the requests lists without clearing the active list, leading to
2396
	 * confusion.
2397
	 */
2398
	while (!list_empty(&ring->request_list)) {
2399
		struct drm_i915_gem_request *request;
2332 Serge 2400
 
6084 serge 2401
		request = list_first_entry(&ring->request_list,
2402
					   struct drm_i915_gem_request,
2403
					   list);
2332 Serge 2404
 
6084 serge 2405
		if (!i915_gem_request_completed(request, true))
2406
			break;
2332 Serge 2407
 
6084 serge 2408
		i915_gem_request_retire(request);
2409
	}
2410
 
5060 serge 2411
	/* Move any buffers on the active list that are no longer referenced
2412
	 * by the ringbuffer to the flushing/inactive lists as appropriate,
2413
	 * before we free the context associated with the requests.
2414
	 */
2415
	while (!list_empty(&ring->active_list)) {
2416
		struct drm_i915_gem_object *obj;
2417
 
2418
		obj = list_first_entry(&ring->active_list,
2419
				      struct drm_i915_gem_object,
6084 serge 2420
				      ring_list[ring->id]);
5060 serge 2421
 
6084 serge 2422
		if (!list_empty(&obj->last_read_req[ring->id]->list))
5060 serge 2423
			break;
2424
 
6084 serge 2425
		i915_gem_object_retire__read(obj, ring->id);
5060 serge 2426
	}
2427
 
6084 serge 2428
	if (unlikely(ring->trace_irq_req &&
2429
		     i915_gem_request_completed(ring->trace_irq_req, true))) {
2352 Serge 2430
		ring->irq_put(ring);
6084 serge 2431
		i915_gem_request_assign(&ring->trace_irq_req, NULL);
2352 Serge 2432
	}
2332 Serge 2433
 
2352 Serge 2434
	WARN_ON(i915_verify_lists(ring->dev));
2435
}
2332 Serge 2436
 
4560 Serge 2437
bool
2352 Serge 2438
i915_gem_retire_requests(struct drm_device *dev)
2439
{
5060 serge 2440
	struct drm_i915_private *dev_priv = dev->dev_private;
2441
	struct intel_engine_cs *ring;
4560 Serge 2442
	bool idle = true;
2352 Serge 2443
	int i;
2332 Serge 2444
 
4560 Serge 2445
	for_each_ring(ring, dev_priv, i) {
3031 serge 2446
		i915_gem_retire_requests_ring(ring);
4560 Serge 2447
		idle &= list_empty(&ring->request_list);
5354 serge 2448
		if (i915.enable_execlists) {
2449
			unsigned long flags;
2450
 
2451
			spin_lock_irqsave(&ring->execlist_lock, flags);
2452
			idle &= list_empty(&ring->execlist_queue);
2453
			spin_unlock_irqrestore(&ring->execlist_lock, flags);
2454
 
2455
			intel_execlists_retire_requests(ring);
2456
		}
4560 Serge 2457
	}
2458
 
2459
	if (idle)
2460
		mod_delayed_work(dev_priv->wq,
2461
				   &dev_priv->mm.idle_work,
2462
				   msecs_to_jiffies(100));
2463
 
2464
	return idle;
2352 Serge 2465
}
2466
 
2360 Serge 2467
static void
2468
i915_gem_retire_work_handler(struct work_struct *work)
2469
{
4560 Serge 2470
	struct drm_i915_private *dev_priv =
2471
		container_of(work, typeof(*dev_priv), mm.retire_work.work);
2472
	struct drm_device *dev = dev_priv->dev;
2360 Serge 2473
	bool idle;
2352 Serge 2474
 
2360 Serge 2475
	/* Come back later if the device is busy... */
4560 Serge 2476
	idle = false;
2477
	if (mutex_trylock(&dev->struct_mutex)) {
2478
		idle = i915_gem_retire_requests(dev);
2479
		mutex_unlock(&dev->struct_mutex);
2480
	}
2481
	if (!idle)
3482 Serge 2482
		queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work,
2483
				   round_jiffies_up_relative(HZ));
4560 Serge 2484
}
2352 Serge 2485
 
4560 Serge 2486
static void
2487
i915_gem_idle_work_handler(struct work_struct *work)
2488
{
2489
	struct drm_i915_private *dev_priv =
2490
		container_of(work, typeof(*dev_priv), mm.idle_work.work);
6084 serge 2491
	struct drm_device *dev = dev_priv->dev;
2492
	struct intel_engine_cs *ring;
2493
	int i;
2352 Serge 2494
 
6084 serge 2495
	for_each_ring(ring, dev_priv, i)
2496
		if (!list_empty(&ring->request_list))
2497
			return;
2498
 
2499
	intel_mark_idle(dev);
2500
 
2501
	if (mutex_trylock(&dev->struct_mutex)) {
2502
		struct intel_engine_cs *ring;
2503
		int i;
2504
 
2505
		for_each_ring(ring, dev_priv, i)
2506
			i915_gem_batch_pool_fini(&ring->batch_pool);
2507
 
2508
		mutex_unlock(&dev->struct_mutex);
2509
	}
2360 Serge 2510
}
2511
 
2344 Serge 2512
/**
3031 serge 2513
 * Ensures that an object will eventually get non-busy by flushing any required
2514
 * write domains, emitting any outstanding lazy request and retiring and
2515
 * completed requests.
2352 Serge 2516
 */
3031 serge 2517
static int
2518
i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
2352 Serge 2519
{
6084 serge 2520
	int i;
2352 Serge 2521
 
6084 serge 2522
	if (!obj->active)
2523
		return 0;
2352 Serge 2524
 
6084 serge 2525
	for (i = 0; i < I915_NUM_RINGS; i++) {
2526
		struct drm_i915_gem_request *req;
2527
 
2528
		req = obj->last_read_req[i];
2529
		if (req == NULL)
2530
			continue;
2531
 
2532
		if (list_empty(&req->list))
2533
			goto retire;
2534
 
2535
		if (i915_gem_request_completed(req, true)) {
2536
			__i915_gem_request_retire__upto(req);
2537
retire:
2538
			i915_gem_object_retire__read(obj, i);
2539
		}
3031 serge 2540
	}
2352 Serge 2541
 
3031 serge 2542
	return 0;
2543
}
2352 Serge 2544
 
3243 Serge 2545
/**
2546
 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
2547
 * @DRM_IOCTL_ARGS: standard ioctl arguments
2548
 *
2549
 * Returns 0 if successful, else an error is returned with the remaining time in
2550
 * the timeout parameter.
2551
 *  -ETIME: object is still busy after timeout
2552
 *  -ERESTARTSYS: signal interrupted the wait
2553
 *  -ENONENT: object doesn't exist
2554
 * Also possible, but rare:
2555
 *  -EAGAIN: GPU wedged
2556
 *  -ENOMEM: damn
2557
 *  -ENODEV: Internal IRQ fail
2558
 *  -E?: The add request failed
2559
 *
2560
 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
2561
 * non-zero timeout parameter the wait ioctl will wait for the given number of
2562
 * nanoseconds on an object becoming unbusy. Since the wait itself does so
2563
 * without holding struct_mutex the object may become re-busied before this
2564
 * function completes. A similar but shorter * race condition exists in the busy
2565
 * ioctl
2566
 */
4246 Serge 2567
int
2568
i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
2569
{
5060 serge 2570
	struct drm_i915_private *dev_priv = dev->dev_private;
4246 Serge 2571
	struct drm_i915_gem_wait *args = data;
2572
	struct drm_i915_gem_object *obj;
6084 serge 2573
	struct drm_i915_gem_request *req[I915_NUM_RINGS];
4246 Serge 2574
	unsigned reset_counter;
6084 serge 2575
	int i, n = 0;
2576
	int ret;
2352 Serge 2577
 
5354 serge 2578
	if (args->flags != 0)
2579
		return -EINVAL;
2580
 
4246 Serge 2581
	ret = i915_mutex_lock_interruptible(dev);
2582
	if (ret)
2583
		return ret;
2352 Serge 2584
 
4246 Serge 2585
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->bo_handle));
2586
	if (&obj->base == NULL) {
2587
		mutex_unlock(&dev->struct_mutex);
2588
		return -ENOENT;
2589
	}
2352 Serge 2590
 
4246 Serge 2591
	/* Need to make sure the object gets inactive eventually. */
2592
	ret = i915_gem_object_flush_active(obj);
2593
	if (ret)
2594
		goto out;
2352 Serge 2595
 
6084 serge 2596
	if (!obj->active)
2597
		goto out;
2352 Serge 2598
 
4246 Serge 2599
	/* Do this after OLR check to make sure we make forward progress polling
6084 serge 2600
	 * on this IOCTL with a timeout == 0 (like busy ioctl)
4246 Serge 2601
	 */
6084 serge 2602
	if (args->timeout_ns == 0) {
4246 Serge 2603
		ret = -ETIME;
2604
		goto out;
2605
	}
2352 Serge 2606
 
4246 Serge 2607
	drm_gem_object_unreference(&obj->base);
2608
	reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
6084 serge 2609
 
2610
	for (i = 0; i < I915_NUM_RINGS; i++) {
2611
		if (obj->last_read_req[i] == NULL)
2612
			continue;
2613
 
2614
		req[n++] = i915_gem_request_reference(obj->last_read_req[i]);
2615
	}
2616
 
4246 Serge 2617
	mutex_unlock(&dev->struct_mutex);
2352 Serge 2618
 
6084 serge 2619
	for (i = 0; i < n; i++) {
2620
		if (ret == 0)
2621
			ret = __i915_wait_request(req[i], reset_counter, true,
2622
						  args->timeout_ns > 0 ? &args->timeout_ns : NULL,
2623
						  file->driver_priv);
2624
		i915_gem_request_unreference__unlocked(req[i]);
2625
	}
2626
	return ret;
3243 Serge 2627
 
4246 Serge 2628
out:
2629
	drm_gem_object_unreference(&obj->base);
2630
	mutex_unlock(&dev->struct_mutex);
2631
	return ret;
2632
}
3243 Serge 2633
 
6084 serge 2634
static int
2635
__i915_gem_object_sync(struct drm_i915_gem_object *obj,
2636
		       struct intel_engine_cs *to,
2637
		       struct drm_i915_gem_request *from_req,
2638
		       struct drm_i915_gem_request **to_req)
2639
{
2640
	struct intel_engine_cs *from;
2641
	int ret;
2642
 
2643
	from = i915_gem_request_get_ring(from_req);
2644
	if (to == from)
2645
		return 0;
2646
 
2647
	if (i915_gem_request_completed(from_req, true))
2648
		return 0;
2649
 
2650
	if (!i915_semaphore_is_enabled(obj->base.dev)) {
2651
		struct drm_i915_private *i915 = to_i915(obj->base.dev);
2652
		ret = __i915_wait_request(from_req,
2653
					  atomic_read(&i915->gpu_error.reset_counter),
2654
					  i915->mm.interruptible,
2655
					  NULL,
2656
					  &i915->rps.semaphores);
2657
		if (ret)
2658
			return ret;
2659
 
2660
		i915_gem_object_retire_request(obj, from_req);
2661
	} else {
2662
		int idx = intel_ring_sync_index(from, to);
2663
		u32 seqno = i915_gem_request_get_seqno(from_req);
2664
 
2665
		WARN_ON(!to_req);
2666
 
2667
		if (seqno <= from->semaphore.sync_seqno[idx])
2668
			return 0;
2669
 
2670
		if (*to_req == NULL) {
2671
			ret = i915_gem_request_alloc(to, to->default_context, to_req);
2672
			if (ret)
2673
				return ret;
2674
		}
2675
 
2676
		trace_i915_gem_ring_sync_to(*to_req, from, from_req);
2677
		ret = to->semaphore.sync_to(*to_req, from, seqno);
2678
		if (ret)
2679
			return ret;
2680
 
2681
		/* We use last_read_req because sync_to()
2682
		 * might have just caused seqno wrap under
2683
		 * the radar.
2684
		 */
2685
		from->semaphore.sync_seqno[idx] =
2686
			i915_gem_request_get_seqno(obj->last_read_req[from->id]);
2687
	}
2688
 
2689
	return 0;
2690
}
2691
 
2352 Serge 2692
/**
3031 serge 2693
 * i915_gem_object_sync - sync an object to a ring.
2694
 *
2695
 * @obj: object which may be in use on another ring.
2696
 * @to: ring we wish to use the object on. May be NULL.
6084 serge 2697
 * @to_req: request we wish to use the object for. See below.
2698
 *          This will be allocated and returned if a request is
2699
 *          required but not passed in.
3031 serge 2700
 *
2701
 * This code is meant to abstract object synchronization with the GPU.
2702
 * Calling with NULL implies synchronizing the object with the CPU
6084 serge 2703
 * rather than a particular GPU ring. Conceptually we serialise writes
2704
 * between engines inside the GPU. We only allow one engine to write
2705
 * into a buffer at any time, but multiple readers. To ensure each has
2706
 * a coherent view of memory, we must:
3031 serge 2707
 *
6084 serge 2708
 * - If there is an outstanding write request to the object, the new
2709
 *   request must wait for it to complete (either CPU or in hw, requests
2710
 *   on the same ring will be naturally ordered).
2711
 *
2712
 * - If we are a write request (pending_write_domain is set), the new
2713
 *   request must wait for outstanding read requests to complete.
2714
 *
2715
 * For CPU synchronisation (NULL to) no request is required. For syncing with
2716
 * rings to_req must be non-NULL. However, a request does not have to be
2717
 * pre-allocated. If *to_req is NULL and sync commands will be emitted then a
2718
 * request will be allocated automatically and returned through *to_req. Note
2719
 * that it is not guaranteed that commands will be emitted (because the system
2720
 * might already be idle). Hence there is no need to create a request that
2721
 * might never have any work submitted. Note further that if a request is
2722
 * returned in *to_req, it is the responsibility of the caller to submit
2723
 * that request (after potentially adding more work to it).
2724
 *
3031 serge 2725
 * Returns 0 if successful, else propagates up the lower layer error.
2344 Serge 2726
 */
2727
int
3031 serge 2728
i915_gem_object_sync(struct drm_i915_gem_object *obj,
6084 serge 2729
		     struct intel_engine_cs *to,
2730
		     struct drm_i915_gem_request **to_req)
2344 Serge 2731
{
6084 serge 2732
	const bool readonly = obj->base.pending_write_domain == 0;
2733
	struct drm_i915_gem_request *req[I915_NUM_RINGS];
2734
	int ret, i, n;
2332 Serge 2735
 
6084 serge 2736
	if (!obj->active)
3031 serge 2737
		return 0;
2332 Serge 2738
 
6084 serge 2739
	if (to == NULL)
2740
		return i915_gem_object_wait_rendering(obj, readonly);
2332 Serge 2741
 
6084 serge 2742
	n = 0;
2743
	if (readonly) {
2744
		if (obj->last_write_req)
2745
			req[n++] = obj->last_write_req;
2746
	} else {
2747
		for (i = 0; i < I915_NUM_RINGS; i++)
2748
			if (obj->last_read_req[i])
2749
				req[n++] = obj->last_read_req[i];
2750
	}
2751
	for (i = 0; i < n; i++) {
2752
		ret = __i915_gem_object_sync(obj, to, req[i], to_req);
2753
		if (ret)
2754
			return ret;
2755
	}
3031 serge 2756
 
6084 serge 2757
	return 0;
2344 Serge 2758
}
2332 Serge 2759
 
2344 Serge 2760
static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
2761
{
2762
	u32 old_write_domain, old_read_domains;
2332 Serge 2763
 
2344 Serge 2764
	/* Force a pagefault for domain tracking on next user access */
6084 serge 2765
	i915_gem_release_mmap(obj);
2332 Serge 2766
 
2344 Serge 2767
	if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
2768
		return;
2332 Serge 2769
 
3480 Serge 2770
	/* Wait for any direct GTT access to complete */
2771
	mb();
2772
 
2344 Serge 2773
	old_read_domains = obj->base.read_domains;
2774
	old_write_domain = obj->base.write_domain;
2351 Serge 2775
 
2344 Serge 2776
	obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT;
2777
	obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
2332 Serge 2778
 
2351 Serge 2779
	trace_i915_gem_object_change_domain(obj,
2780
					    old_read_domains,
2781
					    old_write_domain);
2344 Serge 2782
}
2332 Serge 2783
 
6084 serge 2784
static int __i915_vma_unbind(struct i915_vma *vma, bool wait)
2344 Serge 2785
{
4104 Serge 2786
	struct drm_i915_gem_object *obj = vma->obj;
5060 serge 2787
	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
3480 Serge 2788
	int ret;
3263 Serge 2789
    if(obj == get_fb_obj())
5367 serge 2790
    {
2791
        WARN(1,"attempt to unbind fb object\n");
3263 Serge 2792
        return 0;
5367 serge 2793
    };
3263 Serge 2794
 
4104 Serge 2795
	if (list_empty(&vma->vma_link))
2344 Serge 2796
		return 0;
2332 Serge 2797
 
4560 Serge 2798
	if (!drm_mm_node_allocated(&vma->node)) {
2799
		i915_gem_vma_destroy(vma);
2800
		return 0;
2801
	}
2802
 
5060 serge 2803
	if (vma->pin_count)
3031 serge 2804
		return -EBUSY;
2332 Serge 2805
 
3243 Serge 2806
	BUG_ON(obj->pages == NULL);
3031 serge 2807
 
6084 serge 2808
	if (wait) {
2809
		ret = i915_gem_object_wait_rendering(obj, false);
2810
		if (ret)
2811
			return ret;
2812
	}
2332 Serge 2813
 
6084 serge 2814
	if (i915_is_ggtt(vma->vm) &&
2815
	    vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
2816
		i915_gem_object_finish_gtt(obj);
5354 serge 2817
 
6084 serge 2818
		/* release the fence reg _after_ flushing */
2819
		ret = i915_gem_object_put_fence(obj);
2820
		if (ret)
2821
			return ret;
5060 serge 2822
	}
2332 Serge 2823
 
4104 Serge 2824
	trace_i915_vma_unbind(vma);
2332 Serge 2825
 
6084 serge 2826
	vma->vm->unbind_vma(vma);
2827
	vma->bound = 0;
2332 Serge 2828
 
5060 serge 2829
	list_del_init(&vma->mm_list);
6084 serge 2830
	if (i915_is_ggtt(vma->vm)) {
2831
		if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
2832
			obj->map_and_fenceable = false;
2833
		} else if (vma->ggtt_view.pages) {
2834
			sg_free_table(vma->ggtt_view.pages);
2835
			kfree(vma->ggtt_view.pages);
2836
		}
2837
		vma->ggtt_view.pages = NULL;
2838
	}
2332 Serge 2839
 
4104 Serge 2840
	drm_mm_remove_node(&vma->node);
2841
	i915_gem_vma_destroy(vma);
2842
 
2843
	/* Since the unbound list is global, only move to that list if
4560 Serge 2844
	 * no more VMAs exist. */
6084 serge 2845
	if (list_empty(&obj->vma_list))
4104 Serge 2846
		list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list);
2847
 
4560 Serge 2848
	/* And finally now the object is completely decoupled from this vma,
2849
	 * we can drop its hold on the backing storage and allow it to be
2850
	 * reaped by the shrinker.
2851
	 */
2852
	i915_gem_object_unpin_pages(obj);
2853
 
2344 Serge 2854
	return 0;
2855
}
2332 Serge 2856
 
6084 serge 2857
int i915_vma_unbind(struct i915_vma *vma)
2858
{
2859
	return __i915_vma_unbind(vma, true);
2860
}
2861
 
2862
int __i915_vma_unbind_no_wait(struct i915_vma *vma)
2863
{
2864
	return __i915_vma_unbind(vma, false);
2865
}
2866
 
3031 serge 2867
int i915_gpu_idle(struct drm_device *dev)
2344 Serge 2868
{
5060 serge 2869
	struct drm_i915_private *dev_priv = dev->dev_private;
2870
	struct intel_engine_cs *ring;
2344 Serge 2871
	int ret, i;
2332 Serge 2872
 
2344 Serge 2873
	/* Flush everything onto the inactive list. */
3031 serge 2874
	for_each_ring(ring, dev_priv, i) {
5354 serge 2875
		if (!i915.enable_execlists) {
6084 serge 2876
			struct drm_i915_gem_request *req;
3031 serge 2877
 
6084 serge 2878
			ret = i915_gem_request_alloc(ring, ring->default_context, &req);
2352 Serge 2879
			if (ret)
2880
				return ret;
2344 Serge 2881
 
6084 serge 2882
			ret = i915_switch_context(req);
2883
			if (ret) {
2884
				i915_gem_request_cancel(req);
2885
				return ret;
2886
			}
2344 Serge 2887
 
6084 serge 2888
			i915_add_request_no_flush(req);
2889
		}
2332 Serge 2890
 
6084 serge 2891
		ret = intel_ring_idle(ring);
3031 serge 2892
		if (ret)
2893
			return ret;
2894
	}
2332 Serge 2895
 
6084 serge 2896
	WARN_ON(i915_verify_lists(dev));
3031 serge 2897
	return 0;
2898
}
2332 Serge 2899
 
5354 serge 2900
static bool i915_gem_valid_gtt_space(struct i915_vma *vma,
3031 serge 2901
				     unsigned long cache_level)
2902
{
5354 serge 2903
	struct drm_mm_node *gtt_space = &vma->node;
3031 serge 2904
	struct drm_mm_node *other;
2332 Serge 2905
 
5354 serge 2906
	/*
2907
	 * On some machines we have to be careful when putting differing types
2908
	 * of snoopable memory together to avoid the prefetcher crossing memory
2909
	 * domains and dying. During vm initialisation, we decide whether or not
2910
	 * these constraints apply and set the drm_mm.color_adjust
2911
	 * appropriately.
3031 serge 2912
	 */
5354 serge 2913
	if (vma->vm->mm.color_adjust == NULL)
3031 serge 2914
		return true;
2332 Serge 2915
 
4104 Serge 2916
	if (!drm_mm_node_allocated(gtt_space))
3031 serge 2917
		return true;
2332 Serge 2918
 
3031 serge 2919
	if (list_empty(>t_space->node_list))
2920
		return true;
2332 Serge 2921
 
3031 serge 2922
	other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list);
2923
	if (other->allocated && !other->hole_follows && other->color != cache_level)
2924
		return false;
2344 Serge 2925
 
3031 serge 2926
	other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list);
2927
	if (other->allocated && !gtt_space->hole_follows && other->color != cache_level)
2928
		return false;
2344 Serge 2929
 
3031 serge 2930
	return true;
2931
}
2344 Serge 2932
 
2332 Serge 2933
/**
6084 serge 2934
 * Finds free space in the GTT aperture and binds the object or a view of it
2935
 * there.
2332 Serge 2936
 */
5060 serge 2937
static struct i915_vma *
4104 Serge 2938
i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
2939
			   struct i915_address_space *vm,
6084 serge 2940
			   const struct i915_ggtt_view *ggtt_view,
2941
			   unsigned alignment,
5060 serge 2942
			   uint64_t flags)
2332 Serge 2943
{
2944
	struct drm_device *dev = obj->base.dev;
5060 serge 2945
	struct drm_i915_private *dev_priv = dev->dev_private;
6084 serge 2946
	u32 fence_alignment, unfenced_alignment;
2947
	u32 search_flag, alloc_flag;
2948
	u64 start, end;
2949
	u64 size, fence_size;
4104 Serge 2950
	struct i915_vma *vma;
2332 Serge 2951
	int ret;
2326 Serge 2952
 
6084 serge 2953
	if (i915_is_ggtt(vm)) {
2954
		u32 view_size;
2332 Serge 2955
 
6084 serge 2956
		if (WARN_ON(!ggtt_view))
2957
			return ERR_PTR(-EINVAL);
2958
 
2959
		view_size = i915_ggtt_view_size(obj, ggtt_view);
2960
 
2961
		fence_size = i915_gem_get_gtt_size(dev,
2962
						   view_size,
2963
						   obj->tiling_mode);
2964
		fence_alignment = i915_gem_get_gtt_alignment(dev,
2965
							     view_size,
2966
							     obj->tiling_mode,
2967
							     true);
2968
		unfenced_alignment = i915_gem_get_gtt_alignment(dev,
2969
								view_size,
2970
								obj->tiling_mode,
2971
								false);
2972
		size = flags & PIN_MAPPABLE ? fence_size : view_size;
2973
	} else {
2974
		fence_size = i915_gem_get_gtt_size(dev,
2975
						   obj->base.size,
2976
						   obj->tiling_mode);
2977
		fence_alignment = i915_gem_get_gtt_alignment(dev,
2978
							     obj->base.size,
2979
							     obj->tiling_mode,
2980
							     true);
2981
		unfenced_alignment =
2982
			i915_gem_get_gtt_alignment(dev,
2983
						   obj->base.size,
2984
						   obj->tiling_mode,
2985
						   false);
2986
		size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
2987
	}
2988
 
2989
	start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
2990
	end = vm->total;
2991
	if (flags & PIN_MAPPABLE)
2992
		end = min_t(u64, end, dev_priv->gtt.mappable_end);
2993
	if (flags & PIN_ZONE_4G)
2994
		end = min_t(u64, end, (1ULL << 32));
2995
 
2332 Serge 2996
	if (alignment == 0)
5060 serge 2997
		alignment = flags & PIN_MAPPABLE ? fence_alignment :
2332 Serge 2998
						unfenced_alignment;
5060 serge 2999
	if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
6084 serge 3000
		DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n",
3001
			  ggtt_view ? ggtt_view->type : 0,
3002
			  alignment);
5060 serge 3003
		return ERR_PTR(-EINVAL);
2332 Serge 3004
	}
3005
 
6084 serge 3006
	/* If binding the object/GGTT view requires more space than the entire
3007
	 * aperture has, reject it early before evicting everything in a vain
3008
	 * attempt to find space.
2332 Serge 3009
	 */
6084 serge 3010
	if (size > end) {
3011
		DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%llu > %s aperture=%llu\n",
3012
			  ggtt_view ? ggtt_view->type : 0,
3013
			  size,
5060 serge 3014
			  flags & PIN_MAPPABLE ? "mappable" : "total",
3015
			  end);
3016
		return ERR_PTR(-E2BIG);
2332 Serge 3017
	}
3018
 
3031 serge 3019
	ret = i915_gem_object_get_pages(obj);
3020
	if (ret)
5060 serge 3021
		return ERR_PTR(ret);
3031 serge 3022
 
3243 Serge 3023
	i915_gem_object_pin_pages(obj);
3024
 
6084 serge 3025
	vma = ggtt_view ? i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) :
3026
			  i915_gem_obj_lookup_or_create_vma(obj, vm);
3027
 
5060 serge 3028
	if (IS_ERR(vma))
4104 Serge 3029
		goto err_unpin;
3243 Serge 3030
 
6084 serge 3031
	if (flags & PIN_HIGH) {
3032
		search_flag = DRM_MM_SEARCH_BELOW;
3033
		alloc_flag = DRM_MM_CREATE_TOP;
3034
	} else {
3035
		search_flag = DRM_MM_SEARCH_DEFAULT;
3036
		alloc_flag = DRM_MM_CREATE_DEFAULT;
3037
	}
3038
 
4104 Serge 3039
search_free:
3040
	ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
3041
						  size, alignment,
5060 serge 3042
						  obj->cache_level,
3043
						  start, end,
6084 serge 3044
						  search_flag,
3045
						  alloc_flag);
3243 Serge 3046
	if (ret) {
2332 Serge 3047
 
4104 Serge 3048
		goto err_free_vma;
2332 Serge 3049
	}
5354 serge 3050
	if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) {
4104 Serge 3051
		ret = -EINVAL;
3052
		goto err_remove_node;
3031 serge 3053
	}
2332 Serge 3054
 
6084 serge 3055
	trace_i915_vma_bind(vma, flags);
3056
	ret = i915_vma_bind(vma, obj->cache_level, flags);
4104 Serge 3057
	if (ret)
3058
		goto err_remove_node;
2332 Serge 3059
 
4104 Serge 3060
	list_move_tail(&obj->global_list, &dev_priv->mm.bound_list);
3061
	list_add_tail(&vma->mm_list, &vm->inactive_list);
2332 Serge 3062
 
5060 serge 3063
	return vma;
4104 Serge 3064
 
3065
err_remove_node:
3066
	drm_mm_remove_node(&vma->node);
3067
err_free_vma:
3068
	i915_gem_vma_destroy(vma);
5060 serge 3069
	vma = ERR_PTR(ret);
4104 Serge 3070
err_unpin:
3071
	i915_gem_object_unpin_pages(obj);
5060 serge 3072
	return vma;
2332 Serge 3073
}
3074
 
4104 Serge 3075
bool
3076
i915_gem_clflush_object(struct drm_i915_gem_object *obj,
3077
			bool force)
2332 Serge 3078
{
3079
	/* If we don't have a page list set up, then we're not pinned
3080
	 * to GPU, and we can ignore the cache flush because it'll happen
3081
	 * again at bind time.
3082
	 */
3243 Serge 3083
	if (obj->pages == NULL)
4104 Serge 3084
		return false;
2332 Serge 3085
 
3480 Serge 3086
	/*
3087
	 * Stolen memory is always coherent with the GPU as it is explicitly
3088
	 * marked as wc by the system, or the system is cache-coherent.
3089
	 */
5354 serge 3090
	if (obj->stolen || obj->phys_handle)
4104 Serge 3091
		return false;
3480 Serge 3092
 
2332 Serge 3093
	/* If the GPU is snooping the contents of the CPU cache,
3094
	 * we do not need to manually clear the CPU cache lines.  However,
3095
	 * the caches are only snooped when the render cache is
3096
	 * flushed/invalidated.  As we always have to emit invalidations
3097
	 * and flushes when moving into and out of the RENDER domain, correct
3098
	 * snooping behaviour occurs naturally as the result of our domain
3099
	 * tracking.
3100
	 */
6084 serge 3101
	if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) {
3102
		obj->cache_dirty = true;
4104 Serge 3103
		return false;
6084 serge 3104
	}
2332 Serge 3105
 
4293 Serge 3106
	trace_i915_gem_object_clflush(obj);
3107
	drm_clflush_sg(obj->pages);
6084 serge 3108
	obj->cache_dirty = false;
2344 Serge 3109
 
4104 Serge 3110
	return true;
2332 Serge 3111
}
3112
 
2344 Serge 3113
/** Flushes the GTT write domain for the object if it's dirty. */
3114
static void
3115
i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
3116
{
3117
	uint32_t old_write_domain;
2332 Serge 3118
 
2344 Serge 3119
	if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
3120
		return;
2332 Serge 3121
 
2344 Serge 3122
	/* No actual flushing is required for the GTT write domain.  Writes
3123
	 * to it immediately go to main memory as far as we know, so there's
3124
	 * no chipset flush.  It also doesn't land in render cache.
3125
	 *
3126
	 * However, we do have to enforce the order so that all writes through
3127
	 * the GTT land before any writes to the device, such as updates to
3128
	 * the GATT itself.
3129
	 */
3130
	wmb();
2332 Serge 3131
 
2344 Serge 3132
	old_write_domain = obj->base.write_domain;
3133
	obj->base.write_domain = 0;
2332 Serge 3134
 
6084 serge 3135
	intel_fb_obj_flush(obj, false, ORIGIN_GTT);
5354 serge 3136
 
2351 Serge 3137
	trace_i915_gem_object_change_domain(obj,
3138
					    obj->base.read_domains,
3139
					    old_write_domain);
2344 Serge 3140
}
2332 Serge 3141
 
3142
/** Flushes the CPU write domain for the object if it's dirty. */
2326 Serge 3143
static void
6084 serge 3144
i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
2332 Serge 3145
{
3146
	uint32_t old_write_domain;
3147
 
3148
	if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
3149
		return;
3150
 
6084 serge 3151
	if (i915_gem_clflush_object(obj, obj->pin_display))
3152
		i915_gem_chipset_flush(obj->base.dev);
4104 Serge 3153
 
2332 Serge 3154
	old_write_domain = obj->base.write_domain;
3155
	obj->base.write_domain = 0;
3156
 
6084 serge 3157
	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
5354 serge 3158
 
2351 Serge 3159
	trace_i915_gem_object_change_domain(obj,
3160
					    obj->base.read_domains,
3161
					    old_write_domain);
2332 Serge 3162
}
3163
 
3164
/**
3165
 * Moves a single object to the GTT read, and possibly write domain.
3166
 *
3167
 * This function returns when the move is complete, including waiting on
3168
 * flushes to occur.
3169
 */
3170
int
3171
i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
3172
{
3173
	uint32_t old_write_domain, old_read_domains;
6084 serge 3174
	struct i915_vma *vma;
2332 Serge 3175
	int ret;
3176
 
3177
	if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
3178
		return 0;
3179
 
3031 serge 3180
	ret = i915_gem_object_wait_rendering(obj, !write);
6084 serge 3181
	if (ret)
3182
		return ret;
2332 Serge 3183
 
6084 serge 3184
	/* Flush and acquire obj->pages so that we are coherent through
3185
	 * direct access in memory with previous cached writes through
3186
	 * shmemfs and that our cache domain tracking remains valid.
3187
	 * For example, if the obj->filp was moved to swap without us
3188
	 * being notified and releasing the pages, we would mistakenly
3189
	 * continue to assume that the obj remained out of the CPU cached
3190
	 * domain.
3191
	 */
3192
	ret = i915_gem_object_get_pages(obj);
3193
	if (ret)
3194
		return ret;
2332 Serge 3195
 
6084 serge 3196
	i915_gem_object_flush_cpu_write_domain(obj);
3197
 
3480 Serge 3198
	/* Serialise direct access to this object with the barriers for
3199
	 * coherent writes from the GPU, by effectively invalidating the
3200
	 * GTT domain upon first access.
3201
	 */
3202
	if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
3203
		mb();
3204
 
2332 Serge 3205
	old_write_domain = obj->base.write_domain;
3206
	old_read_domains = obj->base.read_domains;
3207
 
3208
	/* It should now be out of any other write domains, and we can update
3209
	 * the domain values for our changes.
3210
	 */
3211
	BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
3212
	obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
3213
	if (write) {
3214
		obj->base.read_domains = I915_GEM_DOMAIN_GTT;
3215
		obj->base.write_domain = I915_GEM_DOMAIN_GTT;
3216
		obj->dirty = 1;
3217
	}
3218
 
2351 Serge 3219
	trace_i915_gem_object_change_domain(obj,
3220
					    old_read_domains,
3221
					    old_write_domain);
3222
 
3031 serge 3223
	/* And bump the LRU for this access */
6084 serge 3224
	vma = i915_gem_obj_to_ggtt(obj);
3225
	if (vma && drm_mm_node_allocated(&vma->node) && !obj->active)
3226
		list_move_tail(&vma->mm_list,
3227
			       &to_i915(obj->base.dev)->gtt.base.inactive_list);
3031 serge 3228
 
2332 Serge 3229
	return 0;
3230
}
3231
 
6084 serge 3232
/**
3233
 * Changes the cache-level of an object across all VMA.
3234
 *
3235
 * After this function returns, the object will be in the new cache-level
3236
 * across all GTT and the contents of the backing storage will be coherent,
3237
 * with respect to the new cache-level. In order to keep the backing storage
3238
 * coherent for all users, we only allow a single cache level to be set
3239
 * globally on the object and prevent it from being changed whilst the
3240
 * hardware is reading from the object. That is if the object is currently
3241
 * on the scanout it will be set to uncached (or equivalent display
3242
 * cache coherency) and all non-MOCS GPU access will also be uncached so
3243
 * that all direct access to the scanout remains coherent.
3244
 */
2335 Serge 3245
int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
3246
				    enum i915_cache_level cache_level)
3247
{
3031 serge 3248
	struct drm_device *dev = obj->base.dev;
5060 serge 3249
	struct i915_vma *vma, *next;
6084 serge 3250
	bool bound = false;
3251
	int ret = 0;
2332 Serge 3252
 
2335 Serge 3253
	if (obj->cache_level == cache_level)
6084 serge 3254
		goto out;
2332 Serge 3255
 
6084 serge 3256
	/* Inspect the list of currently bound VMA and unbind any that would
3257
	 * be invalid given the new cache-level. This is principally to
3258
	 * catch the issue of the CS prefetch crossing page boundaries and
3259
	 * reading an invalid PTE on older architectures.
3260
	 */
3261
	list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
3262
		if (!drm_mm_node_allocated(&vma->node))
3263
			continue;
2332 Serge 3264
 
6084 serge 3265
		if (vma->pin_count) {
3266
			DRM_DEBUG("can not change the cache level of pinned objects\n");
3267
			return -EBUSY;
3268
		}
3269
 
5354 serge 3270
		if (!i915_gem_valid_gtt_space(vma, cache_level)) {
4104 Serge 3271
			ret = i915_vma_unbind(vma);
6084 serge 3272
			if (ret)
3273
				return ret;
3274
		} else
3275
			bound = true;
3031 serge 3276
	}
3277
 
6084 serge 3278
	/* We can reuse the existing drm_mm nodes but need to change the
3279
	 * cache-level on the PTE. We could simply unbind them all and
3280
	 * rebind with the correct cache-level on next use. However since
3281
	 * we already have a valid slot, dma mapping, pages etc, we may as
3282
	 * rewrite the PTE in the belief that doing so tramples upon less
3283
	 * state and so involves less work.
3284
	 */
3285
	if (bound) {
3286
		/* Before we change the PTE, the GPU must not be accessing it.
3287
		 * If we wait upon the object, we know that all the bound
3288
		 * VMA are no longer active.
3289
		 */
3290
		ret = i915_gem_object_wait_rendering(obj, false);
2335 Serge 3291
		if (ret)
3292
			return ret;
2332 Serge 3293
 
6084 serge 3294
		if (!HAS_LLC(dev) && cache_level != I915_CACHE_NONE) {
3295
			/* Access to snoopable pages through the GTT is
3296
			 * incoherent and on some machines causes a hard
3297
			 * lockup. Relinquish the CPU mmaping to force
3298
			 * userspace to refault in the pages and we can
3299
			 * then double check if the GTT mapping is still
3300
			 * valid for that pointer access.
3301
			 */
3302
			i915_gem_release_mmap(obj);
2332 Serge 3303
 
6084 serge 3304
			/* As we no longer need a fence for GTT access,
3305
			 * we can relinquish it now (and so prevent having
3306
			 * to steal a fence from someone else on the next
3307
			 * fence request). Note GPU activity would have
3308
			 * dropped the fence as all snoopable access is
3309
			 * supposed to be linear.
3310
			 */
2335 Serge 3311
			ret = i915_gem_object_put_fence(obj);
3312
			if (ret)
3313
				return ret;
6084 serge 3314
		} else {
3315
			/* We either have incoherent backing store and
3316
			 * so no GTT access or the architecture is fully
3317
			 * coherent. In such cases, existing GTT mmaps
3318
			 * ignore the cache bit in the PTE and we can
3319
			 * rewrite it without confusing the GPU or having
3320
			 * to force userspace to fault back in its mmaps.
3321
			 */
3322
		}
2332 Serge 3323
 
6084 serge 3324
		list_for_each_entry(vma, &obj->vma_list, vma_link) {
3325
			if (!drm_mm_node_allocated(&vma->node))
3326
				continue;
3327
 
3328
			ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
3329
			if (ret)
3330
				return ret;
3331
		}
2335 Serge 3332
	}
2332 Serge 3333
 
4104 Serge 3334
	list_for_each_entry(vma, &obj->vma_list, vma_link)
3335
		vma->node.color = cache_level;
3336
	obj->cache_level = cache_level;
3337
 
6084 serge 3338
out:
3339
	/* Flush the dirty CPU caches to the backing storage so that the
3340
	 * object is now coherent at its new cache level (with respect
3341
	 * to the access domain).
3342
	 */
3343
	if (obj->cache_dirty &&
3344
	    obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
3345
	    cpu_write_needs_clflush(obj)) {
3346
		if (i915_gem_clflush_object(obj, true))
3347
			i915_gem_chipset_flush(obj->base.dev);
3348
	}
2332 Serge 3349
 
2335 Serge 3350
	return 0;
3351
}
2332 Serge 3352
 
3260 Serge 3353
int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
3354
			       struct drm_file *file)
3355
{
3356
	struct drm_i915_gem_caching *args = data;
3357
	struct drm_i915_gem_object *obj;
3358
 
3359
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
6084 serge 3360
	if (&obj->base == NULL)
3361
		return -ENOENT;
3260 Serge 3362
 
4104 Serge 3363
	switch (obj->cache_level) {
3364
	case I915_CACHE_LLC:
3365
	case I915_CACHE_L3_LLC:
3366
		args->caching = I915_CACHING_CACHED;
3367
		break;
3260 Serge 3368
 
4104 Serge 3369
	case I915_CACHE_WT:
3370
		args->caching = I915_CACHING_DISPLAY;
3371
		break;
3372
 
3373
	default:
3374
		args->caching = I915_CACHING_NONE;
3375
		break;
3376
	}
3377
 
6084 serge 3378
	drm_gem_object_unreference_unlocked(&obj->base);
3379
	return 0;
3260 Serge 3380
}
3381
 
3382
int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
3383
			       struct drm_file *file)
3384
{
6084 serge 3385
	struct drm_i915_private *dev_priv = dev->dev_private;
3260 Serge 3386
	struct drm_i915_gem_caching *args = data;
3387
	struct drm_i915_gem_object *obj;
3388
	enum i915_cache_level level;
3389
	int ret;
3390
 
3391
	switch (args->caching) {
3392
	case I915_CACHING_NONE:
3393
		level = I915_CACHE_NONE;
3394
		break;
3395
	case I915_CACHING_CACHED:
6084 serge 3396
		/*
3397
		 * Due to a HW issue on BXT A stepping, GPU stores via a
3398
		 * snooped mapping may leave stale data in a corresponding CPU
3399
		 * cacheline, whereas normally such cachelines would get
3400
		 * invalidated.
3401
		 */
3402
		if (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0)
3403
			return -ENODEV;
3404
 
3260 Serge 3405
		level = I915_CACHE_LLC;
3406
		break;
4104 Serge 3407
	case I915_CACHING_DISPLAY:
3408
		level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE;
3409
		break;
3260 Serge 3410
	default:
3411
		return -EINVAL;
3412
	}
3413
 
6084 serge 3414
	intel_runtime_pm_get(dev_priv);
3415
 
3260 Serge 3416
	ret = i915_mutex_lock_interruptible(dev);
3417
	if (ret)
6084 serge 3418
		goto rpm_put;
3260 Serge 3419
 
3420
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
3421
	if (&obj->base == NULL) {
3422
		ret = -ENOENT;
3423
		goto unlock;
3424
	}
3425
 
3426
	ret = i915_gem_object_set_cache_level(obj, level);
3427
 
3428
	drm_gem_object_unreference(&obj->base);
3429
unlock:
3430
	mutex_unlock(&dev->struct_mutex);
6084 serge 3431
rpm_put:
3432
	intel_runtime_pm_put(dev_priv);
3433
 
3260 Serge 3434
	return ret;
3435
}
3436
 
2335 Serge 3437
/*
3438
 * Prepare buffer for display plane (scanout, cursors, etc).
3439
 * Can be called from an uninterruptible phase (modesetting) and allows
3440
 * any flushes to be pipelined (for pageflips).
3441
 */
3442
int
3443
i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
3444
				     u32 alignment,
6084 serge 3445
				     struct intel_engine_cs *pipelined,
3446
				     struct drm_i915_gem_request **pipelined_request,
3447
				     const struct i915_ggtt_view *view)
2335 Serge 3448
{
3449
	u32 old_read_domains, old_write_domain;
3450
	int ret;
2332 Serge 3451
 
6084 serge 3452
	ret = i915_gem_object_sync(obj, pipelined, pipelined_request);
2335 Serge 3453
	if (ret)
3454
		return ret;
2332 Serge 3455
 
4104 Serge 3456
	/* Mark the pin_display early so that we account for the
3457
	 * display coherency whilst setting up the cache domains.
3458
	 */
6084 serge 3459
	obj->pin_display++;
4104 Serge 3460
 
2335 Serge 3461
	/* The display engine is not coherent with the LLC cache on gen6.  As
3462
	 * a result, we make sure that the pinning that is about to occur is
3463
	 * done with uncached PTEs. This is lowest common denominator for all
3464
	 * chipsets.
3465
	 *
3466
	 * However for gen6+, we could do better by using the GFDT bit instead
3467
	 * of uncaching, which would allow us to flush all the LLC-cached data
3468
	 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
3469
	 */
4104 Serge 3470
	ret = i915_gem_object_set_cache_level(obj,
3471
					      HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE);
2360 Serge 3472
	if (ret)
4104 Serge 3473
		goto err_unpin_display;
2332 Serge 3474
 
2335 Serge 3475
	/* As the user may map the buffer once pinned in the display plane
3476
	 * (e.g. libkms for the bootup splash), we have to ensure that we
3477
	 * always use map_and_fenceable for all scanout buffers.
3478
	 */
6084 serge 3479
	ret = i915_gem_object_ggtt_pin(obj, view, alignment,
3480
				       view->type == I915_GGTT_VIEW_NORMAL ?
3481
				       PIN_MAPPABLE : 0);
2335 Serge 3482
	if (ret)
4104 Serge 3483
		goto err_unpin_display;
2332 Serge 3484
 
6084 serge 3485
	i915_gem_object_flush_cpu_write_domain(obj);
2332 Serge 3486
 
2335 Serge 3487
	old_write_domain = obj->base.write_domain;
3488
	old_read_domains = obj->base.read_domains;
2332 Serge 3489
 
2335 Serge 3490
	/* It should now be out of any other write domains, and we can update
3491
	 * the domain values for our changes.
3492
	 */
3031 serge 3493
	obj->base.write_domain = 0;
2335 Serge 3494
	obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
2332 Serge 3495
 
2351 Serge 3496
	trace_i915_gem_object_change_domain(obj,
3497
					    old_read_domains,
3498
					    old_write_domain);
2332 Serge 3499
 
2335 Serge 3500
	return 0;
4104 Serge 3501
 
3502
err_unpin_display:
6084 serge 3503
	obj->pin_display--;
4104 Serge 3504
	return ret;
2335 Serge 3505
}
2332 Serge 3506
 
4104 Serge 3507
void
6084 serge 3508
i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj,
3509
					 const struct i915_ggtt_view *view)
4104 Serge 3510
{
6084 serge 3511
	if (WARN_ON(obj->pin_display == 0))
3512
		return;
4104 Serge 3513
 
6084 serge 3514
	i915_gem_object_ggtt_unpin_view(obj, view);
2332 Serge 3515
 
6084 serge 3516
	obj->pin_display--;
2344 Serge 3517
}
2332 Serge 3518
 
2344 Serge 3519
/**
3520
 * Moves a single object to the CPU read, and possibly write domain.
3521
 *
3522
 * This function returns when the move is complete, including waiting on
3523
 * flushes to occur.
3524
 */
3031 serge 3525
int
2344 Serge 3526
i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
3527
{
3528
	uint32_t old_write_domain, old_read_domains;
3529
	int ret;
2332 Serge 3530
 
2344 Serge 3531
	if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
3532
		return 0;
2332 Serge 3533
 
3031 serge 3534
	ret = i915_gem_object_wait_rendering(obj, !write);
2344 Serge 3535
	if (ret)
3536
		return ret;
2332 Serge 3537
 
2344 Serge 3538
	i915_gem_object_flush_gtt_write_domain(obj);
2332 Serge 3539
 
2344 Serge 3540
	old_write_domain = obj->base.write_domain;
3541
	old_read_domains = obj->base.read_domains;
2332 Serge 3542
 
2344 Serge 3543
	/* Flush the CPU cache if it's still invalid. */
3544
	if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
4104 Serge 3545
		i915_gem_clflush_object(obj, false);
2332 Serge 3546
 
2344 Serge 3547
		obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
3548
	}
2332 Serge 3549
 
2344 Serge 3550
	/* It should now be out of any other write domains, and we can update
3551
	 * the domain values for our changes.
3552
	 */
3553
	BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
2332 Serge 3554
 
2344 Serge 3555
	/* If we're writing through the CPU, then the GPU read domains will
3556
	 * need to be invalidated at next use.
3557
	 */
3558
	if (write) {
3559
		obj->base.read_domains = I915_GEM_DOMAIN_CPU;
3560
		obj->base.write_domain = I915_GEM_DOMAIN_CPU;
3561
	}
2332 Serge 3562
 
2351 Serge 3563
	trace_i915_gem_object_change_domain(obj,
3564
					    old_read_domains,
3565
					    old_write_domain);
2332 Serge 3566
 
2344 Serge 3567
	return 0;
3568
}
2332 Serge 3569
 
3031 serge 3570
/* Throttle our rendering by waiting until the ring has completed our requests
3571
 * emitted over 20 msec ago.
2344 Serge 3572
 *
3031 serge 3573
 * Note that if we were to use the current jiffies each time around the loop,
3574
 * we wouldn't escape the function with any frames outstanding if the time to
3575
 * render a frame was over 20ms.
3576
 *
3577
 * This should get us reasonable parallelism between CPU and GPU but also
3578
 * relatively low latency when blocking on a particular request to finish.
2344 Serge 3579
 */
3031 serge 3580
static int
3581
i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
2344 Serge 3582
{
3031 serge 3583
	struct drm_i915_private *dev_priv = dev->dev_private;
3584
	struct drm_i915_file_private *file_priv = file->driver_priv;
6084 serge 3585
	unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
3586
	struct drm_i915_gem_request *request, *target = NULL;
3480 Serge 3587
	unsigned reset_counter;
3031 serge 3588
	int ret;
2332 Serge 3589
 
3480 Serge 3590
	ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
3591
	if (ret)
3592
		return ret;
2332 Serge 3593
 
3480 Serge 3594
	ret = i915_gem_check_wedge(&dev_priv->gpu_error, false);
3595
	if (ret)
3596
		return ret;
3597
 
3031 serge 3598
	spin_lock(&file_priv->mm.lock);
3599
	list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
3600
		if (time_after_eq(request->emitted_jiffies, recent_enough))
3601
			break;
2332 Serge 3602
 
6084 serge 3603
		/*
3604
		 * Note that the request might not have been submitted yet.
3605
		 * In which case emitted_jiffies will be zero.
3606
		 */
3607
		if (!request->emitted_jiffies)
3608
			continue;
3609
 
3610
		target = request;
3031 serge 3611
	}
3480 Serge 3612
	reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
6084 serge 3613
	if (target)
3614
		i915_gem_request_reference(target);
3031 serge 3615
	spin_unlock(&file_priv->mm.lock);
2332 Serge 3616
 
6084 serge 3617
	if (target == NULL)
3031 serge 3618
		return 0;
2332 Serge 3619
 
6084 serge 3620
	ret = __i915_wait_request(target, reset_counter, true, NULL, NULL);
3031 serge 3621
	if (ret == 0)
3622
		queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
2332 Serge 3623
 
6084 serge 3624
	i915_gem_request_unreference__unlocked(target);
3625
 
3031 serge 3626
	return ret;
2352 Serge 3627
}
2332 Serge 3628
 
5060 serge 3629
static bool
3630
i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
3631
{
3632
	struct drm_i915_gem_object *obj = vma->obj;
3633
 
3634
	if (alignment &&
3635
	    vma->node.start & (alignment - 1))
3636
		return true;
3637
 
3638
	if (flags & PIN_MAPPABLE && !obj->map_and_fenceable)
3639
		return true;
3640
 
3641
	if (flags & PIN_OFFSET_BIAS &&
3642
	    vma->node.start < (flags & PIN_OFFSET_MASK))
3643
		return true;
3644
 
3645
	return false;
3646
}
3647
 
6084 serge 3648
void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
2332 Serge 3649
{
6084 serge 3650
	struct drm_i915_gem_object *obj = vma->obj;
3651
	bool mappable, fenceable;
3652
	u32 fence_size, fence_alignment;
3653
 
3654
	fence_size = i915_gem_get_gtt_size(obj->base.dev,
3655
					   obj->base.size,
3656
					   obj->tiling_mode);
3657
	fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev,
3658
						     obj->base.size,
3659
						     obj->tiling_mode,
3660
						     true);
3661
 
3662
	fenceable = (vma->node.size == fence_size &&
3663
		     (vma->node.start & (fence_alignment - 1)) == 0);
3664
 
3665
	mappable = (vma->node.start + fence_size <=
3666
		    to_i915(obj->base.dev)->gtt.mappable_end);
3667
 
3668
	obj->map_and_fenceable = mappable && fenceable;
3669
}
3670
 
3671
static int
3672
i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
3673
		       struct i915_address_space *vm,
3674
		       const struct i915_ggtt_view *ggtt_view,
3675
		       uint32_t alignment,
3676
		       uint64_t flags)
3677
{
5060 serge 3678
	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
4104 Serge 3679
	struct i915_vma *vma;
5354 serge 3680
	unsigned bound;
2332 Serge 3681
	int ret;
3682
 
5060 serge 3683
	if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base))
3684
		return -ENODEV;
2332 Serge 3685
 
5060 serge 3686
	if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm)))
3687
		return -EINVAL;
4104 Serge 3688
 
5354 serge 3689
	if (WARN_ON((flags & (PIN_MAPPABLE | PIN_GLOBAL)) == PIN_MAPPABLE))
3690
		return -EINVAL;
3691
 
6084 serge 3692
	if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view))
3693
		return -EINVAL;
3694
 
3695
	vma = ggtt_view ? i915_gem_obj_to_ggtt_view(obj, ggtt_view) :
3696
			  i915_gem_obj_to_vma(obj, vm);
3697
 
3698
	if (IS_ERR(vma))
3699
		return PTR_ERR(vma);
3700
 
5060 serge 3701
	if (vma) {
3702
		if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
3703
			return -EBUSY;
4104 Serge 3704
 
5060 serge 3705
		if (i915_vma_misplaced(vma, alignment, flags)) {
3706
			WARN(vma->pin_count,
6084 serge 3707
			     "bo is already pinned in %s with incorrect alignment:"
3708
			     " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d,"
2332 Serge 3709
			     " obj->map_and_fenceable=%d\n",
6084 serge 3710
			     ggtt_view ? "ggtt" : "ppgtt",
3711
			     upper_32_bits(vma->node.start),
3712
			     lower_32_bits(vma->node.start),
3713
			     alignment,
5060 serge 3714
			     !!(flags & PIN_MAPPABLE),
2332 Serge 3715
			     obj->map_and_fenceable);
4104 Serge 3716
			ret = i915_vma_unbind(vma);
2332 Serge 3717
			if (ret)
3718
				return ret;
5060 serge 3719
 
3720
			vma = NULL;
2332 Serge 3721
		}
3722
	}
3723
 
5354 serge 3724
	bound = vma ? vma->bound : 0;
5060 serge 3725
	if (vma == NULL || !drm_mm_node_allocated(&vma->node)) {
6084 serge 3726
		vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment,
3727
						 flags);
5060 serge 3728
		if (IS_ERR(vma))
3729
			return PTR_ERR(vma);
6084 serge 3730
	} else {
3731
		ret = i915_vma_bind(vma, obj->cache_level, flags);
3732
		if (ret)
3733
			return ret;
2332 Serge 3734
	}
3735
 
6084 serge 3736
	if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL &&
3737
	    (bound ^ vma->bound) & GLOBAL_BIND) {
3738
		__i915_vma_set_map_and_fenceable(vma);
3739
		WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable);
5354 serge 3740
	}
3741
 
5060 serge 3742
	vma->pin_count++;
2332 Serge 3743
	return 0;
3744
}
3745
 
6084 serge 3746
int
3747
i915_gem_object_pin(struct drm_i915_gem_object *obj,
3748
		    struct i915_address_space *vm,
3749
		    uint32_t alignment,
3750
		    uint64_t flags)
2344 Serge 3751
{
6084 serge 3752
	return i915_gem_object_do_pin(obj, vm,
3753
				      i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL,
3754
				      alignment, flags);
2344 Serge 3755
}
2332 Serge 3756
 
6084 serge 3757
int
3758
i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
3759
			 const struct i915_ggtt_view *view,
3760
			 uint32_t alignment,
3761
			 uint64_t flags)
5060 serge 3762
{
6084 serge 3763
	if (WARN_ONCE(!view, "no view specified"))
3764
		return -EINVAL;
5060 serge 3765
 
6084 serge 3766
	return i915_gem_object_do_pin(obj, i915_obj_to_ggtt(obj), view,
3767
				      alignment, flags | PIN_GLOBAL);
5060 serge 3768
}
3769
 
3770
void
6084 serge 3771
i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
3772
				const struct i915_ggtt_view *view)
5060 serge 3773
{
6084 serge 3774
	struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view);
5060 serge 3775
 
6084 serge 3776
	BUG_ON(!vma);
3777
	WARN_ON(vma->pin_count == 0);
3778
	WARN_ON(!i915_gem_obj_ggtt_bound_view(obj, view));
2332 Serge 3779
 
6084 serge 3780
	--vma->pin_count;
3031 serge 3781
}
2332 Serge 3782
 
3031 serge 3783
int
3784
i915_gem_busy_ioctl(struct drm_device *dev, void *data,
3785
		    struct drm_file *file)
3786
{
3787
	struct drm_i915_gem_busy *args = data;
3788
	struct drm_i915_gem_object *obj;
3789
	int ret;
2332 Serge 3790
 
3031 serge 3791
	ret = i915_mutex_lock_interruptible(dev);
3792
	if (ret)
3793
		return ret;
2332 Serge 3794
 
5060 serge 3795
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
3031 serge 3796
	if (&obj->base == NULL) {
3797
		ret = -ENOENT;
3798
		goto unlock;
3799
	}
2332 Serge 3800
 
3031 serge 3801
	/* Count all active objects as busy, even if they are currently not used
3802
	 * by the gpu. Users of this interface expect objects to eventually
3803
	 * become non-busy without any further actions, therefore emit any
3804
	 * necessary flushes here.
3805
	 */
3806
	ret = i915_gem_object_flush_active(obj);
6084 serge 3807
	if (ret)
3808
		goto unref;
2332 Serge 3809
 
6084 serge 3810
	BUILD_BUG_ON(I915_NUM_RINGS > 16);
3811
	args->busy = obj->active << 16;
3812
	if (obj->last_write_req)
3813
		args->busy |= obj->last_write_req->ring->id;
2332 Serge 3814
 
6084 serge 3815
unref:
3031 serge 3816
	drm_gem_object_unreference(&obj->base);
3817
unlock:
3818
	mutex_unlock(&dev->struct_mutex);
3819
	return ret;
3820
}
2332 Serge 3821
 
3031 serge 3822
int
3823
i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
3824
			struct drm_file *file_priv)
3825
{
3826
	return i915_gem_ring_throttle(dev, file_priv);
3827
}
2332 Serge 3828
 
3263 Serge 3829
#if 0
3830
 
3031 serge 3831
int
3832
i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
3833
		       struct drm_file *file_priv)
3834
{
5354 serge 3835
	struct drm_i915_private *dev_priv = dev->dev_private;
3031 serge 3836
	struct drm_i915_gem_madvise *args = data;
3837
	struct drm_i915_gem_object *obj;
3838
	int ret;
2332 Serge 3839
 
3031 serge 3840
	switch (args->madv) {
3841
	case I915_MADV_DONTNEED:
3842
	case I915_MADV_WILLNEED:
3843
	    break;
3844
	default:
3845
	    return -EINVAL;
3846
	}
2332 Serge 3847
 
3031 serge 3848
	ret = i915_mutex_lock_interruptible(dev);
3849
	if (ret)
3850
		return ret;
2332 Serge 3851
 
3031 serge 3852
	obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle));
3853
	if (&obj->base == NULL) {
3854
		ret = -ENOENT;
3855
		goto unlock;
3856
	}
2332 Serge 3857
 
5060 serge 3858
	if (i915_gem_obj_is_pinned(obj)) {
3031 serge 3859
		ret = -EINVAL;
3860
		goto out;
3861
	}
2332 Serge 3862
 
5354 serge 3863
	if (obj->pages &&
3864
	    obj->tiling_mode != I915_TILING_NONE &&
3865
	    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
3866
		if (obj->madv == I915_MADV_WILLNEED)
3867
			i915_gem_object_unpin_pages(obj);
3868
		if (args->madv == I915_MADV_WILLNEED)
3869
			i915_gem_object_pin_pages(obj);
3870
	}
3871
 
3031 serge 3872
	if (obj->madv != __I915_MADV_PURGED)
3873
		obj->madv = args->madv;
2332 Serge 3874
 
3031 serge 3875
	/* if the object is no longer attached, discard its backing storage */
6084 serge 3876
	if (obj->madv == I915_MADV_DONTNEED && obj->pages == NULL)
3031 serge 3877
		i915_gem_object_truncate(obj);
2332 Serge 3878
 
3031 serge 3879
	args->retained = obj->madv != __I915_MADV_PURGED;
2332 Serge 3880
 
3031 serge 3881
out:
3882
	drm_gem_object_unreference(&obj->base);
3883
unlock:
3884
	mutex_unlock(&dev->struct_mutex);
3885
	return ret;
3886
}
3887
#endif
2332 Serge 3888
 
3031 serge 3889
void i915_gem_object_init(struct drm_i915_gem_object *obj,
3890
			  const struct drm_i915_gem_object_ops *ops)
3891
{
6084 serge 3892
	int i;
3893
 
4104 Serge 3894
	INIT_LIST_HEAD(&obj->global_list);
6084 serge 3895
	for (i = 0; i < I915_NUM_RINGS; i++)
3896
		INIT_LIST_HEAD(&obj->ring_list[i]);
4104 Serge 3897
	INIT_LIST_HEAD(&obj->obj_exec_link);
3898
	INIT_LIST_HEAD(&obj->vma_list);
6084 serge 3899
	INIT_LIST_HEAD(&obj->batch_pool_link);
2332 Serge 3900
 
3031 serge 3901
	obj->ops = ops;
3902
 
3903
	obj->fence_reg = I915_FENCE_REG_NONE;
3904
	obj->madv = I915_MADV_WILLNEED;
3905
 
3906
	i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size);
3907
}
3908
 
3909
static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
3910
	.get_pages = i915_gem_object_get_pages_gtt,
3911
	.put_pages = i915_gem_object_put_pages_gtt,
3912
};
3913
 
2332 Serge 3914
struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
3915
						  size_t size)
3916
{
3917
	struct drm_i915_gem_object *obj;
3031 serge 3918
	struct address_space *mapping;
3480 Serge 3919
	gfp_t mask;
2340 Serge 3920
 
3746 Serge 3921
	obj = i915_gem_object_alloc(dev);
2332 Serge 3922
	if (obj == NULL)
3923
		return NULL;
3924
 
3925
	if (drm_gem_object_init(dev, &obj->base, size) != 0) {
4104 Serge 3926
		i915_gem_object_free(obj);
2332 Serge 3927
		return NULL;
3928
	}
3929
 
3930
 
3031 serge 3931
	i915_gem_object_init(obj, &i915_gem_object_ops);
2332 Serge 3932
 
3933
	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
3934
	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
3935
 
3031 serge 3936
	if (HAS_LLC(dev)) {
3937
		/* On some devices, we can have the GPU use the LLC (the CPU
2332 Serge 3938
		 * cache) for about a 10% performance improvement
3939
		 * compared to uncached.  Graphics requests other than
3940
		 * display scanout are coherent with the CPU in
3941
		 * accessing this cache.  This means in this mode we
3942
		 * don't need to clflush on the CPU side, and on the
3943
		 * GPU side we only need to flush internal caches to
3944
		 * get data visible to the CPU.
3945
		 *
3946
		 * However, we maintain the display planes as UC, and so
3947
		 * need to rebind when first used as such.
3948
		 */
3949
		obj->cache_level = I915_CACHE_LLC;
3950
	} else
3951
		obj->cache_level = I915_CACHE_NONE;
3952
 
4560 Serge 3953
	trace_i915_gem_object_create(obj);
3954
 
2332 Serge 3955
	return obj;
3956
}
3957
 
3031 serge 3958
void i915_gem_free_object(struct drm_gem_object *gem_obj)
2344 Serge 3959
{
3031 serge 3960
	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
2344 Serge 3961
	struct drm_device *dev = obj->base.dev;
5060 serge 3962
	struct drm_i915_private *dev_priv = dev->dev_private;
4104 Serge 3963
	struct i915_vma *vma, *next;
2332 Serge 3964
 
4560 Serge 3965
	intel_runtime_pm_get(dev_priv);
3966
 
3031 serge 3967
	trace_i915_gem_object_destroy(obj);
3968
 
5060 serge 3969
	list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
3970
		int ret;
3031 serge 3971
 
5060 serge 3972
		vma->pin_count = 0;
3973
		ret = i915_vma_unbind(vma);
4104 Serge 3974
		if (WARN_ON(ret == -ERESTARTSYS)) {
6084 serge 3975
			bool was_interruptible;
3031 serge 3976
 
6084 serge 3977
			was_interruptible = dev_priv->mm.interruptible;
3978
			dev_priv->mm.interruptible = false;
3031 serge 3979
 
4104 Serge 3980
			WARN_ON(i915_vma_unbind(vma));
3031 serge 3981
 
6084 serge 3982
			dev_priv->mm.interruptible = was_interruptible;
3983
		}
2344 Serge 3984
	}
2332 Serge 3985
 
4104 Serge 3986
	/* Stolen objects don't hold a ref, but do hold pin count. Fix that up
3987
	 * before progressing. */
3988
	if (obj->stolen)
3989
		i915_gem_object_unpin_pages(obj);
3990
 
5060 serge 3991
	WARN_ON(obj->frontbuffer_bits);
3992
 
5354 serge 3993
	if (obj->pages && obj->madv == I915_MADV_WILLNEED &&
3994
	    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES &&
3995
	    obj->tiling_mode != I915_TILING_NONE)
3996
		i915_gem_object_unpin_pages(obj);
3997
 
4104 Serge 3998
	if (WARN_ON(obj->pages_pin_count))
6084 serge 3999
		obj->pages_pin_count = 0;
3031 serge 4000
	i915_gem_object_put_pages(obj);
4001
//   i915_gem_object_free_mmap_offset(obj);
2332 Serge 4002
 
3243 Serge 4003
	BUG_ON(obj->pages);
2332 Serge 4004
 
3031 serge 4005
 
3290 Serge 4006
    if(obj->base.filp != NULL)
4007
    {
3298 Serge 4008
//        printf("filp %p\n", obj->base.filp);
3290 Serge 4009
        shmem_file_delete(obj->base.filp);
4010
    }
4011
 
2344 Serge 4012
	drm_gem_object_release(&obj->base);
4013
	i915_gem_info_remove_obj(dev_priv, obj->base.size);
2332 Serge 4014
 
2344 Serge 4015
	kfree(obj->bit_17);
4104 Serge 4016
	i915_gem_object_free(obj);
4560 Serge 4017
 
4018
	intel_runtime_pm_put(dev_priv);
2344 Serge 4019
}
2332 Serge 4020
 
4560 Serge 4021
struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
4104 Serge 4022
				     struct i915_address_space *vm)
4023
{
4560 Serge 4024
	struct i915_vma *vma;
6084 serge 4025
	list_for_each_entry(vma, &obj->vma_list, vma_link) {
4026
		if (i915_is_ggtt(vma->vm) &&
4027
		    vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
4028
			continue;
4560 Serge 4029
		if (vma->vm == vm)
4030
			return vma;
6084 serge 4031
	}
4032
	return NULL;
4033
}
4560 Serge 4034
 
6084 serge 4035
struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj,
4036
					   const struct i915_ggtt_view *view)
4037
{
4038
	struct i915_address_space *ggtt = i915_obj_to_ggtt(obj);
4039
	struct i915_vma *vma;
4040
 
4041
	if (WARN_ONCE(!view, "no view specified"))
4042
		return ERR_PTR(-EINVAL);
4043
 
4044
	list_for_each_entry(vma, &obj->vma_list, vma_link)
4045
		if (vma->vm == ggtt &&
4046
		    i915_ggtt_view_equal(&vma->ggtt_view, view))
4047
			return vma;
4560 Serge 4048
	return NULL;
4049
}
4050
 
4104 Serge 4051
void i915_gem_vma_destroy(struct i915_vma *vma)
4052
{
5354 serge 4053
	struct i915_address_space *vm = NULL;
4104 Serge 4054
	WARN_ON(vma->node.allocated);
4560 Serge 4055
 
4056
	/* Keep the vma as a placeholder in the execbuffer reservation lists */
4057
	if (!list_empty(&vma->exec_list))
4058
		return;
4059
 
5354 serge 4060
	vm = vma->vm;
4061
 
4062
	if (!i915_is_ggtt(vm))
4063
		i915_ppgtt_put(i915_vm_to_ppgtt(vm));
4064
 
4104 Serge 4065
	list_del(&vma->vma_link);
4560 Serge 4066
 
4104 Serge 4067
	kfree(vma);
4068
}
4069
 
6084 serge 4070
static void
4071
i915_gem_stop_ringbuffers(struct drm_device *dev)
4072
{
4073
	struct drm_i915_private *dev_priv = dev->dev_private;
4074
	struct intel_engine_cs *ring;
4075
	int i;
4076
 
4077
	for_each_ring(ring, dev_priv, i)
4078
		dev_priv->gt.stop_ring(ring);
4079
}
4080
 
3031 serge 4081
#if 0
4082
int
4560 Serge 4083
i915_gem_suspend(struct drm_device *dev)
2344 Serge 4084
{
5060 serge 4085
	struct drm_i915_private *dev_priv = dev->dev_private;
4560 Serge 4086
	int ret = 0;
2332 Serge 4087
 
4560 Serge 4088
	mutex_lock(&dev->struct_mutex);
3031 serge 4089
	ret = i915_gpu_idle(dev);
4560 Serge 4090
	if (ret)
4091
		goto err;
4092
 
3031 serge 4093
	i915_gem_retire_requests(dev);
4094
 
5060 serge 4095
	i915_gem_stop_ringbuffers(dev);
4560 Serge 4096
	mutex_unlock(&dev->struct_mutex);
4097
 
6084 serge 4098
	cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
3263 Serge 4099
	cancel_delayed_work_sync(&dev_priv->mm.retire_work);
5060 serge 4100
	flush_delayed_work(&dev_priv->mm.idle_work);
3031 serge 4101
 
6084 serge 4102
	/* Assert that we sucessfully flushed all the work and
4103
	 * reset the GPU back to its idle, low power state.
4104
	 */
4105
	WARN_ON(dev_priv->mm.busy);
4106
 
3031 serge 4107
	return 0;
4560 Serge 4108
 
4109
err:
4110
	mutex_unlock(&dev->struct_mutex);
4111
	return ret;
2344 Serge 4112
}
3031 serge 4113
#endif
2332 Serge 4114
 
6084 serge 4115
int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice)
3031 serge 4116
{
6084 serge 4117
	struct intel_engine_cs *ring = req->ring;
4560 Serge 4118
	struct drm_device *dev = ring->dev;
5060 serge 4119
	struct drm_i915_private *dev_priv = dev->dev_private;
4560 Serge 4120
	u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200);
4121
	u32 *remap_info = dev_priv->l3_parity.remap_info[slice];
4122
	int i, ret;
2332 Serge 4123
 
4560 Serge 4124
	if (!HAS_L3_DPF(dev) || !remap_info)
4125
		return 0;
2332 Serge 4126
 
6084 serge 4127
	ret = intel_ring_begin(req, GEN7_L3LOG_SIZE / 4 * 3);
4560 Serge 4128
	if (ret)
4129
		return ret;
2332 Serge 4130
 
4560 Serge 4131
	/*
4132
	 * Note: We do not worry about the concurrent register cacheline hang
4133
	 * here because no other code should access these registers other than
4134
	 * at initialization time.
4135
	 */
3031 serge 4136
	for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) {
4560 Serge 4137
		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
4138
		intel_ring_emit(ring, reg_base + i);
4139
		intel_ring_emit(ring, remap_info[i/4]);
3031 serge 4140
	}
2332 Serge 4141
 
4560 Serge 4142
	intel_ring_advance(ring);
2332 Serge 4143
 
4560 Serge 4144
	return ret;
3031 serge 4145
}
2332 Serge 4146
 
3031 serge 4147
void i915_gem_init_swizzling(struct drm_device *dev)
4148
{
5060 serge 4149
	struct drm_i915_private *dev_priv = dev->dev_private;
2332 Serge 4150
 
3031 serge 4151
	if (INTEL_INFO(dev)->gen < 5 ||
4152
	    dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
4153
		return;
2332 Serge 4154
 
3031 serge 4155
	I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
4156
				 DISP_TILE_SURFACE_SWIZZLING);
2332 Serge 4157
 
3031 serge 4158
	if (IS_GEN5(dev))
4159
		return;
2344 Serge 4160
 
3031 serge 4161
	I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
4162
	if (IS_GEN6(dev))
4163
		I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
3480 Serge 4164
	else if (IS_GEN7(dev))
4165
		I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
4560 Serge 4166
	else if (IS_GEN8(dev))
4167
		I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW));
3031 serge 4168
	else
3480 Serge 4169
		BUG();
3031 serge 4170
}
4171
 
5354 serge 4172
static void init_unused_ring(struct drm_device *dev, u32 base)
2332 Serge 4173
{
3480 Serge 4174
	struct drm_i915_private *dev_priv = dev->dev_private;
5354 serge 4175
 
4176
	I915_WRITE(RING_CTL(base), 0);
4177
	I915_WRITE(RING_HEAD(base), 0);
4178
	I915_WRITE(RING_TAIL(base), 0);
4179
	I915_WRITE(RING_START(base), 0);
4180
}
4181
 
4182
static void init_unused_rings(struct drm_device *dev)
4183
{
4184
	if (IS_I830(dev)) {
4185
		init_unused_ring(dev, PRB1_BASE);
4186
		init_unused_ring(dev, SRB0_BASE);
4187
		init_unused_ring(dev, SRB1_BASE);
4188
		init_unused_ring(dev, SRB2_BASE);
4189
		init_unused_ring(dev, SRB3_BASE);
4190
	} else if (IS_GEN2(dev)) {
4191
		init_unused_ring(dev, SRB0_BASE);
4192
		init_unused_ring(dev, SRB1_BASE);
4193
	} else if (IS_GEN3(dev)) {
4194
		init_unused_ring(dev, PRB1_BASE);
4195
		init_unused_ring(dev, PRB2_BASE);
4196
	}
4197
}
4198
 
4199
int i915_gem_init_rings(struct drm_device *dev)
4200
{
4201
	struct drm_i915_private *dev_priv = dev->dev_private;
2332 Serge 4202
	int ret;
2351 Serge 4203
 
2332 Serge 4204
	ret = intel_init_render_ring_buffer(dev);
4205
	if (ret)
4206
		return ret;
4207
 
6084 serge 4208
	if (HAS_BSD(dev)) {
2332 Serge 4209
		ret = intel_init_bsd_ring_buffer(dev);
4210
		if (ret)
4211
			goto cleanup_render_ring;
4212
	}
4213
 
6084 serge 4214
	if (HAS_BLT(dev)) {
2332 Serge 4215
		ret = intel_init_blt_ring_buffer(dev);
4216
		if (ret)
4217
			goto cleanup_bsd_ring;
4218
	}
4219
 
4104 Serge 4220
	if (HAS_VEBOX(dev)) {
4221
		ret = intel_init_vebox_ring_buffer(dev);
4222
		if (ret)
4223
			goto cleanup_blt_ring;
4224
	}
4225
 
5060 serge 4226
	if (HAS_BSD2(dev)) {
4227
		ret = intel_init_bsd2_ring_buffer(dev);
4228
		if (ret)
4229
			goto cleanup_vebox_ring;
4230
	}
4104 Serge 4231
 
2332 Serge 4232
	return 0;
4233
 
4104 Serge 4234
cleanup_vebox_ring:
4235
	intel_cleanup_ring_buffer(&dev_priv->ring[VECS]);
3480 Serge 4236
cleanup_blt_ring:
4237
	intel_cleanup_ring_buffer(&dev_priv->ring[BCS]);
2332 Serge 4238
cleanup_bsd_ring:
4239
	intel_cleanup_ring_buffer(&dev_priv->ring[VCS]);
4240
cleanup_render_ring:
4241
	intel_cleanup_ring_buffer(&dev_priv->ring[RCS]);
3480 Serge 4242
 
2332 Serge 4243
	return ret;
4244
}
4245
 
3480 Serge 4246
int
4247
i915_gem_init_hw(struct drm_device *dev)
3031 serge 4248
{
5060 serge 4249
	struct drm_i915_private *dev_priv = dev->dev_private;
6084 serge 4250
	struct intel_engine_cs *ring;
4251
	int ret, i, j;
3031 serge 4252
 
3480 Serge 4253
	if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt())
4254
		return -EIO;
3031 serge 4255
 
6084 serge 4256
	/* Double layer security blanket, see i915_gem_init() */
4257
	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4258
 
4104 Serge 4259
	if (dev_priv->ellc_size)
4260
		I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf));
3480 Serge 4261
 
4560 Serge 4262
	if (IS_HASWELL(dev))
4263
		I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev) ?
4264
			   LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
4265
 
3746 Serge 4266
	if (HAS_PCH_NOP(dev)) {
5060 serge 4267
		if (IS_IVYBRIDGE(dev)) {
6084 serge 4268
			u32 temp = I915_READ(GEN7_MSG_CTL);
4269
			temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK);
4270
			I915_WRITE(GEN7_MSG_CTL, temp);
5060 serge 4271
		} else if (INTEL_INFO(dev)->gen >= 7) {
4272
			u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT);
4273
			temp &= ~RESET_PCH_HANDSHAKE_ENABLE;
4274
			I915_WRITE(HSW_NDE_RSTWRN_OPT, temp);
4275
		}
3746 Serge 4276
	}
4277
 
3480 Serge 4278
	i915_gem_init_swizzling(dev);
4279
 
6084 serge 4280
	/*
4281
	 * At least 830 can leave some of the unused rings
4282
	 * "active" (ie. head != tail) after resume which
4283
	 * will prevent c3 entry. Makes sure all unused rings
4284
	 * are totally idle.
4285
	 */
4286
	init_unused_rings(dev);
3480 Serge 4287
 
6084 serge 4288
	BUG_ON(!dev_priv->ring[RCS].default_context);
4560 Serge 4289
 
6084 serge 4290
	ret = i915_ppgtt_init_hw(dev);
4291
	if (ret) {
4292
		DRM_ERROR("PPGTT enable HW failed %d\n", ret);
4293
		goto out;
4294
	}
4295
 
4296
	/* Need to do basic initialisation of all rings first: */
4297
	for_each_ring(ring, dev_priv, i) {
4298
		ret = ring->init_hw(ring);
4299
		if (ret)
4300
			goto out;
4301
	}
4302
 
4303
	/* We can't enable contexts until all firmware is loaded */
4304
	if (HAS_GUC_UCODE(dev)) {
4305
		ret = intel_guc_ucode_load(dev);
4306
		if (ret) {
4307
			/*
4308
			 * If we got an error and GuC submission is enabled, map
4309
			 * the error to -EIO so the GPU will be declared wedged.
4310
			 * OTOH, if we didn't intend to use the GuC anyway, just
4311
			 * discard the error and carry on.
4312
			 */
4313
			DRM_ERROR("Failed to initialize GuC, error %d%s\n", ret,
4314
				  i915.enable_guc_submission ? "" :
4315
				  " (ignored)");
4316
			ret = i915.enable_guc_submission ? -EIO : 0;
4317
			if (ret)
4318
				goto out;
4319
		}
4320
	}
4321
 
3480 Serge 4322
	/*
6084 serge 4323
	 * Increment the next seqno by 0x100 so we have a visible break
4324
	 * on re-initialisation
3480 Serge 4325
	 */
6084 serge 4326
	ret = i915_gem_set_seqno(dev, dev_priv->next_seqno+0x100);
4327
	if (ret)
4328
		goto out;
5354 serge 4329
 
6084 serge 4330
	/* Now it is safe to go back round and do everything else: */
4331
	for_each_ring(ring, dev_priv, i) {
4332
		struct drm_i915_gem_request *req;
4560 Serge 4333
 
6084 serge 4334
		WARN_ON(!ring->default_context);
4335
 
4336
		ret = i915_gem_request_alloc(ring, ring->default_context, &req);
4337
		if (ret) {
4338
			i915_gem_cleanup_ringbuffer(dev);
4339
			goto out;
4340
		}
4341
 
4342
		if (ring->id == RCS) {
4343
			for (j = 0; j < NUM_L3_SLICES(dev); j++)
4344
				i915_gem_l3_remap(req, j);
4345
		}
4346
 
4347
		ret = i915_ppgtt_init_ring(req);
4348
		if (ret && ret != -EIO) {
4349
			DRM_ERROR("PPGTT enable ring #%d failed %d\n", i, ret);
4350
			i915_gem_request_cancel(req);
4351
			i915_gem_cleanup_ringbuffer(dev);
4352
			goto out;
4353
		}
4354
 
4355
		ret = i915_gem_context_enable(req);
4356
		if (ret && ret != -EIO) {
4357
			DRM_ERROR("Context enable ring #%d failed %d\n", i, ret);
4358
			i915_gem_request_cancel(req);
4359
			i915_gem_cleanup_ringbuffer(dev);
4360
			goto out;
4361
		}
4362
 
4363
		i915_add_request_no_flush(req);
5354 serge 4364
	}
4365
 
6084 serge 4366
out:
4367
	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5060 serge 4368
	return ret;
3031 serge 4369
}
4370
 
4371
int i915_gem_init(struct drm_device *dev)
4372
{
4373
	struct drm_i915_private *dev_priv = dev->dev_private;
4374
	int ret;
4375
 
5354 serge 4376
	i915.enable_execlists = intel_sanitize_enable_execlists(dev,
4377
			i915.enable_execlists);
4378
 
3031 serge 4379
	mutex_lock(&dev->struct_mutex);
3746 Serge 4380
 
4381
	if (IS_VALLEYVIEW(dev)) {
4382
		/* VLVA0 (potential hack), BIOS isn't actually waking us */
5060 serge 4383
		I915_WRITE(VLV_GTLC_WAKE_CTRL, VLV_GTLC_ALLOWWAKEREQ);
4384
		if (wait_for((I915_READ(VLV_GTLC_PW_STATUS) &
4385
			      VLV_GTLC_ALLOWWAKEACK), 10))
3746 Serge 4386
			DRM_DEBUG_DRIVER("allow wake ack timed out\n");
4387
	}
4388
 
5354 serge 4389
	if (!i915.enable_execlists) {
6084 serge 4390
		dev_priv->gt.execbuf_submit = i915_gem_ringbuffer_submission;
5354 serge 4391
		dev_priv->gt.init_rings = i915_gem_init_rings;
4392
		dev_priv->gt.cleanup_ring = intel_cleanup_ring_buffer;
4393
		dev_priv->gt.stop_ring = intel_stop_ring_buffer;
4394
	} else {
6084 serge 4395
		dev_priv->gt.execbuf_submit = intel_execlists_submission;
5354 serge 4396
		dev_priv->gt.init_rings = intel_logical_rings_init;
4397
		dev_priv->gt.cleanup_ring = intel_logical_ring_cleanup;
4398
		dev_priv->gt.stop_ring = intel_logical_ring_stop;
4399
	}
4400
 
6084 serge 4401
	/* This is just a security blanket to placate dragons.
4402
	 * On some systems, we very sporadically observe that the first TLBs
4403
	 * used by the CS may be stale, despite us poking the TLB reset. If
4404
	 * we hold the forcewake during initialisation these problems
4405
	 * just magically go away.
4406
	 */
4407
	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
5354 serge 4408
 
6084 serge 4409
//	ret = i915_gem_init_userptr(dev);
4410
//	if (ret)
4411
//		goto out_unlock;
3746 Serge 4412
 
6084 serge 4413
	i915_gem_init_global_gtt(dev);
4414
 
5060 serge 4415
	ret = i915_gem_context_init(dev);
6084 serge 4416
	if (ret)
4417
		goto out_unlock;
3031 serge 4418
 
6084 serge 4419
	ret = dev_priv->gt.init_rings(dev);
4420
	if (ret)
4421
		goto out_unlock;
4422
 
5060 serge 4423
	ret = i915_gem_init_hw(dev);
4424
	if (ret == -EIO) {
4425
		/* Allow ring initialisation to fail by marking the GPU as
4426
		 * wedged. But we only want to do this where the GPU is angry,
4427
		 * for all other failure, such as an allocation failure, bail.
4428
		 */
4429
		DRM_ERROR("Failed to initialize GPU, declaring it wedged\n");
6084 serge 4430
		atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter);
5060 serge 4431
		ret = 0;
4432
	}
6084 serge 4433
 
4434
out_unlock:
4435
	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5060 serge 4436
	mutex_unlock(&dev->struct_mutex);
3746 Serge 4437
 
6084 serge 4438
	return ret;
3031 serge 4439
}
4440
 
2332 Serge 4441
void
4442
i915_gem_cleanup_ringbuffer(struct drm_device *dev)
4443
{
5060 serge 4444
	struct drm_i915_private *dev_priv = dev->dev_private;
4445
	struct intel_engine_cs *ring;
2332 Serge 4446
	int i;
4447
 
3031 serge 4448
	for_each_ring(ring, dev_priv, i)
5354 serge 4449
		dev_priv->gt.cleanup_ring(ring);
2332 Serge 4450
}
4451
 
4452
static void
5060 serge 4453
init_ring_lists(struct intel_engine_cs *ring)
2326 Serge 4454
{
6084 serge 4455
	INIT_LIST_HEAD(&ring->active_list);
4456
	INIT_LIST_HEAD(&ring->request_list);
2326 Serge 4457
}
4458
 
4459
void
4460
i915_gem_load(struct drm_device *dev)
4461
{
5060 serge 4462
	struct drm_i915_private *dev_priv = dev->dev_private;
6084 serge 4463
	int i;
2326 Serge 4464
 
4104 Serge 4465
	INIT_LIST_HEAD(&dev_priv->vm_list);
4560 Serge 4466
	INIT_LIST_HEAD(&dev_priv->context_list);
3031 serge 4467
	INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
4468
	INIT_LIST_HEAD(&dev_priv->mm.bound_list);
6084 serge 4469
	INIT_LIST_HEAD(&dev_priv->mm.fence_list);
4470
	for (i = 0; i < I915_NUM_RINGS; i++)
4471
		init_ring_lists(&dev_priv->ring[i]);
2342 Serge 4472
	for (i = 0; i < I915_MAX_NUM_FENCES; i++)
6084 serge 4473
		INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
2360 Serge 4474
	INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
4475
			  i915_gem_retire_work_handler);
4560 Serge 4476
	INIT_DELAYED_WORK(&dev_priv->mm.idle_work,
4477
			  i915_gem_idle_work_handler);
3480 Serge 4478
	init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
2326 Serge 4479
 
6084 serge 4480
	dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
2326 Serge 4481
 
3746 Serge 4482
	if (INTEL_INFO(dev)->gen >= 7 && !IS_VALLEYVIEW(dev))
4483
		dev_priv->num_fence_regs = 32;
4484
	else if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
6084 serge 4485
		dev_priv->num_fence_regs = 16;
4486
	else
4487
		dev_priv->num_fence_regs = 8;
2326 Serge 4488
 
6084 serge 4489
	if (intel_vgpu_active(dev))
4490
		dev_priv->num_fence_regs =
4491
				I915_READ(vgtif_reg(avail_rs.fence_num));
4492
 
4493
	/*
4494
	 * Set initial sequence number for requests.
4495
	 * Using this number allows the wraparound to happen early,
4496
	 * catching any obvious problems.
4497
	 */
4498
	dev_priv->next_seqno = ((u32)~0 - 0x1100);
4499
	dev_priv->last_seqno = ((u32)~0 - 0x1101);
4500
 
4501
	/* Initialize fence registers to zero */
3746 Serge 4502
	INIT_LIST_HEAD(&dev_priv->mm.fence_list);
4503
	i915_gem_restore_fences(dev);
2326 Serge 4504
 
6084 serge 4505
	i915_gem_detect_bit_6_swizzle(dev);
2326 Serge 4506
 
6084 serge 4507
	dev_priv->mm.interruptible = true;
2326 Serge 4508
 
5060 serge 4509
	mutex_init(&dev_priv->fb_tracking.lock);
2326 Serge 4510
}
4511
 
6084 serge 4512
void i915_gem_release(struct drm_device *dev, struct drm_file *file)
4513
{
4514
	struct drm_i915_file_private *file_priv = file->driver_priv;
4515
 
4516
	/* Clean up our request list when the client is going away, so that
4517
	 * later retire_requests won't dereference our soon-to-be-gone
4518
	 * file_priv.
4519
	 */
4520
	spin_lock(&file_priv->mm.lock);
4521
	while (!list_empty(&file_priv->mm.request_list)) {
4522
		struct drm_i915_gem_request *request;
4523
 
4524
		request = list_first_entry(&file_priv->mm.request_list,
4525
					   struct drm_i915_gem_request,
4526
					   client_list);
4527
		list_del(&request->client_list);
4528
		request->file_priv = NULL;
4529
	}
4530
	spin_unlock(&file_priv->mm.lock);
4531
 
4532
	if (!list_empty(&file_priv->rps.link)) {
4533
		spin_lock(&to_i915(dev)->rps.client_lock);
4534
		list_del(&file_priv->rps.link);
4535
		spin_unlock(&to_i915(dev)->rps.client_lock);
4536
	}
4537
}
4538
 
5060 serge 4539
int i915_gem_open(struct drm_device *dev, struct drm_file *file)
4104 Serge 4540
{
5060 serge 4541
	struct drm_i915_file_private *file_priv;
4104 Serge 4542
	int ret;
2326 Serge 4543
 
5060 serge 4544
	DRM_DEBUG_DRIVER("\n");
4104 Serge 4545
 
5060 serge 4546
	file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
4547
	if (!file_priv)
4104 Serge 4548
		return -ENOMEM;
4549
 
5060 serge 4550
	file->driver_priv = file_priv;
4551
	file_priv->dev_priv = dev->dev_private;
4552
	file_priv->file = file;
6084 serge 4553
	INIT_LIST_HEAD(&file_priv->rps.link);
4104 Serge 4554
 
5060 serge 4555
	spin_lock_init(&file_priv->mm.lock);
4556
	INIT_LIST_HEAD(&file_priv->mm.request_list);
4104 Serge 4557
 
5060 serge 4558
	ret = i915_gem_context_open(dev, file);
4559
	if (ret)
4560
		kfree(file_priv);
4104 Serge 4561
 
4562
	return ret;
4563
}
4564
 
5354 serge 4565
/**
4566
 * i915_gem_track_fb - update frontbuffer tracking
6084 serge 4567
 * @old: current GEM buffer for the frontbuffer slots
4568
 * @new: new GEM buffer for the frontbuffer slots
4569
 * @frontbuffer_bits: bitmask of frontbuffer slots
5354 serge 4570
 *
4571
 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them
4572
 * from @old and setting them in @new. Both @old and @new can be NULL.
4573
 */
5060 serge 4574
void i915_gem_track_fb(struct drm_i915_gem_object *old,
4575
		       struct drm_i915_gem_object *new,
4576
		       unsigned frontbuffer_bits)
4104 Serge 4577
{
5060 serge 4578
	if (old) {
4579
		WARN_ON(!mutex_is_locked(&old->base.dev->struct_mutex));
4580
		WARN_ON(!(old->frontbuffer_bits & frontbuffer_bits));
4581
		old->frontbuffer_bits &= ~frontbuffer_bits;
4104 Serge 4582
	}
4583
 
5060 serge 4584
	if (new) {
4585
		WARN_ON(!mutex_is_locked(&new->base.dev->struct_mutex));
4586
		WARN_ON(new->frontbuffer_bits & frontbuffer_bits);
4587
		new->frontbuffer_bits |= frontbuffer_bits;
4104 Serge 4588
	}
4589
}
4590
 
4591
/* All the new VM stuff */
6084 serge 4592
u64 i915_gem_obj_offset(struct drm_i915_gem_object *o,
4593
			struct i915_address_space *vm)
4104 Serge 4594
{
4595
	struct drm_i915_private *dev_priv = o->base.dev->dev_private;
4596
	struct i915_vma *vma;
4597
 
5354 serge 4598
	WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base);
4104 Serge 4599
 
4600
	list_for_each_entry(vma, &o->vma_list, vma_link) {
6084 serge 4601
		if (i915_is_ggtt(vma->vm) &&
4602
		    vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
4603
			continue;
4104 Serge 4604
		if (vma->vm == vm)
4605
			return vma->node.start;
6084 serge 4606
	}
4104 Serge 4607
 
5060 serge 4608
	WARN(1, "%s vma for this object not found.\n",
4609
	     i915_is_ggtt(vm) ? "global" : "ppgtt");
4610
	return -1;
4104 Serge 4611
}
4612
 
6084 serge 4613
u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o,
4614
				  const struct i915_ggtt_view *view)
4615
{
4616
	struct i915_address_space *ggtt = i915_obj_to_ggtt(o);
4617
	struct i915_vma *vma;
4618
 
4619
	list_for_each_entry(vma, &o->vma_list, vma_link)
4620
		if (vma->vm == ggtt &&
4621
		    i915_ggtt_view_equal(&vma->ggtt_view, view))
4622
			return vma->node.start;
4623
 
4624
	WARN(1, "global vma for this object not found. (view=%u)\n", view->type);
4625
	return -1;
4626
}
4627
 
4104 Serge 4628
bool i915_gem_obj_bound(struct drm_i915_gem_object *o,
4629
			struct i915_address_space *vm)
4630
{
4631
	struct i915_vma *vma;
4632
 
6084 serge 4633
	list_for_each_entry(vma, &o->vma_list, vma_link) {
4634
		if (i915_is_ggtt(vma->vm) &&
4635
		    vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
4636
			continue;
4104 Serge 4637
		if (vma->vm == vm && drm_mm_node_allocated(&vma->node))
4638
			return true;
6084 serge 4639
	}
4104 Serge 4640
 
4641
	return false;
4642
}
4643
 
6084 serge 4644
bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o,
4645
				  const struct i915_ggtt_view *view)
4646
{
4647
	struct i915_address_space *ggtt = i915_obj_to_ggtt(o);
4648
	struct i915_vma *vma;
4649
 
4650
	list_for_each_entry(vma, &o->vma_list, vma_link)
4651
		if (vma->vm == ggtt &&
4652
		    i915_ggtt_view_equal(&vma->ggtt_view, view) &&
4653
		    drm_mm_node_allocated(&vma->node))
4654
			return true;
4655
 
4656
	return false;
4657
}
4658
 
4104 Serge 4659
bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o)
4660
{
4560 Serge 4661
	struct i915_vma *vma;
4104 Serge 4662
 
4560 Serge 4663
	list_for_each_entry(vma, &o->vma_list, vma_link)
4664
		if (drm_mm_node_allocated(&vma->node))
4104 Serge 4665
			return true;
4666
 
4667
	return false;
4668
}
4669
 
4670
unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o,
4671
				struct i915_address_space *vm)
4672
{
4673
	struct drm_i915_private *dev_priv = o->base.dev->dev_private;
4674
	struct i915_vma *vma;
4675
 
5354 serge 4676
	WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base);
4104 Serge 4677
 
4678
	BUG_ON(list_empty(&o->vma_list));
4679
 
6084 serge 4680
	list_for_each_entry(vma, &o->vma_list, vma_link) {
4681
		if (i915_is_ggtt(vma->vm) &&
4682
		    vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
4683
			continue;
4104 Serge 4684
		if (vma->vm == vm)
4685
			return vma->node.size;
6084 serge 4686
	}
4104 Serge 4687
	return 0;
4688
}
4560 Serge 4689
 
6084 serge 4690
bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj)
4691
{
4692
	struct i915_vma *vma;
4693
	list_for_each_entry(vma, &obj->vma_list, vma_link)
4694
		if (vma->pin_count > 0)
4695
			return true;
4560 Serge 4696
 
6084 serge 4697
	return false;
4698
}
5060 serge 4699
 
6084 serge 4700
/* Allocate a new GEM object and fill it with the supplied data */
4701
struct drm_i915_gem_object *
4702
i915_gem_object_create_from_data(struct drm_device *dev,
4703
			         const void *data, size_t size)
4104 Serge 4704
{
6084 serge 4705
	struct drm_i915_gem_object *obj;
4706
	struct sg_table *sg;
4707
	size_t bytes;
4708
	int ret;
4104 Serge 4709
 
6084 serge 4710
	obj = i915_gem_alloc_object(dev, round_up(size, PAGE_SIZE));
4711
	if (IS_ERR_OR_NULL(obj))
4712
		return obj;
4104 Serge 4713
 
6084 serge 4714
	ret = i915_gem_object_set_to_cpu_domain(obj, true);
4715
	if (ret)
4716
		goto fail;
4717
 
4718
	ret = i915_gem_object_get_pages(obj);
4719
	if (ret)
4720
		goto fail;
4721
 
4722
	i915_gem_object_pin_pages(obj);
4723
	sg = obj->pages;
4724
	bytes = sg_copy_from_buffer(sg->sgl, sg->nents, (void *)data, size);
4725
	i915_gem_object_unpin_pages(obj);
4726
 
4727
	if (WARN_ON(bytes != size)) {
4728
		DRM_ERROR("Incomplete copy, wrote %zu of %zu", bytes, size);
4729
		ret = -EFAULT;
4730
		goto fail;
4731
	}
4732
 
4733
	return obj;
4734
 
4735
fail:
4736
	drm_gem_object_unreference(&obj->base);
4737
	return ERR_PTR(ret);
4104 Serge 4738
}