Subversion Repositories Kolibri OS

Rev

Rev 6937 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2326 Serge 1
/*
6084 serge 2
 * Copyright © 2008-2015 Intel Corporation
2326 Serge 3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
 * IN THE SOFTWARE.
22
 *
23
 * Authors:
24
 *    Eric Anholt 
25
 *
26
 */
27
 
3031 serge 28
#include 
4280 Serge 29
#include 
3031 serge 30
#include 
2326 Serge 31
#include "i915_drv.h"
6084 serge 32
#include "i915_vgpu.h"
2351 Serge 33
#include "i915_trace.h"
2326 Serge 34
#include "intel_drv.h"
3260 Serge 35
#include 
2330 Serge 36
#include 
6660 serge 37
#include 
2326 Serge 38
#include 
6660 serge 39
#include 
40
 
6084 serge 41
#define RQ_BUG_ON(expr)
2326 Serge 42
 
2344 Serge 43
extern int x86_clflush_size;
2332 Serge 44
 
3263 Serge 45
#define PROT_READ       0x1             /* page can be read */
46
#define PROT_WRITE      0x2             /* page can be written */
47
#define MAP_SHARED      0x01            /* Share changes */
48
 
3266 Serge 49
struct drm_i915_gem_object *get_fb_obj();
50
 
3263 Serge 51
unsigned long vm_mmap(struct file *file, unsigned long addr,
52
         unsigned long len, unsigned long prot,
53
         unsigned long flag, unsigned long offset);
54
 
2344 Serge 55
 
2332 Serge 56
static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
6084 serge 57
static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
5060 serge 58
static void
6084 serge 59
i915_gem_object_retire__write(struct drm_i915_gem_object *obj);
60
static void
61
i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring);
2326 Serge 62
 
4104 Serge 63
static bool cpu_cache_is_coherent(struct drm_device *dev,
64
				  enum i915_cache_level level)
65
{
66
	return HAS_LLC(dev) || level != I915_CACHE_NONE;
67
}
68
 
69
static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
70
{
71
	if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
72
		return true;
73
 
74
	return obj->pin_display;
75
}
76
 
2332 Serge 77
/* some bookkeeping */
78
static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
79
				  size_t size)
80
{
4104 Serge 81
	spin_lock(&dev_priv->mm.object_stat_lock);
2332 Serge 82
	dev_priv->mm.object_count++;
83
	dev_priv->mm.object_memory += size;
4104 Serge 84
	spin_unlock(&dev_priv->mm.object_stat_lock);
2332 Serge 85
}
86
 
87
static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
88
				     size_t size)
89
{
4104 Serge 90
	spin_lock(&dev_priv->mm.object_stat_lock);
2332 Serge 91
	dev_priv->mm.object_count--;
92
	dev_priv->mm.object_memory -= size;
4104 Serge 93
	spin_unlock(&dev_priv->mm.object_stat_lock);
2332 Serge 94
}
95
 
96
static int
3480 Serge 97
i915_gem_wait_for_error(struct i915_gpu_error *error)
2332 Serge 98
{
99
	int ret;
100
 
3480 Serge 101
#define EXIT_COND (!i915_reset_in_progress(error))
102
	if (EXIT_COND)
2332 Serge 103
		return 0;
3255 Serge 104
#if 0
3031 serge 105
	/*
106
	 * Only wait 10 seconds for the gpu reset to complete to avoid hanging
107
	 * userspace. If it takes that long something really bad is going on and
108
	 * we should simply try to bail out and fail as gracefully as possible.
109
	 */
3480 Serge 110
	ret = wait_event_interruptible_timeout(error->reset_queue,
111
					       EXIT_COND,
112
					       10*HZ);
3031 serge 113
	if (ret == 0) {
114
		DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
115
		return -EIO;
116
	} else if (ret < 0) {
2332 Serge 117
		return ret;
3031 serge 118
	}
2332 Serge 119
 
3255 Serge 120
#endif
3480 Serge 121
#undef EXIT_COND
3255 Serge 122
 
2332 Serge 123
	return 0;
124
}
125
 
126
int i915_mutex_lock_interruptible(struct drm_device *dev)
127
{
3480 Serge 128
	struct drm_i915_private *dev_priv = dev->dev_private;
2332 Serge 129
	int ret;
130
 
3480 Serge 131
	ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
2332 Serge 132
	if (ret)
133
		return ret;
134
 
3480 Serge 135
	ret = mutex_lock_interruptible(&dev->struct_mutex);
136
	if (ret)
137
		return ret;
2332 Serge 138
 
139
	WARN_ON(i915_verify_lists(dev));
140
	return 0;
141
}
142
 
143
int
144
i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
145
			    struct drm_file *file)
146
{
147
	struct drm_i915_private *dev_priv = dev->dev_private;
148
	struct drm_i915_gem_get_aperture *args = data;
6084 serge 149
	struct i915_gtt *ggtt = &dev_priv->gtt;
150
	struct i915_vma *vma;
2332 Serge 151
	size_t pinned;
152
 
153
	pinned = 0;
154
	mutex_lock(&dev->struct_mutex);
7144 serge 155
	list_for_each_entry(vma, &ggtt->base.active_list, vm_link)
6084 serge 156
		if (vma->pin_count)
157
			pinned += vma->node.size;
7144 serge 158
	list_for_each_entry(vma, &ggtt->base.inactive_list, vm_link)
6084 serge 159
		if (vma->pin_count)
160
			pinned += vma->node.size;
2332 Serge 161
	mutex_unlock(&dev->struct_mutex);
162
 
4104 Serge 163
	args->aper_size = dev_priv->gtt.base.total;
2342 Serge 164
	args->aper_available_size = args->aper_size - pinned;
2332 Serge 165
 
166
	return 0;
167
}
168
 
6296 serge 169
static int
170
i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
171
{
172
	char *vaddr = obj->phys_handle->vaddr;
173
	struct sg_table *st;
174
	struct scatterlist *sg;
175
	int i;
176
 
177
	if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
178
		return -EINVAL;
179
 
180
 
181
	st = kmalloc(sizeof(*st), GFP_KERNEL);
182
	if (st == NULL)
183
		return -ENOMEM;
184
 
185
	if (sg_alloc_table(st, 1, GFP_KERNEL)) {
186
		kfree(st);
187
		return -ENOMEM;
188
	}
189
 
190
	sg = st->sgl;
191
	sg->offset = 0;
192
	sg->length = obj->base.size;
193
 
194
	sg_dma_address(sg) = obj->phys_handle->busaddr;
195
	sg_dma_len(sg) = obj->base.size;
196
 
197
	obj->pages = st;
198
	return 0;
199
}
200
 
201
static void
202
i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
203
{
204
	int ret;
205
 
206
	BUG_ON(obj->madv == __I915_MADV_PURGED);
207
 
208
	ret = i915_gem_object_set_to_cpu_domain(obj, true);
209
	if (ret) {
210
		/* In the event of a disaster, abandon all caches and
211
		 * hope for the best.
212
		 */
213
		WARN_ON(ret != -EIO);
214
		obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
215
	}
216
 
217
	if (obj->madv == I915_MADV_DONTNEED)
218
		obj->dirty = 0;
219
 
220
	if (obj->dirty) {
221
		obj->dirty = 0;
222
	}
223
 
224
	sg_free_table(obj->pages);
225
	kfree(obj->pages);
226
}
227
 
228
static void
229
i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
230
{
231
	drm_pci_free(obj->base.dev, obj->phys_handle);
232
}
233
 
234
static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
235
	.get_pages = i915_gem_object_get_pages_phys,
236
	.put_pages = i915_gem_object_put_pages_phys,
237
	.release = i915_gem_object_release_phys,
238
};
239
 
240
static int
241
drop_pages(struct drm_i915_gem_object *obj)
242
{
243
	struct i915_vma *vma, *next;
244
	int ret;
245
 
246
	drm_gem_object_reference(&obj->base);
7144 serge 247
	list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link)
6296 serge 248
		if (i915_vma_unbind(vma))
249
			break;
250
 
251
	ret = i915_gem_object_put_pages(obj);
252
	drm_gem_object_unreference(&obj->base);
253
 
254
	return ret;
255
}
256
 
257
int
258
i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
259
			    int align)
260
{
261
	drm_dma_handle_t *phys;
262
	int ret;
263
 
264
	if (obj->phys_handle) {
265
		if ((unsigned long)obj->phys_handle->vaddr & (align -1))
266
			return -EBUSY;
267
 
268
		return 0;
269
	}
270
 
271
	if (obj->madv != I915_MADV_WILLNEED)
272
		return -EFAULT;
273
 
274
	if (obj->base.filp == NULL)
275
		return -EINVAL;
276
 
277
	ret = drop_pages(obj);
278
	if (ret)
279
		return ret;
280
 
281
	/* create a new object */
282
	phys = drm_pci_alloc(obj->base.dev, obj->base.size, align);
283
	if (!phys)
284
		return -ENOMEM;
285
 
286
	obj->phys_handle = phys;
287
	obj->ops = &i915_gem_phys_ops;
288
 
289
	return i915_gem_object_get_pages(obj);
290
}
3480 Serge 291
void *i915_gem_object_alloc(struct drm_device *dev)
292
{
293
	struct drm_i915_private *dev_priv = dev->dev_private;
5367 serge 294
    return kzalloc(sizeof(struct drm_i915_gem_object), 0);
3480 Serge 295
}
296
 
297
void i915_gem_object_free(struct drm_i915_gem_object *obj)
298
{
299
	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
300
	kfree(obj);
301
}
302
 
3031 serge 303
static int
304
i915_gem_create(struct drm_file *file,
2332 Serge 305
		struct drm_device *dev,
306
		uint64_t size,
307
		uint32_t *handle_p)
308
{
309
	struct drm_i915_gem_object *obj;
310
	int ret;
311
	u32 handle;
312
 
313
	size = roundup(size, PAGE_SIZE);
2342 Serge 314
	if (size == 0)
315
		return -EINVAL;
2332 Serge 316
 
317
	/* Allocate the new object */
318
	obj = i915_gem_alloc_object(dev, size);
319
	if (obj == NULL)
320
		return -ENOMEM;
321
 
322
	ret = drm_gem_handle_create(file, &obj->base, &handle);
4104 Serge 323
	/* drop reference from allocate - handle holds it now */
324
	drm_gem_object_unreference_unlocked(&obj->base);
325
	if (ret)
2332 Serge 326
		return ret;
327
 
328
	*handle_p = handle;
329
	return 0;
330
}
331
 
332
int
333
i915_gem_dumb_create(struct drm_file *file,
334
		     struct drm_device *dev,
335
		     struct drm_mode_create_dumb *args)
336
{
337
	/* have to work out size/pitch and return them */
4560 Serge 338
	args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64);
2332 Serge 339
	args->size = args->pitch * args->height;
340
	return i915_gem_create(file, dev,
341
			       args->size, &args->handle);
342
}
343
 
2326 Serge 344
/**
2332 Serge 345
 * Creates a new mm object and returns a handle to it.
346
 */
347
int
348
i915_gem_create_ioctl(struct drm_device *dev, void *data,
349
		      struct drm_file *file)
350
{
351
	struct drm_i915_gem_create *args = data;
3031 serge 352
 
2332 Serge 353
	return i915_gem_create(file, dev,
354
			       args->size, &args->handle);
355
}
356
 
3031 serge 357
static inline int
358
__copy_to_user_swizzled(char __user *cpu_vaddr,
359
			const char *gpu_vaddr, int gpu_offset,
6084 serge 360
			int length)
2332 Serge 361
{
3031 serge 362
	int ret, cpu_offset = 0;
2332 Serge 363
 
3031 serge 364
	while (length > 0) {
365
		int cacheline_end = ALIGN(gpu_offset + 1, 64);
366
		int this_length = min(cacheline_end - gpu_offset, length);
367
		int swizzled_gpu_offset = gpu_offset ^ 64;
2332 Serge 368
 
3031 serge 369
		ret = __copy_to_user(cpu_vaddr + cpu_offset,
370
				     gpu_vaddr + swizzled_gpu_offset,
371
				     this_length);
372
		if (ret)
373
			return ret + length;
2332 Serge 374
 
3031 serge 375
		cpu_offset += this_length;
376
		gpu_offset += this_length;
377
		length -= this_length;
378
	}
379
 
380
	return 0;
2332 Serge 381
}
382
 
3031 serge 383
static inline int
384
__copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
385
			  const char __user *cpu_vaddr,
386
			  int length)
2332 Serge 387
{
3031 serge 388
	int ret, cpu_offset = 0;
2332 Serge 389
 
390
	while (length > 0) {
391
		int cacheline_end = ALIGN(gpu_offset + 1, 64);
392
		int this_length = min(cacheline_end - gpu_offset, length);
393
		int swizzled_gpu_offset = gpu_offset ^ 64;
394
 
3031 serge 395
		ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
6084 serge 396
				       cpu_vaddr + cpu_offset,
397
				       this_length);
3031 serge 398
		if (ret)
399
			return ret + length;
400
 
2332 Serge 401
		cpu_offset += this_length;
402
		gpu_offset += this_length;
403
		length -= this_length;
404
	}
405
 
3031 serge 406
	return 0;
2332 Serge 407
}
408
 
6131 serge 409
/*
410
 * Pins the specified object's pages and synchronizes the object with
411
 * GPU accesses. Sets needs_clflush to non-zero if the caller should
412
 * flush the object from the CPU cache.
413
 */
414
int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
415
				    int *needs_clflush)
416
{
417
	int ret;
418
 
419
	*needs_clflush = 0;
420
 
421
	if (!obj->base.filp)
422
		return -EINVAL;
423
 
424
	if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
425
		/* If we're not in the cpu read domain, set ourself into the gtt
426
		 * read domain and manually flush cachelines (if required). This
427
		 * optimizes for the case when the gpu will dirty the data
428
		 * anyway again before the next pread happens. */
429
		*needs_clflush = !cpu_cache_is_coherent(obj->base.dev,
430
							obj->cache_level);
431
		ret = i915_gem_object_wait_rendering(obj, true);
432
		if (ret)
433
			return ret;
434
	}
435
 
436
	ret = i915_gem_object_get_pages(obj);
437
	if (ret)
438
		return ret;
439
 
440
	i915_gem_object_pin_pages(obj);
441
 
442
	return ret;
443
}
444
 
3031 serge 445
/* Per-page copy function for the shmem pread fastpath.
446
 * Flushes invalid cachelines before reading the target if
447
 * needs_clflush is set. */
2332 Serge 448
static int
3031 serge 449
shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length,
450
		 char __user *user_data,
451
		 bool page_do_bit17_swizzling, bool needs_clflush)
452
{
6084 serge 453
	char *vaddr;
454
	int ret;
3031 serge 455
 
456
	if (unlikely(page_do_bit17_swizzling))
457
		return -EINVAL;
458
 
6084 serge 459
	vaddr = kmap_atomic(page);
3031 serge 460
	if (needs_clflush)
461
		drm_clflush_virt_range(vaddr + shmem_page_offset,
462
				       page_length);
6084 serge 463
	ret = __copy_to_user_inatomic(user_data,
3031 serge 464
				      vaddr + shmem_page_offset,
6084 serge 465
				      page_length);
466
	kunmap_atomic(vaddr);
3031 serge 467
 
468
	return ret ? -EFAULT : 0;
469
}
470
 
471
static void
472
shmem_clflush_swizzled_range(char *addr, unsigned long length,
473
			     bool swizzled)
474
{
475
	if (unlikely(swizzled)) {
476
		unsigned long start = (unsigned long) addr;
477
		unsigned long end = (unsigned long) addr + length;
478
 
479
		/* For swizzling simply ensure that we always flush both
480
		 * channels. Lame, but simple and it works. Swizzled
481
		 * pwrite/pread is far from a hotpath - current userspace
482
		 * doesn't use it at all. */
483
		start = round_down(start, 128);
484
		end = round_up(end, 128);
485
 
486
		drm_clflush_virt_range((void *)start, end - start);
487
	} else {
488
		drm_clflush_virt_range(addr, length);
489
	}
490
 
491
}
492
 
493
/* Only difference to the fast-path function is that this can handle bit17
494
 * and uses non-atomic copy and kmap functions. */
495
static int
496
shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
497
		 char __user *user_data,
498
		 bool page_do_bit17_swizzling, bool needs_clflush)
499
{
500
	char *vaddr;
501
	int ret;
502
 
503
	vaddr = kmap(page);
504
	if (needs_clflush)
505
		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
506
					     page_length,
507
					     page_do_bit17_swizzling);
508
 
509
	if (page_do_bit17_swizzling)
510
		ret = __copy_to_user_swizzled(user_data,
511
					      vaddr, shmem_page_offset,
512
					      page_length);
513
	else
514
		ret = __copy_to_user(user_data,
515
				     vaddr + shmem_page_offset,
516
				     page_length);
517
	kunmap(page);
518
 
519
	return ret ? - EFAULT : 0;
520
}
521
 
522
static int
523
i915_gem_shmem_pread(struct drm_device *dev,
6084 serge 524
		     struct drm_i915_gem_object *obj,
525
		     struct drm_i915_gem_pread *args,
526
		     struct drm_file *file)
2332 Serge 527
{
3031 serge 528
	char __user *user_data;
2332 Serge 529
	ssize_t remain;
530
	loff_t offset;
3031 serge 531
	int shmem_page_offset, page_length, ret = 0;
532
	int obj_do_bit17_swizzling, page_do_bit17_swizzling;
533
	int prefaulted = 0;
534
	int needs_clflush = 0;
3746 Serge 535
	struct sg_page_iter sg_iter;
2332 Serge 536
 
3746 Serge 537
	user_data = to_user_ptr(args->data_ptr);
2332 Serge 538
	remain = args->size;
539
 
3031 serge 540
	obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
541
 
5060 serge 542
	ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
3031 serge 543
	if (ret)
544
		return ret;
545
 
2332 Serge 546
	offset = args->offset;
547
 
3746 Serge 548
	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
549
			 offset >> PAGE_SHIFT) {
550
		struct page *page = sg_page_iter_page(&sg_iter);
2332 Serge 551
 
3031 serge 552
		if (remain <= 0)
553
			break;
554
 
2332 Serge 555
		/* Operation in this page
556
		 *
3031 serge 557
		 * shmem_page_offset = offset within page in shmem file
2332 Serge 558
		 * page_length = bytes to copy for this page
559
		 */
3031 serge 560
		shmem_page_offset = offset_in_page(offset);
2332 Serge 561
		page_length = remain;
3031 serge 562
		if ((shmem_page_offset + page_length) > PAGE_SIZE)
563
			page_length = PAGE_SIZE - shmem_page_offset;
2332 Serge 564
 
3031 serge 565
		page_do_bit17_swizzling = obj_do_bit17_swizzling &&
566
			(page_to_phys(page) & (1 << 17)) != 0;
2332 Serge 567
 
3031 serge 568
		ret = shmem_pread_fast(page, shmem_page_offset, page_length,
569
				       user_data, page_do_bit17_swizzling,
570
				       needs_clflush);
571
		if (ret == 0)
572
			goto next_page;
2332 Serge 573
 
3031 serge 574
		mutex_unlock(&dev->struct_mutex);
575
 
576
		ret = shmem_pread_slow(page, shmem_page_offset, page_length,
577
				       user_data, page_do_bit17_swizzling,
578
				       needs_clflush);
579
 
580
		mutex_lock(&dev->struct_mutex);
581
 
2332 Serge 582
		if (ret)
3031 serge 583
			goto out;
2332 Serge 584
 
5060 serge 585
next_page:
2332 Serge 586
		remain -= page_length;
587
		user_data += page_length;
588
		offset += page_length;
589
	}
590
 
3031 serge 591
out:
592
	i915_gem_object_unpin_pages(obj);
593
 
594
	return ret;
2332 Serge 595
}
596
 
597
/**
3031 serge 598
 * Reads data from the object referenced by handle.
599
 *
600
 * On error, the contents of *data are undefined.
2332 Serge 601
 */
3031 serge 602
int
603
i915_gem_pread_ioctl(struct drm_device *dev, void *data,
604
		     struct drm_file *file)
605
{
606
	struct drm_i915_gem_pread *args = data;
607
	struct drm_i915_gem_object *obj;
608
	int ret = 0;
609
 
610
	if (args->size == 0)
611
		return 0;
612
 
613
	ret = i915_mutex_lock_interruptible(dev);
614
	if (ret)
615
		return ret;
616
 
617
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
618
	if (&obj->base == NULL) {
619
		ret = -ENOENT;
620
		goto unlock;
621
	}
622
 
623
	/* Bounds check source.  */
624
	if (args->offset > obj->base.size ||
625
	    args->size > obj->base.size - args->offset) {
626
		ret = -EINVAL;
627
		goto out;
628
	}
629
 
630
	/* prime objects have no backing filp to GEM pread/pwrite
631
	 * pages from.
632
	 */
633
	if (!obj->base.filp) {
634
		ret = -EINVAL;
635
		goto out;
636
	}
637
 
638
	trace_i915_gem_object_pread(obj, args->offset, args->size);
639
 
640
	ret = i915_gem_shmem_pread(dev, obj, args, file);
641
 
642
out:
643
	drm_gem_object_unreference(&obj->base);
644
unlock:
645
	mutex_unlock(&dev->struct_mutex);
646
	return ret;
647
}
648
 
649
/* This is the fast write path which cannot handle
650
 * page faults in the source data
651
 */
652
 
7144 serge 653
static inline int
654
fast_user_write(struct io_mapping *mapping,
655
		loff_t page_base, int page_offset,
656
		char __user *user_data,
657
		int length)
658
{
659
	void __iomem *vaddr_atomic;
660
	void *vaddr;
661
	unsigned long unwritten;
3031 serge 662
 
7144 serge 663
	vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
664
	/* We can use the cpu mem copy function because this is X86. */
665
	vaddr = (void __force*)vaddr_atomic + page_offset;
666
	unwritten = __copy_from_user_inatomic_nocache(vaddr,
667
						      user_data, length);
668
	io_mapping_unmap_atomic(vaddr_atomic);
669
	return unwritten;
670
}
671
 
3031 serge 672
/**
673
 * This is the fast pwrite path, where we copy the data directly from the
674
 * user into the GTT, uncached.
675
 */
2332 Serge 676
static int
3031 serge 677
i915_gem_gtt_pwrite_fast(struct drm_device *dev,
678
			 struct drm_i915_gem_object *obj,
679
			 struct drm_i915_gem_pwrite *args,
680
			 struct drm_file *file)
2332 Serge 681
{
5060 serge 682
	struct drm_i915_private *dev_priv = dev->dev_private;
2332 Serge 683
	ssize_t remain;
3031 serge 684
	loff_t offset, page_base;
685
	char __user *user_data;
686
	int page_offset, page_length, ret;
2332 Serge 687
 
5060 serge 688
	ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
3031 serge 689
	if (ret)
690
		goto out;
691
 
692
	ret = i915_gem_object_set_to_gtt_domain(obj, true);
693
	if (ret)
694
		goto out_unpin;
695
 
696
	ret = i915_gem_object_put_fence(obj);
697
	if (ret)
698
		goto out_unpin;
699
 
4539 Serge 700
	user_data = to_user_ptr(args->data_ptr);
2332 Serge 701
	remain = args->size;
702
 
4104 Serge 703
	offset = i915_gem_obj_ggtt_offset(obj) + args->offset;
2332 Serge 704
 
6084 serge 705
	intel_fb_obj_invalidate(obj, ORIGIN_GTT);
706
 
3031 serge 707
	while (remain > 0) {
708
		/* Operation in this page
709
		 *
710
		 * page_base = page offset within aperture
711
		 * page_offset = offset within page
712
		 * page_length = bytes to copy for this page
713
		 */
714
		page_base = offset & PAGE_MASK;
715
		page_offset = offset_in_page(offset);
716
		page_length = remain;
717
		if ((page_offset + remain) > PAGE_SIZE)
718
			page_length = PAGE_SIZE - page_offset;
2332 Serge 719
 
7144 serge 720
		/* If we get a fault while copying data, then (presumably) our
721
		 * source page isn't available.  Return the error and we'll
722
		 * retry in the slow path.
723
		 */
724
		if (fast_user_write(dev_priv->gtt.mappable, page_base,
725
				    page_offset, user_data, page_length)) {
726
			ret = -EFAULT;
727
			goto out_flush;
728
		}
3031 serge 729
 
730
		remain -= page_length;
731
		user_data += page_length;
732
		offset += page_length;
2332 Serge 733
	}
734
 
6084 serge 735
out_flush:
736
	intel_fb_obj_flush(obj, false, ORIGIN_GTT);
3031 serge 737
out_unpin:
5060 serge 738
	i915_gem_object_ggtt_unpin(obj);
3031 serge 739
out:
6084 serge 740
	return ret;
3031 serge 741
}
742
 
743
/* Per-page copy function for the shmem pwrite fastpath.
744
 * Flushes invalid cachelines before writing to the target if
745
 * needs_clflush_before is set and flushes out any written cachelines after
746
 * writing if needs_clflush is set. */
747
static int
748
shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
749
		  char __user *user_data,
750
		  bool page_do_bit17_swizzling,
751
		  bool needs_clflush_before,
752
		  bool needs_clflush_after)
753
{
754
	char *vaddr;
5354 serge 755
	int ret;
3031 serge 756
 
757
	if (unlikely(page_do_bit17_swizzling))
758
		return -EINVAL;
759
 
5354 serge 760
	vaddr = kmap_atomic(page);
3031 serge 761
	if (needs_clflush_before)
762
		drm_clflush_virt_range(vaddr + shmem_page_offset,
763
				       page_length);
7144 serge 764
	ret = __copy_from_user_inatomic(vaddr + shmem_page_offset,
765
					user_data, page_length);
3031 serge 766
	if (needs_clflush_after)
767
		drm_clflush_virt_range(vaddr + shmem_page_offset,
768
				       page_length);
5354 serge 769
	kunmap_atomic(vaddr);
3031 serge 770
 
771
	return ret ? -EFAULT : 0;
772
}
773
 
774
/* Only difference to the fast-path function is that this can handle bit17
775
 * and uses non-atomic copy and kmap functions. */
776
static int
777
shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
778
		  char __user *user_data,
779
		  bool page_do_bit17_swizzling,
780
		  bool needs_clflush_before,
781
		  bool needs_clflush_after)
782
{
783
	char *vaddr;
784
	int ret;
785
 
786
	vaddr = kmap(page);
787
	if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
788
		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
789
					     page_length,
790
					     page_do_bit17_swizzling);
791
	if (page_do_bit17_swizzling)
792
		ret = __copy_from_user_swizzled(vaddr, shmem_page_offset,
793
						user_data,
794
						page_length);
795
	else
796
		ret = __copy_from_user(vaddr + shmem_page_offset,
797
				       user_data,
798
				       page_length);
799
	if (needs_clflush_after)
800
		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
801
					     page_length,
802
					     page_do_bit17_swizzling);
803
	kunmap(page);
804
 
805
	return ret ? -EFAULT : 0;
806
}
807
 
808
static int
809
i915_gem_shmem_pwrite(struct drm_device *dev,
810
		      struct drm_i915_gem_object *obj,
811
		      struct drm_i915_gem_pwrite *args,
812
		      struct drm_file *file)
813
{
814
	ssize_t remain;
815
	loff_t offset;
816
	char __user *user_data;
817
	int shmem_page_offset, page_length, ret = 0;
818
	int obj_do_bit17_swizzling, page_do_bit17_swizzling;
819
	int hit_slowpath = 0;
820
	int needs_clflush_after = 0;
821
	int needs_clflush_before = 0;
3746 Serge 822
	struct sg_page_iter sg_iter;
3031 serge 823
 
3746 Serge 824
	user_data = to_user_ptr(args->data_ptr);
3031 serge 825
	remain = args->size;
826
 
827
	obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
828
 
829
	if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
830
		/* If we're not in the cpu write domain, set ourself into the gtt
831
		 * write domain and manually flush cachelines (if required). This
832
		 * optimizes for the case when the gpu will use the data
833
		 * right away and we therefore have to clflush anyway. */
4104 Serge 834
		needs_clflush_after = cpu_write_needs_clflush(obj);
4560 Serge 835
		ret = i915_gem_object_wait_rendering(obj, false);
6084 serge 836
		if (ret)
837
			return ret;
838
	}
4104 Serge 839
	/* Same trick applies to invalidate partially written cachelines read
840
	 * before writing. */
841
	if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0)
842
		needs_clflush_before =
843
			!cpu_cache_is_coherent(dev, obj->cache_level);
3031 serge 844
 
845
	ret = i915_gem_object_get_pages(obj);
2332 Serge 846
	if (ret)
3031 serge 847
		return ret;
2332 Serge 848
 
6084 serge 849
	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
850
 
3031 serge 851
	i915_gem_object_pin_pages(obj);
2332 Serge 852
 
853
	offset = args->offset;
3031 serge 854
	obj->dirty = 1;
2332 Serge 855
 
3746 Serge 856
	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
857
			 offset >> PAGE_SHIFT) {
858
		struct page *page = sg_page_iter_page(&sg_iter);
3031 serge 859
		int partial_cacheline_write;
2332 Serge 860
 
3031 serge 861
		if (remain <= 0)
862
			break;
863
 
2332 Serge 864
		/* Operation in this page
865
		 *
866
		 * shmem_page_offset = offset within page in shmem file
867
		 * page_length = bytes to copy for this page
868
		 */
869
		shmem_page_offset = offset_in_page(offset);
870
 
871
		page_length = remain;
872
		if ((shmem_page_offset + page_length) > PAGE_SIZE)
873
			page_length = PAGE_SIZE - shmem_page_offset;
874
 
3031 serge 875
		/* If we don't overwrite a cacheline completely we need to be
876
		 * careful to have up-to-date data by first clflushing. Don't
877
		 * overcomplicate things and flush the entire patch. */
878
		partial_cacheline_write = needs_clflush_before &&
879
			((shmem_page_offset | page_length)
3260 Serge 880
				& (x86_clflush_size - 1));
2332 Serge 881
 
3031 serge 882
		page_do_bit17_swizzling = obj_do_bit17_swizzling &&
883
			(page_to_phys(page) & (1 << 17)) != 0;
2332 Serge 884
 
3031 serge 885
		ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
886
					user_data, page_do_bit17_swizzling,
887
					partial_cacheline_write,
888
					needs_clflush_after);
889
		if (ret == 0)
890
			goto next_page;
891
 
892
		hit_slowpath = 1;
893
		mutex_unlock(&dev->struct_mutex);
6296 serge 894
		ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
895
					user_data, page_do_bit17_swizzling,
896
					partial_cacheline_write,
897
					needs_clflush_after);
3031 serge 898
 
899
		mutex_lock(&dev->struct_mutex);
900
 
901
		if (ret)
902
			goto out;
903
 
5354 serge 904
next_page:
2332 Serge 905
		remain -= page_length;
3031 serge 906
		user_data += page_length;
2332 Serge 907
		offset += page_length;
908
	}
909
 
910
out:
3031 serge 911
	i915_gem_object_unpin_pages(obj);
912
 
913
	if (hit_slowpath) {
3480 Serge 914
		/*
915
		 * Fixup: Flush cpu caches in case we didn't flush the dirty
916
		 * cachelines in-line while writing and the object moved
917
		 * out of the cpu write domain while we've dropped the lock.
918
		 */
919
		if (!needs_clflush_after &&
920
		    obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
4104 Serge 921
			if (i915_gem_clflush_object(obj, obj->pin_display))
6084 serge 922
				needs_clflush_after = true;
3031 serge 923
		}
2332 Serge 924
	}
925
 
3031 serge 926
	if (needs_clflush_after)
3243 Serge 927
		i915_gem_chipset_flush(dev);
6084 serge 928
	else
929
		obj->cache_dirty = true;
3031 serge 930
 
6084 serge 931
	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
2332 Serge 932
	return ret;
933
}
3031 serge 934
 
935
/**
936
 * Writes data to the object referenced by handle.
937
 *
938
 * On error, the contents of the buffer that were to be modified are undefined.
939
 */
940
int
941
i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
942
		      struct drm_file *file)
943
{
6084 serge 944
	struct drm_i915_private *dev_priv = dev->dev_private;
3031 serge 945
	struct drm_i915_gem_pwrite *args = data;
946
	struct drm_i915_gem_object *obj;
947
	int ret;
948
 
4104 Serge 949
	if (args->size == 0)
950
		return 0;
951
 
6084 serge 952
	intel_runtime_pm_get(dev_priv);
3480 Serge 953
 
3031 serge 954
	ret = i915_mutex_lock_interruptible(dev);
955
	if (ret)
6084 serge 956
		goto put_rpm;
3031 serge 957
 
958
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
959
	if (&obj->base == NULL) {
960
		ret = -ENOENT;
961
		goto unlock;
962
	}
963
 
964
	/* Bounds check destination. */
965
	if (args->offset > obj->base.size ||
966
	    args->size > obj->base.size - args->offset) {
967
		ret = -EINVAL;
968
		goto out;
969
	}
970
 
971
	/* prime objects have no backing filp to GEM pread/pwrite
972
	 * pages from.
973
	 */
974
	if (!obj->base.filp) {
975
		ret = -EINVAL;
976
		goto out;
977
	}
978
 
979
	trace_i915_gem_object_pwrite(obj, args->offset, args->size);
980
 
981
	ret = -EFAULT;
982
	/* We can only do the GTT pwrite on untiled buffers, as otherwise
983
	 * it would end up going through the fenced access, and we'll get
984
	 * different detiling behavior between reading and writing.
985
	 * pread/pwrite currently are reading and writing from the CPU
986
	 * perspective, requiring manual detiling by the client.
987
	 */
4104 Serge 988
	if (obj->tiling_mode == I915_TILING_NONE &&
989
	    obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
990
	    cpu_write_needs_clflush(obj)) {
3031 serge 991
		ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
992
		/* Note that the gtt paths might fail with non-page-backed user
993
		 * pointers (e.g. gtt mappings when moving data between
994
		 * textures). Fallback to the shmem path in that case. */
995
	}
996
 
6296 serge 997
	if (ret == -EFAULT || ret == -ENOSPC) {
6084 serge 998
			ret = i915_gem_shmem_pwrite(dev, obj, args, file);
6296 serge 999
	}
3031 serge 1000
 
1001
out:
1002
	drm_gem_object_unreference(&obj->base);
1003
unlock:
1004
	mutex_unlock(&dev->struct_mutex);
6084 serge 1005
put_rpm:
1006
	intel_runtime_pm_put(dev_priv);
1007
 
3031 serge 1008
	return ret;
1009
}
1010
 
1011
int
3480 Serge 1012
i915_gem_check_wedge(struct i915_gpu_error *error,
3031 serge 1013
		     bool interruptible)
1014
{
3480 Serge 1015
	if (i915_reset_in_progress(error)) {
3031 serge 1016
		/* Non-interruptible callers can't handle -EAGAIN, hence return
1017
		 * -EIO unconditionally for these. */
1018
		if (!interruptible)
1019
			return -EIO;
2332 Serge 1020
 
3480 Serge 1021
		/* Recovery complete, but the reset failed ... */
1022
		if (i915_terminally_wedged(error))
3031 serge 1023
			return -EIO;
2332 Serge 1024
 
6084 serge 1025
		/*
1026
		 * Check if GPU Reset is in progress - we need intel_ring_begin
1027
		 * to work properly to reinit the hw state while the gpu is
1028
		 * still marked as reset-in-progress. Handle this with a flag.
1029
		 */
1030
		if (!error->reload_in_reset)
1031
			return -EAGAIN;
3031 serge 1032
	}
2332 Serge 1033
 
3031 serge 1034
	return 0;
1035
}
2332 Serge 1036
 
4560 Serge 1037
static void fake_irq(unsigned long data)
1038
{
1039
//	wake_up_process((struct task_struct *)data);
1040
}
1041
 
1042
static bool missed_irq(struct drm_i915_private *dev_priv,
5060 serge 1043
		       struct intel_engine_cs *ring)
4560 Serge 1044
{
1045
	return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings);
1046
}
1047
 
6084 serge 1048
static unsigned long local_clock_us(unsigned *cpu)
4560 Serge 1049
{
6084 serge 1050
	unsigned long t;
1051
 
1052
	/* Cheaply and approximately convert from nanoseconds to microseconds.
1053
	 * The result and subsequent calculations are also defined in the same
1054
	 * approximate microseconds units. The principal source of timing
1055
	 * error here is from the simple truncation.
1056
	 *
1057
	 * Note that local_clock() is only defined wrt to the current CPU;
1058
	 * the comparisons are no longer valid if we switch CPUs. Instead of
1059
	 * blocking preemption for the entire busywait, we can detect the CPU
1060
	 * switch and use that as indicator of system load and a reason to
1061
	 * stop busywaiting, see busywait_stop().
1062
	 */
1063
	t = GetClockNs() >> 10;
1064
 
1065
	return t;
1066
}
1067
 
1068
static bool busywait_stop(unsigned long timeout, unsigned cpu)
1069
{
1070
	unsigned this_cpu = 0;
1071
 
1072
	if (time_after(local_clock_us(&this_cpu), timeout))
4560 Serge 1073
		return true;
1074
 
6084 serge 1075
	return this_cpu != cpu;
4560 Serge 1076
}
1077
 
6084 serge 1078
static int __i915_spin_request(struct drm_i915_gem_request *req, int state)
1079
{
1080
	unsigned long timeout;
1081
	unsigned cpu;
1082
 
1083
	/* When waiting for high frequency requests, e.g. during synchronous
1084
	 * rendering split between the CPU and GPU, the finite amount of time
1085
	 * required to set up the irq and wait upon it limits the response
1086
	 * rate. By busywaiting on the request completion for a short while we
1087
	 * can service the high frequency waits as quick as possible. However,
1088
	 * if it is a slow request, we want to sleep as quickly as possible.
1089
	 * The tradeoff between waiting and sleeping is roughly the time it
1090
	 * takes to sleep on a request, on the order of a microsecond.
1091
	 */
1092
 
1093
	if (req->ring->irq_refcount)
1094
		return -EBUSY;
1095
 
1096
	/* Only spin if we know the GPU is processing this request */
1097
	if (!i915_gem_request_started(req, true))
1098
		return -EAGAIN;
1099
 
1100
	timeout = local_clock_us(&cpu) + 5;
1101
	while (1 /*!need_resched()*/) {
1102
		if (i915_gem_request_completed(req, true))
1103
			return 0;
1104
 
1105
		if (busywait_stop(timeout, cpu))
1106
			break;
1107
 
1108
		cpu_relax_lowlatency();
1109
	}
1110
 
1111
	if (i915_gem_request_completed(req, false))
1112
		return 0;
1113
 
1114
	return -EAGAIN;
1115
}
1116
 
3031 serge 1117
/**
6084 serge 1118
 * __i915_wait_request - wait until execution of request has finished
1119
 * @req: duh!
1120
 * @reset_counter: reset sequence associated with the given request
3031 serge 1121
 * @interruptible: do an interruptible wait (normally yes)
1122
 * @timeout: in - how long to wait (NULL forever); out - how much time remaining
1123
 *
3480 Serge 1124
 * Note: It is of utmost importance that the passed in seqno and reset_counter
1125
 * values have been read by the caller in an smp safe manner. Where read-side
1126
 * locks are involved, it is sufficient to read the reset_counter before
1127
 * unlocking the lock that protects the seqno. For lockless tricks, the
1128
 * reset_counter _must_ be read before, and an appropriate smp_rmb must be
1129
 * inserted.
1130
 *
6084 serge 1131
 * Returns 0 if the request was found within the alloted time. Else returns the
3031 serge 1132
 * errno with remaining time filled in timeout argument.
1133
 */
6084 serge 1134
int __i915_wait_request(struct drm_i915_gem_request *req,
3480 Serge 1135
			unsigned reset_counter,
4560 Serge 1136
			bool interruptible,
5060 serge 1137
			s64 *timeout,
6084 serge 1138
			struct intel_rps_client *rps)
3031 serge 1139
{
6084 serge 1140
	struct intel_engine_cs *ring = i915_gem_request_get_ring(req);
5060 serge 1141
	struct drm_device *dev = ring->dev;
1142
	struct drm_i915_private *dev_priv = dev->dev_private;
4560 Serge 1143
	const bool irq_test_in_progress =
1144
		ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring);
6084 serge 1145
	int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
6088 serge 1146
	wait_queue_t wait;
5060 serge 1147
	unsigned long timeout_expire;
7144 serge 1148
	s64 before = 0; /* Only to silence a compiler warning. */
3031 serge 1149
	int ret;
2332 Serge 1150
 
5060 serge 1151
	WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled");
4104 Serge 1152
 
6084 serge 1153
	if (list_empty(&req->list))
3031 serge 1154
		return 0;
2332 Serge 1155
 
6084 serge 1156
	if (i915_gem_request_completed(req, true))
1157
		return 0;
2332 Serge 1158
 
6084 serge 1159
	timeout_expire = 0;
1160
	if (timeout) {
1161
		if (WARN_ON(*timeout < 0))
1162
			return -EINVAL;
1163
 
1164
		if (*timeout == 0)
1165
			return -ETIME;
1166
 
1167
		timeout_expire = jiffies + nsecs_to_jiffies_timeout(*timeout);
7144 serge 1168
 
1169
		/*
1170
		 * Record current time in case interrupted by signal, or wedged.
1171
		 */
1172
		before = ktime_get_raw_ns();
3031 serge 1173
	}
2332 Serge 1174
 
6084 serge 1175
	if (INTEL_INFO(dev_priv)->gen >= 6)
1176
		gen6_rps_boost(dev_priv, rps, req->emitted_jiffies);
2332 Serge 1177
 
6084 serge 1178
	trace_i915_gem_request_wait_begin(req);
1179
 
1180
	/* Optimistic spin for the next jiffie before touching IRQs */
1181
	ret = __i915_spin_request(req, state);
1182
	if (ret == 0)
1183
		goto out;
1184
 
1185
	if (!irq_test_in_progress && WARN_ON(!ring->irq_get(ring))) {
1186
		ret = -ENODEV;
1187
		goto out;
1188
	}
1189
 
6088 serge 1190
	INIT_LIST_HEAD(&wait.task_list);
1191
	wait.evnt = CreateEvent(NULL, MANUAL_DESTROY);
2332 Serge 1192
 
4560 Serge 1193
	for (;;) {
6103 serge 1194
		unsigned long flags;
4560 Serge 1195
 
3480 Serge 1196
		/* We need to check whether any gpu reset happened in between
1197
		 * the caller grabbing the seqno and now ... */
4560 Serge 1198
		if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) {
1199
			/* ... but upgrade the -EAGAIN to an -EIO if the gpu
1200
			 * is truely gone. */
1201
			ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
1202
			if (ret == 0)
1203
				ret = -EAGAIN;
1204
			break;
1205
		}
3480 Serge 1206
 
6084 serge 1207
		if (i915_gem_request_completed(req, false)) {
4560 Serge 1208
			ret = 0;
1209
			break;
1210
		}
2332 Serge 1211
 
6088 serge 1212
		if (timeout && time_after_eq(jiffies, timeout_expire)) {
4560 Serge 1213
			ret = -ETIME;
1214
			break;
1215
		}
2332 Serge 1216
 
4560 Serge 1217
        spin_lock_irqsave(&ring->irq_queue.lock, flags);
6088 serge 1218
        if (list_empty(&wait.task_list))
1219
            __add_wait_queue(&ring->irq_queue, &wait);
4560 Serge 1220
        spin_unlock_irqrestore(&ring->irq_queue.lock, flags);
1221
 
6088 serge 1222
            WaitEventTimeout(wait.evnt, 1);
4560 Serge 1223
 
6088 serge 1224
        if (!list_empty(&wait.task_list)) {
4560 Serge 1225
            spin_lock_irqsave(&ring->irq_queue.lock, flags);
6088 serge 1226
            list_del_init(&wait.task_list);
4560 Serge 1227
            spin_unlock_irqrestore(&ring->irq_queue.lock, flags);
1228
        }
1229
 
6088 serge 1230
	};
4560 Serge 1231
 
1232
	if (!irq_test_in_progress)
6084 serge 1233
		ring->irq_put(ring);
2332 Serge 1234
 
6088 serge 1235
    DestroyEvent(wait.evnt);
1236
 
6084 serge 1237
out:
1238
	trace_i915_gem_request_wait_end(req);
1239
 
1240
	if (timeout) {
7144 serge 1241
		s64 tres = *timeout - (ktime_get_raw_ns() - before);
6084 serge 1242
 
1243
		*timeout = tres < 0 ? 0 : tres;
1244
 
1245
		/*
1246
		 * Apparently ktime isn't accurate enough and occasionally has a
1247
		 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
1248
		 * things up to make the test happy. We allow up to 1 jiffy.
1249
		 *
1250
		 * This is a regrssion from the timespec->ktime conversion.
1251
		 */
1252
		if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000)
1253
			*timeout = 0;
1254
	}
1255
 
4560 Serge 1256
	return ret;
3031 serge 1257
}
2332 Serge 1258
 
6084 serge 1259
int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
1260
				   struct drm_file *file)
1261
{
1262
	struct drm_i915_private *dev_private;
1263
	struct drm_i915_file_private *file_priv;
1264
 
1265
	WARN_ON(!req || !file || req->file_priv);
1266
 
1267
	if (!req || !file)
1268
		return -EINVAL;
1269
 
1270
	if (req->file_priv)
1271
		return -EINVAL;
1272
 
1273
	dev_private = req->ring->dev->dev_private;
1274
	file_priv = file->driver_priv;
1275
 
1276
	spin_lock(&file_priv->mm.lock);
1277
	req->file_priv = file_priv;
1278
	list_add_tail(&req->client_list, &file_priv->mm.request_list);
1279
	spin_unlock(&file_priv->mm.lock);
1280
 
6660 serge 1281
    req->pid = (struct pid*)1;
6084 serge 1282
 
1283
	return 0;
1284
}
1285
 
1286
static inline void
1287
i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
1288
{
1289
	struct drm_i915_file_private *file_priv = request->file_priv;
1290
 
1291
	if (!file_priv)
1292
		return;
1293
 
1294
	spin_lock(&file_priv->mm.lock);
1295
	list_del(&request->client_list);
1296
	request->file_priv = NULL;
1297
	spin_unlock(&file_priv->mm.lock);
6660 serge 1298
	request->pid = NULL;
6084 serge 1299
}
1300
 
1301
static void i915_gem_request_retire(struct drm_i915_gem_request *request)
1302
{
1303
	trace_i915_gem_request_retire(request);
1304
 
1305
	/* We know the GPU must have read the request to have
1306
	 * sent us the seqno + interrupt, so use the position
1307
	 * of tail of the request to update the last known position
1308
	 * of the GPU head.
1309
	 *
1310
	 * Note this requires that we are always called in request
1311
	 * completion order.
1312
	 */
1313
	request->ringbuf->last_retired_head = request->postfix;
1314
 
1315
	list_del_init(&request->list);
1316
	i915_gem_request_remove_from_client(request);
1317
 
1318
	i915_gem_request_unreference(request);
1319
}
1320
 
1321
static void
1322
__i915_gem_request_retire__upto(struct drm_i915_gem_request *req)
1323
{
1324
	struct intel_engine_cs *engine = req->ring;
1325
	struct drm_i915_gem_request *tmp;
1326
 
6660 serge 1327
	lockdep_assert_held(&engine->dev->struct_mutex);
6084 serge 1328
 
1329
	if (list_empty(&req->list))
1330
		return;
1331
 
1332
	do {
1333
		tmp = list_first_entry(&engine->request_list,
1334
				       typeof(*tmp), list);
1335
 
1336
		i915_gem_request_retire(tmp);
1337
	} while (tmp != req);
1338
 
1339
	WARN_ON(i915_verify_lists(engine->dev));
1340
}
1341
 
3031 serge 1342
/**
6084 serge 1343
 * Waits for a request to be signaled, and cleans up the
3031 serge 1344
 * request and object lists appropriately for that event.
1345
 */
1346
int
6084 serge 1347
i915_wait_request(struct drm_i915_gem_request *req)
3031 serge 1348
{
6084 serge 1349
	struct drm_device *dev;
1350
	struct drm_i915_private *dev_priv;
1351
	bool interruptible;
3031 serge 1352
	int ret;
2332 Serge 1353
 
6084 serge 1354
	BUG_ON(req == NULL);
1355
 
1356
	dev = req->ring->dev;
1357
	dev_priv = dev->dev_private;
1358
	interruptible = dev_priv->mm.interruptible;
1359
 
3031 serge 1360
	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
2332 Serge 1361
 
3480 Serge 1362
	ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
3031 serge 1363
	if (ret)
1364
		return ret;
2332 Serge 1365
 
6084 serge 1366
	ret = __i915_wait_request(req,
1367
				  atomic_read(&dev_priv->gpu_error.reset_counter),
1368
				  interruptible, NULL, NULL);
3031 serge 1369
	if (ret)
1370
		return ret;
2332 Serge 1371
 
6084 serge 1372
	__i915_gem_request_retire__upto(req);
4104 Serge 1373
	return 0;
1374
}
1375
 
3031 serge 1376
/**
1377
 * Ensures that all rendering to the object has completed and the object is
1378
 * safe to unbind from the GTT or access from the CPU.
1379
 */
6084 serge 1380
int
3031 serge 1381
i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
1382
			       bool readonly)
1383
{
6084 serge 1384
	int ret, i;
2332 Serge 1385
 
6084 serge 1386
	if (!obj->active)
3031 serge 1387
		return 0;
2332 Serge 1388
 
6084 serge 1389
	if (readonly) {
1390
		if (obj->last_write_req != NULL) {
1391
			ret = i915_wait_request(obj->last_write_req);
1392
			if (ret)
1393
				return ret;
2332 Serge 1394
 
6084 serge 1395
			i = obj->last_write_req->ring->id;
1396
			if (obj->last_read_req[i] == obj->last_write_req)
1397
				i915_gem_object_retire__read(obj, i);
1398
			else
1399
				i915_gem_object_retire__write(obj);
1400
		}
1401
	} else {
1402
		for (i = 0; i < I915_NUM_RINGS; i++) {
1403
			if (obj->last_read_req[i] == NULL)
1404
				continue;
1405
 
1406
			ret = i915_wait_request(obj->last_read_req[i]);
1407
			if (ret)
1408
				return ret;
1409
 
1410
			i915_gem_object_retire__read(obj, i);
1411
		}
1412
		RQ_BUG_ON(obj->active);
1413
	}
1414
 
1415
	return 0;
3031 serge 1416
}
2332 Serge 1417
 
6084 serge 1418
static void
1419
i915_gem_object_retire_request(struct drm_i915_gem_object *obj,
1420
			       struct drm_i915_gem_request *req)
1421
{
1422
	int ring = req->ring->id;
1423
 
1424
	if (obj->last_read_req[ring] == req)
1425
		i915_gem_object_retire__read(obj, ring);
1426
	else if (obj->last_write_req == req)
1427
		i915_gem_object_retire__write(obj);
1428
 
1429
	__i915_gem_request_retire__upto(req);
1430
}
1431
 
3260 Serge 1432
/* A nonblocking variant of the above wait. This is a highly dangerous routine
1433
 * as the object state may change during this call.
1434
 */
1435
static __must_check int
1436
i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
6084 serge 1437
					    struct intel_rps_client *rps,
3260 Serge 1438
					    bool readonly)
1439
{
1440
	struct drm_device *dev = obj->base.dev;
1441
	struct drm_i915_private *dev_priv = dev->dev_private;
6084 serge 1442
	struct drm_i915_gem_request *requests[I915_NUM_RINGS];
3480 Serge 1443
	unsigned reset_counter;
6084 serge 1444
	int ret, i, n = 0;
2332 Serge 1445
 
3260 Serge 1446
	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1447
	BUG_ON(!dev_priv->mm.interruptible);
2332 Serge 1448
 
6084 serge 1449
	if (!obj->active)
3260 Serge 1450
		return 0;
2332 Serge 1451
 
3480 Serge 1452
	ret = i915_gem_check_wedge(&dev_priv->gpu_error, true);
3260 Serge 1453
	if (ret)
1454
		return ret;
2332 Serge 1455
 
6084 serge 1456
	reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
2332 Serge 1457
 
6084 serge 1458
	if (readonly) {
1459
		struct drm_i915_gem_request *req;
1460
 
1461
		req = obj->last_write_req;
1462
		if (req == NULL)
1463
			return 0;
1464
 
1465
		requests[n++] = i915_gem_request_reference(req);
1466
	} else {
1467
		for (i = 0; i < I915_NUM_RINGS; i++) {
1468
			struct drm_i915_gem_request *req;
1469
 
1470
			req = obj->last_read_req[i];
1471
			if (req == NULL)
1472
				continue;
1473
 
1474
			requests[n++] = i915_gem_request_reference(req);
1475
		}
1476
	}
1477
 
3260 Serge 1478
	mutex_unlock(&dev->struct_mutex);
6084 serge 1479
	for (i = 0; ret == 0 && i < n; i++)
1480
		ret = __i915_wait_request(requests[i], reset_counter, true,
1481
					  NULL, rps);
3260 Serge 1482
	mutex_lock(&dev->struct_mutex);
2332 Serge 1483
 
6084 serge 1484
	for (i = 0; i < n; i++) {
1485
		if (ret == 0)
1486
			i915_gem_object_retire_request(obj, requests[i]);
1487
		i915_gem_request_unreference(requests[i]);
1488
	}
1489
 
1490
	return ret;
3260 Serge 1491
}
2332 Serge 1492
 
6084 serge 1493
static struct intel_rps_client *to_rps_client(struct drm_file *file)
1494
{
1495
	struct drm_i915_file_private *fpriv = file->driver_priv;
1496
	return &fpriv->rps;
1497
}
1498
 
3260 Serge 1499
/**
1500
 * Called when user space prepares to use an object with the CPU, either
1501
 * through the mmap ioctl's mapping or a GTT mapping.
1502
 */
1503
int
1504
i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1505
			  struct drm_file *file)
1506
{
1507
	struct drm_i915_gem_set_domain *args = data;
1508
	struct drm_i915_gem_object *obj;
1509
	uint32_t read_domains = args->read_domains;
1510
	uint32_t write_domain = args->write_domain;
1511
	int ret;
2332 Serge 1512
 
3260 Serge 1513
	/* Only handle setting domains to types used by the CPU. */
1514
	if (write_domain & I915_GEM_GPU_DOMAINS)
1515
		return -EINVAL;
2332 Serge 1516
 
3260 Serge 1517
	if (read_domains & I915_GEM_GPU_DOMAINS)
1518
		return -EINVAL;
2332 Serge 1519
 
3260 Serge 1520
	/* Having something in the write domain implies it's in the read
1521
	 * domain, and only that read domain.  Enforce that in the request.
1522
	 */
1523
	if (write_domain != 0 && read_domains != write_domain)
1524
		return -EINVAL;
2332 Serge 1525
 
3260 Serge 1526
	ret = i915_mutex_lock_interruptible(dev);
1527
	if (ret)
1528
		return ret;
2332 Serge 1529
 
3260 Serge 1530
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1531
	if (&obj->base == NULL) {
1532
		ret = -ENOENT;
1533
		goto unlock;
1534
	}
2332 Serge 1535
 
3260 Serge 1536
	/* Try to flush the object off the GPU without holding the lock.
1537
	 * We will repeat the flush holding the lock in the normal manner
1538
	 * to catch cases where we are gazumped.
1539
	 */
5060 serge 1540
	ret = i915_gem_object_wait_rendering__nonblocking(obj,
6084 serge 1541
							  to_rps_client(file),
5060 serge 1542
							  !write_domain);
3260 Serge 1543
	if (ret)
1544
		goto unref;
2332 Serge 1545
 
6084 serge 1546
	if (read_domains & I915_GEM_DOMAIN_GTT)
3260 Serge 1547
		ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
6084 serge 1548
	else
3260 Serge 1549
		ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
2332 Serge 1550
 
6084 serge 1551
	if (write_domain != 0)
1552
		intel_fb_obj_invalidate(obj,
1553
					write_domain == I915_GEM_DOMAIN_GTT ?
1554
					ORIGIN_GTT : ORIGIN_CPU);
1555
 
3260 Serge 1556
unref:
1557
	drm_gem_object_unreference(&obj->base);
1558
unlock:
1559
	mutex_unlock(&dev->struct_mutex);
1560
	return ret;
1561
}
2332 Serge 1562
 
4293 Serge 1563
/**
1564
 * Called when user space has done writes to this buffer
1565
 */
1566
int
1567
i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1568
			 struct drm_file *file)
1569
{
1570
	struct drm_i915_gem_sw_finish *args = data;
1571
	struct drm_i915_gem_object *obj;
1572
	int ret = 0;
2332 Serge 1573
 
4293 Serge 1574
	ret = i915_mutex_lock_interruptible(dev);
1575
	if (ret)
1576
		return ret;
2332 Serge 1577
 
4293 Serge 1578
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1579
	if (&obj->base == NULL) {
1580
		ret = -ENOENT;
1581
		goto unlock;
1582
	}
2332 Serge 1583
 
4293 Serge 1584
	/* Pinned buffers may be scanout, so flush the cache */
1585
	if (obj->pin_display)
6084 serge 1586
		i915_gem_object_flush_cpu_write_domain(obj);
2332 Serge 1587
 
4293 Serge 1588
	drm_gem_object_unreference(&obj->base);
1589
unlock:
1590
	mutex_unlock(&dev->struct_mutex);
1591
	return ret;
1592
}
1593
 
3260 Serge 1594
/**
1595
 * Maps the contents of an object, returning the address it is mapped
1596
 * into.
1597
 *
1598
 * While the mapping holds a reference on the contents of the object, it doesn't
1599
 * imply a ref on the object itself.
5354 serge 1600
 *
1601
 * IMPORTANT:
1602
 *
1603
 * DRM driver writers who look a this function as an example for how to do GEM
1604
 * mmap support, please don't implement mmap support like here. The modern way
1605
 * to implement DRM mmap support is with an mmap offset ioctl (like
1606
 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
1607
 * That way debug tooling like valgrind will understand what's going on, hiding
1608
 * the mmap call in a driver private ioctl will break that. The i915 driver only
1609
 * does cpu mmaps this way because we didn't know better.
3260 Serge 1610
 */
1611
int
1612
i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1613
		    struct drm_file *file)
1614
{
1615
	struct drm_i915_gem_mmap *args = data;
1616
	struct drm_gem_object *obj;
4392 Serge 1617
	unsigned long addr;
2332 Serge 1618
 
6084 serge 1619
//	if (args->flags & ~(I915_MMAP_WC))
1620
//		return -EINVAL;
3260 Serge 1621
	obj = drm_gem_object_lookup(dev, file, args->handle);
1622
	if (obj == NULL)
1623
		return -ENOENT;
4104 Serge 1624
 
3260 Serge 1625
	/* prime objects have no backing filp to GEM mmap
1626
	 * pages from.
1627
	 */
1628
	if (!obj->filp) {
1629
		drm_gem_object_unreference_unlocked(obj);
1630
		return -EINVAL;
1631
	}
2332 Serge 1632
 
6084 serge 1633
	addr = vm_mmap(obj->filp, 0, args->size,
1634
		       PROT_READ | PROT_WRITE, MAP_SHARED,
1635
		       args->offset);
3260 Serge 1636
	drm_gem_object_unreference_unlocked(obj);
6084 serge 1637
	if (IS_ERR((void *)addr))
1638
		return addr;
2332 Serge 1639
 
3260 Serge 1640
	args->addr_ptr = (uint64_t) addr;
2332 Serge 1641
 
6084 serge 1642
	return 0;
3260 Serge 1643
}
2332 Serge 1644
 
1645
 
1646
 
1647
 
1648
 
1649
 
1650
 
1651
 
3031 serge 1652
 
1653
 
1654
 
1655
 
1656
 
1657
/**
1658
 * i915_gem_release_mmap - remove physical page mappings
1659
 * @obj: obj in question
1660
 *
1661
 * Preserve the reservation of the mmapping with the DRM core code, but
1662
 * relinquish ownership of the pages back to the system.
1663
 *
1664
 * It is vital that we remove the page mapping if we have mapped a tiled
1665
 * object through the GTT and then lose the fence register due to
1666
 * resource pressure. Similarly if the object has been moved out of the
1667
 * aperture, than pages mapped into userspace must be revoked. Removing the
1668
 * mapping will then trigger a page fault on the next user access, allowing
1669
 * fixup by i915_gem_fault().
1670
 */
1671
void
1672
i915_gem_release_mmap(struct drm_i915_gem_object *obj)
1673
{
1674
	if (!obj->fault_mappable)
1675
		return;
1676
 
4104 Serge 1677
//	drm_vma_node_unmap(&obj->base.vma_node, obj->base.dev->dev_mapping);
3031 serge 1678
	obj->fault_mappable = false;
1679
}
1680
 
6084 serge 1681
void
1682
i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv)
1683
{
1684
	struct drm_i915_gem_object *obj;
1685
 
1686
	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
1687
		i915_gem_release_mmap(obj);
1688
}
1689
 
3480 Serge 1690
uint32_t
2332 Serge 1691
i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
1692
{
1693
	uint32_t gtt_size;
1694
 
1695
	if (INTEL_INFO(dev)->gen >= 4 ||
1696
	    tiling_mode == I915_TILING_NONE)
1697
		return size;
1698
 
1699
	/* Previous chips need a power-of-two fence region when tiling */
1700
	if (INTEL_INFO(dev)->gen == 3)
1701
		gtt_size = 1024*1024;
1702
	else
1703
		gtt_size = 512*1024;
1704
 
1705
	while (gtt_size < size)
1706
		gtt_size <<= 1;
1707
 
1708
	return gtt_size;
1709
}
1710
 
1711
/**
1712
 * i915_gem_get_gtt_alignment - return required GTT alignment for an object
1713
 * @obj: object to check
1714
 *
1715
 * Return the required GTT alignment for an object, taking into account
1716
 * potential fence register mapping.
1717
 */
3480 Serge 1718
uint32_t
1719
i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size,
1720
			   int tiling_mode, bool fenced)
2332 Serge 1721
{
1722
	/*
1723
	 * Minimum alignment is 4k (GTT page size), but might be greater
1724
	 * if a fence register is needed for the object.
1725
	 */
3480 Serge 1726
	if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) ||
2332 Serge 1727
	    tiling_mode == I915_TILING_NONE)
1728
		return 4096;
1729
 
1730
	/*
1731
	 * Previous chips need to be aligned to the size of the smallest
1732
	 * fence register that can contain the object.
1733
	 */
1734
	return i915_gem_get_gtt_size(dev, size, tiling_mode);
1735
}
1736
 
1737
 
1738
 
3480 Serge 1739
int
1740
i915_gem_mmap_gtt(struct drm_file *file,
1741
          struct drm_device *dev,
6084 serge 1742
		  uint32_t handle,
3480 Serge 1743
          uint64_t *offset)
1744
{
1745
    struct drm_i915_private *dev_priv = dev->dev_private;
1746
    struct drm_i915_gem_object *obj;
1747
    unsigned long pfn;
1748
    char *mem, *ptr;
1749
    int ret;
1750
 
1751
    ret = i915_mutex_lock_interruptible(dev);
1752
    if (ret)
1753
        return ret;
1754
 
1755
    obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle));
1756
    if (&obj->base == NULL) {
1757
        ret = -ENOENT;
1758
        goto unlock;
1759
    }
1760
 
1761
    if (obj->madv != I915_MADV_WILLNEED) {
5060 serge 1762
		DRM_DEBUG("Attempting to mmap a purgeable buffer\n");
1763
		ret = -EFAULT;
3480 Serge 1764
        goto out;
1765
    }
1766
    /* Now bind it into the GTT if needed */
5060 serge 1767
	ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
3480 Serge 1768
    if (ret)
1769
        goto out;
1770
 
1771
    ret = i915_gem_object_set_to_gtt_domain(obj, 1);
1772
    if (ret)
1773
        goto unpin;
1774
 
1775
    ret = i915_gem_object_get_fence(obj);
1776
    if (ret)
1777
        goto unpin;
1778
 
1779
    obj->fault_mappable = true;
1780
 
4104 Serge 1781
    pfn = dev_priv->gtt.mappable_base + i915_gem_obj_ggtt_offset(obj);
3480 Serge 1782
 
1783
    /* Finally, remap it using the new GTT offset */
1784
 
1785
    mem = UserAlloc(obj->base.size);
1786
    if(unlikely(mem == NULL))
1787
    {
1788
        ret = -ENOMEM;
1789
        goto unpin;
1790
    }
1791
 
1792
    for(ptr = mem; ptr < mem + obj->base.size; ptr+= 4096, pfn+= 4096)
1793
        MapPage(ptr, pfn, PG_SHARED|PG_UW);
1794
 
1795
unpin:
5060 serge 1796
    i915_gem_object_unpin_pages(obj);
3480 Serge 1797
 
1798
 
5367 serge 1799
    *offset = (uint32_t)mem;
3480 Serge 1800
 
1801
out:
6088 serge 1802
	drm_gem_object_unreference(&obj->base);
3480 Serge 1803
unlock:
6088 serge 1804
	mutex_unlock(&dev->struct_mutex);
1805
	return ret;
3480 Serge 1806
}
1807
 
1808
/**
1809
 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
1810
 * @dev: DRM device
1811
 * @data: GTT mapping ioctl data
1812
 * @file: GEM object info
1813
 *
1814
 * Simply returns the fake offset to userspace so it can mmap it.
1815
 * The mmap call will end up in drm_gem_mmap(), which will set things
1816
 * up so we can get faults in the handler above.
1817
 *
1818
 * The fault handler will take care of binding the object into the GTT
1819
 * (since it may have been evicted to make room for something), allocating
1820
 * a fence register, and mapping the appropriate aperture address into
1821
 * userspace.
1822
 */
1823
int
1824
i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
6084 serge 1825
			struct drm_file *file)
3480 Serge 1826
{
6084 serge 1827
	struct drm_i915_gem_mmap_gtt *args = data;
3480 Serge 1828
 
6084 serge 1829
	return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
3480 Serge 1830
}
1831
 
3031 serge 1832
/* Immediately discard the backing storage */
1833
static void
1834
i915_gem_object_truncate(struct drm_i915_gem_object *obj)
1835
{
1836
//	i915_gem_object_free_mmap_offset(obj);
2332 Serge 1837
 
3263 Serge 1838
	if (obj->base.filp == NULL)
1839
		return;
2332 Serge 1840
 
3031 serge 1841
	/* Our goal here is to return as much of the memory as
1842
	 * is possible back to the system as we are called from OOM.
1843
	 * To do this we must instruct the shmfs to drop all of its
1844
	 * backing pages, *now*.
1845
	 */
5060 serge 1846
//	shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
3031 serge 1847
	obj->madv = __I915_MADV_PURGED;
1848
}
2332 Serge 1849
 
5060 serge 1850
/* Try to discard unwanted pages */
1851
static void
1852
i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
3031 serge 1853
{
5060 serge 1854
	struct address_space *mapping;
1855
 
1856
	switch (obj->madv) {
1857
	case I915_MADV_DONTNEED:
1858
		i915_gem_object_truncate(obj);
1859
	case __I915_MADV_PURGED:
1860
		return;
1861
	}
1862
 
1863
	if (obj->base.filp == NULL)
1864
		return;
1865
 
3031 serge 1866
}
2332 Serge 1867
 
3031 serge 1868
static void
1869
i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
1870
{
3746 Serge 1871
	struct sg_page_iter sg_iter;
1872
	int ret;
2332 Serge 1873
 
3031 serge 1874
	BUG_ON(obj->madv == __I915_MADV_PURGED);
2332 Serge 1875
 
3031 serge 1876
	ret = i915_gem_object_set_to_cpu_domain(obj, true);
1877
	if (ret) {
1878
		/* In the event of a disaster, abandon all caches and
1879
		 * hope for the best.
1880
		 */
1881
		WARN_ON(ret != -EIO);
4104 Serge 1882
		i915_gem_clflush_object(obj, true);
3031 serge 1883
		obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
1884
	}
2332 Serge 1885
 
6084 serge 1886
	i915_gem_gtt_finish_object(obj);
6296 serge 1887
 
1888
	if (i915_gem_object_needs_bit17_swizzle(obj))
1889
		i915_gem_object_save_bit_17_swizzle(obj);
1890
 
3031 serge 1891
	if (obj->madv == I915_MADV_DONTNEED)
1892
		obj->dirty = 0;
2332 Serge 1893
 
3746 Serge 1894
	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
1895
		struct page *page = sg_page_iter_page(&sg_iter);
2332 Serge 1896
 
6084 serge 1897
		page_cache_release(page);
3243 Serge 1898
	}
6084 serge 1899
	obj->dirty = 0;
3243 Serge 1900
 
1901
	sg_free_table(obj->pages);
1902
	kfree(obj->pages);
3031 serge 1903
}
2332 Serge 1904
 
3480 Serge 1905
int
3031 serge 1906
i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
1907
{
1908
	const struct drm_i915_gem_object_ops *ops = obj->ops;
2332 Serge 1909
 
3243 Serge 1910
	if (obj->pages == NULL)
3031 serge 1911
		return 0;
2332 Serge 1912
 
3031 serge 1913
	if (obj->pages_pin_count)
1914
		return -EBUSY;
1915
 
4104 Serge 1916
	BUG_ON(i915_gem_obj_bound_any(obj));
1917
 
3243 Serge 1918
	/* ->put_pages might need to allocate memory for the bit17 swizzle
1919
	 * array, hence protect them from being reaped by removing them from gtt
1920
	 * lists early. */
4104 Serge 1921
	list_del(&obj->global_list);
3243 Serge 1922
 
3031 serge 1923
	ops->put_pages(obj);
3243 Serge 1924
	obj->pages = NULL;
3031 serge 1925
 
5060 serge 1926
	i915_gem_object_invalidate(obj);
3031 serge 1927
 
1928
	return 0;
1929
}
1930
 
2332 Serge 1931
static int
3031 serge 1932
i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
2332 Serge 1933
{
3260 Serge 1934
	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
6084 serge 1935
	int page_count, i;
1936
	struct address_space *mapping;
1937
	struct sg_table *st;
3243 Serge 1938
	struct scatterlist *sg;
3746 Serge 1939
	struct sg_page_iter sg_iter;
3243 Serge 1940
	struct page *page;
3746 Serge 1941
	unsigned long last_pfn = 0;	/* suppress gcc warning */
6084 serge 1942
	int ret;
3243 Serge 1943
	gfp_t gfp;
2332 Serge 1944
 
3243 Serge 1945
	/* Assert that the object is not currently in any GPU domain. As it
1946
	 * wasn't in the GTT, there shouldn't be any way it could have been in
1947
	 * a GPU cache
2332 Serge 1948
	 */
3243 Serge 1949
	BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
1950
	BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
1951
 
1952
	st = kmalloc(sizeof(*st), GFP_KERNEL);
1953
	if (st == NULL)
1954
		return -ENOMEM;
1955
 
2332 Serge 1956
	page_count = obj->base.size / PAGE_SIZE;
3243 Serge 1957
	if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
1958
		kfree(st);
2332 Serge 1959
		return -ENOMEM;
3243 Serge 1960
	}
2332 Serge 1961
 
3243 Serge 1962
	/* Get the list of pages out of our struct file.  They'll be pinned
1963
	 * at this point until we release them.
1964
	 *
1965
	 * Fail silently without starting the shrinker
1966
	 */
3746 Serge 1967
	sg = st->sgl;
1968
	st->nents = 0;
1969
	for (i = 0; i < page_count; i++) {
4104 Serge 1970
        page = shmem_read_mapping_page_gfp(obj->base.filp, i, gfp);
3260 Serge 1971
		if (IS_ERR(page)) {
1972
            dbgprintf("%s invalid page %p\n", __FUNCTION__, page);
2332 Serge 1973
			goto err_pages;
3260 Serge 1974
		}
5354 serge 1975
#ifdef CONFIG_SWIOTLB
1976
		if (swiotlb_nr_tbl()) {
1977
			st->nents++;
1978
			sg_set_page(sg, page, PAGE_SIZE, 0);
1979
			sg = sg_next(sg);
1980
			continue;
1981
		}
1982
#endif
3746 Serge 1983
		if (!i || page_to_pfn(page) != last_pfn + 1) {
1984
			if (i)
1985
				sg = sg_next(sg);
1986
			st->nents++;
6084 serge 1987
			sg_set_page(sg, page, PAGE_SIZE, 0);
3746 Serge 1988
		} else {
1989
			sg->length += PAGE_SIZE;
1990
		}
1991
		last_pfn = page_to_pfn(page);
6937 serge 1992
 
1993
		/* Check that the i965g/gm workaround works. */
1994
		WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL));
3243 Serge 1995
	}
5354 serge 1996
#ifdef CONFIG_SWIOTLB
1997
	if (!swiotlb_nr_tbl())
1998
#endif
3746 Serge 1999
		sg_mark_end(sg);
3243 Serge 2000
	obj->pages = st;
3031 serge 2001
 
6084 serge 2002
	ret = i915_gem_gtt_prepare_object(obj);
2003
	if (ret)
2004
		goto err_pages;
5367 serge 2005
 
6296 serge 2006
	if (i915_gem_object_needs_bit17_swizzle(obj))
2007
		i915_gem_object_do_bit_17_swizzle(obj);
2008
 
5367 serge 2009
	if (obj->tiling_mode != I915_TILING_NONE &&
2010
	    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES)
2011
		i915_gem_object_pin_pages(obj);
2012
 
2332 Serge 2013
	return 0;
2014
 
2015
err_pages:
3746 Serge 2016
	sg_mark_end(sg);
2017
	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0)
2018
		page_cache_release(sg_page_iter_page(&sg_iter));
3243 Serge 2019
	sg_free_table(st);
2020
	kfree(st);
6084 serge 2021
 
3243 Serge 2022
	return PTR_ERR(page);
2332 Serge 2023
}
2024
 
3031 serge 2025
/* Ensure that the associated pages are gathered from the backing storage
2026
 * and pinned into our object. i915_gem_object_get_pages() may be called
2027
 * multiple times before they are released by a single call to
2028
 * i915_gem_object_put_pages() - once the pages are no longer referenced
2029
 * either as a result of memory pressure (reaping pages under the shrinker)
2030
 * or as the object is itself released.
2031
 */
2032
int
2033
i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2332 Serge 2034
{
3031 serge 2035
	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2036
	const struct drm_i915_gem_object_ops *ops = obj->ops;
2037
	int ret;
2332 Serge 2038
 
3243 Serge 2039
	if (obj->pages)
3031 serge 2040
		return 0;
2332 Serge 2041
 
4392 Serge 2042
	if (obj->madv != I915_MADV_WILLNEED) {
5060 serge 2043
		DRM_DEBUG("Attempting to obtain a purgeable object\n");
2044
		return -EFAULT;
4392 Serge 2045
	}
2046
 
3031 serge 2047
	BUG_ON(obj->pages_pin_count);
2332 Serge 2048
 
3031 serge 2049
	ret = ops->get_pages(obj);
2050
	if (ret)
2051
		return ret;
2344 Serge 2052
 
4104 Serge 2053
	list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
6084 serge 2054
 
2055
	obj->get_page.sg = obj->pages->sgl;
2056
	obj->get_page.last = 0;
2057
 
2058
	return 0;
2332 Serge 2059
}
2060
 
6084 serge 2061
void i915_vma_move_to_active(struct i915_vma *vma,
2062
			     struct drm_i915_gem_request *req)
2332 Serge 2063
{
6084 serge 2064
	struct drm_i915_gem_object *obj = vma->obj;
2065
	struct intel_engine_cs *ring;
2332 Serge 2066
 
6084 serge 2067
	ring = i915_gem_request_get_ring(req);
2332 Serge 2068
 
2069
	/* Add a reference if we're newly entering the active list. */
6084 serge 2070
	if (obj->active == 0)
2344 Serge 2071
		drm_gem_object_reference(&obj->base);
6084 serge 2072
	obj->active |= intel_ring_flag(ring);
2332 Serge 2073
 
6084 serge 2074
	list_move_tail(&obj->ring_list[ring->id], &ring->active_list);
2075
	i915_gem_request_assign(&obj->last_read_req[ring->id], req);
2332 Serge 2076
 
7144 serge 2077
	list_move_tail(&vma->vm_link, &vma->vm->active_list);
2332 Serge 2078
}
2079
 
6084 serge 2080
static void
2081
i915_gem_object_retire__write(struct drm_i915_gem_object *obj)
4560 Serge 2082
{
6084 serge 2083
	RQ_BUG_ON(obj->last_write_req == NULL);
2084
	RQ_BUG_ON(!(obj->active & intel_ring_flag(obj->last_write_req->ring)));
2085
 
2086
	i915_gem_request_assign(&obj->last_write_req, NULL);
2087
	intel_fb_obj_flush(obj, true, ORIGIN_CS);
4560 Serge 2088
}
2089
 
2344 Serge 2090
static void
6084 serge 2091
i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring)
2344 Serge 2092
{
5060 serge 2093
	struct i915_vma *vma;
2332 Serge 2094
 
6084 serge 2095
	RQ_BUG_ON(obj->last_read_req[ring] == NULL);
2096
	RQ_BUG_ON(!(obj->active & (1 << ring)));
2332 Serge 2097
 
6084 serge 2098
	list_del_init(&obj->ring_list[ring]);
2099
	i915_gem_request_assign(&obj->last_read_req[ring], NULL);
2344 Serge 2100
 
6084 serge 2101
	if (obj->last_write_req && obj->last_write_req->ring->id == ring)
2102
		i915_gem_object_retire__write(obj);
5354 serge 2103
 
6084 serge 2104
	obj->active &= ~(1 << ring);
2105
	if (obj->active)
2106
		return;
2344 Serge 2107
 
6084 serge 2108
	/* Bump our place on the bound list to keep it roughly in LRU order
2109
	 * so that we don't steal from recently used but inactive objects
2110
	 * (unless we are forced to ofc!)
2111
	 */
2112
	list_move_tail(&obj->global_list,
2113
		       &to_i915(obj->base.dev)->mm.bound_list);
3031 serge 2114
 
7144 serge 2115
	list_for_each_entry(vma, &obj->vma_list, obj_link) {
2116
		if (!list_empty(&vma->vm_link))
2117
			list_move_tail(&vma->vm_link, &vma->vm->inactive_list);
6084 serge 2118
	}
2344 Serge 2119
 
6084 serge 2120
	i915_gem_request_assign(&obj->last_fenced_req, NULL);
2352 Serge 2121
	drm_gem_object_unreference(&obj->base);
2122
}
2123
 
3243 Serge 2124
static int
3480 Serge 2125
i915_gem_init_seqno(struct drm_device *dev, u32 seqno)
2344 Serge 2126
{
3243 Serge 2127
	struct drm_i915_private *dev_priv = dev->dev_private;
5060 serge 2128
	struct intel_engine_cs *ring;
3243 Serge 2129
	int ret, i, j;
2344 Serge 2130
 
3480 Serge 2131
	/* Carefully retire all requests without writing to the rings */
3243 Serge 2132
	for_each_ring(ring, dev_priv, i) {
3480 Serge 2133
		ret = intel_ring_idle(ring);
6084 serge 2134
		if (ret)
2135
			return ret;
3480 Serge 2136
	}
2137
	i915_gem_retire_requests(dev);
3243 Serge 2138
 
3480 Serge 2139
	/* Finally reset hw state */
3243 Serge 2140
	for_each_ring(ring, dev_priv, i) {
3480 Serge 2141
		intel_ring_init_seqno(ring, seqno);
2142
 
5060 serge 2143
		for (j = 0; j < ARRAY_SIZE(ring->semaphore.sync_seqno); j++)
2144
			ring->semaphore.sync_seqno[j] = 0;
3243 Serge 2145
	}
2146
 
2147
	return 0;
2344 Serge 2148
}
2149
 
3480 Serge 2150
int i915_gem_set_seqno(struct drm_device *dev, u32 seqno)
2151
{
2152
	struct drm_i915_private *dev_priv = dev->dev_private;
2153
	int ret;
2154
 
2155
	if (seqno == 0)
2156
		return -EINVAL;
2157
 
2158
	/* HWS page needs to be set less than what we
2159
	 * will inject to ring
2160
	 */
2161
	ret = i915_gem_init_seqno(dev, seqno - 1);
2162
	if (ret)
2163
		return ret;
2164
 
2165
	/* Carefully set the last_seqno value so that wrap
2166
	 * detection still works
2167
	 */
2168
	dev_priv->next_seqno = seqno;
2169
	dev_priv->last_seqno = seqno - 1;
2170
	if (dev_priv->last_seqno == 0)
2171
		dev_priv->last_seqno--;
2172
 
2173
	return 0;
2174
}
2175
 
3243 Serge 2176
int
2177
i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
2344 Serge 2178
{
3243 Serge 2179
	struct drm_i915_private *dev_priv = dev->dev_private;
2344 Serge 2180
 
3243 Serge 2181
	/* reserve 0 for non-seqno */
2182
	if (dev_priv->next_seqno == 0) {
3480 Serge 2183
		int ret = i915_gem_init_seqno(dev, 0);
3243 Serge 2184
		if (ret)
2185
			return ret;
2186
 
2187
		dev_priv->next_seqno = 1;
2188
	}
2189
 
3480 Serge 2190
	*seqno = dev_priv->last_seqno = dev_priv->next_seqno++;
3243 Serge 2191
	return 0;
2332 Serge 2192
}
2193
 
6084 serge 2194
/*
2195
 * NB: This function is not allowed to fail. Doing so would mean the the
2196
 * request is not being tracked for completion but the work itself is
2197
 * going to happen on the hardware. This would be a Bad Thing(tm).
2198
 */
2199
void __i915_add_request(struct drm_i915_gem_request *request,
2200
			struct drm_i915_gem_object *obj,
2201
			bool flush_caches)
2352 Serge 2202
{
6084 serge 2203
	struct intel_engine_cs *ring;
2204
	struct drm_i915_private *dev_priv;
5354 serge 2205
	struct intel_ringbuffer *ringbuf;
6084 serge 2206
	u32 request_start;
2352 Serge 2207
	int ret;
2332 Serge 2208
 
5354 serge 2209
	if (WARN_ON(request == NULL))
6084 serge 2210
		return;
5354 serge 2211
 
6084 serge 2212
	ring = request->ring;
2213
	dev_priv = ring->dev->dev_private;
2214
	ringbuf = request->ringbuf;
5354 serge 2215
 
6084 serge 2216
	/*
2217
	 * To ensure that this call will not fail, space for its emissions
2218
	 * should already have been reserved in the ring buffer. Let the ring
2219
	 * know that it is time to use that space up.
2220
	 */
2221
	intel_ring_reserved_space_use(ringbuf);
2222
 
5354 serge 2223
	request_start = intel_ring_get_tail(ringbuf);
3031 serge 2224
	/*
2225
	 * Emit any outstanding flushes - execbuf can fail to emit the flush
2226
	 * after having emitted the batchbuffer command. Hence we need to fix
2227
	 * things up similar to emitting the lazy request. The difference here
2228
	 * is that the flush _must_ happen before the next request, no matter
2229
	 * what.
2230
	 */
6084 serge 2231
	if (flush_caches) {
2232
		if (i915.enable_execlists)
2233
			ret = logical_ring_flush_all_caches(request);
2234
		else
2235
			ret = intel_ring_flush_all_caches(request);
2236
		/* Not allowed to fail! */
2237
		WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret);
5354 serge 2238
	}
2332 Serge 2239
 
3031 serge 2240
	/* Record the position of the start of the request so that
2241
	 * should we detect the updated seqno part-way through the
6084 serge 2242
	 * GPU processing the request, we never over-estimate the
3031 serge 2243
	 * position of the head.
2244
	 */
6084 serge 2245
	request->postfix = intel_ring_get_tail(ringbuf);
3031 serge 2246
 
6084 serge 2247
	if (i915.enable_execlists)
2248
		ret = ring->emit_request(request);
2249
	else {
2250
		ret = ring->add_request(request);
2251
 
2252
		request->tail = intel_ring_get_tail(ringbuf);
5354 serge 2253
	}
6084 serge 2254
	/* Not allowed to fail! */
2255
	WARN(ret, "emit|add_request failed: %d!\n", ret);
2332 Serge 2256
 
4104 Serge 2257
	request->head = request_start;
2258
 
2259
	/* Whilst this request exists, batch_obj will be on the
2260
	 * active_list, and so will hold the active reference. Only when this
2261
	 * request is retired will the the batch_obj be moved onto the
2262
	 * inactive_list and lose its active reference. Hence we do not need
2263
	 * to explicitly hold another reference here.
2264
	 */
4560 Serge 2265
	request->batch_obj = obj;
4104 Serge 2266
 
5060 serge 2267
	request->emitted_jiffies = jiffies;
6084 serge 2268
	request->previous_seqno = ring->last_submitted_seqno;
2269
	ring->last_submitted_seqno = request->seqno;
2352 Serge 2270
	list_add_tail(&request->list, &ring->request_list);
2332 Serge 2271
 
6084 serge 2272
	trace_i915_gem_request_add(request);
2332 Serge 2273
 
7144 serge 2274
	i915_queue_hangcheck(ring->dev);
3263 Serge 2275
 
6084 serge 2276
	queue_delayed_work(dev_priv->wq,
2277
			   &dev_priv->mm.retire_work,
2278
			   round_jiffies_up_relative(HZ));
2279
	intel_mark_busy(dev_priv->dev);
2332 Serge 2280
 
6084 serge 2281
	/* Sanity check that the reserved size was large enough. */
2282
	intel_ring_reserved_space_end(ringbuf);
2352 Serge 2283
}
2332 Serge 2284
 
5060 serge 2285
static bool i915_context_is_banned(struct drm_i915_private *dev_priv,
2286
				   const struct intel_context *ctx)
4104 Serge 2287
{
5060 serge 2288
	unsigned long elapsed;
4104 Serge 2289
 
5060 serge 2290
    elapsed = GetTimerTicks()/100 - ctx->hang_stats.guilty_ts;
4104 Serge 2291
 
5060 serge 2292
	if (ctx->hang_stats.banned)
2293
		return true;
4104 Serge 2294
 
6084 serge 2295
	if (ctx->hang_stats.ban_period_seconds &&
2296
	    elapsed <= ctx->hang_stats.ban_period_seconds) {
5060 serge 2297
		if (!i915_gem_context_is_default(ctx)) {
2298
			DRM_DEBUG("context hanging too fast, banning!\n");
4104 Serge 2299
			return true;
5060 serge 2300
		} else if (i915_stop_ring_allow_ban(dev_priv)) {
2301
			if (i915_stop_ring_allow_warn(dev_priv))
6084 serge 2302
				DRM_ERROR("gpu hanging too fast, banning!\n");
4104 Serge 2303
			return true;
6084 serge 2304
		}
4104 Serge 2305
	}
2306
 
2307
	return false;
2308
}
2309
 
5060 serge 2310
static void i915_set_reset_status(struct drm_i915_private *dev_priv,
2311
				  struct intel_context *ctx,
2312
				  const bool guilty)
4560 Serge 2313
{
5060 serge 2314
	struct i915_ctx_hang_stats *hs;
4560 Serge 2315
 
5060 serge 2316
	if (WARN_ON(!ctx))
2317
		return;
4560 Serge 2318
 
5060 serge 2319
	hs = &ctx->hang_stats;
4560 Serge 2320
 
5060 serge 2321
	if (guilty) {
2322
		hs->banned = i915_context_is_banned(dev_priv, ctx);
2323
		hs->batch_active++;
2324
        hs->guilty_ts = GetTimerTicks()/100;
2325
	} else {
2326
		hs->batch_pending++;
4104 Serge 2327
	}
2328
}
2329
 
6084 serge 2330
void i915_gem_request_free(struct kref *req_ref)
4104 Serge 2331
{
6084 serge 2332
	struct drm_i915_gem_request *req = container_of(req_ref,
2333
						 typeof(*req), ref);
2334
	struct intel_context *ctx = req->ctx;
5354 serge 2335
 
6084 serge 2336
	if (req->file_priv)
2337
		i915_gem_request_remove_from_client(req);
4104 Serge 2338
 
5354 serge 2339
	if (ctx) {
7144 serge 2340
		if (i915.enable_execlists && ctx != req->i915->kernel_context)
2341
			intel_lr_context_unpin(ctx, req->ring);
4104 Serge 2342
 
5354 serge 2343
		i915_gem_context_unreference(ctx);
2344
	}
6084 serge 2345
 
2346
	kfree(req);
4104 Serge 2347
}
2348
 
7144 serge 2349
static inline int
2350
__i915_gem_request_alloc(struct intel_engine_cs *ring,
2351
			 struct intel_context *ctx,
2352
			 struct drm_i915_gem_request **req_out)
6084 serge 2353
{
2354
	struct drm_i915_private *dev_priv = to_i915(ring->dev);
2355
	struct drm_i915_gem_request *req;
2356
	int ret;
2357
 
2358
	if (!req_out)
2359
		return -EINVAL;
2360
 
2361
	*req_out = NULL;
2362
 
2363
//	req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL);
2364
	req = kzalloc(sizeof(*req),0);
2365
	if (req == NULL)
2366
		return -ENOMEM;
2367
 
2368
	ret = i915_gem_get_seqno(ring->dev, &req->seqno);
2369
	if (ret)
2370
		goto err;
2371
 
2372
	kref_init(&req->ref);
2373
	req->i915 = dev_priv;
2374
	req->ring = ring;
2375
	req->ctx  = ctx;
2376
	i915_gem_context_reference(req->ctx);
2377
 
2378
	if (i915.enable_execlists)
2379
		ret = intel_logical_ring_alloc_request_extras(req);
2380
	else
2381
		ret = intel_ring_alloc_request_extras(req);
2382
	if (ret) {
2383
		i915_gem_context_unreference(req->ctx);
2384
		goto err;
2385
	}
2386
 
2387
	/*
2388
	 * Reserve space in the ring buffer for all the commands required to
2389
	 * eventually emit this request. This is to guarantee that the
2390
	 * i915_add_request() call can't fail. Note that the reserve may need
2391
	 * to be redone if the request is not actually submitted straight
2392
	 * away, e.g. because a GPU scheduler has deferred it.
2393
	 */
2394
	if (i915.enable_execlists)
2395
		ret = intel_logical_ring_reserve_space(req);
2396
	else
2397
		ret = intel_ring_reserve_space(req);
2398
	if (ret) {
2399
		/*
2400
		 * At this point, the request is fully allocated even if not
2401
		 * fully prepared. Thus it can be cleaned up using the proper
2402
		 * free code.
2403
		 */
2404
		i915_gem_request_cancel(req);
2405
		return ret;
2406
	}
2407
 
2408
	*req_out = req;
2409
	return 0;
2410
 
2411
err:
2412
	kfree(req);
2413
	return ret;
2414
}
2415
 
7144 serge 2416
/**
2417
 * i915_gem_request_alloc - allocate a request structure
2418
 *
2419
 * @engine: engine that we wish to issue the request on.
2420
 * @ctx: context that the request will be associated with.
2421
 *       This can be NULL if the request is not directly related to
2422
 *       any specific user context, in which case this function will
2423
 *       choose an appropriate context to use.
2424
 *
2425
 * Returns a pointer to the allocated request if successful,
2426
 * or an error code if not.
2427
 */
2428
struct drm_i915_gem_request *
2429
i915_gem_request_alloc(struct intel_engine_cs *engine,
2430
		       struct intel_context *ctx)
2431
{
2432
	struct drm_i915_gem_request *req;
2433
	int err;
2434
 
2435
	if (ctx == NULL)
2436
		ctx = to_i915(engine->dev)->kernel_context;
2437
	err = __i915_gem_request_alloc(engine, ctx, &req);
2438
	return err ? ERR_PTR(err) : req;
2439
}
2440
 
6084 serge 2441
void i915_gem_request_cancel(struct drm_i915_gem_request *req)
2442
{
2443
	intel_ring_reserved_space_cancel(req->ringbuf);
2444
 
2445
	i915_gem_request_unreference(req);
2446
}
2447
 
5060 serge 2448
struct drm_i915_gem_request *
2449
i915_gem_find_active_request(struct intel_engine_cs *ring)
3031 serge 2450
{
4539 Serge 2451
	struct drm_i915_gem_request *request;
4104 Serge 2452
 
4539 Serge 2453
	list_for_each_entry(request, &ring->request_list, list) {
6084 serge 2454
		if (i915_gem_request_completed(request, false))
4539 Serge 2455
			continue;
4104 Serge 2456
 
5060 serge 2457
		return request;
4539 Serge 2458
	}
5060 serge 2459
 
2460
	return NULL;
4539 Serge 2461
}
2462
 
5060 serge 2463
static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv,
2464
				       struct intel_engine_cs *ring)
2465
{
2466
	struct drm_i915_gem_request *request;
2467
	bool ring_hung;
2468
 
2469
	request = i915_gem_find_active_request(ring);
2470
 
2471
	if (request == NULL)
2472
		return;
2473
 
2474
	ring_hung = ring->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
2475
 
2476
	i915_set_reset_status(dev_priv, request->ctx, ring_hung);
2477
 
2478
	list_for_each_entry_continue(request, &ring->request_list, list)
2479
		i915_set_reset_status(dev_priv, request->ctx, false);
2480
}
2481
 
4539 Serge 2482
static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
5060 serge 2483
					struct intel_engine_cs *ring)
4539 Serge 2484
{
6937 serge 2485
	struct intel_ringbuffer *buffer;
2486
 
4560 Serge 2487
	while (!list_empty(&ring->active_list)) {
2488
		struct drm_i915_gem_object *obj;
2489
 
2490
		obj = list_first_entry(&ring->active_list,
2491
				       struct drm_i915_gem_object,
6084 serge 2492
				       ring_list[ring->id]);
4560 Serge 2493
 
6084 serge 2494
		i915_gem_object_retire__read(obj, ring->id);
4560 Serge 2495
	}
2496
 
2497
	/*
5354 serge 2498
	 * Clear the execlists queue up before freeing the requests, as those
2499
	 * are the ones that keep the context and ringbuffer backing objects
2500
	 * pinned in place.
2501
	 */
2502
 
6937 serge 2503
	if (i915.enable_execlists) {
2504
		spin_lock_irq(&ring->execlist_lock);
6084 serge 2505
 
6937 serge 2506
		/* list_splice_tail_init checks for empty lists */
2507
		list_splice_tail_init(&ring->execlist_queue,
2508
				      &ring->execlist_retired_req_list);
6084 serge 2509
 
6937 serge 2510
		spin_unlock_irq(&ring->execlist_lock);
2511
		intel_execlists_retire_requests(ring);
5354 serge 2512
	}
2513
 
2514
	/*
4560 Serge 2515
	 * We must free the requests after all the corresponding objects have
2516
	 * been moved off active lists. Which is the same order as the normal
2517
	 * retire_requests function does. This is important if object hold
2518
	 * implicit references on things like e.g. ppgtt address spaces through
2519
	 * the request.
2520
	 */
3031 serge 2521
	while (!list_empty(&ring->request_list)) {
2522
		struct drm_i915_gem_request *request;
2332 Serge 2523
 
3031 serge 2524
		request = list_first_entry(&ring->request_list,
2525
					   struct drm_i915_gem_request,
2526
					   list);
2332 Serge 2527
 
6084 serge 2528
		i915_gem_request_retire(request);
3031 serge 2529
	}
6937 serge 2530
 
2531
	/* Having flushed all requests from all queues, we know that all
2532
	 * ringbuffers must now be empty. However, since we do not reclaim
2533
	 * all space when retiring the request (to prevent HEADs colliding
2534
	 * with rapid ringbuffer wraparound) the amount of available space
2535
	 * upon reset is less than when we start. Do one more pass over
2536
	 * all the ringbuffers to reset last_retired_head.
2537
	 */
2538
	list_for_each_entry(buffer, &ring->buffers, link) {
2539
		buffer->last_retired_head = buffer->tail;
2540
		intel_ring_update_space(buffer);
2541
	}
3031 serge 2542
}
2332 Serge 2543
 
3031 serge 2544
void i915_gem_reset(struct drm_device *dev)
2545
{
2546
	struct drm_i915_private *dev_priv = dev->dev_private;
5060 serge 2547
	struct intel_engine_cs *ring;
3031 serge 2548
	int i;
2360 Serge 2549
 
4539 Serge 2550
	/*
2551
	 * Before we free the objects from the requests, we need to inspect
2552
	 * them for finding the guilty party. As the requests only borrow
2553
	 * their reference to the objects, the inspection must be done first.
2554
	 */
3031 serge 2555
	for_each_ring(ring, dev_priv, i)
4539 Serge 2556
		i915_gem_reset_ring_status(dev_priv, ring);
2360 Serge 2557
 
4539 Serge 2558
	for_each_ring(ring, dev_priv, i)
2559
		i915_gem_reset_ring_cleanup(dev_priv, ring);
2560
 
5060 serge 2561
	i915_gem_context_reset(dev);
4560 Serge 2562
 
3746 Serge 2563
	i915_gem_restore_fences(dev);
6084 serge 2564
 
2565
	WARN_ON(i915_verify_lists(dev));
3031 serge 2566
}
2360 Serge 2567
 
2352 Serge 2568
/**
2569
 * This function clears the request list as sequence numbers are passed.
2570
 */
3031 serge 2571
void
5060 serge 2572
i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
2352 Serge 2573
{
6084 serge 2574
	WARN_ON(i915_verify_lists(ring->dev));
2332 Serge 2575
 
6084 serge 2576
	/* Retire requests first as we use it above for the early return.
2577
	 * If we retire requests last, we may use a later seqno and so clear
2578
	 * the requests lists without clearing the active list, leading to
2579
	 * confusion.
2580
	 */
2581
	while (!list_empty(&ring->request_list)) {
2582
		struct drm_i915_gem_request *request;
2332 Serge 2583
 
6084 serge 2584
		request = list_first_entry(&ring->request_list,
2585
					   struct drm_i915_gem_request,
2586
					   list);
2332 Serge 2587
 
6084 serge 2588
		if (!i915_gem_request_completed(request, true))
2589
			break;
2332 Serge 2590
 
6084 serge 2591
		i915_gem_request_retire(request);
2592
	}
2593
 
5060 serge 2594
	/* Move any buffers on the active list that are no longer referenced
2595
	 * by the ringbuffer to the flushing/inactive lists as appropriate,
2596
	 * before we free the context associated with the requests.
2597
	 */
2598
	while (!list_empty(&ring->active_list)) {
2599
		struct drm_i915_gem_object *obj;
2600
 
2601
		obj = list_first_entry(&ring->active_list,
2602
				      struct drm_i915_gem_object,
6084 serge 2603
				      ring_list[ring->id]);
5060 serge 2604
 
6084 serge 2605
		if (!list_empty(&obj->last_read_req[ring->id]->list))
5060 serge 2606
			break;
2607
 
6084 serge 2608
		i915_gem_object_retire__read(obj, ring->id);
5060 serge 2609
	}
2610
 
6084 serge 2611
	if (unlikely(ring->trace_irq_req &&
2612
		     i915_gem_request_completed(ring->trace_irq_req, true))) {
2352 Serge 2613
		ring->irq_put(ring);
6084 serge 2614
		i915_gem_request_assign(&ring->trace_irq_req, NULL);
2352 Serge 2615
	}
2332 Serge 2616
 
2352 Serge 2617
	WARN_ON(i915_verify_lists(ring->dev));
2618
}
2332 Serge 2619
 
4560 Serge 2620
bool
2352 Serge 2621
i915_gem_retire_requests(struct drm_device *dev)
2622
{
5060 serge 2623
	struct drm_i915_private *dev_priv = dev->dev_private;
2624
	struct intel_engine_cs *ring;
4560 Serge 2625
	bool idle = true;
2352 Serge 2626
	int i;
2332 Serge 2627
 
4560 Serge 2628
	for_each_ring(ring, dev_priv, i) {
3031 serge 2629
		i915_gem_retire_requests_ring(ring);
4560 Serge 2630
		idle &= list_empty(&ring->request_list);
5354 serge 2631
		if (i915.enable_execlists) {
7144 serge 2632
			spin_lock_irq(&ring->execlist_lock);
5354 serge 2633
			idle &= list_empty(&ring->execlist_queue);
7144 serge 2634
			spin_unlock_irq(&ring->execlist_lock);
5354 serge 2635
 
2636
			intel_execlists_retire_requests(ring);
2637
		}
4560 Serge 2638
	}
2639
 
6937 serge 2640
//	if (idle)
2641
//		mod_delayed_work(dev_priv->wq,
2642
//				   &dev_priv->mm.idle_work,
2643
//				   msecs_to_jiffies(100));
4560 Serge 2644
 
2645
	return idle;
2352 Serge 2646
}
2647
 
2360 Serge 2648
static void
2649
i915_gem_retire_work_handler(struct work_struct *work)
2650
{
4560 Serge 2651
	struct drm_i915_private *dev_priv =
2652
		container_of(work, typeof(*dev_priv), mm.retire_work.work);
2653
	struct drm_device *dev = dev_priv->dev;
2360 Serge 2654
	bool idle;
2352 Serge 2655
 
2360 Serge 2656
	/* Come back later if the device is busy... */
4560 Serge 2657
	idle = false;
2658
	if (mutex_trylock(&dev->struct_mutex)) {
2659
		idle = i915_gem_retire_requests(dev);
2660
		mutex_unlock(&dev->struct_mutex);
2661
	}
2662
	if (!idle)
3482 Serge 2663
		queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work,
2664
				   round_jiffies_up_relative(HZ));
4560 Serge 2665
}
2352 Serge 2666
 
4560 Serge 2667
static void
2668
i915_gem_idle_work_handler(struct work_struct *work)
2669
{
2670
	struct drm_i915_private *dev_priv =
2671
		container_of(work, typeof(*dev_priv), mm.idle_work.work);
6084 serge 2672
	struct drm_device *dev = dev_priv->dev;
2673
	struct intel_engine_cs *ring;
2674
	int i;
2352 Serge 2675
 
6084 serge 2676
	for_each_ring(ring, dev_priv, i)
2677
		if (!list_empty(&ring->request_list))
2678
			return;
2679
 
6937 serge 2680
	/* we probably should sync with hangcheck here, using cancel_work_sync.
2681
	 * Also locking seems to be fubar here, ring->request_list is protected
2682
	 * by dev->struct_mutex. */
2683
 
6084 serge 2684
	intel_mark_idle(dev);
2685
 
2686
	if (mutex_trylock(&dev->struct_mutex)) {
2687
		struct intel_engine_cs *ring;
2688
		int i;
2689
 
2690
		for_each_ring(ring, dev_priv, i)
2691
			i915_gem_batch_pool_fini(&ring->batch_pool);
2692
 
2693
		mutex_unlock(&dev->struct_mutex);
2694
	}
2360 Serge 2695
}
2696
 
2344 Serge 2697
/**
3031 serge 2698
 * Ensures that an object will eventually get non-busy by flushing any required
2699
 * write domains, emitting any outstanding lazy request and retiring and
2700
 * completed requests.
2352 Serge 2701
 */
3031 serge 2702
static int
2703
i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
2352 Serge 2704
{
6084 serge 2705
	int i;
2352 Serge 2706
 
6084 serge 2707
	if (!obj->active)
2708
		return 0;
2352 Serge 2709
 
6084 serge 2710
	for (i = 0; i < I915_NUM_RINGS; i++) {
2711
		struct drm_i915_gem_request *req;
2712
 
2713
		req = obj->last_read_req[i];
2714
		if (req == NULL)
2715
			continue;
2716
 
2717
		if (list_empty(&req->list))
2718
			goto retire;
2719
 
2720
		if (i915_gem_request_completed(req, true)) {
2721
			__i915_gem_request_retire__upto(req);
2722
retire:
2723
			i915_gem_object_retire__read(obj, i);
2724
		}
3031 serge 2725
	}
2352 Serge 2726
 
3031 serge 2727
	return 0;
2728
}
2352 Serge 2729
 
3243 Serge 2730
/**
2731
 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
2732
 * @DRM_IOCTL_ARGS: standard ioctl arguments
2733
 *
2734
 * Returns 0 if successful, else an error is returned with the remaining time in
2735
 * the timeout parameter.
2736
 *  -ETIME: object is still busy after timeout
2737
 *  -ERESTARTSYS: signal interrupted the wait
2738
 *  -ENONENT: object doesn't exist
2739
 * Also possible, but rare:
2740
 *  -EAGAIN: GPU wedged
2741
 *  -ENOMEM: damn
2742
 *  -ENODEV: Internal IRQ fail
2743
 *  -E?: The add request failed
2744
 *
2745
 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
2746
 * non-zero timeout parameter the wait ioctl will wait for the given number of
2747
 * nanoseconds on an object becoming unbusy. Since the wait itself does so
2748
 * without holding struct_mutex the object may become re-busied before this
2749
 * function completes. A similar but shorter * race condition exists in the busy
2750
 * ioctl
2751
 */
4246 Serge 2752
int
2753
i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
2754
{
5060 serge 2755
	struct drm_i915_private *dev_priv = dev->dev_private;
4246 Serge 2756
	struct drm_i915_gem_wait *args = data;
2757
	struct drm_i915_gem_object *obj;
6084 serge 2758
	struct drm_i915_gem_request *req[I915_NUM_RINGS];
4246 Serge 2759
	unsigned reset_counter;
6084 serge 2760
	int i, n = 0;
2761
	int ret;
2352 Serge 2762
 
5354 serge 2763
	if (args->flags != 0)
2764
		return -EINVAL;
2765
 
4246 Serge 2766
	ret = i915_mutex_lock_interruptible(dev);
2767
	if (ret)
2768
		return ret;
2352 Serge 2769
 
4246 Serge 2770
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->bo_handle));
2771
	if (&obj->base == NULL) {
2772
		mutex_unlock(&dev->struct_mutex);
2773
		return -ENOENT;
2774
	}
2352 Serge 2775
 
4246 Serge 2776
	/* Need to make sure the object gets inactive eventually. */
2777
	ret = i915_gem_object_flush_active(obj);
2778
	if (ret)
2779
		goto out;
2352 Serge 2780
 
6084 serge 2781
	if (!obj->active)
2782
		goto out;
2352 Serge 2783
 
4246 Serge 2784
	/* Do this after OLR check to make sure we make forward progress polling
6084 serge 2785
	 * on this IOCTL with a timeout == 0 (like busy ioctl)
4246 Serge 2786
	 */
6084 serge 2787
	if (args->timeout_ns == 0) {
4246 Serge 2788
		ret = -ETIME;
2789
		goto out;
2790
	}
2352 Serge 2791
 
4246 Serge 2792
	drm_gem_object_unreference(&obj->base);
2793
	reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
6084 serge 2794
 
2795
	for (i = 0; i < I915_NUM_RINGS; i++) {
2796
		if (obj->last_read_req[i] == NULL)
2797
			continue;
2798
 
2799
		req[n++] = i915_gem_request_reference(obj->last_read_req[i]);
2800
	}
2801
 
4246 Serge 2802
	mutex_unlock(&dev->struct_mutex);
2352 Serge 2803
 
6084 serge 2804
	for (i = 0; i < n; i++) {
2805
		if (ret == 0)
2806
			ret = __i915_wait_request(req[i], reset_counter, true,
2807
						  args->timeout_ns > 0 ? &args->timeout_ns : NULL,
6937 serge 2808
						  to_rps_client(file));
6084 serge 2809
		i915_gem_request_unreference__unlocked(req[i]);
2810
	}
2811
	return ret;
3243 Serge 2812
 
4246 Serge 2813
out:
2814
	drm_gem_object_unreference(&obj->base);
2815
	mutex_unlock(&dev->struct_mutex);
2816
	return ret;
2817
}
3243 Serge 2818
 
6084 serge 2819
static int
2820
__i915_gem_object_sync(struct drm_i915_gem_object *obj,
2821
		       struct intel_engine_cs *to,
2822
		       struct drm_i915_gem_request *from_req,
2823
		       struct drm_i915_gem_request **to_req)
2824
{
2825
	struct intel_engine_cs *from;
2826
	int ret;
2827
 
2828
	from = i915_gem_request_get_ring(from_req);
2829
	if (to == from)
2830
		return 0;
2831
 
2832
	if (i915_gem_request_completed(from_req, true))
2833
		return 0;
2834
 
2835
	if (!i915_semaphore_is_enabled(obj->base.dev)) {
2836
		struct drm_i915_private *i915 = to_i915(obj->base.dev);
2837
		ret = __i915_wait_request(from_req,
2838
					  atomic_read(&i915->gpu_error.reset_counter),
2839
					  i915->mm.interruptible,
2840
					  NULL,
2841
					  &i915->rps.semaphores);
2842
		if (ret)
2843
			return ret;
2844
 
2845
		i915_gem_object_retire_request(obj, from_req);
2846
	} else {
2847
		int idx = intel_ring_sync_index(from, to);
2848
		u32 seqno = i915_gem_request_get_seqno(from_req);
2849
 
2850
		WARN_ON(!to_req);
2851
 
2852
		if (seqno <= from->semaphore.sync_seqno[idx])
2853
			return 0;
2854
 
2855
		if (*to_req == NULL) {
7144 serge 2856
			struct drm_i915_gem_request *req;
2857
 
2858
			req = i915_gem_request_alloc(to, NULL);
2859
			if (IS_ERR(req))
2860
				return PTR_ERR(req);
2861
 
2862
			*to_req = req;
6084 serge 2863
		}
2864
 
2865
		trace_i915_gem_ring_sync_to(*to_req, from, from_req);
2866
		ret = to->semaphore.sync_to(*to_req, from, seqno);
2867
		if (ret)
2868
			return ret;
2869
 
2870
		/* We use last_read_req because sync_to()
2871
		 * might have just caused seqno wrap under
2872
		 * the radar.
2873
		 */
2874
		from->semaphore.sync_seqno[idx] =
2875
			i915_gem_request_get_seqno(obj->last_read_req[from->id]);
2876
	}
2877
 
2878
	return 0;
2879
}
2880
 
2352 Serge 2881
/**
3031 serge 2882
 * i915_gem_object_sync - sync an object to a ring.
2883
 *
2884
 * @obj: object which may be in use on another ring.
2885
 * @to: ring we wish to use the object on. May be NULL.
6084 serge 2886
 * @to_req: request we wish to use the object for. See below.
2887
 *          This will be allocated and returned if a request is
2888
 *          required but not passed in.
3031 serge 2889
 *
2890
 * This code is meant to abstract object synchronization with the GPU.
2891
 * Calling with NULL implies synchronizing the object with the CPU
6084 serge 2892
 * rather than a particular GPU ring. Conceptually we serialise writes
2893
 * between engines inside the GPU. We only allow one engine to write
2894
 * into a buffer at any time, but multiple readers. To ensure each has
2895
 * a coherent view of memory, we must:
3031 serge 2896
 *
6084 serge 2897
 * - If there is an outstanding write request to the object, the new
2898
 *   request must wait for it to complete (either CPU or in hw, requests
2899
 *   on the same ring will be naturally ordered).
2900
 *
2901
 * - If we are a write request (pending_write_domain is set), the new
2902
 *   request must wait for outstanding read requests to complete.
2903
 *
2904
 * For CPU synchronisation (NULL to) no request is required. For syncing with
2905
 * rings to_req must be non-NULL. However, a request does not have to be
2906
 * pre-allocated. If *to_req is NULL and sync commands will be emitted then a
2907
 * request will be allocated automatically and returned through *to_req. Note
2908
 * that it is not guaranteed that commands will be emitted (because the system
2909
 * might already be idle). Hence there is no need to create a request that
2910
 * might never have any work submitted. Note further that if a request is
2911
 * returned in *to_req, it is the responsibility of the caller to submit
2912
 * that request (after potentially adding more work to it).
2913
 *
3031 serge 2914
 * Returns 0 if successful, else propagates up the lower layer error.
2344 Serge 2915
 */
2916
int
3031 serge 2917
i915_gem_object_sync(struct drm_i915_gem_object *obj,
6084 serge 2918
		     struct intel_engine_cs *to,
2919
		     struct drm_i915_gem_request **to_req)
2344 Serge 2920
{
6084 serge 2921
	const bool readonly = obj->base.pending_write_domain == 0;
2922
	struct drm_i915_gem_request *req[I915_NUM_RINGS];
2923
	int ret, i, n;
2332 Serge 2924
 
6084 serge 2925
	if (!obj->active)
3031 serge 2926
		return 0;
2332 Serge 2927
 
6084 serge 2928
	if (to == NULL)
2929
		return i915_gem_object_wait_rendering(obj, readonly);
2332 Serge 2930
 
6084 serge 2931
	n = 0;
2932
	if (readonly) {
2933
		if (obj->last_write_req)
2934
			req[n++] = obj->last_write_req;
2935
	} else {
2936
		for (i = 0; i < I915_NUM_RINGS; i++)
2937
			if (obj->last_read_req[i])
2938
				req[n++] = obj->last_read_req[i];
2939
	}
2940
	for (i = 0; i < n; i++) {
2941
		ret = __i915_gem_object_sync(obj, to, req[i], to_req);
2942
		if (ret)
2943
			return ret;
2944
	}
3031 serge 2945
 
6084 serge 2946
	return 0;
2344 Serge 2947
}
2332 Serge 2948
 
2344 Serge 2949
static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
2950
{
2951
	u32 old_write_domain, old_read_domains;
2332 Serge 2952
 
2344 Serge 2953
	/* Force a pagefault for domain tracking on next user access */
6084 serge 2954
	i915_gem_release_mmap(obj);
2332 Serge 2955
 
2344 Serge 2956
	if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
2957
		return;
2332 Serge 2958
 
3480 Serge 2959
	/* Wait for any direct GTT access to complete */
2960
	mb();
2961
 
2344 Serge 2962
	old_read_domains = obj->base.read_domains;
2963
	old_write_domain = obj->base.write_domain;
2351 Serge 2964
 
2344 Serge 2965
	obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT;
2966
	obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
2332 Serge 2967
 
2351 Serge 2968
	trace_i915_gem_object_change_domain(obj,
2969
					    old_read_domains,
2970
					    old_write_domain);
2344 Serge 2971
}
2332 Serge 2972
 
6084 serge 2973
static int __i915_vma_unbind(struct i915_vma *vma, bool wait)
2344 Serge 2974
{
4104 Serge 2975
	struct drm_i915_gem_object *obj = vma->obj;
5060 serge 2976
	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
3480 Serge 2977
	int ret;
3263 Serge 2978
 
7144 serge 2979
	if (list_empty(&vma->obj_link))
2344 Serge 2980
		return 0;
2332 Serge 2981
 
4560 Serge 2982
	if (!drm_mm_node_allocated(&vma->node)) {
2983
		i915_gem_vma_destroy(vma);
2984
		return 0;
2985
	}
2986
 
5060 serge 2987
	if (vma->pin_count)
3031 serge 2988
		return -EBUSY;
2332 Serge 2989
 
3243 Serge 2990
	BUG_ON(obj->pages == NULL);
3031 serge 2991
 
6084 serge 2992
	if (wait) {
2993
		ret = i915_gem_object_wait_rendering(obj, false);
2994
		if (ret)
2995
			return ret;
2996
	}
2332 Serge 2997
 
7144 serge 2998
	if (vma->is_ggtt && vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
6084 serge 2999
		i915_gem_object_finish_gtt(obj);
5354 serge 3000
 
6084 serge 3001
		/* release the fence reg _after_ flushing */
3002
		ret = i915_gem_object_put_fence(obj);
3003
		if (ret)
3004
			return ret;
5060 serge 3005
	}
2332 Serge 3006
 
4104 Serge 3007
	trace_i915_vma_unbind(vma);
2332 Serge 3008
 
6084 serge 3009
	vma->vm->unbind_vma(vma);
3010
	vma->bound = 0;
2332 Serge 3011
 
7144 serge 3012
	list_del_init(&vma->vm_link);
3013
	if (vma->is_ggtt) {
6084 serge 3014
		if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
3015
			obj->map_and_fenceable = false;
3016
		} else if (vma->ggtt_view.pages) {
3017
			sg_free_table(vma->ggtt_view.pages);
3018
			kfree(vma->ggtt_view.pages);
3019
		}
3020
		vma->ggtt_view.pages = NULL;
3021
	}
2332 Serge 3022
 
4104 Serge 3023
	drm_mm_remove_node(&vma->node);
3024
	i915_gem_vma_destroy(vma);
3025
 
3026
	/* Since the unbound list is global, only move to that list if
4560 Serge 3027
	 * no more VMAs exist. */
6084 serge 3028
	if (list_empty(&obj->vma_list))
4104 Serge 3029
		list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list);
3030
 
4560 Serge 3031
	/* And finally now the object is completely decoupled from this vma,
3032
	 * we can drop its hold on the backing storage and allow it to be
3033
	 * reaped by the shrinker.
3034
	 */
3035
	i915_gem_object_unpin_pages(obj);
3036
 
2344 Serge 3037
	return 0;
3038
}
2332 Serge 3039
 
6084 serge 3040
int i915_vma_unbind(struct i915_vma *vma)
3041
{
3042
	return __i915_vma_unbind(vma, true);
3043
}
3044
 
3045
int __i915_vma_unbind_no_wait(struct i915_vma *vma)
3046
{
3047
	return __i915_vma_unbind(vma, false);
3048
}
3049
 
3031 serge 3050
int i915_gpu_idle(struct drm_device *dev)
2344 Serge 3051
{
5060 serge 3052
	struct drm_i915_private *dev_priv = dev->dev_private;
3053
	struct intel_engine_cs *ring;
2344 Serge 3054
	int ret, i;
2332 Serge 3055
 
2344 Serge 3056
	/* Flush everything onto the inactive list. */
3031 serge 3057
	for_each_ring(ring, dev_priv, i) {
5354 serge 3058
		if (!i915.enable_execlists) {
6084 serge 3059
			struct drm_i915_gem_request *req;
3031 serge 3060
 
7144 serge 3061
			req = i915_gem_request_alloc(ring, NULL);
3062
			if (IS_ERR(req))
3063
				return PTR_ERR(req);
2344 Serge 3064
 
6084 serge 3065
			ret = i915_switch_context(req);
3066
			if (ret) {
3067
				i915_gem_request_cancel(req);
3068
				return ret;
3069
			}
2344 Serge 3070
 
6084 serge 3071
			i915_add_request_no_flush(req);
3072
		}
2332 Serge 3073
 
6084 serge 3074
		ret = intel_ring_idle(ring);
3031 serge 3075
		if (ret)
3076
			return ret;
3077
	}
2332 Serge 3078
 
6084 serge 3079
	WARN_ON(i915_verify_lists(dev));
3031 serge 3080
	return 0;
3081
}
2332 Serge 3082
 
5354 serge 3083
static bool i915_gem_valid_gtt_space(struct i915_vma *vma,
3031 serge 3084
				     unsigned long cache_level)
3085
{
5354 serge 3086
	struct drm_mm_node *gtt_space = &vma->node;
3031 serge 3087
	struct drm_mm_node *other;
2332 Serge 3088
 
5354 serge 3089
	/*
3090
	 * On some machines we have to be careful when putting differing types
3091
	 * of snoopable memory together to avoid the prefetcher crossing memory
3092
	 * domains and dying. During vm initialisation, we decide whether or not
3093
	 * these constraints apply and set the drm_mm.color_adjust
3094
	 * appropriately.
3031 serge 3095
	 */
5354 serge 3096
	if (vma->vm->mm.color_adjust == NULL)
3031 serge 3097
		return true;
2332 Serge 3098
 
4104 Serge 3099
	if (!drm_mm_node_allocated(gtt_space))
3031 serge 3100
		return true;
2332 Serge 3101
 
3031 serge 3102
	if (list_empty(>t_space->node_list))
3103
		return true;
2332 Serge 3104
 
3031 serge 3105
	other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list);
3106
	if (other->allocated && !other->hole_follows && other->color != cache_level)
3107
		return false;
2344 Serge 3108
 
3031 serge 3109
	other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list);
3110
	if (other->allocated && !gtt_space->hole_follows && other->color != cache_level)
3111
		return false;
2344 Serge 3112
 
3031 serge 3113
	return true;
3114
}
2344 Serge 3115
 
2332 Serge 3116
/**
6084 serge 3117
 * Finds free space in the GTT aperture and binds the object or a view of it
3118
 * there.
2332 Serge 3119
 */
5060 serge 3120
static struct i915_vma *
4104 Serge 3121
i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
3122
			   struct i915_address_space *vm,
6084 serge 3123
			   const struct i915_ggtt_view *ggtt_view,
3124
			   unsigned alignment,
5060 serge 3125
			   uint64_t flags)
2332 Serge 3126
{
3127
	struct drm_device *dev = obj->base.dev;
5060 serge 3128
	struct drm_i915_private *dev_priv = dev->dev_private;
6084 serge 3129
	u32 fence_alignment, unfenced_alignment;
3130
	u32 search_flag, alloc_flag;
3131
	u64 start, end;
3132
	u64 size, fence_size;
4104 Serge 3133
	struct i915_vma *vma;
2332 Serge 3134
	int ret;
2326 Serge 3135
 
6084 serge 3136
	if (i915_is_ggtt(vm)) {
3137
		u32 view_size;
2332 Serge 3138
 
6084 serge 3139
		if (WARN_ON(!ggtt_view))
3140
			return ERR_PTR(-EINVAL);
3141
 
3142
		view_size = i915_ggtt_view_size(obj, ggtt_view);
3143
 
3144
		fence_size = i915_gem_get_gtt_size(dev,
3145
						   view_size,
3146
						   obj->tiling_mode);
3147
		fence_alignment = i915_gem_get_gtt_alignment(dev,
3148
							     view_size,
3149
							     obj->tiling_mode,
3150
							     true);
3151
		unfenced_alignment = i915_gem_get_gtt_alignment(dev,
3152
								view_size,
3153
								obj->tiling_mode,
3154
								false);
3155
		size = flags & PIN_MAPPABLE ? fence_size : view_size;
3156
	} else {
3157
		fence_size = i915_gem_get_gtt_size(dev,
3158
						   obj->base.size,
3159
						   obj->tiling_mode);
3160
		fence_alignment = i915_gem_get_gtt_alignment(dev,
3161
							     obj->base.size,
3162
							     obj->tiling_mode,
3163
							     true);
3164
		unfenced_alignment =
3165
			i915_gem_get_gtt_alignment(dev,
3166
						   obj->base.size,
3167
						   obj->tiling_mode,
3168
						   false);
3169
		size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
3170
	}
3171
 
3172
	start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
3173
	end = vm->total;
3174
	if (flags & PIN_MAPPABLE)
3175
		end = min_t(u64, end, dev_priv->gtt.mappable_end);
3176
	if (flags & PIN_ZONE_4G)
6937 serge 3177
		end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE);
6084 serge 3178
 
2332 Serge 3179
	if (alignment == 0)
5060 serge 3180
		alignment = flags & PIN_MAPPABLE ? fence_alignment :
2332 Serge 3181
						unfenced_alignment;
5060 serge 3182
	if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
6084 serge 3183
		DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n",
3184
			  ggtt_view ? ggtt_view->type : 0,
3185
			  alignment);
5060 serge 3186
		return ERR_PTR(-EINVAL);
2332 Serge 3187
	}
3188
 
6084 serge 3189
	/* If binding the object/GGTT view requires more space than the entire
3190
	 * aperture has, reject it early before evicting everything in a vain
3191
	 * attempt to find space.
2332 Serge 3192
	 */
6084 serge 3193
	if (size > end) {
3194
		DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%llu > %s aperture=%llu\n",
3195
			  ggtt_view ? ggtt_view->type : 0,
3196
			  size,
5060 serge 3197
			  flags & PIN_MAPPABLE ? "mappable" : "total",
3198
			  end);
3199
		return ERR_PTR(-E2BIG);
2332 Serge 3200
	}
3201
 
3031 serge 3202
	ret = i915_gem_object_get_pages(obj);
3203
	if (ret)
5060 serge 3204
		return ERR_PTR(ret);
3031 serge 3205
 
3243 Serge 3206
	i915_gem_object_pin_pages(obj);
3207
 
6084 serge 3208
	vma = ggtt_view ? i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) :
3209
			  i915_gem_obj_lookup_or_create_vma(obj, vm);
3210
 
5060 serge 3211
	if (IS_ERR(vma))
4104 Serge 3212
		goto err_unpin;
3243 Serge 3213
 
6937 serge 3214
	if (flags & PIN_OFFSET_FIXED) {
3215
		uint64_t offset = flags & PIN_OFFSET_MASK;
3216
 
3217
		if (offset & (alignment - 1) || offset + size > end) {
3218
			ret = -EINVAL;
3219
			goto err_free_vma;
3220
		}
3221
		vma->node.start = offset;
3222
		vma->node.size = size;
3223
		vma->node.color = obj->cache_level;
3224
		ret = drm_mm_reserve_node(&vm->mm, &vma->node);
3225
		if (ret)
3226
			goto err_free_vma;
3227
	} else {
7144 serge 3228
		if (flags & PIN_HIGH) {
3229
			search_flag = DRM_MM_SEARCH_BELOW;
3230
			alloc_flag = DRM_MM_CREATE_TOP;
3231
		} else {
3232
			search_flag = DRM_MM_SEARCH_DEFAULT;
3233
			alloc_flag = DRM_MM_CREATE_DEFAULT;
3234
		}
6084 serge 3235
 
4104 Serge 3236
search_free:
7144 serge 3237
		ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
3238
							  size, alignment,
3239
							  obj->cache_level,
3240
							  start, end,
3241
							  search_flag,
3242
							  alloc_flag);
3243
		if (ret) {
2332 Serge 3244
 
7144 serge 3245
			goto err_free_vma;
3246
		}
2332 Serge 3247
	}
5354 serge 3248
	if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) {
4104 Serge 3249
		ret = -EINVAL;
3250
		goto err_remove_node;
3031 serge 3251
	}
2332 Serge 3252
 
6084 serge 3253
	trace_i915_vma_bind(vma, flags);
3254
	ret = i915_vma_bind(vma, obj->cache_level, flags);
4104 Serge 3255
	if (ret)
3256
		goto err_remove_node;
2332 Serge 3257
 
4104 Serge 3258
	list_move_tail(&obj->global_list, &dev_priv->mm.bound_list);
7144 serge 3259
	list_add_tail(&vma->vm_link, &vm->inactive_list);
2332 Serge 3260
 
5060 serge 3261
	return vma;
4104 Serge 3262
 
3263
err_remove_node:
3264
	drm_mm_remove_node(&vma->node);
3265
err_free_vma:
3266
	i915_gem_vma_destroy(vma);
5060 serge 3267
	vma = ERR_PTR(ret);
4104 Serge 3268
err_unpin:
3269
	i915_gem_object_unpin_pages(obj);
5060 serge 3270
	return vma;
2332 Serge 3271
}
3272
 
4104 Serge 3273
bool
3274
i915_gem_clflush_object(struct drm_i915_gem_object *obj,
3275
			bool force)
2332 Serge 3276
{
3277
	/* If we don't have a page list set up, then we're not pinned
3278
	 * to GPU, and we can ignore the cache flush because it'll happen
3279
	 * again at bind time.
3280
	 */
3243 Serge 3281
	if (obj->pages == NULL)
4104 Serge 3282
		return false;
2332 Serge 3283
 
3480 Serge 3284
	/*
3285
	 * Stolen memory is always coherent with the GPU as it is explicitly
3286
	 * marked as wc by the system, or the system is cache-coherent.
3287
	 */
5354 serge 3288
	if (obj->stolen || obj->phys_handle)
4104 Serge 3289
		return false;
3480 Serge 3290
 
2332 Serge 3291
	/* If the GPU is snooping the contents of the CPU cache,
3292
	 * we do not need to manually clear the CPU cache lines.  However,
3293
	 * the caches are only snooped when the render cache is
3294
	 * flushed/invalidated.  As we always have to emit invalidations
3295
	 * and flushes when moving into and out of the RENDER domain, correct
3296
	 * snooping behaviour occurs naturally as the result of our domain
3297
	 * tracking.
3298
	 */
6084 serge 3299
	if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) {
3300
		obj->cache_dirty = true;
4104 Serge 3301
		return false;
6084 serge 3302
	}
2332 Serge 3303
 
4293 Serge 3304
	trace_i915_gem_object_clflush(obj);
3305
	drm_clflush_sg(obj->pages);
6084 serge 3306
	obj->cache_dirty = false;
2344 Serge 3307
 
4104 Serge 3308
	return true;
2332 Serge 3309
}
3310
 
2344 Serge 3311
/** Flushes the GTT write domain for the object if it's dirty. */
3312
static void
3313
i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
3314
{
3315
	uint32_t old_write_domain;
2332 Serge 3316
 
2344 Serge 3317
	if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
3318
		return;
2332 Serge 3319
 
2344 Serge 3320
	/* No actual flushing is required for the GTT write domain.  Writes
3321
	 * to it immediately go to main memory as far as we know, so there's
3322
	 * no chipset flush.  It also doesn't land in render cache.
3323
	 *
3324
	 * However, we do have to enforce the order so that all writes through
3325
	 * the GTT land before any writes to the device, such as updates to
3326
	 * the GATT itself.
3327
	 */
3328
	wmb();
2332 Serge 3329
 
2344 Serge 3330
	old_write_domain = obj->base.write_domain;
3331
	obj->base.write_domain = 0;
2332 Serge 3332
 
6084 serge 3333
	intel_fb_obj_flush(obj, false, ORIGIN_GTT);
5354 serge 3334
 
2351 Serge 3335
	trace_i915_gem_object_change_domain(obj,
3336
					    obj->base.read_domains,
3337
					    old_write_domain);
2344 Serge 3338
}
2332 Serge 3339
 
3340
/** Flushes the CPU write domain for the object if it's dirty. */
2326 Serge 3341
static void
6084 serge 3342
i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
2332 Serge 3343
{
3344
	uint32_t old_write_domain;
3345
 
3346
	if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
3347
		return;
3348
 
6084 serge 3349
	if (i915_gem_clflush_object(obj, obj->pin_display))
3350
		i915_gem_chipset_flush(obj->base.dev);
4104 Serge 3351
 
2332 Serge 3352
	old_write_domain = obj->base.write_domain;
3353
	obj->base.write_domain = 0;
3354
 
6084 serge 3355
	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
5354 serge 3356
 
2351 Serge 3357
	trace_i915_gem_object_change_domain(obj,
3358
					    obj->base.read_domains,
3359
					    old_write_domain);
2332 Serge 3360
}
3361
 
3362
/**
3363
 * Moves a single object to the GTT read, and possibly write domain.
3364
 *
3365
 * This function returns when the move is complete, including waiting on
3366
 * flushes to occur.
3367
 */
3368
int
3369
i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
3370
{
3371
	uint32_t old_write_domain, old_read_domains;
6084 serge 3372
	struct i915_vma *vma;
2332 Serge 3373
	int ret;
3374
 
3375
	if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
3376
		return 0;
3377
 
3031 serge 3378
	ret = i915_gem_object_wait_rendering(obj, !write);
6084 serge 3379
	if (ret)
3380
		return ret;
2332 Serge 3381
 
6084 serge 3382
	/* Flush and acquire obj->pages so that we are coherent through
3383
	 * direct access in memory with previous cached writes through
3384
	 * shmemfs and that our cache domain tracking remains valid.
3385
	 * For example, if the obj->filp was moved to swap without us
3386
	 * being notified and releasing the pages, we would mistakenly
3387
	 * continue to assume that the obj remained out of the CPU cached
3388
	 * domain.
3389
	 */
3390
	ret = i915_gem_object_get_pages(obj);
3391
	if (ret)
3392
		return ret;
2332 Serge 3393
 
6084 serge 3394
	i915_gem_object_flush_cpu_write_domain(obj);
3395
 
3480 Serge 3396
	/* Serialise direct access to this object with the barriers for
3397
	 * coherent writes from the GPU, by effectively invalidating the
3398
	 * GTT domain upon first access.
3399
	 */
3400
	if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
3401
		mb();
3402
 
2332 Serge 3403
	old_write_domain = obj->base.write_domain;
3404
	old_read_domains = obj->base.read_domains;
3405
 
3406
	/* It should now be out of any other write domains, and we can update
3407
	 * the domain values for our changes.
3408
	 */
3409
	BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
3410
	obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
3411
	if (write) {
3412
		obj->base.read_domains = I915_GEM_DOMAIN_GTT;
3413
		obj->base.write_domain = I915_GEM_DOMAIN_GTT;
3414
		obj->dirty = 1;
3415
	}
3416
 
2351 Serge 3417
	trace_i915_gem_object_change_domain(obj,
3418
					    old_read_domains,
3419
					    old_write_domain);
3420
 
3031 serge 3421
	/* And bump the LRU for this access */
6084 serge 3422
	vma = i915_gem_obj_to_ggtt(obj);
3423
	if (vma && drm_mm_node_allocated(&vma->node) && !obj->active)
7144 serge 3424
		list_move_tail(&vma->vm_link,
6084 serge 3425
			       &to_i915(obj->base.dev)->gtt.base.inactive_list);
3031 serge 3426
 
2332 Serge 3427
	return 0;
3428
}
3429
 
6084 serge 3430
/**
3431
 * Changes the cache-level of an object across all VMA.
3432
 *
3433
 * After this function returns, the object will be in the new cache-level
3434
 * across all GTT and the contents of the backing storage will be coherent,
3435
 * with respect to the new cache-level. In order to keep the backing storage
3436
 * coherent for all users, we only allow a single cache level to be set
3437
 * globally on the object and prevent it from being changed whilst the
3438
 * hardware is reading from the object. That is if the object is currently
3439
 * on the scanout it will be set to uncached (or equivalent display
3440
 * cache coherency) and all non-MOCS GPU access will also be uncached so
3441
 * that all direct access to the scanout remains coherent.
3442
 */
2335 Serge 3443
int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
3444
				    enum i915_cache_level cache_level)
3445
{
3031 serge 3446
	struct drm_device *dev = obj->base.dev;
5060 serge 3447
	struct i915_vma *vma, *next;
6084 serge 3448
	bool bound = false;
3449
	int ret = 0;
2332 Serge 3450
 
2335 Serge 3451
	if (obj->cache_level == cache_level)
6084 serge 3452
		goto out;
2332 Serge 3453
 
6084 serge 3454
	/* Inspect the list of currently bound VMA and unbind any that would
3455
	 * be invalid given the new cache-level. This is principally to
3456
	 * catch the issue of the CS prefetch crossing page boundaries and
3457
	 * reading an invalid PTE on older architectures.
3458
	 */
7144 serge 3459
	list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) {
6084 serge 3460
		if (!drm_mm_node_allocated(&vma->node))
3461
			continue;
2332 Serge 3462
 
6084 serge 3463
		if (vma->pin_count) {
3464
			DRM_DEBUG("can not change the cache level of pinned objects\n");
3465
			return -EBUSY;
3466
		}
3467
 
5354 serge 3468
		if (!i915_gem_valid_gtt_space(vma, cache_level)) {
4104 Serge 3469
			ret = i915_vma_unbind(vma);
6084 serge 3470
			if (ret)
3471
				return ret;
3472
		} else
3473
			bound = true;
3031 serge 3474
	}
3475
 
6084 serge 3476
	/* We can reuse the existing drm_mm nodes but need to change the
3477
	 * cache-level on the PTE. We could simply unbind them all and
3478
	 * rebind with the correct cache-level on next use. However since
3479
	 * we already have a valid slot, dma mapping, pages etc, we may as
3480
	 * rewrite the PTE in the belief that doing so tramples upon less
3481
	 * state and so involves less work.
3482
	 */
3483
	if (bound) {
3484
		/* Before we change the PTE, the GPU must not be accessing it.
3485
		 * If we wait upon the object, we know that all the bound
3486
		 * VMA are no longer active.
3487
		 */
3488
		ret = i915_gem_object_wait_rendering(obj, false);
2335 Serge 3489
		if (ret)
3490
			return ret;
2332 Serge 3491
 
6084 serge 3492
		if (!HAS_LLC(dev) && cache_level != I915_CACHE_NONE) {
3493
			/* Access to snoopable pages through the GTT is
3494
			 * incoherent and on some machines causes a hard
3495
			 * lockup. Relinquish the CPU mmaping to force
3496
			 * userspace to refault in the pages and we can
3497
			 * then double check if the GTT mapping is still
3498
			 * valid for that pointer access.
3499
			 */
3500
			i915_gem_release_mmap(obj);
2332 Serge 3501
 
6084 serge 3502
			/* As we no longer need a fence for GTT access,
3503
			 * we can relinquish it now (and so prevent having
3504
			 * to steal a fence from someone else on the next
3505
			 * fence request). Note GPU activity would have
3506
			 * dropped the fence as all snoopable access is
3507
			 * supposed to be linear.
3508
			 */
2335 Serge 3509
			ret = i915_gem_object_put_fence(obj);
3510
			if (ret)
3511
				return ret;
6084 serge 3512
		} else {
3513
			/* We either have incoherent backing store and
3514
			 * so no GTT access or the architecture is fully
3515
			 * coherent. In such cases, existing GTT mmaps
3516
			 * ignore the cache bit in the PTE and we can
3517
			 * rewrite it without confusing the GPU or having
3518
			 * to force userspace to fault back in its mmaps.
3519
			 */
3520
		}
2332 Serge 3521
 
7144 serge 3522
		list_for_each_entry(vma, &obj->vma_list, obj_link) {
6084 serge 3523
			if (!drm_mm_node_allocated(&vma->node))
3524
				continue;
3525
 
3526
			ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
3527
			if (ret)
3528
				return ret;
3529
		}
2335 Serge 3530
	}
2332 Serge 3531
 
7144 serge 3532
	list_for_each_entry(vma, &obj->vma_list, obj_link)
4104 Serge 3533
		vma->node.color = cache_level;
3534
	obj->cache_level = cache_level;
3535
 
6084 serge 3536
out:
3537
	/* Flush the dirty CPU caches to the backing storage so that the
3538
	 * object is now coherent at its new cache level (with respect
3539
	 * to the access domain).
3540
	 */
3541
	if (obj->cache_dirty &&
3542
	    obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
3543
	    cpu_write_needs_clflush(obj)) {
3544
		if (i915_gem_clflush_object(obj, true))
3545
			i915_gem_chipset_flush(obj->base.dev);
3546
	}
2332 Serge 3547
 
2335 Serge 3548
	return 0;
3549
}
2332 Serge 3550
 
3260 Serge 3551
int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
3552
			       struct drm_file *file)
3553
{
3554
	struct drm_i915_gem_caching *args = data;
3555
	struct drm_i915_gem_object *obj;
3556
 
3557
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
6084 serge 3558
	if (&obj->base == NULL)
3559
		return -ENOENT;
3260 Serge 3560
 
4104 Serge 3561
	switch (obj->cache_level) {
3562
	case I915_CACHE_LLC:
3563
	case I915_CACHE_L3_LLC:
3564
		args->caching = I915_CACHING_CACHED;
3565
		break;
3260 Serge 3566
 
4104 Serge 3567
	case I915_CACHE_WT:
3568
		args->caching = I915_CACHING_DISPLAY;
3569
		break;
3570
 
3571
	default:
3572
		args->caching = I915_CACHING_NONE;
3573
		break;
3574
	}
3575
 
6084 serge 3576
	drm_gem_object_unreference_unlocked(&obj->base);
3577
	return 0;
3260 Serge 3578
}
3579
 
3580
int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
3581
			       struct drm_file *file)
3582
{
6084 serge 3583
	struct drm_i915_private *dev_priv = dev->dev_private;
3260 Serge 3584
	struct drm_i915_gem_caching *args = data;
3585
	struct drm_i915_gem_object *obj;
3586
	enum i915_cache_level level;
3587
	int ret;
3588
 
3589
	switch (args->caching) {
3590
	case I915_CACHING_NONE:
3591
		level = I915_CACHE_NONE;
3592
		break;
3593
	case I915_CACHING_CACHED:
6084 serge 3594
		/*
3595
		 * Due to a HW issue on BXT A stepping, GPU stores via a
3596
		 * snooped mapping may leave stale data in a corresponding CPU
3597
		 * cacheline, whereas normally such cachelines would get
3598
		 * invalidated.
3599
		 */
6937 serge 3600
		if (IS_BXT_REVID(dev, 0, BXT_REVID_A1))
6084 serge 3601
			return -ENODEV;
3602
 
3260 Serge 3603
		level = I915_CACHE_LLC;
3604
		break;
4104 Serge 3605
	case I915_CACHING_DISPLAY:
3606
		level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE;
3607
		break;
3260 Serge 3608
	default:
3609
		return -EINVAL;
3610
	}
3611
 
6084 serge 3612
	intel_runtime_pm_get(dev_priv);
3613
 
3260 Serge 3614
	ret = i915_mutex_lock_interruptible(dev);
3615
	if (ret)
6084 serge 3616
		goto rpm_put;
3260 Serge 3617
 
3618
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
3619
	if (&obj->base == NULL) {
3620
		ret = -ENOENT;
3621
		goto unlock;
3622
	}
3623
 
3624
	ret = i915_gem_object_set_cache_level(obj, level);
3625
 
3626
	drm_gem_object_unreference(&obj->base);
3627
unlock:
3628
	mutex_unlock(&dev->struct_mutex);
6084 serge 3629
rpm_put:
3630
	intel_runtime_pm_put(dev_priv);
3631
 
3260 Serge 3632
	return ret;
3633
}
3634
 
2335 Serge 3635
/*
3636
 * Prepare buffer for display plane (scanout, cursors, etc).
3637
 * Can be called from an uninterruptible phase (modesetting) and allows
3638
 * any flushes to be pipelined (for pageflips).
3639
 */
3640
int
3641
i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
3642
				     u32 alignment,
6084 serge 3643
				     const struct i915_ggtt_view *view)
2335 Serge 3644
{
3645
	u32 old_read_domains, old_write_domain;
3646
	int ret;
2332 Serge 3647
 
4104 Serge 3648
	/* Mark the pin_display early so that we account for the
3649
	 * display coherency whilst setting up the cache domains.
3650
	 */
6084 serge 3651
	obj->pin_display++;
4104 Serge 3652
 
2335 Serge 3653
	/* The display engine is not coherent with the LLC cache on gen6.  As
3654
	 * a result, we make sure that the pinning that is about to occur is
3655
	 * done with uncached PTEs. This is lowest common denominator for all
3656
	 * chipsets.
3657
	 *
3658
	 * However for gen6+, we could do better by using the GFDT bit instead
3659
	 * of uncaching, which would allow us to flush all the LLC-cached data
3660
	 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
3661
	 */
4104 Serge 3662
	ret = i915_gem_object_set_cache_level(obj,
3663
					      HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE);
2360 Serge 3664
	if (ret)
4104 Serge 3665
		goto err_unpin_display;
2332 Serge 3666
 
2335 Serge 3667
	/* As the user may map the buffer once pinned in the display plane
3668
	 * (e.g. libkms for the bootup splash), we have to ensure that we
3669
	 * always use map_and_fenceable for all scanout buffers.
3670
	 */
6084 serge 3671
	ret = i915_gem_object_ggtt_pin(obj, view, alignment,
3672
				       view->type == I915_GGTT_VIEW_NORMAL ?
3673
				       PIN_MAPPABLE : 0);
2335 Serge 3674
	if (ret)
4104 Serge 3675
		goto err_unpin_display;
2332 Serge 3676
 
6084 serge 3677
	i915_gem_object_flush_cpu_write_domain(obj);
2332 Serge 3678
 
2335 Serge 3679
	old_write_domain = obj->base.write_domain;
3680
	old_read_domains = obj->base.read_domains;
2332 Serge 3681
 
2335 Serge 3682
	/* It should now be out of any other write domains, and we can update
3683
	 * the domain values for our changes.
3684
	 */
3031 serge 3685
	obj->base.write_domain = 0;
2335 Serge 3686
	obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
2332 Serge 3687
 
2351 Serge 3688
	trace_i915_gem_object_change_domain(obj,
3689
					    old_read_domains,
3690
					    old_write_domain);
2332 Serge 3691
 
2335 Serge 3692
	return 0;
4104 Serge 3693
 
3694
err_unpin_display:
6084 serge 3695
	obj->pin_display--;
4104 Serge 3696
	return ret;
2335 Serge 3697
}
2332 Serge 3698
 
4104 Serge 3699
void
6084 serge 3700
i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj,
3701
					 const struct i915_ggtt_view *view)
4104 Serge 3702
{
6084 serge 3703
	if (WARN_ON(obj->pin_display == 0))
3704
		return;
4104 Serge 3705
 
6084 serge 3706
	i915_gem_object_ggtt_unpin_view(obj, view);
2332 Serge 3707
 
6084 serge 3708
	obj->pin_display--;
2344 Serge 3709
}
2332 Serge 3710
 
2344 Serge 3711
/**
3712
 * Moves a single object to the CPU read, and possibly write domain.
3713
 *
3714
 * This function returns when the move is complete, including waiting on
3715
 * flushes to occur.
3716
 */
3031 serge 3717
int
2344 Serge 3718
i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
3719
{
3720
	uint32_t old_write_domain, old_read_domains;
3721
	int ret;
2332 Serge 3722
 
2344 Serge 3723
	if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
3724
		return 0;
2332 Serge 3725
 
3031 serge 3726
	ret = i915_gem_object_wait_rendering(obj, !write);
2344 Serge 3727
	if (ret)
3728
		return ret;
2332 Serge 3729
 
2344 Serge 3730
	i915_gem_object_flush_gtt_write_domain(obj);
2332 Serge 3731
 
2344 Serge 3732
	old_write_domain = obj->base.write_domain;
3733
	old_read_domains = obj->base.read_domains;
2332 Serge 3734
 
2344 Serge 3735
	/* Flush the CPU cache if it's still invalid. */
3736
	if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
4104 Serge 3737
		i915_gem_clflush_object(obj, false);
2332 Serge 3738
 
2344 Serge 3739
		obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
3740
	}
2332 Serge 3741
 
2344 Serge 3742
	/* It should now be out of any other write domains, and we can update
3743
	 * the domain values for our changes.
3744
	 */
3745
	BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
2332 Serge 3746
 
2344 Serge 3747
	/* If we're writing through the CPU, then the GPU read domains will
3748
	 * need to be invalidated at next use.
3749
	 */
3750
	if (write) {
3751
		obj->base.read_domains = I915_GEM_DOMAIN_CPU;
3752
		obj->base.write_domain = I915_GEM_DOMAIN_CPU;
3753
	}
2332 Serge 3754
 
2351 Serge 3755
	trace_i915_gem_object_change_domain(obj,
3756
					    old_read_domains,
3757
					    old_write_domain);
2332 Serge 3758
 
2344 Serge 3759
	return 0;
3760
}
2332 Serge 3761
 
3031 serge 3762
/* Throttle our rendering by waiting until the ring has completed our requests
3763
 * emitted over 20 msec ago.
2344 Serge 3764
 *
3031 serge 3765
 * Note that if we were to use the current jiffies each time around the loop,
3766
 * we wouldn't escape the function with any frames outstanding if the time to
3767
 * render a frame was over 20ms.
3768
 *
3769
 * This should get us reasonable parallelism between CPU and GPU but also
3770
 * relatively low latency when blocking on a particular request to finish.
2344 Serge 3771
 */
3031 serge 3772
static int
3773
i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
2344 Serge 3774
{
3031 serge 3775
	struct drm_i915_private *dev_priv = dev->dev_private;
3776
	struct drm_i915_file_private *file_priv = file->driver_priv;
6084 serge 3777
	unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
3778
	struct drm_i915_gem_request *request, *target = NULL;
3480 Serge 3779
	unsigned reset_counter;
3031 serge 3780
	int ret;
2332 Serge 3781
 
3480 Serge 3782
	ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
3783
	if (ret)
3784
		return ret;
2332 Serge 3785
 
3480 Serge 3786
	ret = i915_gem_check_wedge(&dev_priv->gpu_error, false);
3787
	if (ret)
3788
		return ret;
3789
 
3031 serge 3790
	spin_lock(&file_priv->mm.lock);
3791
	list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
3792
		if (time_after_eq(request->emitted_jiffies, recent_enough))
3793
			break;
2332 Serge 3794
 
6084 serge 3795
		/*
3796
		 * Note that the request might not have been submitted yet.
3797
		 * In which case emitted_jiffies will be zero.
3798
		 */
3799
		if (!request->emitted_jiffies)
3800
			continue;
3801
 
3802
		target = request;
3031 serge 3803
	}
3480 Serge 3804
	reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
6084 serge 3805
	if (target)
3806
		i915_gem_request_reference(target);
3031 serge 3807
	spin_unlock(&file_priv->mm.lock);
2332 Serge 3808
 
6084 serge 3809
	if (target == NULL)
3031 serge 3810
		return 0;
2332 Serge 3811
 
6084 serge 3812
	ret = __i915_wait_request(target, reset_counter, true, NULL, NULL);
3031 serge 3813
	if (ret == 0)
3814
		queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
2332 Serge 3815
 
6084 serge 3816
	i915_gem_request_unreference__unlocked(target);
3817
 
3031 serge 3818
	return ret;
2352 Serge 3819
}
2332 Serge 3820
 
5060 serge 3821
static bool
3822
i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
3823
{
3824
	struct drm_i915_gem_object *obj = vma->obj;
3825
 
3826
	if (alignment &&
3827
	    vma->node.start & (alignment - 1))
3828
		return true;
3829
 
3830
	if (flags & PIN_MAPPABLE && !obj->map_and_fenceable)
3831
		return true;
3832
 
3833
	if (flags & PIN_OFFSET_BIAS &&
3834
	    vma->node.start < (flags & PIN_OFFSET_MASK))
3835
		return true;
3836
 
6937 serge 3837
	if (flags & PIN_OFFSET_FIXED &&
3838
	    vma->node.start != (flags & PIN_OFFSET_MASK))
3839
		return true;
3840
 
5060 serge 3841
	return false;
3842
}
3843
 
6084 serge 3844
void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
2332 Serge 3845
{
6084 serge 3846
	struct drm_i915_gem_object *obj = vma->obj;
3847
	bool mappable, fenceable;
3848
	u32 fence_size, fence_alignment;
3849
 
3850
	fence_size = i915_gem_get_gtt_size(obj->base.dev,
3851
					   obj->base.size,
3852
					   obj->tiling_mode);
3853
	fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev,
3854
						     obj->base.size,
3855
						     obj->tiling_mode,
3856
						     true);
3857
 
3858
	fenceable = (vma->node.size == fence_size &&
3859
		     (vma->node.start & (fence_alignment - 1)) == 0);
3860
 
3861
	mappable = (vma->node.start + fence_size <=
3862
		    to_i915(obj->base.dev)->gtt.mappable_end);
3863
 
3864
	obj->map_and_fenceable = mappable && fenceable;
3865
}
3866
 
3867
static int
3868
i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
3869
		       struct i915_address_space *vm,
3870
		       const struct i915_ggtt_view *ggtt_view,
3871
		       uint32_t alignment,
3872
		       uint64_t flags)
3873
{
5060 serge 3874
	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
4104 Serge 3875
	struct i915_vma *vma;
5354 serge 3876
	unsigned bound;
2332 Serge 3877
	int ret;
3878
 
5060 serge 3879
	if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base))
3880
		return -ENODEV;
2332 Serge 3881
 
5060 serge 3882
	if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm)))
3883
		return -EINVAL;
4104 Serge 3884
 
5354 serge 3885
	if (WARN_ON((flags & (PIN_MAPPABLE | PIN_GLOBAL)) == PIN_MAPPABLE))
3886
		return -EINVAL;
3887
 
6084 serge 3888
	if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view))
3889
		return -EINVAL;
3890
 
3891
	vma = ggtt_view ? i915_gem_obj_to_ggtt_view(obj, ggtt_view) :
3892
			  i915_gem_obj_to_vma(obj, vm);
3893
 
3894
	if (IS_ERR(vma))
3895
		return PTR_ERR(vma);
3896
 
5060 serge 3897
	if (vma) {
3898
		if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
3899
			return -EBUSY;
4104 Serge 3900
 
5060 serge 3901
		if (i915_vma_misplaced(vma, alignment, flags)) {
3902
			WARN(vma->pin_count,
6084 serge 3903
			     "bo is already pinned in %s with incorrect alignment:"
3904
			     " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d,"
2332 Serge 3905
			     " obj->map_and_fenceable=%d\n",
6084 serge 3906
			     ggtt_view ? "ggtt" : "ppgtt",
3907
			     upper_32_bits(vma->node.start),
3908
			     lower_32_bits(vma->node.start),
3909
			     alignment,
5060 serge 3910
			     !!(flags & PIN_MAPPABLE),
2332 Serge 3911
			     obj->map_and_fenceable);
4104 Serge 3912
			ret = i915_vma_unbind(vma);
2332 Serge 3913
			if (ret)
3914
				return ret;
5060 serge 3915
 
3916
			vma = NULL;
2332 Serge 3917
		}
3918
	}
3919
 
5354 serge 3920
	bound = vma ? vma->bound : 0;
5060 serge 3921
	if (vma == NULL || !drm_mm_node_allocated(&vma->node)) {
6084 serge 3922
		vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment,
3923
						 flags);
5060 serge 3924
		if (IS_ERR(vma))
3925
			return PTR_ERR(vma);
6084 serge 3926
	} else {
3927
		ret = i915_vma_bind(vma, obj->cache_level, flags);
3928
		if (ret)
3929
			return ret;
2332 Serge 3930
	}
3931
 
6084 serge 3932
	if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL &&
3933
	    (bound ^ vma->bound) & GLOBAL_BIND) {
3934
		__i915_vma_set_map_and_fenceable(vma);
3935
		WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable);
5354 serge 3936
	}
3937
 
5060 serge 3938
	vma->pin_count++;
2332 Serge 3939
	return 0;
3940
}
3941
 
6084 serge 3942
int
3943
i915_gem_object_pin(struct drm_i915_gem_object *obj,
3944
		    struct i915_address_space *vm,
3945
		    uint32_t alignment,
3946
		    uint64_t flags)
2344 Serge 3947
{
6084 serge 3948
	return i915_gem_object_do_pin(obj, vm,
3949
				      i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL,
3950
				      alignment, flags);
2344 Serge 3951
}
2332 Serge 3952
 
6084 serge 3953
int
3954
i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
3955
			 const struct i915_ggtt_view *view,
3956
			 uint32_t alignment,
3957
			 uint64_t flags)
5060 serge 3958
{
6084 serge 3959
	if (WARN_ONCE(!view, "no view specified"))
3960
		return -EINVAL;
5060 serge 3961
 
6084 serge 3962
	return i915_gem_object_do_pin(obj, i915_obj_to_ggtt(obj), view,
3963
				      alignment, flags | PIN_GLOBAL);
5060 serge 3964
}
3965
 
3966
void
6084 serge 3967
i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
3968
				const struct i915_ggtt_view *view)
5060 serge 3969
{
6084 serge 3970
	struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view);
5060 serge 3971
 
6084 serge 3972
	BUG_ON(!vma);
3973
	WARN_ON(vma->pin_count == 0);
3974
	WARN_ON(!i915_gem_obj_ggtt_bound_view(obj, view));
2332 Serge 3975
 
6084 serge 3976
	--vma->pin_count;
3031 serge 3977
}
2332 Serge 3978
 
3031 serge 3979
int
3980
i915_gem_busy_ioctl(struct drm_device *dev, void *data,
3981
		    struct drm_file *file)
3982
{
3983
	struct drm_i915_gem_busy *args = data;
3984
	struct drm_i915_gem_object *obj;
3985
	int ret;
2332 Serge 3986
 
3031 serge 3987
	ret = i915_mutex_lock_interruptible(dev);
3988
	if (ret)
3989
		return ret;
2332 Serge 3990
 
5060 serge 3991
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
3031 serge 3992
	if (&obj->base == NULL) {
3993
		ret = -ENOENT;
3994
		goto unlock;
3995
	}
2332 Serge 3996
 
3031 serge 3997
	/* Count all active objects as busy, even if they are currently not used
3998
	 * by the gpu. Users of this interface expect objects to eventually
3999
	 * become non-busy without any further actions, therefore emit any
4000
	 * necessary flushes here.
4001
	 */
4002
	ret = i915_gem_object_flush_active(obj);
6084 serge 4003
	if (ret)
4004
		goto unref;
2332 Serge 4005
 
7144 serge 4006
	args->busy = 0;
4007
	if (obj->active) {
4008
		int i;
2332 Serge 4009
 
7144 serge 4010
		for (i = 0; i < I915_NUM_RINGS; i++) {
4011
			struct drm_i915_gem_request *req;
4012
 
4013
			req = obj->last_read_req[i];
4014
			if (req)
4015
				args->busy |= 1 << (16 + req->ring->exec_id);
4016
		}
4017
		if (obj->last_write_req)
4018
			args->busy |= obj->last_write_req->ring->exec_id;
4019
	}
4020
 
6084 serge 4021
unref:
3031 serge 4022
	drm_gem_object_unreference(&obj->base);
4023
unlock:
4024
	mutex_unlock(&dev->struct_mutex);
4025
	return ret;
4026
}
2332 Serge 4027
 
3031 serge 4028
int
4029
i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
4030
			struct drm_file *file_priv)
4031
{
4032
	return i915_gem_ring_throttle(dev, file_priv);
4033
}
2332 Serge 4034
 
3263 Serge 4035
#if 0
4036
 
3031 serge 4037
int
4038
i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
4039
		       struct drm_file *file_priv)
4040
{
5354 serge 4041
	struct drm_i915_private *dev_priv = dev->dev_private;
3031 serge 4042
	struct drm_i915_gem_madvise *args = data;
4043
	struct drm_i915_gem_object *obj;
4044
	int ret;
2332 Serge 4045
 
3031 serge 4046
	switch (args->madv) {
4047
	case I915_MADV_DONTNEED:
4048
	case I915_MADV_WILLNEED:
4049
	    break;
4050
	default:
4051
	    return -EINVAL;
4052
	}
2332 Serge 4053
 
3031 serge 4054
	ret = i915_mutex_lock_interruptible(dev);
4055
	if (ret)
4056
		return ret;
2332 Serge 4057
 
3031 serge 4058
	obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle));
4059
	if (&obj->base == NULL) {
4060
		ret = -ENOENT;
4061
		goto unlock;
4062
	}
2332 Serge 4063
 
5060 serge 4064
	if (i915_gem_obj_is_pinned(obj)) {
3031 serge 4065
		ret = -EINVAL;
4066
		goto out;
4067
	}
2332 Serge 4068
 
5354 serge 4069
	if (obj->pages &&
4070
	    obj->tiling_mode != I915_TILING_NONE &&
4071
	    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
4072
		if (obj->madv == I915_MADV_WILLNEED)
4073
			i915_gem_object_unpin_pages(obj);
4074
		if (args->madv == I915_MADV_WILLNEED)
4075
			i915_gem_object_pin_pages(obj);
4076
	}
4077
 
3031 serge 4078
	if (obj->madv != __I915_MADV_PURGED)
4079
		obj->madv = args->madv;
2332 Serge 4080
 
3031 serge 4081
	/* if the object is no longer attached, discard its backing storage */
6084 serge 4082
	if (obj->madv == I915_MADV_DONTNEED && obj->pages == NULL)
3031 serge 4083
		i915_gem_object_truncate(obj);
2332 Serge 4084
 
3031 serge 4085
	args->retained = obj->madv != __I915_MADV_PURGED;
2332 Serge 4086
 
3031 serge 4087
out:
4088
	drm_gem_object_unreference(&obj->base);
4089
unlock:
4090
	mutex_unlock(&dev->struct_mutex);
4091
	return ret;
4092
}
4093
#endif
2332 Serge 4094
 
3031 serge 4095
void i915_gem_object_init(struct drm_i915_gem_object *obj,
4096
			  const struct drm_i915_gem_object_ops *ops)
4097
{
6084 serge 4098
	int i;
4099
 
4104 Serge 4100
	INIT_LIST_HEAD(&obj->global_list);
6084 serge 4101
	for (i = 0; i < I915_NUM_RINGS; i++)
4102
		INIT_LIST_HEAD(&obj->ring_list[i]);
4104 Serge 4103
	INIT_LIST_HEAD(&obj->obj_exec_link);
4104
	INIT_LIST_HEAD(&obj->vma_list);
6084 serge 4105
	INIT_LIST_HEAD(&obj->batch_pool_link);
2332 Serge 4106
 
3031 serge 4107
	obj->ops = ops;
4108
 
4109
	obj->fence_reg = I915_FENCE_REG_NONE;
4110
	obj->madv = I915_MADV_WILLNEED;
4111
 
4112
	i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size);
4113
}
4114
 
4115
static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
6937 serge 4116
	.flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE,
3031 serge 4117
	.get_pages = i915_gem_object_get_pages_gtt,
4118
	.put_pages = i915_gem_object_put_pages_gtt,
4119
};
4120
 
2332 Serge 4121
struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
4122
						  size_t size)
4123
{
4124
	struct drm_i915_gem_object *obj;
3031 serge 4125
	struct address_space *mapping;
3480 Serge 4126
	gfp_t mask;
2340 Serge 4127
 
3746 Serge 4128
	obj = i915_gem_object_alloc(dev);
2332 Serge 4129
	if (obj == NULL)
4130
		return NULL;
4131
 
4132
	if (drm_gem_object_init(dev, &obj->base, size) != 0) {
4104 Serge 4133
		i915_gem_object_free(obj);
2332 Serge 4134
		return NULL;
4135
	}
4136
 
4137
 
3031 serge 4138
	i915_gem_object_init(obj, &i915_gem_object_ops);
2332 Serge 4139
 
4140
	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4141
	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4142
 
3031 serge 4143
	if (HAS_LLC(dev)) {
4144
		/* On some devices, we can have the GPU use the LLC (the CPU
2332 Serge 4145
		 * cache) for about a 10% performance improvement
4146
		 * compared to uncached.  Graphics requests other than
4147
		 * display scanout are coherent with the CPU in
4148
		 * accessing this cache.  This means in this mode we
4149
		 * don't need to clflush on the CPU side, and on the
4150
		 * GPU side we only need to flush internal caches to
4151
		 * get data visible to the CPU.
4152
		 *
4153
		 * However, we maintain the display planes as UC, and so
4154
		 * need to rebind when first used as such.
4155
		 */
4156
		obj->cache_level = I915_CACHE_LLC;
4157
	} else
4158
		obj->cache_level = I915_CACHE_NONE;
4159
 
4560 Serge 4160
	trace_i915_gem_object_create(obj);
4161
 
2332 Serge 4162
	return obj;
4163
}
4164
 
6283 serge 4165
static bool discard_backing_storage(struct drm_i915_gem_object *obj)
4166
{
4167
	/* If we are the last user of the backing storage (be it shmemfs
4168
	 * pages or stolen etc), we know that the pages are going to be
4169
	 * immediately released. In this case, we can then skip copying
4170
	 * back the contents from the GPU.
4171
	 */
4172
 
4173
	if (obj->madv != I915_MADV_WILLNEED)
4174
		return false;
4175
 
4176
	if (obj->base.filp == NULL)
4177
		return true;
4178
 
4179
//        printf("filp %p\n", obj->base.filp);
4180
	shmem_file_delete(obj->base.filp);
4181
	return true;
4182
}
4183
 
3031 serge 4184
void i915_gem_free_object(struct drm_gem_object *gem_obj)
2344 Serge 4185
{
3031 serge 4186
	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
2344 Serge 4187
	struct drm_device *dev = obj->base.dev;
5060 serge 4188
	struct drm_i915_private *dev_priv = dev->dev_private;
4104 Serge 4189
	struct i915_vma *vma, *next;
2332 Serge 4190
 
4560 Serge 4191
	intel_runtime_pm_get(dev_priv);
4192
 
3031 serge 4193
	trace_i915_gem_object_destroy(obj);
4194
 
7144 serge 4195
	list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) {
5060 serge 4196
		int ret;
3031 serge 4197
 
5060 serge 4198
		vma->pin_count = 0;
4199
		ret = i915_vma_unbind(vma);
4104 Serge 4200
		if (WARN_ON(ret == -ERESTARTSYS)) {
6084 serge 4201
			bool was_interruptible;
3031 serge 4202
 
6084 serge 4203
			was_interruptible = dev_priv->mm.interruptible;
4204
			dev_priv->mm.interruptible = false;
3031 serge 4205
 
4104 Serge 4206
			WARN_ON(i915_vma_unbind(vma));
3031 serge 4207
 
6084 serge 4208
			dev_priv->mm.interruptible = was_interruptible;
4209
		}
2344 Serge 4210
	}
2332 Serge 4211
 
4104 Serge 4212
	/* Stolen objects don't hold a ref, but do hold pin count. Fix that up
4213
	 * before progressing. */
4214
	if (obj->stolen)
4215
		i915_gem_object_unpin_pages(obj);
4216
 
5060 serge 4217
	WARN_ON(obj->frontbuffer_bits);
4218
 
5354 serge 4219
	if (obj->pages && obj->madv == I915_MADV_WILLNEED &&
4220
	    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES &&
4221
	    obj->tiling_mode != I915_TILING_NONE)
4222
		i915_gem_object_unpin_pages(obj);
4223
 
4104 Serge 4224
	if (WARN_ON(obj->pages_pin_count))
6084 serge 4225
		obj->pages_pin_count = 0;
6283 serge 4226
	if (discard_backing_storage(obj))
4227
		obj->madv = I915_MADV_DONTNEED;
3031 serge 4228
	i915_gem_object_put_pages(obj);
4229
//   i915_gem_object_free_mmap_offset(obj);
2332 Serge 4230
 
3243 Serge 4231
	BUG_ON(obj->pages);
2332 Serge 4232
 
6283 serge 4233
	if (obj->ops->release)
4234
		obj->ops->release(obj);
3031 serge 4235
 
2344 Serge 4236
	drm_gem_object_release(&obj->base);
4237
	i915_gem_info_remove_obj(dev_priv, obj->base.size);
2332 Serge 4238
 
2344 Serge 4239
	kfree(obj->bit_17);
4104 Serge 4240
	i915_gem_object_free(obj);
4560 Serge 4241
 
4242
	intel_runtime_pm_put(dev_priv);
2344 Serge 4243
}
2332 Serge 4244
 
4560 Serge 4245
struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
4104 Serge 4246
				     struct i915_address_space *vm)
4247
{
4560 Serge 4248
	struct i915_vma *vma;
7144 serge 4249
	list_for_each_entry(vma, &obj->vma_list, obj_link) {
6937 serge 4250
		if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL &&
4251
		    vma->vm == vm)
4560 Serge 4252
			return vma;
6084 serge 4253
	}
4254
	return NULL;
4255
}
4560 Serge 4256
 
6084 serge 4257
struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj,
4258
					   const struct i915_ggtt_view *view)
4259
{
4260
	struct i915_address_space *ggtt = i915_obj_to_ggtt(obj);
4261
	struct i915_vma *vma;
4262
 
4263
	if (WARN_ONCE(!view, "no view specified"))
4264
		return ERR_PTR(-EINVAL);
4265
 
7144 serge 4266
	list_for_each_entry(vma, &obj->vma_list, obj_link)
6084 serge 4267
		if (vma->vm == ggtt &&
4268
		    i915_ggtt_view_equal(&vma->ggtt_view, view))
4269
			return vma;
4560 Serge 4270
	return NULL;
4271
}
4272
 
4104 Serge 4273
void i915_gem_vma_destroy(struct i915_vma *vma)
4274
{
4275
	WARN_ON(vma->node.allocated);
4560 Serge 4276
 
4277
	/* Keep the vma as a placeholder in the execbuffer reservation lists */
4278
	if (!list_empty(&vma->exec_list))
4279
		return;
4280
 
7144 serge 4281
	if (!vma->is_ggtt)
4282
		i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm));
5354 serge 4283
 
7144 serge 4284
	list_del(&vma->obj_link);
5354 serge 4285
 
4104 Serge 4286
	kfree(vma);
4287
}
4288
 
6084 serge 4289
static void
4290
i915_gem_stop_ringbuffers(struct drm_device *dev)
4291
{
4292
	struct drm_i915_private *dev_priv = dev->dev_private;
4293
	struct intel_engine_cs *ring;
4294
	int i;
4295
 
4296
	for_each_ring(ring, dev_priv, i)
4297
		dev_priv->gt.stop_ring(ring);
4298
}
4299
 
3031 serge 4300
#if 0
4301
int
4560 Serge 4302
i915_gem_suspend(struct drm_device *dev)
2344 Serge 4303
{
5060 serge 4304
	struct drm_i915_private *dev_priv = dev->dev_private;
4560 Serge 4305
	int ret = 0;
2332 Serge 4306
 
4560 Serge 4307
	mutex_lock(&dev->struct_mutex);
3031 serge 4308
	ret = i915_gpu_idle(dev);
4560 Serge 4309
	if (ret)
4310
		goto err;
4311
 
3031 serge 4312
	i915_gem_retire_requests(dev);
4313
 
5060 serge 4314
	i915_gem_stop_ringbuffers(dev);
4560 Serge 4315
	mutex_unlock(&dev->struct_mutex);
4316
 
6084 serge 4317
	cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
3263 Serge 4318
	cancel_delayed_work_sync(&dev_priv->mm.retire_work);
5060 serge 4319
	flush_delayed_work(&dev_priv->mm.idle_work);
3031 serge 4320
 
6084 serge 4321
	/* Assert that we sucessfully flushed all the work and
4322
	 * reset the GPU back to its idle, low power state.
4323
	 */
4324
	WARN_ON(dev_priv->mm.busy);
4325
 
3031 serge 4326
	return 0;
4560 Serge 4327
 
4328
err:
4329
	mutex_unlock(&dev->struct_mutex);
4330
	return ret;
2344 Serge 4331
}
3031 serge 4332
#endif
2332 Serge 4333
 
6084 serge 4334
int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice)
3031 serge 4335
{
6084 serge 4336
	struct intel_engine_cs *ring = req->ring;
4560 Serge 4337
	struct drm_device *dev = ring->dev;
5060 serge 4338
	struct drm_i915_private *dev_priv = dev->dev_private;
4560 Serge 4339
	u32 *remap_info = dev_priv->l3_parity.remap_info[slice];
4340
	int i, ret;
2332 Serge 4341
 
4560 Serge 4342
	if (!HAS_L3_DPF(dev) || !remap_info)
4343
		return 0;
2332 Serge 4344
 
6084 serge 4345
	ret = intel_ring_begin(req, GEN7_L3LOG_SIZE / 4 * 3);
4560 Serge 4346
	if (ret)
4347
		return ret;
2332 Serge 4348
 
4560 Serge 4349
	/*
4350
	 * Note: We do not worry about the concurrent register cacheline hang
4351
	 * here because no other code should access these registers other than
4352
	 * at initialization time.
4353
	 */
6937 serge 4354
	for (i = 0; i < GEN7_L3LOG_SIZE / 4; i++) {
4560 Serge 4355
		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
6937 serge 4356
		intel_ring_emit_reg(ring, GEN7_L3LOG(slice, i));
4357
		intel_ring_emit(ring, remap_info[i]);
3031 serge 4358
	}
2332 Serge 4359
 
4560 Serge 4360
	intel_ring_advance(ring);
2332 Serge 4361
 
4560 Serge 4362
	return ret;
3031 serge 4363
}
2332 Serge 4364
 
3031 serge 4365
void i915_gem_init_swizzling(struct drm_device *dev)
4366
{
5060 serge 4367
	struct drm_i915_private *dev_priv = dev->dev_private;
2332 Serge 4368
 
3031 serge 4369
	if (INTEL_INFO(dev)->gen < 5 ||
4370
	    dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
4371
		return;
2332 Serge 4372
 
3031 serge 4373
	I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
4374
				 DISP_TILE_SURFACE_SWIZZLING);
2332 Serge 4375
 
3031 serge 4376
	if (IS_GEN5(dev))
4377
		return;
2344 Serge 4378
 
3031 serge 4379
	I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
4380
	if (IS_GEN6(dev))
4381
		I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
3480 Serge 4382
	else if (IS_GEN7(dev))
4383
		I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
4560 Serge 4384
	else if (IS_GEN8(dev))
4385
		I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW));
3031 serge 4386
	else
3480 Serge 4387
		BUG();
3031 serge 4388
}
4389
 
5354 serge 4390
static void init_unused_ring(struct drm_device *dev, u32 base)
2332 Serge 4391
{
3480 Serge 4392
	struct drm_i915_private *dev_priv = dev->dev_private;
5354 serge 4393
 
4394
	I915_WRITE(RING_CTL(base), 0);
4395
	I915_WRITE(RING_HEAD(base), 0);
4396
	I915_WRITE(RING_TAIL(base), 0);
4397
	I915_WRITE(RING_START(base), 0);
4398
}
4399
 
4400
static void init_unused_rings(struct drm_device *dev)
4401
{
4402
	if (IS_I830(dev)) {
4403
		init_unused_ring(dev, PRB1_BASE);
4404
		init_unused_ring(dev, SRB0_BASE);
4405
		init_unused_ring(dev, SRB1_BASE);
4406
		init_unused_ring(dev, SRB2_BASE);
4407
		init_unused_ring(dev, SRB3_BASE);
4408
	} else if (IS_GEN2(dev)) {
4409
		init_unused_ring(dev, SRB0_BASE);
4410
		init_unused_ring(dev, SRB1_BASE);
4411
	} else if (IS_GEN3(dev)) {
4412
		init_unused_ring(dev, PRB1_BASE);
4413
		init_unused_ring(dev, PRB2_BASE);
4414
	}
4415
}
4416
 
4417
int i915_gem_init_rings(struct drm_device *dev)
4418
{
4419
	struct drm_i915_private *dev_priv = dev->dev_private;
2332 Serge 4420
	int ret;
2351 Serge 4421
 
2332 Serge 4422
	ret = intel_init_render_ring_buffer(dev);
4423
	if (ret)
4424
		return ret;
4425
 
6084 serge 4426
	if (HAS_BSD(dev)) {
2332 Serge 4427
		ret = intel_init_bsd_ring_buffer(dev);
4428
		if (ret)
4429
			goto cleanup_render_ring;
4430
	}
4431
 
6084 serge 4432
	if (HAS_BLT(dev)) {
2332 Serge 4433
		ret = intel_init_blt_ring_buffer(dev);
4434
		if (ret)
4435
			goto cleanup_bsd_ring;
4436
	}
4437
 
4104 Serge 4438
	if (HAS_VEBOX(dev)) {
4439
		ret = intel_init_vebox_ring_buffer(dev);
4440
		if (ret)
4441
			goto cleanup_blt_ring;
4442
	}
4443
 
5060 serge 4444
	if (HAS_BSD2(dev)) {
4445
		ret = intel_init_bsd2_ring_buffer(dev);
4446
		if (ret)
4447
			goto cleanup_vebox_ring;
4448
	}
4104 Serge 4449
 
2332 Serge 4450
	return 0;
4451
 
4104 Serge 4452
cleanup_vebox_ring:
4453
	intel_cleanup_ring_buffer(&dev_priv->ring[VECS]);
3480 Serge 4454
cleanup_blt_ring:
4455
	intel_cleanup_ring_buffer(&dev_priv->ring[BCS]);
2332 Serge 4456
cleanup_bsd_ring:
4457
	intel_cleanup_ring_buffer(&dev_priv->ring[VCS]);
4458
cleanup_render_ring:
4459
	intel_cleanup_ring_buffer(&dev_priv->ring[RCS]);
3480 Serge 4460
 
2332 Serge 4461
	return ret;
4462
}
4463
 
3480 Serge 4464
int
4465
i915_gem_init_hw(struct drm_device *dev)
3031 serge 4466
{
5060 serge 4467
	struct drm_i915_private *dev_priv = dev->dev_private;
6084 serge 4468
	struct intel_engine_cs *ring;
4469
	int ret, i, j;
3031 serge 4470
 
3480 Serge 4471
	if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt())
4472
		return -EIO;
3031 serge 4473
 
6084 serge 4474
	/* Double layer security blanket, see i915_gem_init() */
4475
	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4476
 
4104 Serge 4477
	if (dev_priv->ellc_size)
4478
		I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf));
3480 Serge 4479
 
4560 Serge 4480
	if (IS_HASWELL(dev))
4481
		I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev) ?
4482
			   LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
4483
 
3746 Serge 4484
	if (HAS_PCH_NOP(dev)) {
5060 serge 4485
		if (IS_IVYBRIDGE(dev)) {
6084 serge 4486
			u32 temp = I915_READ(GEN7_MSG_CTL);
4487
			temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK);
4488
			I915_WRITE(GEN7_MSG_CTL, temp);
5060 serge 4489
		} else if (INTEL_INFO(dev)->gen >= 7) {
4490
			u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT);
4491
			temp &= ~RESET_PCH_HANDSHAKE_ENABLE;
4492
			I915_WRITE(HSW_NDE_RSTWRN_OPT, temp);
4493
		}
3746 Serge 4494
	}
4495
 
3480 Serge 4496
	i915_gem_init_swizzling(dev);
4497
 
6084 serge 4498
	/*
4499
	 * At least 830 can leave some of the unused rings
4500
	 * "active" (ie. head != tail) after resume which
4501
	 * will prevent c3 entry. Makes sure all unused rings
4502
	 * are totally idle.
4503
	 */
4504
	init_unused_rings(dev);
3480 Serge 4505
 
7144 serge 4506
	BUG_ON(!dev_priv->kernel_context);
4560 Serge 4507
 
6084 serge 4508
	ret = i915_ppgtt_init_hw(dev);
4509
	if (ret) {
4510
		DRM_ERROR("PPGTT enable HW failed %d\n", ret);
4511
		goto out;
4512
	}
4513
 
4514
	/* Need to do basic initialisation of all rings first: */
4515
	for_each_ring(ring, dev_priv, i) {
4516
		ret = ring->init_hw(ring);
4517
		if (ret)
4518
			goto out;
4519
	}
4520
 
4521
	/* We can't enable contexts until all firmware is loaded */
4522
	if (HAS_GUC_UCODE(dev)) {
4523
		ret = intel_guc_ucode_load(dev);
4524
		if (ret) {
6937 serge 4525
			DRM_ERROR("Failed to initialize GuC, error %d\n", ret);
4526
			ret = -EIO;
7144 serge 4527
			goto out;
6084 serge 4528
		}
4529
	}
4530
 
3480 Serge 4531
	/*
6084 serge 4532
	 * Increment the next seqno by 0x100 so we have a visible break
4533
	 * on re-initialisation
3480 Serge 4534
	 */
6084 serge 4535
	ret = i915_gem_set_seqno(dev, dev_priv->next_seqno+0x100);
4536
	if (ret)
4537
		goto out;
5354 serge 4538
 
6084 serge 4539
	/* Now it is safe to go back round and do everything else: */
4540
	for_each_ring(ring, dev_priv, i) {
4541
		struct drm_i915_gem_request *req;
4560 Serge 4542
 
7144 serge 4543
		req = i915_gem_request_alloc(ring, NULL);
4544
		if (IS_ERR(req)) {
4545
			ret = PTR_ERR(req);
6084 serge 4546
			i915_gem_cleanup_ringbuffer(dev);
4547
			goto out;
4548
		}
4549
 
4550
		if (ring->id == RCS) {
4551
			for (j = 0; j < NUM_L3_SLICES(dev); j++)
4552
				i915_gem_l3_remap(req, j);
4553
		}
4554
 
4555
		ret = i915_ppgtt_init_ring(req);
4556
		if (ret && ret != -EIO) {
4557
			DRM_ERROR("PPGTT enable ring #%d failed %d\n", i, ret);
4558
			i915_gem_request_cancel(req);
4559
			i915_gem_cleanup_ringbuffer(dev);
4560
			goto out;
4561
		}
4562
 
4563
		ret = i915_gem_context_enable(req);
4564
		if (ret && ret != -EIO) {
4565
			DRM_ERROR("Context enable ring #%d failed %d\n", i, ret);
4566
			i915_gem_request_cancel(req);
4567
			i915_gem_cleanup_ringbuffer(dev);
4568
			goto out;
4569
		}
4570
 
4571
		i915_add_request_no_flush(req);
5354 serge 4572
	}
4573
 
6084 serge 4574
out:
4575
	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5060 serge 4576
	return ret;
3031 serge 4577
}
4578
 
4579
int i915_gem_init(struct drm_device *dev)
4580
{
4581
	struct drm_i915_private *dev_priv = dev->dev_private;
4582
	int ret;
4583
 
5354 serge 4584
	i915.enable_execlists = intel_sanitize_enable_execlists(dev,
4585
			i915.enable_execlists);
4586
 
3031 serge 4587
	mutex_lock(&dev->struct_mutex);
3746 Serge 4588
 
5354 serge 4589
	if (!i915.enable_execlists) {
6084 serge 4590
		dev_priv->gt.execbuf_submit = i915_gem_ringbuffer_submission;
5354 serge 4591
		dev_priv->gt.init_rings = i915_gem_init_rings;
4592
		dev_priv->gt.cleanup_ring = intel_cleanup_ring_buffer;
4593
		dev_priv->gt.stop_ring = intel_stop_ring_buffer;
4594
	} else {
6084 serge 4595
		dev_priv->gt.execbuf_submit = intel_execlists_submission;
5354 serge 4596
		dev_priv->gt.init_rings = intel_logical_rings_init;
4597
		dev_priv->gt.cleanup_ring = intel_logical_ring_cleanup;
4598
		dev_priv->gt.stop_ring = intel_logical_ring_stop;
4599
	}
4600
 
6084 serge 4601
	/* This is just a security blanket to placate dragons.
4602
	 * On some systems, we very sporadically observe that the first TLBs
4603
	 * used by the CS may be stale, despite us poking the TLB reset. If
4604
	 * we hold the forcewake during initialisation these problems
4605
	 * just magically go away.
4606
	 */
4607
	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
5354 serge 4608
 
6084 serge 4609
//	ret = i915_gem_init_userptr(dev);
4610
//	if (ret)
4611
//		goto out_unlock;
3746 Serge 4612
 
6084 serge 4613
	i915_gem_init_global_gtt(dev);
4614
 
5060 serge 4615
	ret = i915_gem_context_init(dev);
6084 serge 4616
	if (ret)
4617
		goto out_unlock;
3031 serge 4618
 
6084 serge 4619
	ret = dev_priv->gt.init_rings(dev);
4620
	if (ret)
4621
		goto out_unlock;
4622
 
5060 serge 4623
	ret = i915_gem_init_hw(dev);
4624
	if (ret == -EIO) {
4625
		/* Allow ring initialisation to fail by marking the GPU as
4626
		 * wedged. But we only want to do this where the GPU is angry,
4627
		 * for all other failure, such as an allocation failure, bail.
4628
		 */
4629
		DRM_ERROR("Failed to initialize GPU, declaring it wedged\n");
6084 serge 4630
		atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter);
5060 serge 4631
		ret = 0;
4632
	}
6084 serge 4633
 
4634
out_unlock:
4635
	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5060 serge 4636
	mutex_unlock(&dev->struct_mutex);
3746 Serge 4637
 
6084 serge 4638
	return ret;
3031 serge 4639
}
4640
 
2332 Serge 4641
void
4642
i915_gem_cleanup_ringbuffer(struct drm_device *dev)
4643
{
5060 serge 4644
	struct drm_i915_private *dev_priv = dev->dev_private;
4645
	struct intel_engine_cs *ring;
2332 Serge 4646
	int i;
4647
 
3031 serge 4648
	for_each_ring(ring, dev_priv, i)
5354 serge 4649
		dev_priv->gt.cleanup_ring(ring);
7144 serge 4650
 
4651
    if (i915.enable_execlists)
4652
            /*
4653
             * Neither the BIOS, ourselves or any other kernel
4654
             * expects the system to be in execlists mode on startup,
4655
             * so we need to reset the GPU back to legacy mode.
4656
             */
4657
            intel_gpu_reset(dev);
2332 Serge 4658
}
4659
 
4660
static void
5060 serge 4661
init_ring_lists(struct intel_engine_cs *ring)
2326 Serge 4662
{
6084 serge 4663
	INIT_LIST_HEAD(&ring->active_list);
4664
	INIT_LIST_HEAD(&ring->request_list);
2326 Serge 4665
}
4666
 
4667
void
7144 serge 4668
i915_gem_load_init(struct drm_device *dev)
2326 Serge 4669
{
5060 serge 4670
	struct drm_i915_private *dev_priv = dev->dev_private;
6084 serge 4671
	int i;
2326 Serge 4672
 
4104 Serge 4673
	INIT_LIST_HEAD(&dev_priv->vm_list);
4560 Serge 4674
	INIT_LIST_HEAD(&dev_priv->context_list);
3031 serge 4675
	INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
4676
	INIT_LIST_HEAD(&dev_priv->mm.bound_list);
6084 serge 4677
	INIT_LIST_HEAD(&dev_priv->mm.fence_list);
4678
	for (i = 0; i < I915_NUM_RINGS; i++)
4679
		init_ring_lists(&dev_priv->ring[i]);
2342 Serge 4680
	for (i = 0; i < I915_MAX_NUM_FENCES; i++)
6084 serge 4681
		INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
2360 Serge 4682
	INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
4683
			  i915_gem_retire_work_handler);
4560 Serge 4684
	INIT_DELAYED_WORK(&dev_priv->mm.idle_work,
4685
			  i915_gem_idle_work_handler);
3480 Serge 4686
	init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
2326 Serge 4687
 
6084 serge 4688
	dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
2326 Serge 4689
 
6937 serge 4690
	if (INTEL_INFO(dev)->gen >= 7 && !IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev))
3746 Serge 4691
		dev_priv->num_fence_regs = 32;
4692
	else if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
6084 serge 4693
		dev_priv->num_fence_regs = 16;
4694
	else
4695
		dev_priv->num_fence_regs = 8;
2326 Serge 4696
 
6084 serge 4697
	if (intel_vgpu_active(dev))
4698
		dev_priv->num_fence_regs =
4699
				I915_READ(vgtif_reg(avail_rs.fence_num));
4700
 
4701
	/*
4702
	 * Set initial sequence number for requests.
4703
	 * Using this number allows the wraparound to happen early,
4704
	 * catching any obvious problems.
4705
	 */
4706
	dev_priv->next_seqno = ((u32)~0 - 0x1100);
4707
	dev_priv->last_seqno = ((u32)~0 - 0x1101);
4708
 
4709
	/* Initialize fence registers to zero */
3746 Serge 4710
	INIT_LIST_HEAD(&dev_priv->mm.fence_list);
4711
	i915_gem_restore_fences(dev);
2326 Serge 4712
 
6084 serge 4713
	i915_gem_detect_bit_6_swizzle(dev);
7144 serge 4714
	init_waitqueue_head(&dev_priv->pending_flip_queue);
2326 Serge 4715
 
6084 serge 4716
	dev_priv->mm.interruptible = true;
2326 Serge 4717
 
5060 serge 4718
	mutex_init(&dev_priv->fb_tracking.lock);
2326 Serge 4719
}
4720
 
6084 serge 4721
void i915_gem_release(struct drm_device *dev, struct drm_file *file)
4722
{
4723
	struct drm_i915_file_private *file_priv = file->driver_priv;
4724
 
4725
	/* Clean up our request list when the client is going away, so that
4726
	 * later retire_requests won't dereference our soon-to-be-gone
4727
	 * file_priv.
4728
	 */
4729
	spin_lock(&file_priv->mm.lock);
4730
	while (!list_empty(&file_priv->mm.request_list)) {
4731
		struct drm_i915_gem_request *request;
4732
 
4733
		request = list_first_entry(&file_priv->mm.request_list,
4734
					   struct drm_i915_gem_request,
4735
					   client_list);
4736
		list_del(&request->client_list);
4737
		request->file_priv = NULL;
4738
	}
4739
	spin_unlock(&file_priv->mm.lock);
4740
 
4741
	if (!list_empty(&file_priv->rps.link)) {
4742
		spin_lock(&to_i915(dev)->rps.client_lock);
4743
		list_del(&file_priv->rps.link);
4744
		spin_unlock(&to_i915(dev)->rps.client_lock);
4745
	}
4746
}
4747
 
5060 serge 4748
int i915_gem_open(struct drm_device *dev, struct drm_file *file)
4104 Serge 4749
{
5060 serge 4750
	struct drm_i915_file_private *file_priv;
4104 Serge 4751
	int ret;
2326 Serge 4752
 
5060 serge 4753
	DRM_DEBUG_DRIVER("\n");
4104 Serge 4754
 
5060 serge 4755
	file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
4756
	if (!file_priv)
4104 Serge 4757
		return -ENOMEM;
4758
 
5060 serge 4759
	file->driver_priv = file_priv;
4760
	file_priv->dev_priv = dev->dev_private;
4761
	file_priv->file = file;
6084 serge 4762
	INIT_LIST_HEAD(&file_priv->rps.link);
4104 Serge 4763
 
5060 serge 4764
	spin_lock_init(&file_priv->mm.lock);
4765
	INIT_LIST_HEAD(&file_priv->mm.request_list);
4104 Serge 4766
 
7144 serge 4767
	file_priv->bsd_ring = -1;
4768
 
5060 serge 4769
	ret = i915_gem_context_open(dev, file);
4770
	if (ret)
4771
		kfree(file_priv);
4104 Serge 4772
 
4773
	return ret;
4774
}
4775
 
5354 serge 4776
/**
4777
 * i915_gem_track_fb - update frontbuffer tracking
6084 serge 4778
 * @old: current GEM buffer for the frontbuffer slots
4779
 * @new: new GEM buffer for the frontbuffer slots
4780
 * @frontbuffer_bits: bitmask of frontbuffer slots
5354 serge 4781
 *
4782
 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them
4783
 * from @old and setting them in @new. Both @old and @new can be NULL.
4784
 */
5060 serge 4785
void i915_gem_track_fb(struct drm_i915_gem_object *old,
4786
		       struct drm_i915_gem_object *new,
4787
		       unsigned frontbuffer_bits)
4104 Serge 4788
{
5060 serge 4789
	if (old) {
4790
		WARN_ON(!mutex_is_locked(&old->base.dev->struct_mutex));
4791
		WARN_ON(!(old->frontbuffer_bits & frontbuffer_bits));
4792
		old->frontbuffer_bits &= ~frontbuffer_bits;
4104 Serge 4793
	}
4794
 
5060 serge 4795
	if (new) {
4796
		WARN_ON(!mutex_is_locked(&new->base.dev->struct_mutex));
4797
		WARN_ON(new->frontbuffer_bits & frontbuffer_bits);
4798
		new->frontbuffer_bits |= frontbuffer_bits;
4104 Serge 4799
	}
4800
}
4801
 
4802
/* All the new VM stuff */
6084 serge 4803
u64 i915_gem_obj_offset(struct drm_i915_gem_object *o,
4804
			struct i915_address_space *vm)
4104 Serge 4805
{
4806
	struct drm_i915_private *dev_priv = o->base.dev->dev_private;
4807
	struct i915_vma *vma;
4808
 
5354 serge 4809
	WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base);
4104 Serge 4810
 
7144 serge 4811
	list_for_each_entry(vma, &o->vma_list, obj_link) {
4812
		if (vma->is_ggtt &&
6084 serge 4813
		    vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
4814
			continue;
4104 Serge 4815
		if (vma->vm == vm)
4816
			return vma->node.start;
6084 serge 4817
	}
4104 Serge 4818
 
5060 serge 4819
	WARN(1, "%s vma for this object not found.\n",
4820
	     i915_is_ggtt(vm) ? "global" : "ppgtt");
4821
	return -1;
4104 Serge 4822
}
4823
 
6084 serge 4824
u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o,
4825
				  const struct i915_ggtt_view *view)
4826
{
4827
	struct i915_address_space *ggtt = i915_obj_to_ggtt(o);
4828
	struct i915_vma *vma;
4829
 
7144 serge 4830
	list_for_each_entry(vma, &o->vma_list, obj_link)
6084 serge 4831
		if (vma->vm == ggtt &&
4832
		    i915_ggtt_view_equal(&vma->ggtt_view, view))
4833
			return vma->node.start;
4834
 
4835
	WARN(1, "global vma for this object not found. (view=%u)\n", view->type);
4836
	return -1;
4837
}
4838
 
4104 Serge 4839
bool i915_gem_obj_bound(struct drm_i915_gem_object *o,
4840
			struct i915_address_space *vm)
4841
{
4842
	struct i915_vma *vma;
4843
 
7144 serge 4844
	list_for_each_entry(vma, &o->vma_list, obj_link) {
4845
		if (vma->is_ggtt &&
6084 serge 4846
		    vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
4847
			continue;
4104 Serge 4848
		if (vma->vm == vm && drm_mm_node_allocated(&vma->node))
4849
			return true;
6084 serge 4850
	}
4104 Serge 4851
 
4852
	return false;
4853
}
4854
 
6084 serge 4855
bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o,
4856
				  const struct i915_ggtt_view *view)
4857
{
4858
	struct i915_address_space *ggtt = i915_obj_to_ggtt(o);
4859
	struct i915_vma *vma;
4860
 
7144 serge 4861
	list_for_each_entry(vma, &o->vma_list, obj_link)
6084 serge 4862
		if (vma->vm == ggtt &&
4863
		    i915_ggtt_view_equal(&vma->ggtt_view, view) &&
4864
		    drm_mm_node_allocated(&vma->node))
4865
			return true;
4866
 
4867
	return false;
4868
}
4869
 
4104 Serge 4870
bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o)
4871
{
4560 Serge 4872
	struct i915_vma *vma;
4104 Serge 4873
 
7144 serge 4874
	list_for_each_entry(vma, &o->vma_list, obj_link)
4560 Serge 4875
		if (drm_mm_node_allocated(&vma->node))
4104 Serge 4876
			return true;
4877
 
4878
	return false;
4879
}
4880
 
4881
unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o,
4882
				struct i915_address_space *vm)
4883
{
4884
	struct drm_i915_private *dev_priv = o->base.dev->dev_private;
4885
	struct i915_vma *vma;
4886
 
5354 serge 4887
	WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base);
4104 Serge 4888
 
4889
	BUG_ON(list_empty(&o->vma_list));
4890
 
7144 serge 4891
	list_for_each_entry(vma, &o->vma_list, obj_link) {
4892
		if (vma->is_ggtt &&
6084 serge 4893
		    vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
4894
			continue;
4104 Serge 4895
		if (vma->vm == vm)
4896
			return vma->node.size;
6084 serge 4897
	}
4104 Serge 4898
	return 0;
4899
}
4560 Serge 4900
 
6084 serge 4901
bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj)
4902
{
4903
	struct i915_vma *vma;
7144 serge 4904
	list_for_each_entry(vma, &obj->vma_list, obj_link)
6084 serge 4905
		if (vma->pin_count > 0)
4906
			return true;
4560 Serge 4907
 
6084 serge 4908
	return false;
4909
}
5060 serge 4910
 
6937 serge 4911
/* Like i915_gem_object_get_page(), but mark the returned page dirty */
4912
struct page *
4913
i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n)
4914
{
4915
	struct page *page;
4916
 
4917
	/* Only default objects have per-page dirty tracking */
4918
	if (WARN_ON((obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE) == 0))
4919
		return NULL;
4920
 
4921
	page = i915_gem_object_get_page(obj, n);
4922
	set_page_dirty(page);
4923
	return page;
4924
}
4925
 
6084 serge 4926
/* Allocate a new GEM object and fill it with the supplied data */
4927
struct drm_i915_gem_object *
4928
i915_gem_object_create_from_data(struct drm_device *dev,
4929
			         const void *data, size_t size)
4104 Serge 4930
{
6084 serge 4931
	struct drm_i915_gem_object *obj;
4932
	struct sg_table *sg;
4933
	size_t bytes;
4934
	int ret;
4104 Serge 4935
 
6084 serge 4936
	obj = i915_gem_alloc_object(dev, round_up(size, PAGE_SIZE));
4937
	if (IS_ERR_OR_NULL(obj))
4938
		return obj;
4104 Serge 4939
 
6084 serge 4940
	ret = i915_gem_object_set_to_cpu_domain(obj, true);
4941
	if (ret)
4942
		goto fail;
4943
 
4944
	ret = i915_gem_object_get_pages(obj);
4945
	if (ret)
4946
		goto fail;
4947
 
4948
	i915_gem_object_pin_pages(obj);
4949
	sg = obj->pages;
4950
	bytes = sg_copy_from_buffer(sg->sgl, sg->nents, (void *)data, size);
6937 serge 4951
	obj->dirty = 1;		/* Backing store is now out of date */
6084 serge 4952
	i915_gem_object_unpin_pages(obj);
4953
 
4954
	if (WARN_ON(bytes != size)) {
4955
		DRM_ERROR("Incomplete copy, wrote %zu of %zu", bytes, size);
4956
		ret = -EFAULT;
4957
		goto fail;
4958
	}
4959
 
4960
	return obj;
4961
 
4962
fail:
4963
	drm_gem_object_unreference(&obj->base);
4964
	return ERR_PTR(ret);
4104 Serge 4965
}