Subversion Repositories Kolibri OS

Rev

Rev 6296 | Rev 6937 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2326 Serge 1
/*
6084 serge 2
 * Copyright © 2008-2015 Intel Corporation
2326 Serge 3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
 * IN THE SOFTWARE.
22
 *
23
 * Authors:
24
 *    Eric Anholt 
25
 *
26
 */
27
 
3031 serge 28
#include 
4280 Serge 29
#include 
3031 serge 30
#include 
2326 Serge 31
#include "i915_drv.h"
6084 serge 32
#include "i915_vgpu.h"
2351 Serge 33
#include "i915_trace.h"
2326 Serge 34
#include "intel_drv.h"
3260 Serge 35
#include 
2330 Serge 36
#include 
6660 serge 37
#include 
2326 Serge 38
#include 
6660 serge 39
#include 
40
 
6084 serge 41
#define RQ_BUG_ON(expr)
2326 Serge 42
 
2344 Serge 43
extern int x86_clflush_size;
6131 serge 44
#define __copy_to_user_inatomic __copy_to_user
2332 Serge 45
 
3263 Serge 46
#define PROT_READ       0x1             /* page can be read */
47
#define PROT_WRITE      0x2             /* page can be written */
48
#define MAP_SHARED      0x01            /* Share changes */
49
 
2344 Serge 50
 
5060 serge 51
 
3266 Serge 52
struct drm_i915_gem_object *get_fb_obj();
53
 
3263 Serge 54
unsigned long vm_mmap(struct file *file, unsigned long addr,
55
         unsigned long len, unsigned long prot,
56
         unsigned long flag, unsigned long offset);
57
 
2344 Serge 58
 
2332 Serge 59
#define MAX_ERRNO       4095
60
 
61
#define IS_ERR_VALUE(x) unlikely((x) >= (unsigned long)-MAX_ERRNO)
6131 serge 62
#define offset_in_page(p)       ((unsigned long)(p) & ~PAGE_MASK)
2332 Serge 63
 
64
static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
6084 serge 65
static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
5060 serge 66
static void
6084 serge 67
i915_gem_object_retire__write(struct drm_i915_gem_object *obj);
68
static void
69
i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring);
2326 Serge 70
 
4104 Serge 71
static bool cpu_cache_is_coherent(struct drm_device *dev,
72
				  enum i915_cache_level level)
73
{
74
	return HAS_LLC(dev) || level != I915_CACHE_NONE;
75
}
76
 
77
static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
78
{
79
	if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
80
		return true;
81
 
82
	return obj->pin_display;
83
}
84
 
2332 Serge 85
/* some bookkeeping */
86
static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
87
				  size_t size)
88
{
4104 Serge 89
	spin_lock(&dev_priv->mm.object_stat_lock);
2332 Serge 90
	dev_priv->mm.object_count++;
91
	dev_priv->mm.object_memory += size;
4104 Serge 92
	spin_unlock(&dev_priv->mm.object_stat_lock);
2332 Serge 93
}
94
 
95
static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
96
				     size_t size)
97
{
4104 Serge 98
	spin_lock(&dev_priv->mm.object_stat_lock);
2332 Serge 99
	dev_priv->mm.object_count--;
100
	dev_priv->mm.object_memory -= size;
4104 Serge 101
	spin_unlock(&dev_priv->mm.object_stat_lock);
2332 Serge 102
}
103
 
104
static int
3480 Serge 105
i915_gem_wait_for_error(struct i915_gpu_error *error)
2332 Serge 106
{
107
	int ret;
108
 
3480 Serge 109
#define EXIT_COND (!i915_reset_in_progress(error))
110
	if (EXIT_COND)
2332 Serge 111
		return 0;
3255 Serge 112
#if 0
3031 serge 113
	/*
114
	 * Only wait 10 seconds for the gpu reset to complete to avoid hanging
115
	 * userspace. If it takes that long something really bad is going on and
116
	 * we should simply try to bail out and fail as gracefully as possible.
117
	 */
3480 Serge 118
	ret = wait_event_interruptible_timeout(error->reset_queue,
119
					       EXIT_COND,
120
					       10*HZ);
3031 serge 121
	if (ret == 0) {
122
		DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
123
		return -EIO;
124
	} else if (ret < 0) {
2332 Serge 125
		return ret;
3031 serge 126
	}
2332 Serge 127
 
3255 Serge 128
#endif
3480 Serge 129
#undef EXIT_COND
3255 Serge 130
 
2332 Serge 131
	return 0;
132
}
133
 
134
int i915_mutex_lock_interruptible(struct drm_device *dev)
135
{
3480 Serge 136
	struct drm_i915_private *dev_priv = dev->dev_private;
2332 Serge 137
	int ret;
138
 
3480 Serge 139
	ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
2332 Serge 140
	if (ret)
141
		return ret;
142
 
3480 Serge 143
	ret = mutex_lock_interruptible(&dev->struct_mutex);
144
	if (ret)
145
		return ret;
2332 Serge 146
 
147
	WARN_ON(i915_verify_lists(dev));
148
	return 0;
149
}
150
 
151
int
152
i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
153
			    struct drm_file *file)
154
{
155
	struct drm_i915_private *dev_priv = dev->dev_private;
156
	struct drm_i915_gem_get_aperture *args = data;
6084 serge 157
	struct i915_gtt *ggtt = &dev_priv->gtt;
158
	struct i915_vma *vma;
2332 Serge 159
	size_t pinned;
160
 
161
	pinned = 0;
162
	mutex_lock(&dev->struct_mutex);
6084 serge 163
	list_for_each_entry(vma, &ggtt->base.active_list, mm_list)
164
		if (vma->pin_count)
165
			pinned += vma->node.size;
166
	list_for_each_entry(vma, &ggtt->base.inactive_list, mm_list)
167
		if (vma->pin_count)
168
			pinned += vma->node.size;
2332 Serge 169
	mutex_unlock(&dev->struct_mutex);
170
 
4104 Serge 171
	args->aper_size = dev_priv->gtt.base.total;
2342 Serge 172
	args->aper_available_size = args->aper_size - pinned;
2332 Serge 173
 
174
	return 0;
175
}
176
 
6296 serge 177
static int
178
i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
179
{
180
	char *vaddr = obj->phys_handle->vaddr;
181
	struct sg_table *st;
182
	struct scatterlist *sg;
183
	int i;
184
 
185
	if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
186
		return -EINVAL;
187
 
188
 
189
	st = kmalloc(sizeof(*st), GFP_KERNEL);
190
	if (st == NULL)
191
		return -ENOMEM;
192
 
193
	if (sg_alloc_table(st, 1, GFP_KERNEL)) {
194
		kfree(st);
195
		return -ENOMEM;
196
	}
197
 
198
	sg = st->sgl;
199
	sg->offset = 0;
200
	sg->length = obj->base.size;
201
 
202
	sg_dma_address(sg) = obj->phys_handle->busaddr;
203
	sg_dma_len(sg) = obj->base.size;
204
 
205
	obj->pages = st;
206
	return 0;
207
}
208
 
209
static void
210
i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
211
{
212
	int ret;
213
 
214
	BUG_ON(obj->madv == __I915_MADV_PURGED);
215
 
216
	ret = i915_gem_object_set_to_cpu_domain(obj, true);
217
	if (ret) {
218
		/* In the event of a disaster, abandon all caches and
219
		 * hope for the best.
220
		 */
221
		WARN_ON(ret != -EIO);
222
		obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
223
	}
224
 
225
	if (obj->madv == I915_MADV_DONTNEED)
226
		obj->dirty = 0;
227
 
228
	if (obj->dirty) {
229
		obj->dirty = 0;
230
	}
231
 
232
	sg_free_table(obj->pages);
233
	kfree(obj->pages);
234
}
235
 
236
static void
237
i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
238
{
239
	drm_pci_free(obj->base.dev, obj->phys_handle);
240
}
241
 
242
static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
243
	.get_pages = i915_gem_object_get_pages_phys,
244
	.put_pages = i915_gem_object_put_pages_phys,
245
	.release = i915_gem_object_release_phys,
246
};
247
 
248
static int
249
drop_pages(struct drm_i915_gem_object *obj)
250
{
251
	struct i915_vma *vma, *next;
252
	int ret;
253
 
254
	drm_gem_object_reference(&obj->base);
255
	list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link)
256
		if (i915_vma_unbind(vma))
257
			break;
258
 
259
	ret = i915_gem_object_put_pages(obj);
260
	drm_gem_object_unreference(&obj->base);
261
 
262
	return ret;
263
}
264
 
265
int
266
i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
267
			    int align)
268
{
269
	drm_dma_handle_t *phys;
270
	int ret;
271
 
272
	if (obj->phys_handle) {
273
		if ((unsigned long)obj->phys_handle->vaddr & (align -1))
274
			return -EBUSY;
275
 
276
		return 0;
277
	}
278
 
279
	if (obj->madv != I915_MADV_WILLNEED)
280
		return -EFAULT;
281
 
282
	if (obj->base.filp == NULL)
283
		return -EINVAL;
284
 
285
	ret = drop_pages(obj);
286
	if (ret)
287
		return ret;
288
 
289
	/* create a new object */
290
	phys = drm_pci_alloc(obj->base.dev, obj->base.size, align);
291
	if (!phys)
292
		return -ENOMEM;
293
 
294
	obj->phys_handle = phys;
295
	obj->ops = &i915_gem_phys_ops;
296
 
297
	return i915_gem_object_get_pages(obj);
298
}
3480 Serge 299
void *i915_gem_object_alloc(struct drm_device *dev)
300
{
301
	struct drm_i915_private *dev_priv = dev->dev_private;
5367 serge 302
    return kzalloc(sizeof(struct drm_i915_gem_object), 0);
3480 Serge 303
}
304
 
305
void i915_gem_object_free(struct drm_i915_gem_object *obj)
306
{
307
	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
308
	kfree(obj);
309
}
310
 
3031 serge 311
static int
312
i915_gem_create(struct drm_file *file,
2332 Serge 313
		struct drm_device *dev,
314
		uint64_t size,
315
		uint32_t *handle_p)
316
{
317
	struct drm_i915_gem_object *obj;
318
	int ret;
319
	u32 handle;
320
 
321
	size = roundup(size, PAGE_SIZE);
2342 Serge 322
	if (size == 0)
323
		return -EINVAL;
2332 Serge 324
 
325
	/* Allocate the new object */
326
	obj = i915_gem_alloc_object(dev, size);
327
	if (obj == NULL)
328
		return -ENOMEM;
329
 
330
	ret = drm_gem_handle_create(file, &obj->base, &handle);
4104 Serge 331
	/* drop reference from allocate - handle holds it now */
332
	drm_gem_object_unreference_unlocked(&obj->base);
333
	if (ret)
2332 Serge 334
		return ret;
335
 
336
	*handle_p = handle;
337
	return 0;
338
}
339
 
340
int
341
i915_gem_dumb_create(struct drm_file *file,
342
		     struct drm_device *dev,
343
		     struct drm_mode_create_dumb *args)
344
{
345
	/* have to work out size/pitch and return them */
4560 Serge 346
	args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64);
2332 Serge 347
	args->size = args->pitch * args->height;
348
	return i915_gem_create(file, dev,
349
			       args->size, &args->handle);
350
}
351
 
2326 Serge 352
/**
2332 Serge 353
 * Creates a new mm object and returns a handle to it.
354
 */
355
int
356
i915_gem_create_ioctl(struct drm_device *dev, void *data,
357
		      struct drm_file *file)
358
{
359
	struct drm_i915_gem_create *args = data;
3031 serge 360
 
2332 Serge 361
	return i915_gem_create(file, dev,
362
			       args->size, &args->handle);
363
}
364
 
3031 serge 365
static inline int
366
__copy_to_user_swizzled(char __user *cpu_vaddr,
367
			const char *gpu_vaddr, int gpu_offset,
6084 serge 368
			int length)
2332 Serge 369
{
3031 serge 370
	int ret, cpu_offset = 0;
2332 Serge 371
 
3031 serge 372
	while (length > 0) {
373
		int cacheline_end = ALIGN(gpu_offset + 1, 64);
374
		int this_length = min(cacheline_end - gpu_offset, length);
375
		int swizzled_gpu_offset = gpu_offset ^ 64;
2332 Serge 376
 
3031 serge 377
		ret = __copy_to_user(cpu_vaddr + cpu_offset,
378
				     gpu_vaddr + swizzled_gpu_offset,
379
				     this_length);
380
		if (ret)
381
			return ret + length;
2332 Serge 382
 
3031 serge 383
		cpu_offset += this_length;
384
		gpu_offset += this_length;
385
		length -= this_length;
386
	}
387
 
388
	return 0;
2332 Serge 389
}
390
 
3031 serge 391
static inline int
392
__copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
393
			  const char __user *cpu_vaddr,
394
			  int length)
2332 Serge 395
{
3031 serge 396
	int ret, cpu_offset = 0;
2332 Serge 397
 
398
	while (length > 0) {
399
		int cacheline_end = ALIGN(gpu_offset + 1, 64);
400
		int this_length = min(cacheline_end - gpu_offset, length);
401
		int swizzled_gpu_offset = gpu_offset ^ 64;
402
 
3031 serge 403
		ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
6084 serge 404
				       cpu_vaddr + cpu_offset,
405
				       this_length);
3031 serge 406
		if (ret)
407
			return ret + length;
408
 
2332 Serge 409
		cpu_offset += this_length;
410
		gpu_offset += this_length;
411
		length -= this_length;
412
	}
413
 
3031 serge 414
	return 0;
2332 Serge 415
}
416
 
6131 serge 417
/*
418
 * Pins the specified object's pages and synchronizes the object with
419
 * GPU accesses. Sets needs_clflush to non-zero if the caller should
420
 * flush the object from the CPU cache.
421
 */
422
int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
423
				    int *needs_clflush)
424
{
425
	int ret;
426
 
427
	*needs_clflush = 0;
428
 
429
	if (!obj->base.filp)
430
		return -EINVAL;
431
 
432
	if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
433
		/* If we're not in the cpu read domain, set ourself into the gtt
434
		 * read domain and manually flush cachelines (if required). This
435
		 * optimizes for the case when the gpu will dirty the data
436
		 * anyway again before the next pread happens. */
437
		*needs_clflush = !cpu_cache_is_coherent(obj->base.dev,
438
							obj->cache_level);
439
		ret = i915_gem_object_wait_rendering(obj, true);
440
		if (ret)
441
			return ret;
442
	}
443
 
444
	ret = i915_gem_object_get_pages(obj);
445
	if (ret)
446
		return ret;
447
 
448
	i915_gem_object_pin_pages(obj);
449
 
450
	return ret;
451
}
452
 
3031 serge 453
/* Per-page copy function for the shmem pread fastpath.
454
 * Flushes invalid cachelines before reading the target if
455
 * needs_clflush is set. */
2332 Serge 456
static int
3031 serge 457
shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length,
458
		 char __user *user_data,
459
		 bool page_do_bit17_swizzling, bool needs_clflush)
460
{
6084 serge 461
	char *vaddr;
462
	int ret;
3031 serge 463
 
464
	if (unlikely(page_do_bit17_swizzling))
465
		return -EINVAL;
466
 
6084 serge 467
	vaddr = kmap_atomic(page);
3031 serge 468
	if (needs_clflush)
469
		drm_clflush_virt_range(vaddr + shmem_page_offset,
470
				       page_length);
6084 serge 471
	ret = __copy_to_user_inatomic(user_data,
3031 serge 472
				      vaddr + shmem_page_offset,
6084 serge 473
				      page_length);
474
	kunmap_atomic(vaddr);
3031 serge 475
 
476
	return ret ? -EFAULT : 0;
477
}
478
 
479
static void
480
shmem_clflush_swizzled_range(char *addr, unsigned long length,
481
			     bool swizzled)
482
{
483
	if (unlikely(swizzled)) {
484
		unsigned long start = (unsigned long) addr;
485
		unsigned long end = (unsigned long) addr + length;
486
 
487
		/* For swizzling simply ensure that we always flush both
488
		 * channels. Lame, but simple and it works. Swizzled
489
		 * pwrite/pread is far from a hotpath - current userspace
490
		 * doesn't use it at all. */
491
		start = round_down(start, 128);
492
		end = round_up(end, 128);
493
 
494
		drm_clflush_virt_range((void *)start, end - start);
495
	} else {
496
		drm_clflush_virt_range(addr, length);
497
	}
498
 
499
}
500
 
501
/* Only difference to the fast-path function is that this can handle bit17
502
 * and uses non-atomic copy and kmap functions. */
503
static int
504
shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
505
		 char __user *user_data,
506
		 bool page_do_bit17_swizzling, bool needs_clflush)
507
{
508
	char *vaddr;
509
	int ret;
510
 
511
	vaddr = kmap(page);
512
	if (needs_clflush)
513
		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
514
					     page_length,
515
					     page_do_bit17_swizzling);
516
 
517
	if (page_do_bit17_swizzling)
518
		ret = __copy_to_user_swizzled(user_data,
519
					      vaddr, shmem_page_offset,
520
					      page_length);
521
	else
522
		ret = __copy_to_user(user_data,
523
				     vaddr + shmem_page_offset,
524
				     page_length);
525
	kunmap(page);
526
 
527
	return ret ? - EFAULT : 0;
528
}
529
 
530
static int
531
i915_gem_shmem_pread(struct drm_device *dev,
6084 serge 532
		     struct drm_i915_gem_object *obj,
533
		     struct drm_i915_gem_pread *args,
534
		     struct drm_file *file)
2332 Serge 535
{
3031 serge 536
	char __user *user_data;
2332 Serge 537
	ssize_t remain;
538
	loff_t offset;
3031 serge 539
	int shmem_page_offset, page_length, ret = 0;
540
	int obj_do_bit17_swizzling, page_do_bit17_swizzling;
541
	int prefaulted = 0;
542
	int needs_clflush = 0;
3746 Serge 543
	struct sg_page_iter sg_iter;
2332 Serge 544
 
3746 Serge 545
	user_data = to_user_ptr(args->data_ptr);
2332 Serge 546
	remain = args->size;
547
 
3031 serge 548
	obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
549
 
5060 serge 550
	ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
3031 serge 551
	if (ret)
552
		return ret;
553
 
2332 Serge 554
	offset = args->offset;
555
 
3746 Serge 556
	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
557
			 offset >> PAGE_SHIFT) {
558
		struct page *page = sg_page_iter_page(&sg_iter);
2332 Serge 559
 
3031 serge 560
		if (remain <= 0)
561
			break;
562
 
2332 Serge 563
		/* Operation in this page
564
		 *
3031 serge 565
		 * shmem_page_offset = offset within page in shmem file
2332 Serge 566
		 * page_length = bytes to copy for this page
567
		 */
3031 serge 568
		shmem_page_offset = offset_in_page(offset);
2332 Serge 569
		page_length = remain;
3031 serge 570
		if ((shmem_page_offset + page_length) > PAGE_SIZE)
571
			page_length = PAGE_SIZE - shmem_page_offset;
2332 Serge 572
 
3031 serge 573
		page_do_bit17_swizzling = obj_do_bit17_swizzling &&
574
			(page_to_phys(page) & (1 << 17)) != 0;
2332 Serge 575
 
3031 serge 576
		ret = shmem_pread_fast(page, shmem_page_offset, page_length,
577
				       user_data, page_do_bit17_swizzling,
578
				       needs_clflush);
579
		if (ret == 0)
580
			goto next_page;
2332 Serge 581
 
3031 serge 582
		mutex_unlock(&dev->struct_mutex);
583
 
584
		ret = shmem_pread_slow(page, shmem_page_offset, page_length,
585
				       user_data, page_do_bit17_swizzling,
586
				       needs_clflush);
587
 
588
		mutex_lock(&dev->struct_mutex);
589
 
2332 Serge 590
		if (ret)
3031 serge 591
			goto out;
2332 Serge 592
 
5060 serge 593
next_page:
2332 Serge 594
		remain -= page_length;
595
		user_data += page_length;
596
		offset += page_length;
597
	}
598
 
3031 serge 599
out:
600
	i915_gem_object_unpin_pages(obj);
601
 
602
	return ret;
2332 Serge 603
}
604
 
605
/**
3031 serge 606
 * Reads data from the object referenced by handle.
607
 *
608
 * On error, the contents of *data are undefined.
2332 Serge 609
 */
3031 serge 610
int
611
i915_gem_pread_ioctl(struct drm_device *dev, void *data,
612
		     struct drm_file *file)
613
{
614
	struct drm_i915_gem_pread *args = data;
615
	struct drm_i915_gem_object *obj;
616
	int ret = 0;
617
 
618
	if (args->size == 0)
619
		return 0;
620
 
621
	ret = i915_mutex_lock_interruptible(dev);
622
	if (ret)
623
		return ret;
624
 
625
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
626
	if (&obj->base == NULL) {
627
		ret = -ENOENT;
628
		goto unlock;
629
	}
630
 
631
	/* Bounds check source.  */
632
	if (args->offset > obj->base.size ||
633
	    args->size > obj->base.size - args->offset) {
634
		ret = -EINVAL;
635
		goto out;
636
	}
637
 
638
	/* prime objects have no backing filp to GEM pread/pwrite
639
	 * pages from.
640
	 */
641
	if (!obj->base.filp) {
642
		ret = -EINVAL;
643
		goto out;
644
	}
645
 
646
	trace_i915_gem_object_pread(obj, args->offset, args->size);
647
 
648
	ret = i915_gem_shmem_pread(dev, obj, args, file);
649
 
650
out:
651
	drm_gem_object_unreference(&obj->base);
652
unlock:
653
	mutex_unlock(&dev->struct_mutex);
654
	return ret;
655
}
656
 
657
/* This is the fast write path which cannot handle
658
 * page faults in the source data
659
 */
660
 
661
 
662
/**
663
 * This is the fast pwrite path, where we copy the data directly from the
664
 * user into the GTT, uncached.
665
 */
2332 Serge 666
static int
3031 serge 667
i915_gem_gtt_pwrite_fast(struct drm_device *dev,
668
			 struct drm_i915_gem_object *obj,
669
			 struct drm_i915_gem_pwrite *args,
670
			 struct drm_file *file)
2332 Serge 671
{
5060 serge 672
	struct drm_i915_private *dev_priv = dev->dev_private;
2332 Serge 673
	ssize_t remain;
3031 serge 674
	loff_t offset, page_base;
675
	char __user *user_data;
676
	int page_offset, page_length, ret;
2332 Serge 677
 
5060 serge 678
	ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
3031 serge 679
	if (ret)
680
		goto out;
681
 
682
	ret = i915_gem_object_set_to_gtt_domain(obj, true);
683
	if (ret)
684
		goto out_unpin;
685
 
686
	ret = i915_gem_object_put_fence(obj);
687
	if (ret)
688
		goto out_unpin;
689
 
4539 Serge 690
	user_data = to_user_ptr(args->data_ptr);
2332 Serge 691
	remain = args->size;
692
 
4104 Serge 693
	offset = i915_gem_obj_ggtt_offset(obj) + args->offset;
2332 Serge 694
 
6084 serge 695
	intel_fb_obj_invalidate(obj, ORIGIN_GTT);
696
 
3031 serge 697
	while (remain > 0) {
698
		/* Operation in this page
699
		 *
700
		 * page_base = page offset within aperture
701
		 * page_offset = offset within page
702
		 * page_length = bytes to copy for this page
703
		 */
704
		page_base = offset & PAGE_MASK;
705
		page_offset = offset_in_page(offset);
706
		page_length = remain;
707
		if ((page_offset + remain) > PAGE_SIZE)
708
			page_length = PAGE_SIZE - page_offset;
2332 Serge 709
 
6131 serge 710
		MapPage(dev_priv->gtt.mappable,
711
				dev_priv->gtt.mappable_base+page_base, PG_WRITEC|PG_SW);
3031 serge 712
 
6131 serge 713
		memcpy((char*)dev_priv->gtt.mappable+page_offset, user_data, page_length);
3260 Serge 714
 
3031 serge 715
		remain -= page_length;
716
		user_data += page_length;
717
		offset += page_length;
2332 Serge 718
	}
719
 
6084 serge 720
out_flush:
721
	intel_fb_obj_flush(obj, false, ORIGIN_GTT);
3031 serge 722
out_unpin:
5060 serge 723
	i915_gem_object_ggtt_unpin(obj);
3031 serge 724
out:
6084 serge 725
	return ret;
3031 serge 726
}
727
 
728
/* Per-page copy function for the shmem pwrite fastpath.
729
 * Flushes invalid cachelines before writing to the target if
730
 * needs_clflush_before is set and flushes out any written cachelines after
731
 * writing if needs_clflush is set. */
732
static int
733
shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
734
		  char __user *user_data,
735
		  bool page_do_bit17_swizzling,
736
		  bool needs_clflush_before,
737
		  bool needs_clflush_after)
738
{
739
	char *vaddr;
5354 serge 740
	int ret;
3031 serge 741
 
742
	if (unlikely(page_do_bit17_swizzling))
743
		return -EINVAL;
744
 
5354 serge 745
	vaddr = kmap_atomic(page);
3031 serge 746
	if (needs_clflush_before)
747
		drm_clflush_virt_range(vaddr + shmem_page_offset,
748
				       page_length);
3260 Serge 749
	memcpy(vaddr + shmem_page_offset,
3031 serge 750
						user_data,
751
						page_length);
752
	if (needs_clflush_after)
753
		drm_clflush_virt_range(vaddr + shmem_page_offset,
754
				       page_length);
5354 serge 755
	kunmap_atomic(vaddr);
3031 serge 756
 
757
	return ret ? -EFAULT : 0;
758
}
759
 
760
/* Only difference to the fast-path function is that this can handle bit17
761
 * and uses non-atomic copy and kmap functions. */
762
static int
763
shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
764
		  char __user *user_data,
765
		  bool page_do_bit17_swizzling,
766
		  bool needs_clflush_before,
767
		  bool needs_clflush_after)
768
{
769
	char *vaddr;
770
	int ret;
771
 
772
	vaddr = kmap(page);
773
	if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
774
		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
775
					     page_length,
776
					     page_do_bit17_swizzling);
777
	if (page_do_bit17_swizzling)
778
		ret = __copy_from_user_swizzled(vaddr, shmem_page_offset,
779
						user_data,
780
						page_length);
781
	else
782
		ret = __copy_from_user(vaddr + shmem_page_offset,
783
				       user_data,
784
				       page_length);
785
	if (needs_clflush_after)
786
		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
787
					     page_length,
788
					     page_do_bit17_swizzling);
789
	kunmap(page);
790
 
791
	return ret ? -EFAULT : 0;
792
}
793
 
794
static int
795
i915_gem_shmem_pwrite(struct drm_device *dev,
796
		      struct drm_i915_gem_object *obj,
797
		      struct drm_i915_gem_pwrite *args,
798
		      struct drm_file *file)
799
{
800
	ssize_t remain;
801
	loff_t offset;
802
	char __user *user_data;
803
	int shmem_page_offset, page_length, ret = 0;
804
	int obj_do_bit17_swizzling, page_do_bit17_swizzling;
805
	int hit_slowpath = 0;
806
	int needs_clflush_after = 0;
807
	int needs_clflush_before = 0;
3746 Serge 808
	struct sg_page_iter sg_iter;
3031 serge 809
 
3746 Serge 810
	user_data = to_user_ptr(args->data_ptr);
3031 serge 811
	remain = args->size;
812
 
813
	obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
814
 
815
	if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
816
		/* If we're not in the cpu write domain, set ourself into the gtt
817
		 * write domain and manually flush cachelines (if required). This
818
		 * optimizes for the case when the gpu will use the data
819
		 * right away and we therefore have to clflush anyway. */
4104 Serge 820
		needs_clflush_after = cpu_write_needs_clflush(obj);
4560 Serge 821
		ret = i915_gem_object_wait_rendering(obj, false);
6084 serge 822
		if (ret)
823
			return ret;
824
	}
4104 Serge 825
	/* Same trick applies to invalidate partially written cachelines read
826
	 * before writing. */
827
	if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0)
828
		needs_clflush_before =
829
			!cpu_cache_is_coherent(dev, obj->cache_level);
3031 serge 830
 
831
	ret = i915_gem_object_get_pages(obj);
2332 Serge 832
	if (ret)
3031 serge 833
		return ret;
2332 Serge 834
 
6084 serge 835
	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
836
 
3031 serge 837
	i915_gem_object_pin_pages(obj);
2332 Serge 838
 
839
	offset = args->offset;
3031 serge 840
	obj->dirty = 1;
2332 Serge 841
 
3746 Serge 842
	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
843
			 offset >> PAGE_SHIFT) {
844
		struct page *page = sg_page_iter_page(&sg_iter);
3031 serge 845
		int partial_cacheline_write;
2332 Serge 846
 
3031 serge 847
		if (remain <= 0)
848
			break;
849
 
2332 Serge 850
		/* Operation in this page
851
		 *
852
		 * shmem_page_offset = offset within page in shmem file
853
		 * page_length = bytes to copy for this page
854
		 */
855
		shmem_page_offset = offset_in_page(offset);
856
 
857
		page_length = remain;
858
		if ((shmem_page_offset + page_length) > PAGE_SIZE)
859
			page_length = PAGE_SIZE - shmem_page_offset;
860
 
3031 serge 861
		/* If we don't overwrite a cacheline completely we need to be
862
		 * careful to have up-to-date data by first clflushing. Don't
863
		 * overcomplicate things and flush the entire patch. */
864
		partial_cacheline_write = needs_clflush_before &&
865
			((shmem_page_offset | page_length)
3260 Serge 866
				& (x86_clflush_size - 1));
2332 Serge 867
 
3031 serge 868
		page_do_bit17_swizzling = obj_do_bit17_swizzling &&
869
			(page_to_phys(page) & (1 << 17)) != 0;
2332 Serge 870
 
3031 serge 871
		ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
872
					user_data, page_do_bit17_swizzling,
873
					partial_cacheline_write,
874
					needs_clflush_after);
875
		if (ret == 0)
876
			goto next_page;
877
 
878
		hit_slowpath = 1;
879
		mutex_unlock(&dev->struct_mutex);
6296 serge 880
		ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
881
					user_data, page_do_bit17_swizzling,
882
					partial_cacheline_write,
883
					needs_clflush_after);
3031 serge 884
 
885
		mutex_lock(&dev->struct_mutex);
886
 
887
		if (ret)
888
			goto out;
889
 
5354 serge 890
next_page:
2332 Serge 891
		remain -= page_length;
3031 serge 892
		user_data += page_length;
2332 Serge 893
		offset += page_length;
894
	}
895
 
896
out:
3031 serge 897
	i915_gem_object_unpin_pages(obj);
898
 
899
	if (hit_slowpath) {
3480 Serge 900
		/*
901
		 * Fixup: Flush cpu caches in case we didn't flush the dirty
902
		 * cachelines in-line while writing and the object moved
903
		 * out of the cpu write domain while we've dropped the lock.
904
		 */
905
		if (!needs_clflush_after &&
906
		    obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
4104 Serge 907
			if (i915_gem_clflush_object(obj, obj->pin_display))
6084 serge 908
				needs_clflush_after = true;
3031 serge 909
		}
2332 Serge 910
	}
911
 
3031 serge 912
	if (needs_clflush_after)
3243 Serge 913
		i915_gem_chipset_flush(dev);
6084 serge 914
	else
915
		obj->cache_dirty = true;
3031 serge 916
 
6084 serge 917
	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
2332 Serge 918
	return ret;
919
}
3031 serge 920
 
921
/**
922
 * Writes data to the object referenced by handle.
923
 *
924
 * On error, the contents of the buffer that were to be modified are undefined.
925
 */
926
int
927
i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
928
		      struct drm_file *file)
929
{
6084 serge 930
	struct drm_i915_private *dev_priv = dev->dev_private;
3031 serge 931
	struct drm_i915_gem_pwrite *args = data;
932
	struct drm_i915_gem_object *obj;
933
	int ret;
934
 
4104 Serge 935
	if (args->size == 0)
936
		return 0;
937
 
6084 serge 938
	intel_runtime_pm_get(dev_priv);
3480 Serge 939
 
3031 serge 940
	ret = i915_mutex_lock_interruptible(dev);
941
	if (ret)
6084 serge 942
		goto put_rpm;
3031 serge 943
 
944
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
945
	if (&obj->base == NULL) {
946
		ret = -ENOENT;
947
		goto unlock;
948
	}
949
 
950
	/* Bounds check destination. */
951
	if (args->offset > obj->base.size ||
952
	    args->size > obj->base.size - args->offset) {
953
		ret = -EINVAL;
954
		goto out;
955
	}
956
 
957
	/* prime objects have no backing filp to GEM pread/pwrite
958
	 * pages from.
959
	 */
960
	if (!obj->base.filp) {
961
		ret = -EINVAL;
962
		goto out;
963
	}
964
 
965
	trace_i915_gem_object_pwrite(obj, args->offset, args->size);
966
 
967
	ret = -EFAULT;
968
	/* We can only do the GTT pwrite on untiled buffers, as otherwise
969
	 * it would end up going through the fenced access, and we'll get
970
	 * different detiling behavior between reading and writing.
971
	 * pread/pwrite currently are reading and writing from the CPU
972
	 * perspective, requiring manual detiling by the client.
973
	 */
4104 Serge 974
	if (obj->tiling_mode == I915_TILING_NONE &&
975
	    obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
976
	    cpu_write_needs_clflush(obj)) {
3031 serge 977
		ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
978
		/* Note that the gtt paths might fail with non-page-backed user
979
		 * pointers (e.g. gtt mappings when moving data between
980
		 * textures). Fallback to the shmem path in that case. */
981
	}
982
 
6296 serge 983
	if (ret == -EFAULT || ret == -ENOSPC) {
6084 serge 984
			ret = i915_gem_shmem_pwrite(dev, obj, args, file);
6296 serge 985
	}
3031 serge 986
 
987
out:
988
	drm_gem_object_unreference(&obj->base);
989
unlock:
990
	mutex_unlock(&dev->struct_mutex);
6084 serge 991
put_rpm:
992
	intel_runtime_pm_put(dev_priv);
993
 
3031 serge 994
	return ret;
995
}
996
 
997
int
3480 Serge 998
i915_gem_check_wedge(struct i915_gpu_error *error,
3031 serge 999
		     bool interruptible)
1000
{
3480 Serge 1001
	if (i915_reset_in_progress(error)) {
3031 serge 1002
		/* Non-interruptible callers can't handle -EAGAIN, hence return
1003
		 * -EIO unconditionally for these. */
1004
		if (!interruptible)
1005
			return -EIO;
2332 Serge 1006
 
3480 Serge 1007
		/* Recovery complete, but the reset failed ... */
1008
		if (i915_terminally_wedged(error))
3031 serge 1009
			return -EIO;
2332 Serge 1010
 
6084 serge 1011
		/*
1012
		 * Check if GPU Reset is in progress - we need intel_ring_begin
1013
		 * to work properly to reinit the hw state while the gpu is
1014
		 * still marked as reset-in-progress. Handle this with a flag.
1015
		 */
1016
		if (!error->reload_in_reset)
1017
			return -EAGAIN;
3031 serge 1018
	}
2332 Serge 1019
 
3031 serge 1020
	return 0;
1021
}
2332 Serge 1022
 
4560 Serge 1023
static void fake_irq(unsigned long data)
1024
{
1025
//	wake_up_process((struct task_struct *)data);
1026
}
1027
 
1028
static bool missed_irq(struct drm_i915_private *dev_priv,
5060 serge 1029
		       struct intel_engine_cs *ring)
4560 Serge 1030
{
1031
	return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings);
1032
}
1033
 
6084 serge 1034
static unsigned long local_clock_us(unsigned *cpu)
4560 Serge 1035
{
6084 serge 1036
	unsigned long t;
1037
 
1038
	/* Cheaply and approximately convert from nanoseconds to microseconds.
1039
	 * The result and subsequent calculations are also defined in the same
1040
	 * approximate microseconds units. The principal source of timing
1041
	 * error here is from the simple truncation.
1042
	 *
1043
	 * Note that local_clock() is only defined wrt to the current CPU;
1044
	 * the comparisons are no longer valid if we switch CPUs. Instead of
1045
	 * blocking preemption for the entire busywait, we can detect the CPU
1046
	 * switch and use that as indicator of system load and a reason to
1047
	 * stop busywaiting, see busywait_stop().
1048
	 */
1049
	t = GetClockNs() >> 10;
1050
 
1051
	return t;
1052
}
1053
 
1054
static bool busywait_stop(unsigned long timeout, unsigned cpu)
1055
{
1056
	unsigned this_cpu = 0;
1057
 
1058
	if (time_after(local_clock_us(&this_cpu), timeout))
4560 Serge 1059
		return true;
1060
 
6084 serge 1061
	return this_cpu != cpu;
4560 Serge 1062
}
1063
 
6084 serge 1064
static int __i915_spin_request(struct drm_i915_gem_request *req, int state)
1065
{
1066
	unsigned long timeout;
1067
	unsigned cpu;
1068
 
1069
	/* When waiting for high frequency requests, e.g. during synchronous
1070
	 * rendering split between the CPU and GPU, the finite amount of time
1071
	 * required to set up the irq and wait upon it limits the response
1072
	 * rate. By busywaiting on the request completion for a short while we
1073
	 * can service the high frequency waits as quick as possible. However,
1074
	 * if it is a slow request, we want to sleep as quickly as possible.
1075
	 * The tradeoff between waiting and sleeping is roughly the time it
1076
	 * takes to sleep on a request, on the order of a microsecond.
1077
	 */
1078
 
1079
	if (req->ring->irq_refcount)
1080
		return -EBUSY;
1081
 
1082
	/* Only spin if we know the GPU is processing this request */
1083
	if (!i915_gem_request_started(req, true))
1084
		return -EAGAIN;
1085
 
1086
	timeout = local_clock_us(&cpu) + 5;
1087
	while (1 /*!need_resched()*/) {
1088
		if (i915_gem_request_completed(req, true))
1089
			return 0;
1090
 
1091
		if (busywait_stop(timeout, cpu))
1092
			break;
1093
 
1094
		cpu_relax_lowlatency();
1095
	}
1096
 
1097
	if (i915_gem_request_completed(req, false))
1098
		return 0;
1099
 
1100
	return -EAGAIN;
1101
}
1102
 
3031 serge 1103
/**
6084 serge 1104
 * __i915_wait_request - wait until execution of request has finished
1105
 * @req: duh!
1106
 * @reset_counter: reset sequence associated with the given request
3031 serge 1107
 * @interruptible: do an interruptible wait (normally yes)
1108
 * @timeout: in - how long to wait (NULL forever); out - how much time remaining
1109
 *
3480 Serge 1110
 * Note: It is of utmost importance that the passed in seqno and reset_counter
1111
 * values have been read by the caller in an smp safe manner. Where read-side
1112
 * locks are involved, it is sufficient to read the reset_counter before
1113
 * unlocking the lock that protects the seqno. For lockless tricks, the
1114
 * reset_counter _must_ be read before, and an appropriate smp_rmb must be
1115
 * inserted.
1116
 *
6084 serge 1117
 * Returns 0 if the request was found within the alloted time. Else returns the
3031 serge 1118
 * errno with remaining time filled in timeout argument.
1119
 */
6084 serge 1120
int __i915_wait_request(struct drm_i915_gem_request *req,
3480 Serge 1121
			unsigned reset_counter,
4560 Serge 1122
			bool interruptible,
5060 serge 1123
			s64 *timeout,
6084 serge 1124
			struct intel_rps_client *rps)
3031 serge 1125
{
6084 serge 1126
	struct intel_engine_cs *ring = i915_gem_request_get_ring(req);
5060 serge 1127
	struct drm_device *dev = ring->dev;
1128
	struct drm_i915_private *dev_priv = dev->dev_private;
4560 Serge 1129
	const bool irq_test_in_progress =
1130
		ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring);
6084 serge 1131
	int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
6088 serge 1132
	wait_queue_t wait;
5060 serge 1133
	unsigned long timeout_expire;
1134
	s64 before, now;
3031 serge 1135
	int ret;
2332 Serge 1136
 
5060 serge 1137
	WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled");
4104 Serge 1138
 
6084 serge 1139
	if (list_empty(&req->list))
3031 serge 1140
		return 0;
2332 Serge 1141
 
6084 serge 1142
	if (i915_gem_request_completed(req, true))
1143
		return 0;
2332 Serge 1144
 
6084 serge 1145
	timeout_expire = 0;
1146
	if (timeout) {
1147
		if (WARN_ON(*timeout < 0))
1148
			return -EINVAL;
1149
 
1150
		if (*timeout == 0)
1151
			return -ETIME;
1152
 
1153
		timeout_expire = jiffies + nsecs_to_jiffies_timeout(*timeout);
3031 serge 1154
	}
2332 Serge 1155
 
6084 serge 1156
	if (INTEL_INFO(dev_priv)->gen >= 6)
1157
		gen6_rps_boost(dev_priv, rps, req->emitted_jiffies);
2332 Serge 1158
 
6084 serge 1159
	/* Record current time in case interrupted by signal, or wedged */
1160
	trace_i915_gem_request_wait_begin(req);
1161
	before = ktime_get_raw_ns();
1162
 
1163
	/* Optimistic spin for the next jiffie before touching IRQs */
1164
	ret = __i915_spin_request(req, state);
1165
	if (ret == 0)
1166
		goto out;
1167
 
1168
	if (!irq_test_in_progress && WARN_ON(!ring->irq_get(ring))) {
1169
		ret = -ENODEV;
1170
		goto out;
1171
	}
1172
 
6088 serge 1173
	INIT_LIST_HEAD(&wait.task_list);
1174
	wait.evnt = CreateEvent(NULL, MANUAL_DESTROY);
2332 Serge 1175
 
4560 Serge 1176
	for (;;) {
6103 serge 1177
		unsigned long flags;
4560 Serge 1178
 
3480 Serge 1179
		/* We need to check whether any gpu reset happened in between
1180
		 * the caller grabbing the seqno and now ... */
4560 Serge 1181
		if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) {
1182
			/* ... but upgrade the -EAGAIN to an -EIO if the gpu
1183
			 * is truely gone. */
1184
			ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
1185
			if (ret == 0)
1186
				ret = -EAGAIN;
1187
			break;
1188
		}
3480 Serge 1189
 
6084 serge 1190
		if (i915_gem_request_completed(req, false)) {
4560 Serge 1191
			ret = 0;
1192
			break;
1193
		}
2332 Serge 1194
 
6088 serge 1195
		if (timeout && time_after_eq(jiffies, timeout_expire)) {
4560 Serge 1196
			ret = -ETIME;
1197
			break;
1198
		}
2332 Serge 1199
 
4560 Serge 1200
        spin_lock_irqsave(&ring->irq_queue.lock, flags);
6088 serge 1201
        if (list_empty(&wait.task_list))
1202
            __add_wait_queue(&ring->irq_queue, &wait);
4560 Serge 1203
        spin_unlock_irqrestore(&ring->irq_queue.lock, flags);
1204
 
6088 serge 1205
            WaitEventTimeout(wait.evnt, 1);
4560 Serge 1206
 
6088 serge 1207
        if (!list_empty(&wait.task_list)) {
4560 Serge 1208
            spin_lock_irqsave(&ring->irq_queue.lock, flags);
6088 serge 1209
            list_del_init(&wait.task_list);
4560 Serge 1210
            spin_unlock_irqrestore(&ring->irq_queue.lock, flags);
1211
        }
1212
 
6088 serge 1213
	};
4560 Serge 1214
 
1215
	if (!irq_test_in_progress)
6084 serge 1216
		ring->irq_put(ring);
2332 Serge 1217
 
6088 serge 1218
    DestroyEvent(wait.evnt);
1219
 
6084 serge 1220
out:
1221
	now = ktime_get_raw_ns();
1222
	trace_i915_gem_request_wait_end(req);
1223
 
1224
	if (timeout) {
1225
		s64 tres = *timeout - (now - before);
1226
 
1227
		*timeout = tres < 0 ? 0 : tres;
1228
 
1229
		/*
1230
		 * Apparently ktime isn't accurate enough and occasionally has a
1231
		 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
1232
		 * things up to make the test happy. We allow up to 1 jiffy.
1233
		 *
1234
		 * This is a regrssion from the timespec->ktime conversion.
1235
		 */
1236
		if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000)
1237
			*timeout = 0;
1238
	}
1239
 
4560 Serge 1240
	return ret;
3031 serge 1241
}
2332 Serge 1242
 
6084 serge 1243
int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
1244
				   struct drm_file *file)
1245
{
1246
	struct drm_i915_private *dev_private;
1247
	struct drm_i915_file_private *file_priv;
1248
 
1249
	WARN_ON(!req || !file || req->file_priv);
1250
 
1251
	if (!req || !file)
1252
		return -EINVAL;
1253
 
1254
	if (req->file_priv)
1255
		return -EINVAL;
1256
 
1257
	dev_private = req->ring->dev->dev_private;
1258
	file_priv = file->driver_priv;
1259
 
1260
	spin_lock(&file_priv->mm.lock);
1261
	req->file_priv = file_priv;
1262
	list_add_tail(&req->client_list, &file_priv->mm.request_list);
1263
	spin_unlock(&file_priv->mm.lock);
1264
 
6660 serge 1265
    req->pid = (struct pid*)1;
6084 serge 1266
 
1267
	return 0;
1268
}
1269
 
1270
static inline void
1271
i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
1272
{
1273
	struct drm_i915_file_private *file_priv = request->file_priv;
1274
 
1275
	if (!file_priv)
1276
		return;
1277
 
1278
	spin_lock(&file_priv->mm.lock);
1279
	list_del(&request->client_list);
1280
	request->file_priv = NULL;
1281
	spin_unlock(&file_priv->mm.lock);
6660 serge 1282
	request->pid = NULL;
6084 serge 1283
}
1284
 
1285
static void i915_gem_request_retire(struct drm_i915_gem_request *request)
1286
{
1287
	trace_i915_gem_request_retire(request);
1288
 
1289
	/* We know the GPU must have read the request to have
1290
	 * sent us the seqno + interrupt, so use the position
1291
	 * of tail of the request to update the last known position
1292
	 * of the GPU head.
1293
	 *
1294
	 * Note this requires that we are always called in request
1295
	 * completion order.
1296
	 */
1297
	request->ringbuf->last_retired_head = request->postfix;
1298
 
1299
	list_del_init(&request->list);
1300
	i915_gem_request_remove_from_client(request);
1301
 
1302
	i915_gem_request_unreference(request);
1303
}
1304
 
1305
static void
1306
__i915_gem_request_retire__upto(struct drm_i915_gem_request *req)
1307
{
1308
	struct intel_engine_cs *engine = req->ring;
1309
	struct drm_i915_gem_request *tmp;
1310
 
6660 serge 1311
	lockdep_assert_held(&engine->dev->struct_mutex);
6084 serge 1312
 
1313
	if (list_empty(&req->list))
1314
		return;
1315
 
1316
	do {
1317
		tmp = list_first_entry(&engine->request_list,
1318
				       typeof(*tmp), list);
1319
 
1320
		i915_gem_request_retire(tmp);
1321
	} while (tmp != req);
1322
 
1323
	WARN_ON(i915_verify_lists(engine->dev));
1324
}
1325
 
3031 serge 1326
/**
6084 serge 1327
 * Waits for a request to be signaled, and cleans up the
3031 serge 1328
 * request and object lists appropriately for that event.
1329
 */
1330
int
6084 serge 1331
i915_wait_request(struct drm_i915_gem_request *req)
3031 serge 1332
{
6084 serge 1333
	struct drm_device *dev;
1334
	struct drm_i915_private *dev_priv;
1335
	bool interruptible;
3031 serge 1336
	int ret;
2332 Serge 1337
 
6084 serge 1338
	BUG_ON(req == NULL);
1339
 
1340
	dev = req->ring->dev;
1341
	dev_priv = dev->dev_private;
1342
	interruptible = dev_priv->mm.interruptible;
1343
 
3031 serge 1344
	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
2332 Serge 1345
 
3480 Serge 1346
	ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
3031 serge 1347
	if (ret)
1348
		return ret;
2332 Serge 1349
 
6084 serge 1350
	ret = __i915_wait_request(req,
1351
				  atomic_read(&dev_priv->gpu_error.reset_counter),
1352
				  interruptible, NULL, NULL);
3031 serge 1353
	if (ret)
1354
		return ret;
2332 Serge 1355
 
6084 serge 1356
	__i915_gem_request_retire__upto(req);
4104 Serge 1357
	return 0;
1358
}
1359
 
3031 serge 1360
/**
1361
 * Ensures that all rendering to the object has completed and the object is
1362
 * safe to unbind from the GTT or access from the CPU.
1363
 */
6084 serge 1364
int
3031 serge 1365
i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
1366
			       bool readonly)
1367
{
6084 serge 1368
	int ret, i;
2332 Serge 1369
 
6084 serge 1370
	if (!obj->active)
3031 serge 1371
		return 0;
2332 Serge 1372
 
6084 serge 1373
	if (readonly) {
1374
		if (obj->last_write_req != NULL) {
1375
			ret = i915_wait_request(obj->last_write_req);
1376
			if (ret)
1377
				return ret;
2332 Serge 1378
 
6084 serge 1379
			i = obj->last_write_req->ring->id;
1380
			if (obj->last_read_req[i] == obj->last_write_req)
1381
				i915_gem_object_retire__read(obj, i);
1382
			else
1383
				i915_gem_object_retire__write(obj);
1384
		}
1385
	} else {
1386
		for (i = 0; i < I915_NUM_RINGS; i++) {
1387
			if (obj->last_read_req[i] == NULL)
1388
				continue;
1389
 
1390
			ret = i915_wait_request(obj->last_read_req[i]);
1391
			if (ret)
1392
				return ret;
1393
 
1394
			i915_gem_object_retire__read(obj, i);
1395
		}
1396
		RQ_BUG_ON(obj->active);
1397
	}
1398
 
1399
	return 0;
3031 serge 1400
}
2332 Serge 1401
 
6084 serge 1402
static void
1403
i915_gem_object_retire_request(struct drm_i915_gem_object *obj,
1404
			       struct drm_i915_gem_request *req)
1405
{
1406
	int ring = req->ring->id;
1407
 
1408
	if (obj->last_read_req[ring] == req)
1409
		i915_gem_object_retire__read(obj, ring);
1410
	else if (obj->last_write_req == req)
1411
		i915_gem_object_retire__write(obj);
1412
 
1413
	__i915_gem_request_retire__upto(req);
1414
}
1415
 
3260 Serge 1416
/* A nonblocking variant of the above wait. This is a highly dangerous routine
1417
 * as the object state may change during this call.
1418
 */
1419
static __must_check int
1420
i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
6084 serge 1421
					    struct intel_rps_client *rps,
3260 Serge 1422
					    bool readonly)
1423
{
1424
	struct drm_device *dev = obj->base.dev;
1425
	struct drm_i915_private *dev_priv = dev->dev_private;
6084 serge 1426
	struct drm_i915_gem_request *requests[I915_NUM_RINGS];
3480 Serge 1427
	unsigned reset_counter;
6084 serge 1428
	int ret, i, n = 0;
2332 Serge 1429
 
3260 Serge 1430
	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1431
	BUG_ON(!dev_priv->mm.interruptible);
2332 Serge 1432
 
6084 serge 1433
	if (!obj->active)
3260 Serge 1434
		return 0;
2332 Serge 1435
 
3480 Serge 1436
	ret = i915_gem_check_wedge(&dev_priv->gpu_error, true);
3260 Serge 1437
	if (ret)
1438
		return ret;
2332 Serge 1439
 
6084 serge 1440
	reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
2332 Serge 1441
 
6084 serge 1442
	if (readonly) {
1443
		struct drm_i915_gem_request *req;
1444
 
1445
		req = obj->last_write_req;
1446
		if (req == NULL)
1447
			return 0;
1448
 
1449
		requests[n++] = i915_gem_request_reference(req);
1450
	} else {
1451
		for (i = 0; i < I915_NUM_RINGS; i++) {
1452
			struct drm_i915_gem_request *req;
1453
 
1454
			req = obj->last_read_req[i];
1455
			if (req == NULL)
1456
				continue;
1457
 
1458
			requests[n++] = i915_gem_request_reference(req);
1459
		}
1460
	}
1461
 
3260 Serge 1462
	mutex_unlock(&dev->struct_mutex);
6084 serge 1463
	for (i = 0; ret == 0 && i < n; i++)
1464
		ret = __i915_wait_request(requests[i], reset_counter, true,
1465
					  NULL, rps);
3260 Serge 1466
	mutex_lock(&dev->struct_mutex);
2332 Serge 1467
 
6084 serge 1468
	for (i = 0; i < n; i++) {
1469
		if (ret == 0)
1470
			i915_gem_object_retire_request(obj, requests[i]);
1471
		i915_gem_request_unreference(requests[i]);
1472
	}
1473
 
1474
	return ret;
3260 Serge 1475
}
2332 Serge 1476
 
6084 serge 1477
static struct intel_rps_client *to_rps_client(struct drm_file *file)
1478
{
1479
	struct drm_i915_file_private *fpriv = file->driver_priv;
1480
	return &fpriv->rps;
1481
}
1482
 
3260 Serge 1483
/**
1484
 * Called when user space prepares to use an object with the CPU, either
1485
 * through the mmap ioctl's mapping or a GTT mapping.
1486
 */
1487
int
1488
i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1489
			  struct drm_file *file)
1490
{
1491
	struct drm_i915_gem_set_domain *args = data;
1492
	struct drm_i915_gem_object *obj;
1493
	uint32_t read_domains = args->read_domains;
1494
	uint32_t write_domain = args->write_domain;
1495
	int ret;
2332 Serge 1496
 
3260 Serge 1497
	/* Only handle setting domains to types used by the CPU. */
1498
	if (write_domain & I915_GEM_GPU_DOMAINS)
1499
		return -EINVAL;
2332 Serge 1500
 
3260 Serge 1501
	if (read_domains & I915_GEM_GPU_DOMAINS)
1502
		return -EINVAL;
2332 Serge 1503
 
3260 Serge 1504
	/* Having something in the write domain implies it's in the read
1505
	 * domain, and only that read domain.  Enforce that in the request.
1506
	 */
1507
	if (write_domain != 0 && read_domains != write_domain)
1508
		return -EINVAL;
2332 Serge 1509
 
3260 Serge 1510
	ret = i915_mutex_lock_interruptible(dev);
1511
	if (ret)
1512
		return ret;
2332 Serge 1513
 
3260 Serge 1514
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1515
	if (&obj->base == NULL) {
1516
		ret = -ENOENT;
1517
		goto unlock;
1518
	}
2332 Serge 1519
 
3260 Serge 1520
	/* Try to flush the object off the GPU without holding the lock.
1521
	 * We will repeat the flush holding the lock in the normal manner
1522
	 * to catch cases where we are gazumped.
1523
	 */
5060 serge 1524
	ret = i915_gem_object_wait_rendering__nonblocking(obj,
6084 serge 1525
							  to_rps_client(file),
5060 serge 1526
							  !write_domain);
3260 Serge 1527
	if (ret)
1528
		goto unref;
2332 Serge 1529
 
6084 serge 1530
	if (read_domains & I915_GEM_DOMAIN_GTT)
3260 Serge 1531
		ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
6084 serge 1532
	else
3260 Serge 1533
		ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
2332 Serge 1534
 
6084 serge 1535
	if (write_domain != 0)
1536
		intel_fb_obj_invalidate(obj,
1537
					write_domain == I915_GEM_DOMAIN_GTT ?
1538
					ORIGIN_GTT : ORIGIN_CPU);
1539
 
3260 Serge 1540
unref:
1541
	drm_gem_object_unreference(&obj->base);
1542
unlock:
1543
	mutex_unlock(&dev->struct_mutex);
1544
	return ret;
1545
}
2332 Serge 1546
 
4293 Serge 1547
/**
1548
 * Called when user space has done writes to this buffer
1549
 */
1550
int
1551
i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1552
			 struct drm_file *file)
1553
{
1554
	struct drm_i915_gem_sw_finish *args = data;
1555
	struct drm_i915_gem_object *obj;
1556
	int ret = 0;
2332 Serge 1557
 
4293 Serge 1558
	ret = i915_mutex_lock_interruptible(dev);
1559
	if (ret)
1560
		return ret;
2332 Serge 1561
 
4293 Serge 1562
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1563
	if (&obj->base == NULL) {
1564
		ret = -ENOENT;
1565
		goto unlock;
1566
	}
2332 Serge 1567
 
4293 Serge 1568
	/* Pinned buffers may be scanout, so flush the cache */
1569
	if (obj->pin_display)
6084 serge 1570
		i915_gem_object_flush_cpu_write_domain(obj);
2332 Serge 1571
 
4293 Serge 1572
	drm_gem_object_unreference(&obj->base);
1573
unlock:
1574
	mutex_unlock(&dev->struct_mutex);
1575
	return ret;
1576
}
1577
 
3260 Serge 1578
/**
1579
 * Maps the contents of an object, returning the address it is mapped
1580
 * into.
1581
 *
1582
 * While the mapping holds a reference on the contents of the object, it doesn't
1583
 * imply a ref on the object itself.
5354 serge 1584
 *
1585
 * IMPORTANT:
1586
 *
1587
 * DRM driver writers who look a this function as an example for how to do GEM
1588
 * mmap support, please don't implement mmap support like here. The modern way
1589
 * to implement DRM mmap support is with an mmap offset ioctl (like
1590
 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
1591
 * That way debug tooling like valgrind will understand what's going on, hiding
1592
 * the mmap call in a driver private ioctl will break that. The i915 driver only
1593
 * does cpu mmaps this way because we didn't know better.
3260 Serge 1594
 */
1595
int
1596
i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1597
		    struct drm_file *file)
1598
{
1599
	struct drm_i915_gem_mmap *args = data;
1600
	struct drm_gem_object *obj;
4392 Serge 1601
	unsigned long addr;
2332 Serge 1602
 
6084 serge 1603
//	if (args->flags & ~(I915_MMAP_WC))
1604
//		return -EINVAL;
3260 Serge 1605
	obj = drm_gem_object_lookup(dev, file, args->handle);
1606
	if (obj == NULL)
1607
		return -ENOENT;
4104 Serge 1608
 
3260 Serge 1609
	/* prime objects have no backing filp to GEM mmap
1610
	 * pages from.
1611
	 */
1612
	if (!obj->filp) {
1613
		drm_gem_object_unreference_unlocked(obj);
1614
		return -EINVAL;
1615
	}
2332 Serge 1616
 
6084 serge 1617
	addr = vm_mmap(obj->filp, 0, args->size,
1618
		       PROT_READ | PROT_WRITE, MAP_SHARED,
1619
		       args->offset);
3260 Serge 1620
	drm_gem_object_unreference_unlocked(obj);
6084 serge 1621
	if (IS_ERR((void *)addr))
1622
		return addr;
2332 Serge 1623
 
3260 Serge 1624
	args->addr_ptr = (uint64_t) addr;
2332 Serge 1625
 
6084 serge 1626
	return 0;
3260 Serge 1627
}
2332 Serge 1628
 
1629
 
1630
 
1631
 
1632
 
1633
 
1634
 
1635
 
3031 serge 1636
 
1637
 
1638
 
1639
 
1640
 
1641
/**
1642
 * i915_gem_release_mmap - remove physical page mappings
1643
 * @obj: obj in question
1644
 *
1645
 * Preserve the reservation of the mmapping with the DRM core code, but
1646
 * relinquish ownership of the pages back to the system.
1647
 *
1648
 * It is vital that we remove the page mapping if we have mapped a tiled
1649
 * object through the GTT and then lose the fence register due to
1650
 * resource pressure. Similarly if the object has been moved out of the
1651
 * aperture, than pages mapped into userspace must be revoked. Removing the
1652
 * mapping will then trigger a page fault on the next user access, allowing
1653
 * fixup by i915_gem_fault().
1654
 */
1655
void
1656
i915_gem_release_mmap(struct drm_i915_gem_object *obj)
1657
{
1658
	if (!obj->fault_mappable)
1659
		return;
1660
 
4104 Serge 1661
//	drm_vma_node_unmap(&obj->base.vma_node, obj->base.dev->dev_mapping);
3031 serge 1662
	obj->fault_mappable = false;
1663
}
1664
 
6084 serge 1665
void
1666
i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv)
1667
{
1668
	struct drm_i915_gem_object *obj;
1669
 
1670
	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
1671
		i915_gem_release_mmap(obj);
1672
}
1673
 
3480 Serge 1674
uint32_t
2332 Serge 1675
i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
1676
{
1677
	uint32_t gtt_size;
1678
 
1679
	if (INTEL_INFO(dev)->gen >= 4 ||
1680
	    tiling_mode == I915_TILING_NONE)
1681
		return size;
1682
 
1683
	/* Previous chips need a power-of-two fence region when tiling */
1684
	if (INTEL_INFO(dev)->gen == 3)
1685
		gtt_size = 1024*1024;
1686
	else
1687
		gtt_size = 512*1024;
1688
 
1689
	while (gtt_size < size)
1690
		gtt_size <<= 1;
1691
 
1692
	return gtt_size;
1693
}
1694
 
1695
/**
1696
 * i915_gem_get_gtt_alignment - return required GTT alignment for an object
1697
 * @obj: object to check
1698
 *
1699
 * Return the required GTT alignment for an object, taking into account
1700
 * potential fence register mapping.
1701
 */
3480 Serge 1702
uint32_t
1703
i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size,
1704
			   int tiling_mode, bool fenced)
2332 Serge 1705
{
1706
	/*
1707
	 * Minimum alignment is 4k (GTT page size), but might be greater
1708
	 * if a fence register is needed for the object.
1709
	 */
3480 Serge 1710
	if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) ||
2332 Serge 1711
	    tiling_mode == I915_TILING_NONE)
1712
		return 4096;
1713
 
1714
	/*
1715
	 * Previous chips need to be aligned to the size of the smallest
1716
	 * fence register that can contain the object.
1717
	 */
1718
	return i915_gem_get_gtt_size(dev, size, tiling_mode);
1719
}
1720
 
1721
 
1722
 
3480 Serge 1723
int
1724
i915_gem_mmap_gtt(struct drm_file *file,
1725
          struct drm_device *dev,
6084 serge 1726
		  uint32_t handle,
3480 Serge 1727
          uint64_t *offset)
1728
{
1729
    struct drm_i915_private *dev_priv = dev->dev_private;
1730
    struct drm_i915_gem_object *obj;
1731
    unsigned long pfn;
1732
    char *mem, *ptr;
1733
    int ret;
1734
 
1735
    ret = i915_mutex_lock_interruptible(dev);
1736
    if (ret)
1737
        return ret;
1738
 
1739
    obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle));
1740
    if (&obj->base == NULL) {
1741
        ret = -ENOENT;
1742
        goto unlock;
1743
    }
1744
 
1745
    if (obj->madv != I915_MADV_WILLNEED) {
5060 serge 1746
		DRM_DEBUG("Attempting to mmap a purgeable buffer\n");
1747
		ret = -EFAULT;
3480 Serge 1748
        goto out;
1749
    }
1750
    /* Now bind it into the GTT if needed */
5060 serge 1751
	ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
3480 Serge 1752
    if (ret)
1753
        goto out;
1754
 
1755
    ret = i915_gem_object_set_to_gtt_domain(obj, 1);
1756
    if (ret)
1757
        goto unpin;
1758
 
1759
    ret = i915_gem_object_get_fence(obj);
1760
    if (ret)
1761
        goto unpin;
1762
 
1763
    obj->fault_mappable = true;
1764
 
4104 Serge 1765
    pfn = dev_priv->gtt.mappable_base + i915_gem_obj_ggtt_offset(obj);
3480 Serge 1766
 
1767
    /* Finally, remap it using the new GTT offset */
1768
 
1769
    mem = UserAlloc(obj->base.size);
1770
    if(unlikely(mem == NULL))
1771
    {
1772
        ret = -ENOMEM;
1773
        goto unpin;
1774
    }
1775
 
1776
    for(ptr = mem; ptr < mem + obj->base.size; ptr+= 4096, pfn+= 4096)
1777
        MapPage(ptr, pfn, PG_SHARED|PG_UW);
1778
 
1779
unpin:
5060 serge 1780
    i915_gem_object_unpin_pages(obj);
3480 Serge 1781
 
1782
 
5367 serge 1783
    *offset = (uint32_t)mem;
3480 Serge 1784
 
1785
out:
6088 serge 1786
	drm_gem_object_unreference(&obj->base);
3480 Serge 1787
unlock:
6088 serge 1788
	mutex_unlock(&dev->struct_mutex);
1789
	return ret;
3480 Serge 1790
}
1791
 
1792
/**
1793
 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
1794
 * @dev: DRM device
1795
 * @data: GTT mapping ioctl data
1796
 * @file: GEM object info
1797
 *
1798
 * Simply returns the fake offset to userspace so it can mmap it.
1799
 * The mmap call will end up in drm_gem_mmap(), which will set things
1800
 * up so we can get faults in the handler above.
1801
 *
1802
 * The fault handler will take care of binding the object into the GTT
1803
 * (since it may have been evicted to make room for something), allocating
1804
 * a fence register, and mapping the appropriate aperture address into
1805
 * userspace.
1806
 */
1807
int
1808
i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
6084 serge 1809
			struct drm_file *file)
3480 Serge 1810
{
6084 serge 1811
	struct drm_i915_gem_mmap_gtt *args = data;
3480 Serge 1812
 
6084 serge 1813
	return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
3480 Serge 1814
}
1815
 
3031 serge 1816
/* Immediately discard the backing storage */
1817
static void
1818
i915_gem_object_truncate(struct drm_i915_gem_object *obj)
1819
{
1820
//	i915_gem_object_free_mmap_offset(obj);
2332 Serge 1821
 
3263 Serge 1822
	if (obj->base.filp == NULL)
1823
		return;
2332 Serge 1824
 
3031 serge 1825
	/* Our goal here is to return as much of the memory as
1826
	 * is possible back to the system as we are called from OOM.
1827
	 * To do this we must instruct the shmfs to drop all of its
1828
	 * backing pages, *now*.
1829
	 */
5060 serge 1830
//	shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
3031 serge 1831
	obj->madv = __I915_MADV_PURGED;
1832
}
2332 Serge 1833
 
5060 serge 1834
/* Try to discard unwanted pages */
1835
static void
1836
i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
3031 serge 1837
{
5060 serge 1838
	struct address_space *mapping;
1839
 
1840
	switch (obj->madv) {
1841
	case I915_MADV_DONTNEED:
1842
		i915_gem_object_truncate(obj);
1843
	case __I915_MADV_PURGED:
1844
		return;
1845
	}
1846
 
1847
	if (obj->base.filp == NULL)
1848
		return;
1849
 
3031 serge 1850
}
2332 Serge 1851
 
3031 serge 1852
static void
1853
i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
1854
{
3746 Serge 1855
	struct sg_page_iter sg_iter;
1856
	int ret;
2332 Serge 1857
 
3031 serge 1858
	BUG_ON(obj->madv == __I915_MADV_PURGED);
2332 Serge 1859
 
3031 serge 1860
	ret = i915_gem_object_set_to_cpu_domain(obj, true);
1861
	if (ret) {
1862
		/* In the event of a disaster, abandon all caches and
1863
		 * hope for the best.
1864
		 */
1865
		WARN_ON(ret != -EIO);
4104 Serge 1866
		i915_gem_clflush_object(obj, true);
3031 serge 1867
		obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
1868
	}
2332 Serge 1869
 
6084 serge 1870
	i915_gem_gtt_finish_object(obj);
6296 serge 1871
 
1872
	if (i915_gem_object_needs_bit17_swizzle(obj))
1873
		i915_gem_object_save_bit_17_swizzle(obj);
1874
 
3031 serge 1875
	if (obj->madv == I915_MADV_DONTNEED)
1876
		obj->dirty = 0;
2332 Serge 1877
 
3746 Serge 1878
	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
1879
		struct page *page = sg_page_iter_page(&sg_iter);
2332 Serge 1880
 
6084 serge 1881
		page_cache_release(page);
3243 Serge 1882
	}
6084 serge 1883
	obj->dirty = 0;
3243 Serge 1884
 
1885
	sg_free_table(obj->pages);
1886
	kfree(obj->pages);
3031 serge 1887
}
2332 Serge 1888
 
3480 Serge 1889
int
3031 serge 1890
i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
1891
{
1892
	const struct drm_i915_gem_object_ops *ops = obj->ops;
2332 Serge 1893
 
3243 Serge 1894
	if (obj->pages == NULL)
3031 serge 1895
		return 0;
2332 Serge 1896
 
3031 serge 1897
	if (obj->pages_pin_count)
1898
		return -EBUSY;
1899
 
4104 Serge 1900
	BUG_ON(i915_gem_obj_bound_any(obj));
1901
 
3243 Serge 1902
	/* ->put_pages might need to allocate memory for the bit17 swizzle
1903
	 * array, hence protect them from being reaped by removing them from gtt
1904
	 * lists early. */
4104 Serge 1905
	list_del(&obj->global_list);
3243 Serge 1906
 
3031 serge 1907
	ops->put_pages(obj);
3243 Serge 1908
	obj->pages = NULL;
3031 serge 1909
 
5060 serge 1910
	i915_gem_object_invalidate(obj);
3031 serge 1911
 
1912
	return 0;
1913
}
1914
 
2332 Serge 1915
static int
3031 serge 1916
i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
2332 Serge 1917
{
3260 Serge 1918
	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
6084 serge 1919
	int page_count, i;
1920
	struct address_space *mapping;
1921
	struct sg_table *st;
3243 Serge 1922
	struct scatterlist *sg;
3746 Serge 1923
	struct sg_page_iter sg_iter;
3243 Serge 1924
	struct page *page;
3746 Serge 1925
	unsigned long last_pfn = 0;	/* suppress gcc warning */
6084 serge 1926
	int ret;
3243 Serge 1927
	gfp_t gfp;
2332 Serge 1928
 
3243 Serge 1929
	/* Assert that the object is not currently in any GPU domain. As it
1930
	 * wasn't in the GTT, there shouldn't be any way it could have been in
1931
	 * a GPU cache
2332 Serge 1932
	 */
3243 Serge 1933
	BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
1934
	BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
1935
 
1936
	st = kmalloc(sizeof(*st), GFP_KERNEL);
1937
	if (st == NULL)
1938
		return -ENOMEM;
1939
 
2332 Serge 1940
	page_count = obj->base.size / PAGE_SIZE;
3243 Serge 1941
	if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
1942
		kfree(st);
2332 Serge 1943
		return -ENOMEM;
3243 Serge 1944
	}
2332 Serge 1945
 
3243 Serge 1946
	/* Get the list of pages out of our struct file.  They'll be pinned
1947
	 * at this point until we release them.
1948
	 *
1949
	 * Fail silently without starting the shrinker
1950
	 */
3746 Serge 1951
	sg = st->sgl;
1952
	st->nents = 0;
1953
	for (i = 0; i < page_count; i++) {
4104 Serge 1954
        page = shmem_read_mapping_page_gfp(obj->base.filp, i, gfp);
3260 Serge 1955
		if (IS_ERR(page)) {
1956
            dbgprintf("%s invalid page %p\n", __FUNCTION__, page);
2332 Serge 1957
			goto err_pages;
3260 Serge 1958
		}
5354 serge 1959
#ifdef CONFIG_SWIOTLB
1960
		if (swiotlb_nr_tbl()) {
1961
			st->nents++;
1962
			sg_set_page(sg, page, PAGE_SIZE, 0);
1963
			sg = sg_next(sg);
1964
			continue;
1965
		}
1966
#endif
3746 Serge 1967
		if (!i || page_to_pfn(page) != last_pfn + 1) {
1968
			if (i)
1969
				sg = sg_next(sg);
1970
			st->nents++;
6084 serge 1971
			sg_set_page(sg, page, PAGE_SIZE, 0);
3746 Serge 1972
		} else {
1973
			sg->length += PAGE_SIZE;
1974
		}
1975
		last_pfn = page_to_pfn(page);
3243 Serge 1976
	}
5354 serge 1977
#ifdef CONFIG_SWIOTLB
1978
	if (!swiotlb_nr_tbl())
1979
#endif
3746 Serge 1980
		sg_mark_end(sg);
3243 Serge 1981
	obj->pages = st;
3031 serge 1982
 
6084 serge 1983
	ret = i915_gem_gtt_prepare_object(obj);
1984
	if (ret)
1985
		goto err_pages;
5367 serge 1986
 
6296 serge 1987
	if (i915_gem_object_needs_bit17_swizzle(obj))
1988
		i915_gem_object_do_bit_17_swizzle(obj);
1989
 
5367 serge 1990
	if (obj->tiling_mode != I915_TILING_NONE &&
1991
	    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES)
1992
		i915_gem_object_pin_pages(obj);
1993
 
2332 Serge 1994
	return 0;
1995
 
1996
err_pages:
3746 Serge 1997
	sg_mark_end(sg);
1998
	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0)
1999
		page_cache_release(sg_page_iter_page(&sg_iter));
3243 Serge 2000
	sg_free_table(st);
2001
	kfree(st);
6084 serge 2002
 
3243 Serge 2003
	return PTR_ERR(page);
2332 Serge 2004
}
2005
 
3031 serge 2006
/* Ensure that the associated pages are gathered from the backing storage
2007
 * and pinned into our object. i915_gem_object_get_pages() may be called
2008
 * multiple times before they are released by a single call to
2009
 * i915_gem_object_put_pages() - once the pages are no longer referenced
2010
 * either as a result of memory pressure (reaping pages under the shrinker)
2011
 * or as the object is itself released.
2012
 */
2013
int
2014
i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2332 Serge 2015
{
3031 serge 2016
	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2017
	const struct drm_i915_gem_object_ops *ops = obj->ops;
2018
	int ret;
2332 Serge 2019
 
3243 Serge 2020
	if (obj->pages)
3031 serge 2021
		return 0;
2332 Serge 2022
 
4392 Serge 2023
	if (obj->madv != I915_MADV_WILLNEED) {
5060 serge 2024
		DRM_DEBUG("Attempting to obtain a purgeable object\n");
2025
		return -EFAULT;
4392 Serge 2026
	}
2027
 
3031 serge 2028
	BUG_ON(obj->pages_pin_count);
2332 Serge 2029
 
3031 serge 2030
	ret = ops->get_pages(obj);
2031
	if (ret)
2032
		return ret;
2344 Serge 2033
 
4104 Serge 2034
	list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
6084 serge 2035
 
2036
	obj->get_page.sg = obj->pages->sgl;
2037
	obj->get_page.last = 0;
2038
 
2039
	return 0;
2332 Serge 2040
}
2041
 
6084 serge 2042
void i915_vma_move_to_active(struct i915_vma *vma,
2043
			     struct drm_i915_gem_request *req)
2332 Serge 2044
{
6084 serge 2045
	struct drm_i915_gem_object *obj = vma->obj;
2046
	struct intel_engine_cs *ring;
2332 Serge 2047
 
6084 serge 2048
	ring = i915_gem_request_get_ring(req);
2332 Serge 2049
 
2050
	/* Add a reference if we're newly entering the active list. */
6084 serge 2051
	if (obj->active == 0)
2344 Serge 2052
		drm_gem_object_reference(&obj->base);
6084 serge 2053
	obj->active |= intel_ring_flag(ring);
2332 Serge 2054
 
6084 serge 2055
	list_move_tail(&obj->ring_list[ring->id], &ring->active_list);
2056
	i915_gem_request_assign(&obj->last_read_req[ring->id], req);
2332 Serge 2057
 
6084 serge 2058
	list_move_tail(&vma->mm_list, &vma->vm->active_list);
2332 Serge 2059
}
2060
 
6084 serge 2061
static void
2062
i915_gem_object_retire__write(struct drm_i915_gem_object *obj)
4560 Serge 2063
{
6084 serge 2064
	RQ_BUG_ON(obj->last_write_req == NULL);
2065
	RQ_BUG_ON(!(obj->active & intel_ring_flag(obj->last_write_req->ring)));
2066
 
2067
	i915_gem_request_assign(&obj->last_write_req, NULL);
2068
	intel_fb_obj_flush(obj, true, ORIGIN_CS);
4560 Serge 2069
}
2070
 
2344 Serge 2071
static void
6084 serge 2072
i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring)
2344 Serge 2073
{
5060 serge 2074
	struct i915_vma *vma;
2332 Serge 2075
 
6084 serge 2076
	RQ_BUG_ON(obj->last_read_req[ring] == NULL);
2077
	RQ_BUG_ON(!(obj->active & (1 << ring)));
2332 Serge 2078
 
6084 serge 2079
	list_del_init(&obj->ring_list[ring]);
2080
	i915_gem_request_assign(&obj->last_read_req[ring], NULL);
2344 Serge 2081
 
6084 serge 2082
	if (obj->last_write_req && obj->last_write_req->ring->id == ring)
2083
		i915_gem_object_retire__write(obj);
5354 serge 2084
 
6084 serge 2085
	obj->active &= ~(1 << ring);
2086
	if (obj->active)
2087
		return;
2344 Serge 2088
 
6084 serge 2089
	/* Bump our place on the bound list to keep it roughly in LRU order
2090
	 * so that we don't steal from recently used but inactive objects
2091
	 * (unless we are forced to ofc!)
2092
	 */
2093
	list_move_tail(&obj->global_list,
2094
		       &to_i915(obj->base.dev)->mm.bound_list);
3031 serge 2095
 
6084 serge 2096
	list_for_each_entry(vma, &obj->vma_list, vma_link) {
2097
		if (!list_empty(&vma->mm_list))
2098
			list_move_tail(&vma->mm_list, &vma->vm->inactive_list);
2099
	}
2344 Serge 2100
 
6084 serge 2101
	i915_gem_request_assign(&obj->last_fenced_req, NULL);
2352 Serge 2102
	drm_gem_object_unreference(&obj->base);
2103
}
2104
 
3243 Serge 2105
static int
3480 Serge 2106
i915_gem_init_seqno(struct drm_device *dev, u32 seqno)
2344 Serge 2107
{
3243 Serge 2108
	struct drm_i915_private *dev_priv = dev->dev_private;
5060 serge 2109
	struct intel_engine_cs *ring;
3243 Serge 2110
	int ret, i, j;
2344 Serge 2111
 
3480 Serge 2112
	/* Carefully retire all requests without writing to the rings */
3243 Serge 2113
	for_each_ring(ring, dev_priv, i) {
3480 Serge 2114
		ret = intel_ring_idle(ring);
6084 serge 2115
		if (ret)
2116
			return ret;
3480 Serge 2117
	}
2118
	i915_gem_retire_requests(dev);
3243 Serge 2119
 
3480 Serge 2120
	/* Finally reset hw state */
3243 Serge 2121
	for_each_ring(ring, dev_priv, i) {
3480 Serge 2122
		intel_ring_init_seqno(ring, seqno);
2123
 
5060 serge 2124
		for (j = 0; j < ARRAY_SIZE(ring->semaphore.sync_seqno); j++)
2125
			ring->semaphore.sync_seqno[j] = 0;
3243 Serge 2126
	}
2127
 
2128
	return 0;
2344 Serge 2129
}
2130
 
3480 Serge 2131
int i915_gem_set_seqno(struct drm_device *dev, u32 seqno)
2132
{
2133
	struct drm_i915_private *dev_priv = dev->dev_private;
2134
	int ret;
2135
 
2136
	if (seqno == 0)
2137
		return -EINVAL;
2138
 
2139
	/* HWS page needs to be set less than what we
2140
	 * will inject to ring
2141
	 */
2142
	ret = i915_gem_init_seqno(dev, seqno - 1);
2143
	if (ret)
2144
		return ret;
2145
 
2146
	/* Carefully set the last_seqno value so that wrap
2147
	 * detection still works
2148
	 */
2149
	dev_priv->next_seqno = seqno;
2150
	dev_priv->last_seqno = seqno - 1;
2151
	if (dev_priv->last_seqno == 0)
2152
		dev_priv->last_seqno--;
2153
 
2154
	return 0;
2155
}
2156
 
3243 Serge 2157
int
2158
i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
2344 Serge 2159
{
3243 Serge 2160
	struct drm_i915_private *dev_priv = dev->dev_private;
2344 Serge 2161
 
3243 Serge 2162
	/* reserve 0 for non-seqno */
2163
	if (dev_priv->next_seqno == 0) {
3480 Serge 2164
		int ret = i915_gem_init_seqno(dev, 0);
3243 Serge 2165
		if (ret)
2166
			return ret;
2167
 
2168
		dev_priv->next_seqno = 1;
2169
	}
2170
 
3480 Serge 2171
	*seqno = dev_priv->last_seqno = dev_priv->next_seqno++;
3243 Serge 2172
	return 0;
2332 Serge 2173
}
2174
 
6084 serge 2175
/*
2176
 * NB: This function is not allowed to fail. Doing so would mean the the
2177
 * request is not being tracked for completion but the work itself is
2178
 * going to happen on the hardware. This would be a Bad Thing(tm).
2179
 */
2180
void __i915_add_request(struct drm_i915_gem_request *request,
2181
			struct drm_i915_gem_object *obj,
2182
			bool flush_caches)
2352 Serge 2183
{
6084 serge 2184
	struct intel_engine_cs *ring;
2185
	struct drm_i915_private *dev_priv;
5354 serge 2186
	struct intel_ringbuffer *ringbuf;
6084 serge 2187
	u32 request_start;
2352 Serge 2188
	int ret;
2332 Serge 2189
 
5354 serge 2190
	if (WARN_ON(request == NULL))
6084 serge 2191
		return;
5354 serge 2192
 
6084 serge 2193
	ring = request->ring;
2194
	dev_priv = ring->dev->dev_private;
2195
	ringbuf = request->ringbuf;
5354 serge 2196
 
6084 serge 2197
	/*
2198
	 * To ensure that this call will not fail, space for its emissions
2199
	 * should already have been reserved in the ring buffer. Let the ring
2200
	 * know that it is time to use that space up.
2201
	 */
2202
	intel_ring_reserved_space_use(ringbuf);
2203
 
5354 serge 2204
	request_start = intel_ring_get_tail(ringbuf);
3031 serge 2205
	/*
2206
	 * Emit any outstanding flushes - execbuf can fail to emit the flush
2207
	 * after having emitted the batchbuffer command. Hence we need to fix
2208
	 * things up similar to emitting the lazy request. The difference here
2209
	 * is that the flush _must_ happen before the next request, no matter
2210
	 * what.
2211
	 */
6084 serge 2212
	if (flush_caches) {
2213
		if (i915.enable_execlists)
2214
			ret = logical_ring_flush_all_caches(request);
2215
		else
2216
			ret = intel_ring_flush_all_caches(request);
2217
		/* Not allowed to fail! */
2218
		WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret);
5354 serge 2219
	}
2332 Serge 2220
 
3031 serge 2221
	/* Record the position of the start of the request so that
2222
	 * should we detect the updated seqno part-way through the
6084 serge 2223
	 * GPU processing the request, we never over-estimate the
3031 serge 2224
	 * position of the head.
2225
	 */
6084 serge 2226
	request->postfix = intel_ring_get_tail(ringbuf);
3031 serge 2227
 
6084 serge 2228
	if (i915.enable_execlists)
2229
		ret = ring->emit_request(request);
2230
	else {
2231
		ret = ring->add_request(request);
2232
 
2233
		request->tail = intel_ring_get_tail(ringbuf);
5354 serge 2234
	}
6084 serge 2235
	/* Not allowed to fail! */
2236
	WARN(ret, "emit|add_request failed: %d!\n", ret);
2332 Serge 2237
 
4104 Serge 2238
	request->head = request_start;
2239
 
2240
	/* Whilst this request exists, batch_obj will be on the
2241
	 * active_list, and so will hold the active reference. Only when this
2242
	 * request is retired will the the batch_obj be moved onto the
2243
	 * inactive_list and lose its active reference. Hence we do not need
2244
	 * to explicitly hold another reference here.
2245
	 */
4560 Serge 2246
	request->batch_obj = obj;
4104 Serge 2247
 
5060 serge 2248
	request->emitted_jiffies = jiffies;
6084 serge 2249
	request->previous_seqno = ring->last_submitted_seqno;
2250
	ring->last_submitted_seqno = request->seqno;
2352 Serge 2251
	list_add_tail(&request->list, &ring->request_list);
2332 Serge 2252
 
6084 serge 2253
	trace_i915_gem_request_add(request);
2332 Serge 2254
 
6084 serge 2255
//	i915_queue_hangcheck(ring->dev);
3263 Serge 2256
 
6084 serge 2257
	queue_delayed_work(dev_priv->wq,
2258
			   &dev_priv->mm.retire_work,
2259
			   round_jiffies_up_relative(HZ));
2260
	intel_mark_busy(dev_priv->dev);
2332 Serge 2261
 
6084 serge 2262
	/* Sanity check that the reserved size was large enough. */
2263
	intel_ring_reserved_space_end(ringbuf);
2352 Serge 2264
}
2332 Serge 2265
 
5060 serge 2266
static bool i915_context_is_banned(struct drm_i915_private *dev_priv,
2267
				   const struct intel_context *ctx)
4104 Serge 2268
{
5060 serge 2269
	unsigned long elapsed;
4104 Serge 2270
 
5060 serge 2271
    elapsed = GetTimerTicks()/100 - ctx->hang_stats.guilty_ts;
4104 Serge 2272
 
5060 serge 2273
	if (ctx->hang_stats.banned)
2274
		return true;
4104 Serge 2275
 
6084 serge 2276
	if (ctx->hang_stats.ban_period_seconds &&
2277
	    elapsed <= ctx->hang_stats.ban_period_seconds) {
5060 serge 2278
		if (!i915_gem_context_is_default(ctx)) {
2279
			DRM_DEBUG("context hanging too fast, banning!\n");
4104 Serge 2280
			return true;
5060 serge 2281
		} else if (i915_stop_ring_allow_ban(dev_priv)) {
2282
			if (i915_stop_ring_allow_warn(dev_priv))
6084 serge 2283
				DRM_ERROR("gpu hanging too fast, banning!\n");
4104 Serge 2284
			return true;
6084 serge 2285
		}
4104 Serge 2286
	}
2287
 
2288
	return false;
2289
}
2290
 
5060 serge 2291
static void i915_set_reset_status(struct drm_i915_private *dev_priv,
2292
				  struct intel_context *ctx,
2293
				  const bool guilty)
4560 Serge 2294
{
5060 serge 2295
	struct i915_ctx_hang_stats *hs;
4560 Serge 2296
 
5060 serge 2297
	if (WARN_ON(!ctx))
2298
		return;
4560 Serge 2299
 
5060 serge 2300
	hs = &ctx->hang_stats;
4560 Serge 2301
 
5060 serge 2302
	if (guilty) {
2303
		hs->banned = i915_context_is_banned(dev_priv, ctx);
2304
		hs->batch_active++;
2305
        hs->guilty_ts = GetTimerTicks()/100;
2306
	} else {
2307
		hs->batch_pending++;
4104 Serge 2308
	}
2309
}
2310
 
6084 serge 2311
void i915_gem_request_free(struct kref *req_ref)
4104 Serge 2312
{
6084 serge 2313
	struct drm_i915_gem_request *req = container_of(req_ref,
2314
						 typeof(*req), ref);
2315
	struct intel_context *ctx = req->ctx;
5354 serge 2316
 
6084 serge 2317
	if (req->file_priv)
2318
		i915_gem_request_remove_from_client(req);
4104 Serge 2319
 
5354 serge 2320
	if (ctx) {
2321
		if (i915.enable_execlists) {
6084 serge 2322
			if (ctx != req->ring->default_context)
2323
				intel_lr_context_unpin(req);
2324
		}
4104 Serge 2325
 
5354 serge 2326
		i915_gem_context_unreference(ctx);
2327
	}
6084 serge 2328
 
2329
	kfree(req);
4104 Serge 2330
}
2331
 
6084 serge 2332
int i915_gem_request_alloc(struct intel_engine_cs *ring,
2333
			   struct intel_context *ctx,
2334
			   struct drm_i915_gem_request **req_out)
2335
{
2336
	struct drm_i915_private *dev_priv = to_i915(ring->dev);
2337
	struct drm_i915_gem_request *req;
2338
	int ret;
2339
 
2340
	if (!req_out)
2341
		return -EINVAL;
2342
 
2343
	*req_out = NULL;
2344
 
2345
//	req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL);
2346
	req = kzalloc(sizeof(*req),0);
2347
	if (req == NULL)
2348
		return -ENOMEM;
2349
 
2350
	ret = i915_gem_get_seqno(ring->dev, &req->seqno);
2351
	if (ret)
2352
		goto err;
2353
 
2354
	kref_init(&req->ref);
2355
	req->i915 = dev_priv;
2356
	req->ring = ring;
2357
	req->ctx  = ctx;
2358
	i915_gem_context_reference(req->ctx);
2359
 
2360
	if (i915.enable_execlists)
2361
		ret = intel_logical_ring_alloc_request_extras(req);
2362
	else
2363
		ret = intel_ring_alloc_request_extras(req);
2364
	if (ret) {
2365
		i915_gem_context_unreference(req->ctx);
2366
		goto err;
2367
	}
2368
 
2369
	/*
2370
	 * Reserve space in the ring buffer for all the commands required to
2371
	 * eventually emit this request. This is to guarantee that the
2372
	 * i915_add_request() call can't fail. Note that the reserve may need
2373
	 * to be redone if the request is not actually submitted straight
2374
	 * away, e.g. because a GPU scheduler has deferred it.
2375
	 */
2376
	if (i915.enable_execlists)
2377
		ret = intel_logical_ring_reserve_space(req);
2378
	else
2379
		ret = intel_ring_reserve_space(req);
2380
	if (ret) {
2381
		/*
2382
		 * At this point, the request is fully allocated even if not
2383
		 * fully prepared. Thus it can be cleaned up using the proper
2384
		 * free code.
2385
		 */
2386
		i915_gem_request_cancel(req);
2387
		return ret;
2388
	}
2389
 
2390
	*req_out = req;
2391
	return 0;
2392
 
2393
err:
2394
	kfree(req);
2395
	return ret;
2396
}
2397
 
2398
void i915_gem_request_cancel(struct drm_i915_gem_request *req)
2399
{
2400
	intel_ring_reserved_space_cancel(req->ringbuf);
2401
 
2402
	i915_gem_request_unreference(req);
2403
}
2404
 
5060 serge 2405
struct drm_i915_gem_request *
2406
i915_gem_find_active_request(struct intel_engine_cs *ring)
3031 serge 2407
{
4539 Serge 2408
	struct drm_i915_gem_request *request;
4104 Serge 2409
 
4539 Serge 2410
	list_for_each_entry(request, &ring->request_list, list) {
6084 serge 2411
		if (i915_gem_request_completed(request, false))
4539 Serge 2412
			continue;
4104 Serge 2413
 
5060 serge 2414
		return request;
4539 Serge 2415
	}
5060 serge 2416
 
2417
	return NULL;
4539 Serge 2418
}
2419
 
5060 serge 2420
static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv,
2421
				       struct intel_engine_cs *ring)
2422
{
2423
	struct drm_i915_gem_request *request;
2424
	bool ring_hung;
2425
 
2426
	request = i915_gem_find_active_request(ring);
2427
 
2428
	if (request == NULL)
2429
		return;
2430
 
2431
	ring_hung = ring->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
2432
 
2433
	i915_set_reset_status(dev_priv, request->ctx, ring_hung);
2434
 
2435
	list_for_each_entry_continue(request, &ring->request_list, list)
2436
		i915_set_reset_status(dev_priv, request->ctx, false);
2437
}
2438
 
4539 Serge 2439
static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
5060 serge 2440
					struct intel_engine_cs *ring)
4539 Serge 2441
{
4560 Serge 2442
	while (!list_empty(&ring->active_list)) {
2443
		struct drm_i915_gem_object *obj;
2444
 
2445
		obj = list_first_entry(&ring->active_list,
2446
				       struct drm_i915_gem_object,
6084 serge 2447
				       ring_list[ring->id]);
4560 Serge 2448
 
6084 serge 2449
		i915_gem_object_retire__read(obj, ring->id);
4560 Serge 2450
	}
2451
 
2452
	/*
5354 serge 2453
	 * Clear the execlists queue up before freeing the requests, as those
2454
	 * are the ones that keep the context and ringbuffer backing objects
2455
	 * pinned in place.
2456
	 */
2457
	while (!list_empty(&ring->execlist_queue)) {
6084 serge 2458
		struct drm_i915_gem_request *submit_req;
5354 serge 2459
 
2460
		submit_req = list_first_entry(&ring->execlist_queue,
6084 serge 2461
				struct drm_i915_gem_request,
5354 serge 2462
				execlist_link);
2463
		list_del(&submit_req->execlist_link);
6084 serge 2464
 
2465
		if (submit_req->ctx != ring->default_context)
2466
			intel_lr_context_unpin(submit_req);
2467
 
2468
		i915_gem_request_unreference(submit_req);
5354 serge 2469
	}
2470
 
2471
	/*
4560 Serge 2472
	 * We must free the requests after all the corresponding objects have
2473
	 * been moved off active lists. Which is the same order as the normal
2474
	 * retire_requests function does. This is important if object hold
2475
	 * implicit references on things like e.g. ppgtt address spaces through
2476
	 * the request.
2477
	 */
3031 serge 2478
	while (!list_empty(&ring->request_list)) {
2479
		struct drm_i915_gem_request *request;
2332 Serge 2480
 
3031 serge 2481
		request = list_first_entry(&ring->request_list,
2482
					   struct drm_i915_gem_request,
2483
					   list);
2332 Serge 2484
 
6084 serge 2485
		i915_gem_request_retire(request);
3031 serge 2486
	}
2487
}
2332 Serge 2488
 
3031 serge 2489
void i915_gem_reset(struct drm_device *dev)
2490
{
2491
	struct drm_i915_private *dev_priv = dev->dev_private;
5060 serge 2492
	struct intel_engine_cs *ring;
3031 serge 2493
	int i;
2360 Serge 2494
 
4539 Serge 2495
	/*
2496
	 * Before we free the objects from the requests, we need to inspect
2497
	 * them for finding the guilty party. As the requests only borrow
2498
	 * their reference to the objects, the inspection must be done first.
2499
	 */
3031 serge 2500
	for_each_ring(ring, dev_priv, i)
4539 Serge 2501
		i915_gem_reset_ring_status(dev_priv, ring);
2360 Serge 2502
 
4539 Serge 2503
	for_each_ring(ring, dev_priv, i)
2504
		i915_gem_reset_ring_cleanup(dev_priv, ring);
2505
 
5060 serge 2506
	i915_gem_context_reset(dev);
4560 Serge 2507
 
3746 Serge 2508
	i915_gem_restore_fences(dev);
6084 serge 2509
 
2510
	WARN_ON(i915_verify_lists(dev));
3031 serge 2511
}
2360 Serge 2512
 
2352 Serge 2513
/**
2514
 * This function clears the request list as sequence numbers are passed.
2515
 */
3031 serge 2516
void
5060 serge 2517
i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
2352 Serge 2518
{
6084 serge 2519
	WARN_ON(i915_verify_lists(ring->dev));
2332 Serge 2520
 
6084 serge 2521
	/* Retire requests first as we use it above for the early return.
2522
	 * If we retire requests last, we may use a later seqno and so clear
2523
	 * the requests lists without clearing the active list, leading to
2524
	 * confusion.
2525
	 */
2526
	while (!list_empty(&ring->request_list)) {
2527
		struct drm_i915_gem_request *request;
2332 Serge 2528
 
6084 serge 2529
		request = list_first_entry(&ring->request_list,
2530
					   struct drm_i915_gem_request,
2531
					   list);
2332 Serge 2532
 
6084 serge 2533
		if (!i915_gem_request_completed(request, true))
2534
			break;
2332 Serge 2535
 
6084 serge 2536
		i915_gem_request_retire(request);
2537
	}
2538
 
5060 serge 2539
	/* Move any buffers on the active list that are no longer referenced
2540
	 * by the ringbuffer to the flushing/inactive lists as appropriate,
2541
	 * before we free the context associated with the requests.
2542
	 */
2543
	while (!list_empty(&ring->active_list)) {
2544
		struct drm_i915_gem_object *obj;
2545
 
2546
		obj = list_first_entry(&ring->active_list,
2547
				      struct drm_i915_gem_object,
6084 serge 2548
				      ring_list[ring->id]);
5060 serge 2549
 
6084 serge 2550
		if (!list_empty(&obj->last_read_req[ring->id]->list))
5060 serge 2551
			break;
2552
 
6084 serge 2553
		i915_gem_object_retire__read(obj, ring->id);
5060 serge 2554
	}
2555
 
6084 serge 2556
	if (unlikely(ring->trace_irq_req &&
2557
		     i915_gem_request_completed(ring->trace_irq_req, true))) {
2352 Serge 2558
		ring->irq_put(ring);
6084 serge 2559
		i915_gem_request_assign(&ring->trace_irq_req, NULL);
2352 Serge 2560
	}
2332 Serge 2561
 
2352 Serge 2562
	WARN_ON(i915_verify_lists(ring->dev));
2563
}
2332 Serge 2564
 
4560 Serge 2565
bool
2352 Serge 2566
i915_gem_retire_requests(struct drm_device *dev)
2567
{
5060 serge 2568
	struct drm_i915_private *dev_priv = dev->dev_private;
2569
	struct intel_engine_cs *ring;
4560 Serge 2570
	bool idle = true;
2352 Serge 2571
	int i;
2332 Serge 2572
 
4560 Serge 2573
	for_each_ring(ring, dev_priv, i) {
3031 serge 2574
		i915_gem_retire_requests_ring(ring);
4560 Serge 2575
		idle &= list_empty(&ring->request_list);
5354 serge 2576
		if (i915.enable_execlists) {
2577
			unsigned long flags;
2578
 
2579
			spin_lock_irqsave(&ring->execlist_lock, flags);
2580
			idle &= list_empty(&ring->execlist_queue);
2581
			spin_unlock_irqrestore(&ring->execlist_lock, flags);
2582
 
2583
			intel_execlists_retire_requests(ring);
2584
		}
4560 Serge 2585
	}
2586
 
2587
	if (idle)
2588
		mod_delayed_work(dev_priv->wq,
2589
				   &dev_priv->mm.idle_work,
2590
				   msecs_to_jiffies(100));
2591
 
2592
	return idle;
2352 Serge 2593
}
2594
 
2360 Serge 2595
static void
2596
i915_gem_retire_work_handler(struct work_struct *work)
2597
{
4560 Serge 2598
	struct drm_i915_private *dev_priv =
2599
		container_of(work, typeof(*dev_priv), mm.retire_work.work);
2600
	struct drm_device *dev = dev_priv->dev;
2360 Serge 2601
	bool idle;
2352 Serge 2602
 
2360 Serge 2603
	/* Come back later if the device is busy... */
4560 Serge 2604
	idle = false;
2605
	if (mutex_trylock(&dev->struct_mutex)) {
2606
		idle = i915_gem_retire_requests(dev);
2607
		mutex_unlock(&dev->struct_mutex);
2608
	}
2609
	if (!idle)
3482 Serge 2610
		queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work,
2611
				   round_jiffies_up_relative(HZ));
4560 Serge 2612
}
2352 Serge 2613
 
4560 Serge 2614
static void
2615
i915_gem_idle_work_handler(struct work_struct *work)
2616
{
2617
	struct drm_i915_private *dev_priv =
2618
		container_of(work, typeof(*dev_priv), mm.idle_work.work);
6084 serge 2619
	struct drm_device *dev = dev_priv->dev;
2620
	struct intel_engine_cs *ring;
2621
	int i;
2352 Serge 2622
 
6084 serge 2623
	for_each_ring(ring, dev_priv, i)
2624
		if (!list_empty(&ring->request_list))
2625
			return;
2626
 
2627
	intel_mark_idle(dev);
2628
 
2629
	if (mutex_trylock(&dev->struct_mutex)) {
2630
		struct intel_engine_cs *ring;
2631
		int i;
2632
 
2633
		for_each_ring(ring, dev_priv, i)
2634
			i915_gem_batch_pool_fini(&ring->batch_pool);
2635
 
2636
		mutex_unlock(&dev->struct_mutex);
2637
	}
2360 Serge 2638
}
2639
 
2344 Serge 2640
/**
3031 serge 2641
 * Ensures that an object will eventually get non-busy by flushing any required
2642
 * write domains, emitting any outstanding lazy request and retiring and
2643
 * completed requests.
2352 Serge 2644
 */
3031 serge 2645
static int
2646
i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
2352 Serge 2647
{
6084 serge 2648
	int i;
2352 Serge 2649
 
6084 serge 2650
	if (!obj->active)
2651
		return 0;
2352 Serge 2652
 
6084 serge 2653
	for (i = 0; i < I915_NUM_RINGS; i++) {
2654
		struct drm_i915_gem_request *req;
2655
 
2656
		req = obj->last_read_req[i];
2657
		if (req == NULL)
2658
			continue;
2659
 
2660
		if (list_empty(&req->list))
2661
			goto retire;
2662
 
2663
		if (i915_gem_request_completed(req, true)) {
2664
			__i915_gem_request_retire__upto(req);
2665
retire:
2666
			i915_gem_object_retire__read(obj, i);
2667
		}
3031 serge 2668
	}
2352 Serge 2669
 
3031 serge 2670
	return 0;
2671
}
2352 Serge 2672
 
3243 Serge 2673
/**
2674
 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
2675
 * @DRM_IOCTL_ARGS: standard ioctl arguments
2676
 *
2677
 * Returns 0 if successful, else an error is returned with the remaining time in
2678
 * the timeout parameter.
2679
 *  -ETIME: object is still busy after timeout
2680
 *  -ERESTARTSYS: signal interrupted the wait
2681
 *  -ENONENT: object doesn't exist
2682
 * Also possible, but rare:
2683
 *  -EAGAIN: GPU wedged
2684
 *  -ENOMEM: damn
2685
 *  -ENODEV: Internal IRQ fail
2686
 *  -E?: The add request failed
2687
 *
2688
 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
2689
 * non-zero timeout parameter the wait ioctl will wait for the given number of
2690
 * nanoseconds on an object becoming unbusy. Since the wait itself does so
2691
 * without holding struct_mutex the object may become re-busied before this
2692
 * function completes. A similar but shorter * race condition exists in the busy
2693
 * ioctl
2694
 */
4246 Serge 2695
int
2696
i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
2697
{
5060 serge 2698
	struct drm_i915_private *dev_priv = dev->dev_private;
4246 Serge 2699
	struct drm_i915_gem_wait *args = data;
2700
	struct drm_i915_gem_object *obj;
6084 serge 2701
	struct drm_i915_gem_request *req[I915_NUM_RINGS];
4246 Serge 2702
	unsigned reset_counter;
6084 serge 2703
	int i, n = 0;
2704
	int ret;
2352 Serge 2705
 
5354 serge 2706
	if (args->flags != 0)
2707
		return -EINVAL;
2708
 
4246 Serge 2709
	ret = i915_mutex_lock_interruptible(dev);
2710
	if (ret)
2711
		return ret;
2352 Serge 2712
 
4246 Serge 2713
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->bo_handle));
2714
	if (&obj->base == NULL) {
2715
		mutex_unlock(&dev->struct_mutex);
2716
		return -ENOENT;
2717
	}
2352 Serge 2718
 
4246 Serge 2719
	/* Need to make sure the object gets inactive eventually. */
2720
	ret = i915_gem_object_flush_active(obj);
2721
	if (ret)
2722
		goto out;
2352 Serge 2723
 
6084 serge 2724
	if (!obj->active)
2725
		goto out;
2352 Serge 2726
 
4246 Serge 2727
	/* Do this after OLR check to make sure we make forward progress polling
6084 serge 2728
	 * on this IOCTL with a timeout == 0 (like busy ioctl)
4246 Serge 2729
	 */
6084 serge 2730
	if (args->timeout_ns == 0) {
4246 Serge 2731
		ret = -ETIME;
2732
		goto out;
2733
	}
2352 Serge 2734
 
4246 Serge 2735
	drm_gem_object_unreference(&obj->base);
2736
	reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
6084 serge 2737
 
2738
	for (i = 0; i < I915_NUM_RINGS; i++) {
2739
		if (obj->last_read_req[i] == NULL)
2740
			continue;
2741
 
2742
		req[n++] = i915_gem_request_reference(obj->last_read_req[i]);
2743
	}
2744
 
4246 Serge 2745
	mutex_unlock(&dev->struct_mutex);
2352 Serge 2746
 
6084 serge 2747
	for (i = 0; i < n; i++) {
2748
		if (ret == 0)
2749
			ret = __i915_wait_request(req[i], reset_counter, true,
2750
						  args->timeout_ns > 0 ? &args->timeout_ns : NULL,
2751
						  file->driver_priv);
2752
		i915_gem_request_unreference__unlocked(req[i]);
2753
	}
2754
	return ret;
3243 Serge 2755
 
4246 Serge 2756
out:
2757
	drm_gem_object_unreference(&obj->base);
2758
	mutex_unlock(&dev->struct_mutex);
2759
	return ret;
2760
}
3243 Serge 2761
 
6084 serge 2762
static int
2763
__i915_gem_object_sync(struct drm_i915_gem_object *obj,
2764
		       struct intel_engine_cs *to,
2765
		       struct drm_i915_gem_request *from_req,
2766
		       struct drm_i915_gem_request **to_req)
2767
{
2768
	struct intel_engine_cs *from;
2769
	int ret;
2770
 
2771
	from = i915_gem_request_get_ring(from_req);
2772
	if (to == from)
2773
		return 0;
2774
 
2775
	if (i915_gem_request_completed(from_req, true))
2776
		return 0;
2777
 
2778
	if (!i915_semaphore_is_enabled(obj->base.dev)) {
2779
		struct drm_i915_private *i915 = to_i915(obj->base.dev);
2780
		ret = __i915_wait_request(from_req,
2781
					  atomic_read(&i915->gpu_error.reset_counter),
2782
					  i915->mm.interruptible,
2783
					  NULL,
2784
					  &i915->rps.semaphores);
2785
		if (ret)
2786
			return ret;
2787
 
2788
		i915_gem_object_retire_request(obj, from_req);
2789
	} else {
2790
		int idx = intel_ring_sync_index(from, to);
2791
		u32 seqno = i915_gem_request_get_seqno(from_req);
2792
 
2793
		WARN_ON(!to_req);
2794
 
2795
		if (seqno <= from->semaphore.sync_seqno[idx])
2796
			return 0;
2797
 
2798
		if (*to_req == NULL) {
2799
			ret = i915_gem_request_alloc(to, to->default_context, to_req);
2800
			if (ret)
2801
				return ret;
2802
		}
2803
 
2804
		trace_i915_gem_ring_sync_to(*to_req, from, from_req);
2805
		ret = to->semaphore.sync_to(*to_req, from, seqno);
2806
		if (ret)
2807
			return ret;
2808
 
2809
		/* We use last_read_req because sync_to()
2810
		 * might have just caused seqno wrap under
2811
		 * the radar.
2812
		 */
2813
		from->semaphore.sync_seqno[idx] =
2814
			i915_gem_request_get_seqno(obj->last_read_req[from->id]);
2815
	}
2816
 
2817
	return 0;
2818
}
2819
 
2352 Serge 2820
/**
3031 serge 2821
 * i915_gem_object_sync - sync an object to a ring.
2822
 *
2823
 * @obj: object which may be in use on another ring.
2824
 * @to: ring we wish to use the object on. May be NULL.
6084 serge 2825
 * @to_req: request we wish to use the object for. See below.
2826
 *          This will be allocated and returned if a request is
2827
 *          required but not passed in.
3031 serge 2828
 *
2829
 * This code is meant to abstract object synchronization with the GPU.
2830
 * Calling with NULL implies synchronizing the object with the CPU
6084 serge 2831
 * rather than a particular GPU ring. Conceptually we serialise writes
2832
 * between engines inside the GPU. We only allow one engine to write
2833
 * into a buffer at any time, but multiple readers. To ensure each has
2834
 * a coherent view of memory, we must:
3031 serge 2835
 *
6084 serge 2836
 * - If there is an outstanding write request to the object, the new
2837
 *   request must wait for it to complete (either CPU or in hw, requests
2838
 *   on the same ring will be naturally ordered).
2839
 *
2840
 * - If we are a write request (pending_write_domain is set), the new
2841
 *   request must wait for outstanding read requests to complete.
2842
 *
2843
 * For CPU synchronisation (NULL to) no request is required. For syncing with
2844
 * rings to_req must be non-NULL. However, a request does not have to be
2845
 * pre-allocated. If *to_req is NULL and sync commands will be emitted then a
2846
 * request will be allocated automatically and returned through *to_req. Note
2847
 * that it is not guaranteed that commands will be emitted (because the system
2848
 * might already be idle). Hence there is no need to create a request that
2849
 * might never have any work submitted. Note further that if a request is
2850
 * returned in *to_req, it is the responsibility of the caller to submit
2851
 * that request (after potentially adding more work to it).
2852
 *
3031 serge 2853
 * Returns 0 if successful, else propagates up the lower layer error.
2344 Serge 2854
 */
2855
int
3031 serge 2856
i915_gem_object_sync(struct drm_i915_gem_object *obj,
6084 serge 2857
		     struct intel_engine_cs *to,
2858
		     struct drm_i915_gem_request **to_req)
2344 Serge 2859
{
6084 serge 2860
	const bool readonly = obj->base.pending_write_domain == 0;
2861
	struct drm_i915_gem_request *req[I915_NUM_RINGS];
2862
	int ret, i, n;
2332 Serge 2863
 
6084 serge 2864
	if (!obj->active)
3031 serge 2865
		return 0;
2332 Serge 2866
 
6084 serge 2867
	if (to == NULL)
2868
		return i915_gem_object_wait_rendering(obj, readonly);
2332 Serge 2869
 
6084 serge 2870
	n = 0;
2871
	if (readonly) {
2872
		if (obj->last_write_req)
2873
			req[n++] = obj->last_write_req;
2874
	} else {
2875
		for (i = 0; i < I915_NUM_RINGS; i++)
2876
			if (obj->last_read_req[i])
2877
				req[n++] = obj->last_read_req[i];
2878
	}
2879
	for (i = 0; i < n; i++) {
2880
		ret = __i915_gem_object_sync(obj, to, req[i], to_req);
2881
		if (ret)
2882
			return ret;
2883
	}
3031 serge 2884
 
6084 serge 2885
	return 0;
2344 Serge 2886
}
2332 Serge 2887
 
2344 Serge 2888
static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
2889
{
2890
	u32 old_write_domain, old_read_domains;
2332 Serge 2891
 
2344 Serge 2892
	/* Force a pagefault for domain tracking on next user access */
6084 serge 2893
	i915_gem_release_mmap(obj);
2332 Serge 2894
 
2344 Serge 2895
	if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
2896
		return;
2332 Serge 2897
 
3480 Serge 2898
	/* Wait for any direct GTT access to complete */
2899
	mb();
2900
 
2344 Serge 2901
	old_read_domains = obj->base.read_domains;
2902
	old_write_domain = obj->base.write_domain;
2351 Serge 2903
 
2344 Serge 2904
	obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT;
2905
	obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
2332 Serge 2906
 
2351 Serge 2907
	trace_i915_gem_object_change_domain(obj,
2908
					    old_read_domains,
2909
					    old_write_domain);
2344 Serge 2910
}
2332 Serge 2911
 
6084 serge 2912
static int __i915_vma_unbind(struct i915_vma *vma, bool wait)
2344 Serge 2913
{
4104 Serge 2914
	struct drm_i915_gem_object *obj = vma->obj;
5060 serge 2915
	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
3480 Serge 2916
	int ret;
3263 Serge 2917
 
4104 Serge 2918
	if (list_empty(&vma->vma_link))
2344 Serge 2919
		return 0;
2332 Serge 2920
 
4560 Serge 2921
	if (!drm_mm_node_allocated(&vma->node)) {
2922
		i915_gem_vma_destroy(vma);
2923
		return 0;
2924
	}
2925
 
5060 serge 2926
	if (vma->pin_count)
3031 serge 2927
		return -EBUSY;
2332 Serge 2928
 
3243 Serge 2929
	BUG_ON(obj->pages == NULL);
3031 serge 2930
 
6084 serge 2931
	if (wait) {
2932
		ret = i915_gem_object_wait_rendering(obj, false);
2933
		if (ret)
2934
			return ret;
2935
	}
2332 Serge 2936
 
6084 serge 2937
	if (i915_is_ggtt(vma->vm) &&
2938
	    vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
2939
		i915_gem_object_finish_gtt(obj);
5354 serge 2940
 
6084 serge 2941
		/* release the fence reg _after_ flushing */
2942
		ret = i915_gem_object_put_fence(obj);
2943
		if (ret)
2944
			return ret;
5060 serge 2945
	}
2332 Serge 2946
 
4104 Serge 2947
	trace_i915_vma_unbind(vma);
2332 Serge 2948
 
6084 serge 2949
	vma->vm->unbind_vma(vma);
2950
	vma->bound = 0;
2332 Serge 2951
 
5060 serge 2952
	list_del_init(&vma->mm_list);
6084 serge 2953
	if (i915_is_ggtt(vma->vm)) {
2954
		if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
2955
			obj->map_and_fenceable = false;
2956
		} else if (vma->ggtt_view.pages) {
2957
			sg_free_table(vma->ggtt_view.pages);
2958
			kfree(vma->ggtt_view.pages);
2959
		}
2960
		vma->ggtt_view.pages = NULL;
2961
	}
2332 Serge 2962
 
4104 Serge 2963
	drm_mm_remove_node(&vma->node);
2964
	i915_gem_vma_destroy(vma);
2965
 
2966
	/* Since the unbound list is global, only move to that list if
4560 Serge 2967
	 * no more VMAs exist. */
6084 serge 2968
	if (list_empty(&obj->vma_list))
4104 Serge 2969
		list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list);
2970
 
4560 Serge 2971
	/* And finally now the object is completely decoupled from this vma,
2972
	 * we can drop its hold on the backing storage and allow it to be
2973
	 * reaped by the shrinker.
2974
	 */
2975
	i915_gem_object_unpin_pages(obj);
2976
 
2344 Serge 2977
	return 0;
2978
}
2332 Serge 2979
 
6084 serge 2980
int i915_vma_unbind(struct i915_vma *vma)
2981
{
2982
	return __i915_vma_unbind(vma, true);
2983
}
2984
 
2985
int __i915_vma_unbind_no_wait(struct i915_vma *vma)
2986
{
2987
	return __i915_vma_unbind(vma, false);
2988
}
2989
 
3031 serge 2990
int i915_gpu_idle(struct drm_device *dev)
2344 Serge 2991
{
5060 serge 2992
	struct drm_i915_private *dev_priv = dev->dev_private;
2993
	struct intel_engine_cs *ring;
2344 Serge 2994
	int ret, i;
2332 Serge 2995
 
2344 Serge 2996
	/* Flush everything onto the inactive list. */
3031 serge 2997
	for_each_ring(ring, dev_priv, i) {
5354 serge 2998
		if (!i915.enable_execlists) {
6084 serge 2999
			struct drm_i915_gem_request *req;
3031 serge 3000
 
6084 serge 3001
			ret = i915_gem_request_alloc(ring, ring->default_context, &req);
2352 Serge 3002
			if (ret)
3003
				return ret;
2344 Serge 3004
 
6084 serge 3005
			ret = i915_switch_context(req);
3006
			if (ret) {
3007
				i915_gem_request_cancel(req);
3008
				return ret;
3009
			}
2344 Serge 3010
 
6084 serge 3011
			i915_add_request_no_flush(req);
3012
		}
2332 Serge 3013
 
6084 serge 3014
		ret = intel_ring_idle(ring);
3031 serge 3015
		if (ret)
3016
			return ret;
3017
	}
2332 Serge 3018
 
6084 serge 3019
	WARN_ON(i915_verify_lists(dev));
3031 serge 3020
	return 0;
3021
}
2332 Serge 3022
 
5354 serge 3023
static bool i915_gem_valid_gtt_space(struct i915_vma *vma,
3031 serge 3024
				     unsigned long cache_level)
3025
{
5354 serge 3026
	struct drm_mm_node *gtt_space = &vma->node;
3031 serge 3027
	struct drm_mm_node *other;
2332 Serge 3028
 
5354 serge 3029
	/*
3030
	 * On some machines we have to be careful when putting differing types
3031
	 * of snoopable memory together to avoid the prefetcher crossing memory
3032
	 * domains and dying. During vm initialisation, we decide whether or not
3033
	 * these constraints apply and set the drm_mm.color_adjust
3034
	 * appropriately.
3031 serge 3035
	 */
5354 serge 3036
	if (vma->vm->mm.color_adjust == NULL)
3031 serge 3037
		return true;
2332 Serge 3038
 
4104 Serge 3039
	if (!drm_mm_node_allocated(gtt_space))
3031 serge 3040
		return true;
2332 Serge 3041
 
3031 serge 3042
	if (list_empty(>t_space->node_list))
3043
		return true;
2332 Serge 3044
 
3031 serge 3045
	other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list);
3046
	if (other->allocated && !other->hole_follows && other->color != cache_level)
3047
		return false;
2344 Serge 3048
 
3031 serge 3049
	other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list);
3050
	if (other->allocated && !gtt_space->hole_follows && other->color != cache_level)
3051
		return false;
2344 Serge 3052
 
3031 serge 3053
	return true;
3054
}
2344 Serge 3055
 
2332 Serge 3056
/**
6084 serge 3057
 * Finds free space in the GTT aperture and binds the object or a view of it
3058
 * there.
2332 Serge 3059
 */
5060 serge 3060
static struct i915_vma *
4104 Serge 3061
i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
3062
			   struct i915_address_space *vm,
6084 serge 3063
			   const struct i915_ggtt_view *ggtt_view,
3064
			   unsigned alignment,
5060 serge 3065
			   uint64_t flags)
2332 Serge 3066
{
3067
	struct drm_device *dev = obj->base.dev;
5060 serge 3068
	struct drm_i915_private *dev_priv = dev->dev_private;
6084 serge 3069
	u32 fence_alignment, unfenced_alignment;
3070
	u32 search_flag, alloc_flag;
3071
	u64 start, end;
3072
	u64 size, fence_size;
4104 Serge 3073
	struct i915_vma *vma;
2332 Serge 3074
	int ret;
2326 Serge 3075
 
6084 serge 3076
	if (i915_is_ggtt(vm)) {
3077
		u32 view_size;
2332 Serge 3078
 
6084 serge 3079
		if (WARN_ON(!ggtt_view))
3080
			return ERR_PTR(-EINVAL);
3081
 
3082
		view_size = i915_ggtt_view_size(obj, ggtt_view);
3083
 
3084
		fence_size = i915_gem_get_gtt_size(dev,
3085
						   view_size,
3086
						   obj->tiling_mode);
3087
		fence_alignment = i915_gem_get_gtt_alignment(dev,
3088
							     view_size,
3089
							     obj->tiling_mode,
3090
							     true);
3091
		unfenced_alignment = i915_gem_get_gtt_alignment(dev,
3092
								view_size,
3093
								obj->tiling_mode,
3094
								false);
3095
		size = flags & PIN_MAPPABLE ? fence_size : view_size;
3096
	} else {
3097
		fence_size = i915_gem_get_gtt_size(dev,
3098
						   obj->base.size,
3099
						   obj->tiling_mode);
3100
		fence_alignment = i915_gem_get_gtt_alignment(dev,
3101
							     obj->base.size,
3102
							     obj->tiling_mode,
3103
							     true);
3104
		unfenced_alignment =
3105
			i915_gem_get_gtt_alignment(dev,
3106
						   obj->base.size,
3107
						   obj->tiling_mode,
3108
						   false);
3109
		size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
3110
	}
3111
 
3112
	start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
3113
	end = vm->total;
3114
	if (flags & PIN_MAPPABLE)
3115
		end = min_t(u64, end, dev_priv->gtt.mappable_end);
3116
	if (flags & PIN_ZONE_4G)
3117
		end = min_t(u64, end, (1ULL << 32));
3118
 
2332 Serge 3119
	if (alignment == 0)
5060 serge 3120
		alignment = flags & PIN_MAPPABLE ? fence_alignment :
2332 Serge 3121
						unfenced_alignment;
5060 serge 3122
	if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
6084 serge 3123
		DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n",
3124
			  ggtt_view ? ggtt_view->type : 0,
3125
			  alignment);
5060 serge 3126
		return ERR_PTR(-EINVAL);
2332 Serge 3127
	}
3128
 
6084 serge 3129
	/* If binding the object/GGTT view requires more space than the entire
3130
	 * aperture has, reject it early before evicting everything in a vain
3131
	 * attempt to find space.
2332 Serge 3132
	 */
6084 serge 3133
	if (size > end) {
3134
		DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%llu > %s aperture=%llu\n",
3135
			  ggtt_view ? ggtt_view->type : 0,
3136
			  size,
5060 serge 3137
			  flags & PIN_MAPPABLE ? "mappable" : "total",
3138
			  end);
3139
		return ERR_PTR(-E2BIG);
2332 Serge 3140
	}
3141
 
3031 serge 3142
	ret = i915_gem_object_get_pages(obj);
3143
	if (ret)
5060 serge 3144
		return ERR_PTR(ret);
3031 serge 3145
 
3243 Serge 3146
	i915_gem_object_pin_pages(obj);
3147
 
6084 serge 3148
	vma = ggtt_view ? i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) :
3149
			  i915_gem_obj_lookup_or_create_vma(obj, vm);
3150
 
5060 serge 3151
	if (IS_ERR(vma))
4104 Serge 3152
		goto err_unpin;
3243 Serge 3153
 
6084 serge 3154
	if (flags & PIN_HIGH) {
3155
		search_flag = DRM_MM_SEARCH_BELOW;
3156
		alloc_flag = DRM_MM_CREATE_TOP;
3157
	} else {
3158
		search_flag = DRM_MM_SEARCH_DEFAULT;
3159
		alloc_flag = DRM_MM_CREATE_DEFAULT;
3160
	}
3161
 
4104 Serge 3162
search_free:
3163
	ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
3164
						  size, alignment,
5060 serge 3165
						  obj->cache_level,
3166
						  start, end,
6084 serge 3167
						  search_flag,
3168
						  alloc_flag);
3243 Serge 3169
	if (ret) {
2332 Serge 3170
 
4104 Serge 3171
		goto err_free_vma;
2332 Serge 3172
	}
5354 serge 3173
	if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) {
4104 Serge 3174
		ret = -EINVAL;
3175
		goto err_remove_node;
3031 serge 3176
	}
2332 Serge 3177
 
6084 serge 3178
	trace_i915_vma_bind(vma, flags);
3179
	ret = i915_vma_bind(vma, obj->cache_level, flags);
4104 Serge 3180
	if (ret)
3181
		goto err_remove_node;
2332 Serge 3182
 
4104 Serge 3183
	list_move_tail(&obj->global_list, &dev_priv->mm.bound_list);
3184
	list_add_tail(&vma->mm_list, &vm->inactive_list);
2332 Serge 3185
 
5060 serge 3186
	return vma;
4104 Serge 3187
 
3188
err_remove_node:
3189
	drm_mm_remove_node(&vma->node);
3190
err_free_vma:
3191
	i915_gem_vma_destroy(vma);
5060 serge 3192
	vma = ERR_PTR(ret);
4104 Serge 3193
err_unpin:
3194
	i915_gem_object_unpin_pages(obj);
5060 serge 3195
	return vma;
2332 Serge 3196
}
3197
 
4104 Serge 3198
bool
3199
i915_gem_clflush_object(struct drm_i915_gem_object *obj,
3200
			bool force)
2332 Serge 3201
{
3202
	/* If we don't have a page list set up, then we're not pinned
3203
	 * to GPU, and we can ignore the cache flush because it'll happen
3204
	 * again at bind time.
3205
	 */
3243 Serge 3206
	if (obj->pages == NULL)
4104 Serge 3207
		return false;
2332 Serge 3208
 
3480 Serge 3209
	/*
3210
	 * Stolen memory is always coherent with the GPU as it is explicitly
3211
	 * marked as wc by the system, or the system is cache-coherent.
3212
	 */
5354 serge 3213
	if (obj->stolen || obj->phys_handle)
4104 Serge 3214
		return false;
3480 Serge 3215
 
2332 Serge 3216
	/* If the GPU is snooping the contents of the CPU cache,
3217
	 * we do not need to manually clear the CPU cache lines.  However,
3218
	 * the caches are only snooped when the render cache is
3219
	 * flushed/invalidated.  As we always have to emit invalidations
3220
	 * and flushes when moving into and out of the RENDER domain, correct
3221
	 * snooping behaviour occurs naturally as the result of our domain
3222
	 * tracking.
3223
	 */
6084 serge 3224
	if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) {
3225
		obj->cache_dirty = true;
4104 Serge 3226
		return false;
6084 serge 3227
	}
2332 Serge 3228
 
4293 Serge 3229
	trace_i915_gem_object_clflush(obj);
3230
	drm_clflush_sg(obj->pages);
6084 serge 3231
	obj->cache_dirty = false;
2344 Serge 3232
 
4104 Serge 3233
	return true;
2332 Serge 3234
}
3235
 
2344 Serge 3236
/** Flushes the GTT write domain for the object if it's dirty. */
3237
static void
3238
i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
3239
{
3240
	uint32_t old_write_domain;
2332 Serge 3241
 
2344 Serge 3242
	if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
3243
		return;
2332 Serge 3244
 
2344 Serge 3245
	/* No actual flushing is required for the GTT write domain.  Writes
3246
	 * to it immediately go to main memory as far as we know, so there's
3247
	 * no chipset flush.  It also doesn't land in render cache.
3248
	 *
3249
	 * However, we do have to enforce the order so that all writes through
3250
	 * the GTT land before any writes to the device, such as updates to
3251
	 * the GATT itself.
3252
	 */
3253
	wmb();
2332 Serge 3254
 
2344 Serge 3255
	old_write_domain = obj->base.write_domain;
3256
	obj->base.write_domain = 0;
2332 Serge 3257
 
6084 serge 3258
	intel_fb_obj_flush(obj, false, ORIGIN_GTT);
5354 serge 3259
 
2351 Serge 3260
	trace_i915_gem_object_change_domain(obj,
3261
					    obj->base.read_domains,
3262
					    old_write_domain);
2344 Serge 3263
}
2332 Serge 3264
 
3265
/** Flushes the CPU write domain for the object if it's dirty. */
2326 Serge 3266
static void
6084 serge 3267
i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
2332 Serge 3268
{
3269
	uint32_t old_write_domain;
3270
 
3271
	if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
3272
		return;
3273
 
6084 serge 3274
	if (i915_gem_clflush_object(obj, obj->pin_display))
3275
		i915_gem_chipset_flush(obj->base.dev);
4104 Serge 3276
 
2332 Serge 3277
	old_write_domain = obj->base.write_domain;
3278
	obj->base.write_domain = 0;
3279
 
6084 serge 3280
	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
5354 serge 3281
 
2351 Serge 3282
	trace_i915_gem_object_change_domain(obj,
3283
					    obj->base.read_domains,
3284
					    old_write_domain);
2332 Serge 3285
}
3286
 
3287
/**
3288
 * Moves a single object to the GTT read, and possibly write domain.
3289
 *
3290
 * This function returns when the move is complete, including waiting on
3291
 * flushes to occur.
3292
 */
3293
int
3294
i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
3295
{
3296
	uint32_t old_write_domain, old_read_domains;
6084 serge 3297
	struct i915_vma *vma;
2332 Serge 3298
	int ret;
3299
 
3300
	if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
3301
		return 0;
3302
 
3031 serge 3303
	ret = i915_gem_object_wait_rendering(obj, !write);
6084 serge 3304
	if (ret)
3305
		return ret;
2332 Serge 3306
 
6084 serge 3307
	/* Flush and acquire obj->pages so that we are coherent through
3308
	 * direct access in memory with previous cached writes through
3309
	 * shmemfs and that our cache domain tracking remains valid.
3310
	 * For example, if the obj->filp was moved to swap without us
3311
	 * being notified and releasing the pages, we would mistakenly
3312
	 * continue to assume that the obj remained out of the CPU cached
3313
	 * domain.
3314
	 */
3315
	ret = i915_gem_object_get_pages(obj);
3316
	if (ret)
3317
		return ret;
2332 Serge 3318
 
6084 serge 3319
	i915_gem_object_flush_cpu_write_domain(obj);
3320
 
3480 Serge 3321
	/* Serialise direct access to this object with the barriers for
3322
	 * coherent writes from the GPU, by effectively invalidating the
3323
	 * GTT domain upon first access.
3324
	 */
3325
	if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
3326
		mb();
3327
 
2332 Serge 3328
	old_write_domain = obj->base.write_domain;
3329
	old_read_domains = obj->base.read_domains;
3330
 
3331
	/* It should now be out of any other write domains, and we can update
3332
	 * the domain values for our changes.
3333
	 */
3334
	BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
3335
	obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
3336
	if (write) {
3337
		obj->base.read_domains = I915_GEM_DOMAIN_GTT;
3338
		obj->base.write_domain = I915_GEM_DOMAIN_GTT;
3339
		obj->dirty = 1;
3340
	}
3341
 
2351 Serge 3342
	trace_i915_gem_object_change_domain(obj,
3343
					    old_read_domains,
3344
					    old_write_domain);
3345
 
3031 serge 3346
	/* And bump the LRU for this access */
6084 serge 3347
	vma = i915_gem_obj_to_ggtt(obj);
3348
	if (vma && drm_mm_node_allocated(&vma->node) && !obj->active)
3349
		list_move_tail(&vma->mm_list,
3350
			       &to_i915(obj->base.dev)->gtt.base.inactive_list);
3031 serge 3351
 
2332 Serge 3352
	return 0;
3353
}
3354
 
6084 serge 3355
/**
3356
 * Changes the cache-level of an object across all VMA.
3357
 *
3358
 * After this function returns, the object will be in the new cache-level
3359
 * across all GTT and the contents of the backing storage will be coherent,
3360
 * with respect to the new cache-level. In order to keep the backing storage
3361
 * coherent for all users, we only allow a single cache level to be set
3362
 * globally on the object and prevent it from being changed whilst the
3363
 * hardware is reading from the object. That is if the object is currently
3364
 * on the scanout it will be set to uncached (or equivalent display
3365
 * cache coherency) and all non-MOCS GPU access will also be uncached so
3366
 * that all direct access to the scanout remains coherent.
3367
 */
2335 Serge 3368
int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
3369
				    enum i915_cache_level cache_level)
3370
{
3031 serge 3371
	struct drm_device *dev = obj->base.dev;
5060 serge 3372
	struct i915_vma *vma, *next;
6084 serge 3373
	bool bound = false;
3374
	int ret = 0;
2332 Serge 3375
 
2335 Serge 3376
	if (obj->cache_level == cache_level)
6084 serge 3377
		goto out;
2332 Serge 3378
 
6084 serge 3379
	/* Inspect the list of currently bound VMA and unbind any that would
3380
	 * be invalid given the new cache-level. This is principally to
3381
	 * catch the issue of the CS prefetch crossing page boundaries and
3382
	 * reading an invalid PTE on older architectures.
3383
	 */
3384
	list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
3385
		if (!drm_mm_node_allocated(&vma->node))
3386
			continue;
2332 Serge 3387
 
6084 serge 3388
		if (vma->pin_count) {
3389
			DRM_DEBUG("can not change the cache level of pinned objects\n");
3390
			return -EBUSY;
3391
		}
3392
 
5354 serge 3393
		if (!i915_gem_valid_gtt_space(vma, cache_level)) {
4104 Serge 3394
			ret = i915_vma_unbind(vma);
6084 serge 3395
			if (ret)
3396
				return ret;
3397
		} else
3398
			bound = true;
3031 serge 3399
	}
3400
 
6084 serge 3401
	/* We can reuse the existing drm_mm nodes but need to change the
3402
	 * cache-level on the PTE. We could simply unbind them all and
3403
	 * rebind with the correct cache-level on next use. However since
3404
	 * we already have a valid slot, dma mapping, pages etc, we may as
3405
	 * rewrite the PTE in the belief that doing so tramples upon less
3406
	 * state and so involves less work.
3407
	 */
3408
	if (bound) {
3409
		/* Before we change the PTE, the GPU must not be accessing it.
3410
		 * If we wait upon the object, we know that all the bound
3411
		 * VMA are no longer active.
3412
		 */
3413
		ret = i915_gem_object_wait_rendering(obj, false);
2335 Serge 3414
		if (ret)
3415
			return ret;
2332 Serge 3416
 
6084 serge 3417
		if (!HAS_LLC(dev) && cache_level != I915_CACHE_NONE) {
3418
			/* Access to snoopable pages through the GTT is
3419
			 * incoherent and on some machines causes a hard
3420
			 * lockup. Relinquish the CPU mmaping to force
3421
			 * userspace to refault in the pages and we can
3422
			 * then double check if the GTT mapping is still
3423
			 * valid for that pointer access.
3424
			 */
3425
			i915_gem_release_mmap(obj);
2332 Serge 3426
 
6084 serge 3427
			/* As we no longer need a fence for GTT access,
3428
			 * we can relinquish it now (and so prevent having
3429
			 * to steal a fence from someone else on the next
3430
			 * fence request). Note GPU activity would have
3431
			 * dropped the fence as all snoopable access is
3432
			 * supposed to be linear.
3433
			 */
2335 Serge 3434
			ret = i915_gem_object_put_fence(obj);
3435
			if (ret)
3436
				return ret;
6084 serge 3437
		} else {
3438
			/* We either have incoherent backing store and
3439
			 * so no GTT access or the architecture is fully
3440
			 * coherent. In such cases, existing GTT mmaps
3441
			 * ignore the cache bit in the PTE and we can
3442
			 * rewrite it without confusing the GPU or having
3443
			 * to force userspace to fault back in its mmaps.
3444
			 */
3445
		}
2332 Serge 3446
 
6084 serge 3447
		list_for_each_entry(vma, &obj->vma_list, vma_link) {
3448
			if (!drm_mm_node_allocated(&vma->node))
3449
				continue;
3450
 
3451
			ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
3452
			if (ret)
3453
				return ret;
3454
		}
2335 Serge 3455
	}
2332 Serge 3456
 
4104 Serge 3457
	list_for_each_entry(vma, &obj->vma_list, vma_link)
3458
		vma->node.color = cache_level;
3459
	obj->cache_level = cache_level;
3460
 
6084 serge 3461
out:
3462
	/* Flush the dirty CPU caches to the backing storage so that the
3463
	 * object is now coherent at its new cache level (with respect
3464
	 * to the access domain).
3465
	 */
3466
	if (obj->cache_dirty &&
3467
	    obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
3468
	    cpu_write_needs_clflush(obj)) {
3469
		if (i915_gem_clflush_object(obj, true))
3470
			i915_gem_chipset_flush(obj->base.dev);
3471
	}
2332 Serge 3472
 
2335 Serge 3473
	return 0;
3474
}
2332 Serge 3475
 
3260 Serge 3476
int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
3477
			       struct drm_file *file)
3478
{
3479
	struct drm_i915_gem_caching *args = data;
3480
	struct drm_i915_gem_object *obj;
3481
 
3482
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
6084 serge 3483
	if (&obj->base == NULL)
3484
		return -ENOENT;
3260 Serge 3485
 
4104 Serge 3486
	switch (obj->cache_level) {
3487
	case I915_CACHE_LLC:
3488
	case I915_CACHE_L3_LLC:
3489
		args->caching = I915_CACHING_CACHED;
3490
		break;
3260 Serge 3491
 
4104 Serge 3492
	case I915_CACHE_WT:
3493
		args->caching = I915_CACHING_DISPLAY;
3494
		break;
3495
 
3496
	default:
3497
		args->caching = I915_CACHING_NONE;
3498
		break;
3499
	}
3500
 
6084 serge 3501
	drm_gem_object_unreference_unlocked(&obj->base);
3502
	return 0;
3260 Serge 3503
}
3504
 
3505
int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
3506
			       struct drm_file *file)
3507
{
6084 serge 3508
	struct drm_i915_private *dev_priv = dev->dev_private;
3260 Serge 3509
	struct drm_i915_gem_caching *args = data;
3510
	struct drm_i915_gem_object *obj;
3511
	enum i915_cache_level level;
3512
	int ret;
3513
 
3514
	switch (args->caching) {
3515
	case I915_CACHING_NONE:
3516
		level = I915_CACHE_NONE;
3517
		break;
3518
	case I915_CACHING_CACHED:
6084 serge 3519
		/*
3520
		 * Due to a HW issue on BXT A stepping, GPU stores via a
3521
		 * snooped mapping may leave stale data in a corresponding CPU
3522
		 * cacheline, whereas normally such cachelines would get
3523
		 * invalidated.
3524
		 */
3525
		if (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0)
3526
			return -ENODEV;
3527
 
3260 Serge 3528
		level = I915_CACHE_LLC;
3529
		break;
4104 Serge 3530
	case I915_CACHING_DISPLAY:
3531
		level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE;
3532
		break;
3260 Serge 3533
	default:
3534
		return -EINVAL;
3535
	}
3536
 
6084 serge 3537
	intel_runtime_pm_get(dev_priv);
3538
 
3260 Serge 3539
	ret = i915_mutex_lock_interruptible(dev);
3540
	if (ret)
6084 serge 3541
		goto rpm_put;
3260 Serge 3542
 
3543
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
3544
	if (&obj->base == NULL) {
3545
		ret = -ENOENT;
3546
		goto unlock;
3547
	}
3548
 
3549
	ret = i915_gem_object_set_cache_level(obj, level);
3550
 
3551
	drm_gem_object_unreference(&obj->base);
3552
unlock:
3553
	mutex_unlock(&dev->struct_mutex);
6084 serge 3554
rpm_put:
3555
	intel_runtime_pm_put(dev_priv);
3556
 
3260 Serge 3557
	return ret;
3558
}
3559
 
2335 Serge 3560
/*
3561
 * Prepare buffer for display plane (scanout, cursors, etc).
3562
 * Can be called from an uninterruptible phase (modesetting) and allows
3563
 * any flushes to be pipelined (for pageflips).
3564
 */
3565
int
3566
i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
3567
				     u32 alignment,
6084 serge 3568
				     struct intel_engine_cs *pipelined,
3569
				     struct drm_i915_gem_request **pipelined_request,
3570
				     const struct i915_ggtt_view *view)
2335 Serge 3571
{
3572
	u32 old_read_domains, old_write_domain;
3573
	int ret;
2332 Serge 3574
 
6084 serge 3575
	ret = i915_gem_object_sync(obj, pipelined, pipelined_request);
2335 Serge 3576
	if (ret)
3577
		return ret;
2332 Serge 3578
 
4104 Serge 3579
	/* Mark the pin_display early so that we account for the
3580
	 * display coherency whilst setting up the cache domains.
3581
	 */
6084 serge 3582
	obj->pin_display++;
4104 Serge 3583
 
2335 Serge 3584
	/* The display engine is not coherent with the LLC cache on gen6.  As
3585
	 * a result, we make sure that the pinning that is about to occur is
3586
	 * done with uncached PTEs. This is lowest common denominator for all
3587
	 * chipsets.
3588
	 *
3589
	 * However for gen6+, we could do better by using the GFDT bit instead
3590
	 * of uncaching, which would allow us to flush all the LLC-cached data
3591
	 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
3592
	 */
4104 Serge 3593
	ret = i915_gem_object_set_cache_level(obj,
3594
					      HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE);
2360 Serge 3595
	if (ret)
4104 Serge 3596
		goto err_unpin_display;
2332 Serge 3597
 
2335 Serge 3598
	/* As the user may map the buffer once pinned in the display plane
3599
	 * (e.g. libkms for the bootup splash), we have to ensure that we
3600
	 * always use map_and_fenceable for all scanout buffers.
3601
	 */
6084 serge 3602
	ret = i915_gem_object_ggtt_pin(obj, view, alignment,
3603
				       view->type == I915_GGTT_VIEW_NORMAL ?
3604
				       PIN_MAPPABLE : 0);
2335 Serge 3605
	if (ret)
4104 Serge 3606
		goto err_unpin_display;
2332 Serge 3607
 
6084 serge 3608
	i915_gem_object_flush_cpu_write_domain(obj);
2332 Serge 3609
 
2335 Serge 3610
	old_write_domain = obj->base.write_domain;
3611
	old_read_domains = obj->base.read_domains;
2332 Serge 3612
 
2335 Serge 3613
	/* It should now be out of any other write domains, and we can update
3614
	 * the domain values for our changes.
3615
	 */
3031 serge 3616
	obj->base.write_domain = 0;
2335 Serge 3617
	obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
2332 Serge 3618
 
2351 Serge 3619
	trace_i915_gem_object_change_domain(obj,
3620
					    old_read_domains,
3621
					    old_write_domain);
2332 Serge 3622
 
2335 Serge 3623
	return 0;
4104 Serge 3624
 
3625
err_unpin_display:
6084 serge 3626
	obj->pin_display--;
4104 Serge 3627
	return ret;
2335 Serge 3628
}
2332 Serge 3629
 
4104 Serge 3630
void
6084 serge 3631
i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj,
3632
					 const struct i915_ggtt_view *view)
4104 Serge 3633
{
6084 serge 3634
	if (WARN_ON(obj->pin_display == 0))
3635
		return;
4104 Serge 3636
 
6084 serge 3637
	i915_gem_object_ggtt_unpin_view(obj, view);
2332 Serge 3638
 
6084 serge 3639
	obj->pin_display--;
2344 Serge 3640
}
2332 Serge 3641
 
2344 Serge 3642
/**
3643
 * Moves a single object to the CPU read, and possibly write domain.
3644
 *
3645
 * This function returns when the move is complete, including waiting on
3646
 * flushes to occur.
3647
 */
3031 serge 3648
int
2344 Serge 3649
i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
3650
{
3651
	uint32_t old_write_domain, old_read_domains;
3652
	int ret;
2332 Serge 3653
 
2344 Serge 3654
	if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
3655
		return 0;
2332 Serge 3656
 
3031 serge 3657
	ret = i915_gem_object_wait_rendering(obj, !write);
2344 Serge 3658
	if (ret)
3659
		return ret;
2332 Serge 3660
 
2344 Serge 3661
	i915_gem_object_flush_gtt_write_domain(obj);
2332 Serge 3662
 
2344 Serge 3663
	old_write_domain = obj->base.write_domain;
3664
	old_read_domains = obj->base.read_domains;
2332 Serge 3665
 
2344 Serge 3666
	/* Flush the CPU cache if it's still invalid. */
3667
	if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
4104 Serge 3668
		i915_gem_clflush_object(obj, false);
2332 Serge 3669
 
2344 Serge 3670
		obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
3671
	}
2332 Serge 3672
 
2344 Serge 3673
	/* It should now be out of any other write domains, and we can update
3674
	 * the domain values for our changes.
3675
	 */
3676
	BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
2332 Serge 3677
 
2344 Serge 3678
	/* If we're writing through the CPU, then the GPU read domains will
3679
	 * need to be invalidated at next use.
3680
	 */
3681
	if (write) {
3682
		obj->base.read_domains = I915_GEM_DOMAIN_CPU;
3683
		obj->base.write_domain = I915_GEM_DOMAIN_CPU;
3684
	}
2332 Serge 3685
 
2351 Serge 3686
	trace_i915_gem_object_change_domain(obj,
3687
					    old_read_domains,
3688
					    old_write_domain);
2332 Serge 3689
 
2344 Serge 3690
	return 0;
3691
}
2332 Serge 3692
 
3031 serge 3693
/* Throttle our rendering by waiting until the ring has completed our requests
3694
 * emitted over 20 msec ago.
2344 Serge 3695
 *
3031 serge 3696
 * Note that if we were to use the current jiffies each time around the loop,
3697
 * we wouldn't escape the function with any frames outstanding if the time to
3698
 * render a frame was over 20ms.
3699
 *
3700
 * This should get us reasonable parallelism between CPU and GPU but also
3701
 * relatively low latency when blocking on a particular request to finish.
2344 Serge 3702
 */
3031 serge 3703
static int
3704
i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
2344 Serge 3705
{
3031 serge 3706
	struct drm_i915_private *dev_priv = dev->dev_private;
3707
	struct drm_i915_file_private *file_priv = file->driver_priv;
6084 serge 3708
	unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
3709
	struct drm_i915_gem_request *request, *target = NULL;
3480 Serge 3710
	unsigned reset_counter;
3031 serge 3711
	int ret;
2332 Serge 3712
 
3480 Serge 3713
	ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
3714
	if (ret)
3715
		return ret;
2332 Serge 3716
 
3480 Serge 3717
	ret = i915_gem_check_wedge(&dev_priv->gpu_error, false);
3718
	if (ret)
3719
		return ret;
3720
 
3031 serge 3721
	spin_lock(&file_priv->mm.lock);
3722
	list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
3723
		if (time_after_eq(request->emitted_jiffies, recent_enough))
3724
			break;
2332 Serge 3725
 
6084 serge 3726
		/*
3727
		 * Note that the request might not have been submitted yet.
3728
		 * In which case emitted_jiffies will be zero.
3729
		 */
3730
		if (!request->emitted_jiffies)
3731
			continue;
3732
 
3733
		target = request;
3031 serge 3734
	}
3480 Serge 3735
	reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
6084 serge 3736
	if (target)
3737
		i915_gem_request_reference(target);
3031 serge 3738
	spin_unlock(&file_priv->mm.lock);
2332 Serge 3739
 
6084 serge 3740
	if (target == NULL)
3031 serge 3741
		return 0;
2332 Serge 3742
 
6084 serge 3743
	ret = __i915_wait_request(target, reset_counter, true, NULL, NULL);
3031 serge 3744
	if (ret == 0)
3745
		queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
2332 Serge 3746
 
6084 serge 3747
	i915_gem_request_unreference__unlocked(target);
3748
 
3031 serge 3749
	return ret;
2352 Serge 3750
}
2332 Serge 3751
 
5060 serge 3752
static bool
3753
i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
3754
{
3755
	struct drm_i915_gem_object *obj = vma->obj;
3756
 
3757
	if (alignment &&
3758
	    vma->node.start & (alignment - 1))
3759
		return true;
3760
 
3761
	if (flags & PIN_MAPPABLE && !obj->map_and_fenceable)
3762
		return true;
3763
 
3764
	if (flags & PIN_OFFSET_BIAS &&
3765
	    vma->node.start < (flags & PIN_OFFSET_MASK))
3766
		return true;
3767
 
3768
	return false;
3769
}
3770
 
6084 serge 3771
void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
2332 Serge 3772
{
6084 serge 3773
	struct drm_i915_gem_object *obj = vma->obj;
3774
	bool mappable, fenceable;
3775
	u32 fence_size, fence_alignment;
3776
 
3777
	fence_size = i915_gem_get_gtt_size(obj->base.dev,
3778
					   obj->base.size,
3779
					   obj->tiling_mode);
3780
	fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev,
3781
						     obj->base.size,
3782
						     obj->tiling_mode,
3783
						     true);
3784
 
3785
	fenceable = (vma->node.size == fence_size &&
3786
		     (vma->node.start & (fence_alignment - 1)) == 0);
3787
 
3788
	mappable = (vma->node.start + fence_size <=
3789
		    to_i915(obj->base.dev)->gtt.mappable_end);
3790
 
3791
	obj->map_and_fenceable = mappable && fenceable;
3792
}
3793
 
3794
static int
3795
i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
3796
		       struct i915_address_space *vm,
3797
		       const struct i915_ggtt_view *ggtt_view,
3798
		       uint32_t alignment,
3799
		       uint64_t flags)
3800
{
5060 serge 3801
	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
4104 Serge 3802
	struct i915_vma *vma;
5354 serge 3803
	unsigned bound;
2332 Serge 3804
	int ret;
3805
 
5060 serge 3806
	if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base))
3807
		return -ENODEV;
2332 Serge 3808
 
5060 serge 3809
	if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm)))
3810
		return -EINVAL;
4104 Serge 3811
 
5354 serge 3812
	if (WARN_ON((flags & (PIN_MAPPABLE | PIN_GLOBAL)) == PIN_MAPPABLE))
3813
		return -EINVAL;
3814
 
6084 serge 3815
	if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view))
3816
		return -EINVAL;
3817
 
3818
	vma = ggtt_view ? i915_gem_obj_to_ggtt_view(obj, ggtt_view) :
3819
			  i915_gem_obj_to_vma(obj, vm);
3820
 
3821
	if (IS_ERR(vma))
3822
		return PTR_ERR(vma);
3823
 
5060 serge 3824
	if (vma) {
3825
		if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
3826
			return -EBUSY;
4104 Serge 3827
 
5060 serge 3828
		if (i915_vma_misplaced(vma, alignment, flags)) {
3829
			WARN(vma->pin_count,
6084 serge 3830
			     "bo is already pinned in %s with incorrect alignment:"
3831
			     " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d,"
2332 Serge 3832
			     " obj->map_and_fenceable=%d\n",
6084 serge 3833
			     ggtt_view ? "ggtt" : "ppgtt",
3834
			     upper_32_bits(vma->node.start),
3835
			     lower_32_bits(vma->node.start),
3836
			     alignment,
5060 serge 3837
			     !!(flags & PIN_MAPPABLE),
2332 Serge 3838
			     obj->map_and_fenceable);
4104 Serge 3839
			ret = i915_vma_unbind(vma);
2332 Serge 3840
			if (ret)
3841
				return ret;
5060 serge 3842
 
3843
			vma = NULL;
2332 Serge 3844
		}
3845
	}
3846
 
5354 serge 3847
	bound = vma ? vma->bound : 0;
5060 serge 3848
	if (vma == NULL || !drm_mm_node_allocated(&vma->node)) {
6084 serge 3849
		vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment,
3850
						 flags);
5060 serge 3851
		if (IS_ERR(vma))
3852
			return PTR_ERR(vma);
6084 serge 3853
	} else {
3854
		ret = i915_vma_bind(vma, obj->cache_level, flags);
3855
		if (ret)
3856
			return ret;
2332 Serge 3857
	}
3858
 
6084 serge 3859
	if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL &&
3860
	    (bound ^ vma->bound) & GLOBAL_BIND) {
3861
		__i915_vma_set_map_and_fenceable(vma);
3862
		WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable);
5354 serge 3863
	}
3864
 
5060 serge 3865
	vma->pin_count++;
2332 Serge 3866
	return 0;
3867
}
3868
 
6084 serge 3869
int
3870
i915_gem_object_pin(struct drm_i915_gem_object *obj,
3871
		    struct i915_address_space *vm,
3872
		    uint32_t alignment,
3873
		    uint64_t flags)
2344 Serge 3874
{
6084 serge 3875
	return i915_gem_object_do_pin(obj, vm,
3876
				      i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL,
3877
				      alignment, flags);
2344 Serge 3878
}
2332 Serge 3879
 
6084 serge 3880
int
3881
i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
3882
			 const struct i915_ggtt_view *view,
3883
			 uint32_t alignment,
3884
			 uint64_t flags)
5060 serge 3885
{
6084 serge 3886
	if (WARN_ONCE(!view, "no view specified"))
3887
		return -EINVAL;
5060 serge 3888
 
6084 serge 3889
	return i915_gem_object_do_pin(obj, i915_obj_to_ggtt(obj), view,
3890
				      alignment, flags | PIN_GLOBAL);
5060 serge 3891
}
3892
 
3893
void
6084 serge 3894
i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
3895
				const struct i915_ggtt_view *view)
5060 serge 3896
{
6084 serge 3897
	struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view);
5060 serge 3898
 
6084 serge 3899
	BUG_ON(!vma);
3900
	WARN_ON(vma->pin_count == 0);
3901
	WARN_ON(!i915_gem_obj_ggtt_bound_view(obj, view));
2332 Serge 3902
 
6084 serge 3903
	--vma->pin_count;
3031 serge 3904
}
2332 Serge 3905
 
3031 serge 3906
int
3907
i915_gem_busy_ioctl(struct drm_device *dev, void *data,
3908
		    struct drm_file *file)
3909
{
3910
	struct drm_i915_gem_busy *args = data;
3911
	struct drm_i915_gem_object *obj;
3912
	int ret;
2332 Serge 3913
 
3031 serge 3914
	ret = i915_mutex_lock_interruptible(dev);
3915
	if (ret)
3916
		return ret;
2332 Serge 3917
 
5060 serge 3918
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
3031 serge 3919
	if (&obj->base == NULL) {
3920
		ret = -ENOENT;
3921
		goto unlock;
3922
	}
2332 Serge 3923
 
3031 serge 3924
	/* Count all active objects as busy, even if they are currently not used
3925
	 * by the gpu. Users of this interface expect objects to eventually
3926
	 * become non-busy without any further actions, therefore emit any
3927
	 * necessary flushes here.
3928
	 */
3929
	ret = i915_gem_object_flush_active(obj);
6084 serge 3930
	if (ret)
3931
		goto unref;
2332 Serge 3932
 
6084 serge 3933
	BUILD_BUG_ON(I915_NUM_RINGS > 16);
3934
	args->busy = obj->active << 16;
3935
	if (obj->last_write_req)
3936
		args->busy |= obj->last_write_req->ring->id;
2332 Serge 3937
 
6084 serge 3938
unref:
3031 serge 3939
	drm_gem_object_unreference(&obj->base);
3940
unlock:
3941
	mutex_unlock(&dev->struct_mutex);
3942
	return ret;
3943
}
2332 Serge 3944
 
3031 serge 3945
int
3946
i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
3947
			struct drm_file *file_priv)
3948
{
3949
	return i915_gem_ring_throttle(dev, file_priv);
3950
}
2332 Serge 3951
 
3263 Serge 3952
#if 0
3953
 
3031 serge 3954
int
3955
i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
3956
		       struct drm_file *file_priv)
3957
{
5354 serge 3958
	struct drm_i915_private *dev_priv = dev->dev_private;
3031 serge 3959
	struct drm_i915_gem_madvise *args = data;
3960
	struct drm_i915_gem_object *obj;
3961
	int ret;
2332 Serge 3962
 
3031 serge 3963
	switch (args->madv) {
3964
	case I915_MADV_DONTNEED:
3965
	case I915_MADV_WILLNEED:
3966
	    break;
3967
	default:
3968
	    return -EINVAL;
3969
	}
2332 Serge 3970
 
3031 serge 3971
	ret = i915_mutex_lock_interruptible(dev);
3972
	if (ret)
3973
		return ret;
2332 Serge 3974
 
3031 serge 3975
	obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle));
3976
	if (&obj->base == NULL) {
3977
		ret = -ENOENT;
3978
		goto unlock;
3979
	}
2332 Serge 3980
 
5060 serge 3981
	if (i915_gem_obj_is_pinned(obj)) {
3031 serge 3982
		ret = -EINVAL;
3983
		goto out;
3984
	}
2332 Serge 3985
 
5354 serge 3986
	if (obj->pages &&
3987
	    obj->tiling_mode != I915_TILING_NONE &&
3988
	    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
3989
		if (obj->madv == I915_MADV_WILLNEED)
3990
			i915_gem_object_unpin_pages(obj);
3991
		if (args->madv == I915_MADV_WILLNEED)
3992
			i915_gem_object_pin_pages(obj);
3993
	}
3994
 
3031 serge 3995
	if (obj->madv != __I915_MADV_PURGED)
3996
		obj->madv = args->madv;
2332 Serge 3997
 
3031 serge 3998
	/* if the object is no longer attached, discard its backing storage */
6084 serge 3999
	if (obj->madv == I915_MADV_DONTNEED && obj->pages == NULL)
3031 serge 4000
		i915_gem_object_truncate(obj);
2332 Serge 4001
 
3031 serge 4002
	args->retained = obj->madv != __I915_MADV_PURGED;
2332 Serge 4003
 
3031 serge 4004
out:
4005
	drm_gem_object_unreference(&obj->base);
4006
unlock:
4007
	mutex_unlock(&dev->struct_mutex);
4008
	return ret;
4009
}
4010
#endif
2332 Serge 4011
 
3031 serge 4012
void i915_gem_object_init(struct drm_i915_gem_object *obj,
4013
			  const struct drm_i915_gem_object_ops *ops)
4014
{
6084 serge 4015
	int i;
4016
 
4104 Serge 4017
	INIT_LIST_HEAD(&obj->global_list);
6084 serge 4018
	for (i = 0; i < I915_NUM_RINGS; i++)
4019
		INIT_LIST_HEAD(&obj->ring_list[i]);
4104 Serge 4020
	INIT_LIST_HEAD(&obj->obj_exec_link);
4021
	INIT_LIST_HEAD(&obj->vma_list);
6084 serge 4022
	INIT_LIST_HEAD(&obj->batch_pool_link);
2332 Serge 4023
 
3031 serge 4024
	obj->ops = ops;
4025
 
4026
	obj->fence_reg = I915_FENCE_REG_NONE;
4027
	obj->madv = I915_MADV_WILLNEED;
4028
 
4029
	i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size);
4030
}
4031
 
4032
static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
4033
	.get_pages = i915_gem_object_get_pages_gtt,
4034
	.put_pages = i915_gem_object_put_pages_gtt,
4035
};
4036
 
2332 Serge 4037
struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
4038
						  size_t size)
4039
{
4040
	struct drm_i915_gem_object *obj;
3031 serge 4041
	struct address_space *mapping;
3480 Serge 4042
	gfp_t mask;
2340 Serge 4043
 
3746 Serge 4044
	obj = i915_gem_object_alloc(dev);
2332 Serge 4045
	if (obj == NULL)
4046
		return NULL;
4047
 
4048
	if (drm_gem_object_init(dev, &obj->base, size) != 0) {
4104 Serge 4049
		i915_gem_object_free(obj);
2332 Serge 4050
		return NULL;
4051
	}
4052
 
4053
 
3031 serge 4054
	i915_gem_object_init(obj, &i915_gem_object_ops);
2332 Serge 4055
 
4056
	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4057
	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4058
 
3031 serge 4059
	if (HAS_LLC(dev)) {
4060
		/* On some devices, we can have the GPU use the LLC (the CPU
2332 Serge 4061
		 * cache) for about a 10% performance improvement
4062
		 * compared to uncached.  Graphics requests other than
4063
		 * display scanout are coherent with the CPU in
4064
		 * accessing this cache.  This means in this mode we
4065
		 * don't need to clflush on the CPU side, and on the
4066
		 * GPU side we only need to flush internal caches to
4067
		 * get data visible to the CPU.
4068
		 *
4069
		 * However, we maintain the display planes as UC, and so
4070
		 * need to rebind when first used as such.
4071
		 */
4072
		obj->cache_level = I915_CACHE_LLC;
4073
	} else
4074
		obj->cache_level = I915_CACHE_NONE;
4075
 
4560 Serge 4076
	trace_i915_gem_object_create(obj);
4077
 
2332 Serge 4078
	return obj;
4079
}
4080
 
6283 serge 4081
static bool discard_backing_storage(struct drm_i915_gem_object *obj)
4082
{
4083
	/* If we are the last user of the backing storage (be it shmemfs
4084
	 * pages or stolen etc), we know that the pages are going to be
4085
	 * immediately released. In this case, we can then skip copying
4086
	 * back the contents from the GPU.
4087
	 */
4088
 
4089
	if (obj->madv != I915_MADV_WILLNEED)
4090
		return false;
4091
 
4092
	if (obj->base.filp == NULL)
4093
		return true;
4094
 
4095
//        printf("filp %p\n", obj->base.filp);
4096
	shmem_file_delete(obj->base.filp);
4097
	return true;
4098
}
4099
 
3031 serge 4100
void i915_gem_free_object(struct drm_gem_object *gem_obj)
2344 Serge 4101
{
3031 serge 4102
	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
2344 Serge 4103
	struct drm_device *dev = obj->base.dev;
5060 serge 4104
	struct drm_i915_private *dev_priv = dev->dev_private;
4104 Serge 4105
	struct i915_vma *vma, *next;
2332 Serge 4106
 
4560 Serge 4107
	intel_runtime_pm_get(dev_priv);
4108
 
3031 serge 4109
	trace_i915_gem_object_destroy(obj);
4110
 
5060 serge 4111
	list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
4112
		int ret;
3031 serge 4113
 
5060 serge 4114
		vma->pin_count = 0;
4115
		ret = i915_vma_unbind(vma);
4104 Serge 4116
		if (WARN_ON(ret == -ERESTARTSYS)) {
6084 serge 4117
			bool was_interruptible;
3031 serge 4118
 
6084 serge 4119
			was_interruptible = dev_priv->mm.interruptible;
4120
			dev_priv->mm.interruptible = false;
3031 serge 4121
 
4104 Serge 4122
			WARN_ON(i915_vma_unbind(vma));
3031 serge 4123
 
6084 serge 4124
			dev_priv->mm.interruptible = was_interruptible;
4125
		}
2344 Serge 4126
	}
2332 Serge 4127
 
4104 Serge 4128
	/* Stolen objects don't hold a ref, but do hold pin count. Fix that up
4129
	 * before progressing. */
4130
	if (obj->stolen)
4131
		i915_gem_object_unpin_pages(obj);
4132
 
5060 serge 4133
	WARN_ON(obj->frontbuffer_bits);
4134
 
5354 serge 4135
	if (obj->pages && obj->madv == I915_MADV_WILLNEED &&
4136
	    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES &&
4137
	    obj->tiling_mode != I915_TILING_NONE)
4138
		i915_gem_object_unpin_pages(obj);
4139
 
4104 Serge 4140
	if (WARN_ON(obj->pages_pin_count))
6084 serge 4141
		obj->pages_pin_count = 0;
6283 serge 4142
	if (discard_backing_storage(obj))
4143
		obj->madv = I915_MADV_DONTNEED;
3031 serge 4144
	i915_gem_object_put_pages(obj);
4145
//   i915_gem_object_free_mmap_offset(obj);
2332 Serge 4146
 
3243 Serge 4147
	BUG_ON(obj->pages);
2332 Serge 4148
 
6283 serge 4149
	if (obj->ops->release)
4150
		obj->ops->release(obj);
3031 serge 4151
 
2344 Serge 4152
	drm_gem_object_release(&obj->base);
4153
	i915_gem_info_remove_obj(dev_priv, obj->base.size);
2332 Serge 4154
 
2344 Serge 4155
	kfree(obj->bit_17);
4104 Serge 4156
	i915_gem_object_free(obj);
4560 Serge 4157
 
4158
	intel_runtime_pm_put(dev_priv);
2344 Serge 4159
}
2332 Serge 4160
 
4560 Serge 4161
struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
4104 Serge 4162
				     struct i915_address_space *vm)
4163
{
4560 Serge 4164
	struct i915_vma *vma;
6084 serge 4165
	list_for_each_entry(vma, &obj->vma_list, vma_link) {
4166
		if (i915_is_ggtt(vma->vm) &&
4167
		    vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
4168
			continue;
4560 Serge 4169
		if (vma->vm == vm)
4170
			return vma;
6084 serge 4171
	}
4172
	return NULL;
4173
}
4560 Serge 4174
 
6084 serge 4175
struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj,
4176
					   const struct i915_ggtt_view *view)
4177
{
4178
	struct i915_address_space *ggtt = i915_obj_to_ggtt(obj);
4179
	struct i915_vma *vma;
4180
 
4181
	if (WARN_ONCE(!view, "no view specified"))
4182
		return ERR_PTR(-EINVAL);
4183
 
4184
	list_for_each_entry(vma, &obj->vma_list, vma_link)
4185
		if (vma->vm == ggtt &&
4186
		    i915_ggtt_view_equal(&vma->ggtt_view, view))
4187
			return vma;
4560 Serge 4188
	return NULL;
4189
}
4190
 
4104 Serge 4191
void i915_gem_vma_destroy(struct i915_vma *vma)
4192
{
5354 serge 4193
	struct i915_address_space *vm = NULL;
4104 Serge 4194
	WARN_ON(vma->node.allocated);
4560 Serge 4195
 
4196
	/* Keep the vma as a placeholder in the execbuffer reservation lists */
4197
	if (!list_empty(&vma->exec_list))
4198
		return;
4199
 
5354 serge 4200
	vm = vma->vm;
4201
 
4202
	if (!i915_is_ggtt(vm))
4203
		i915_ppgtt_put(i915_vm_to_ppgtt(vm));
4204
 
4104 Serge 4205
	list_del(&vma->vma_link);
4560 Serge 4206
 
4104 Serge 4207
	kfree(vma);
4208
}
4209
 
6084 serge 4210
static void
4211
i915_gem_stop_ringbuffers(struct drm_device *dev)
4212
{
4213
	struct drm_i915_private *dev_priv = dev->dev_private;
4214
	struct intel_engine_cs *ring;
4215
	int i;
4216
 
4217
	for_each_ring(ring, dev_priv, i)
4218
		dev_priv->gt.stop_ring(ring);
4219
}
4220
 
3031 serge 4221
#if 0
4222
int
4560 Serge 4223
i915_gem_suspend(struct drm_device *dev)
2344 Serge 4224
{
5060 serge 4225
	struct drm_i915_private *dev_priv = dev->dev_private;
4560 Serge 4226
	int ret = 0;
2332 Serge 4227
 
4560 Serge 4228
	mutex_lock(&dev->struct_mutex);
3031 serge 4229
	ret = i915_gpu_idle(dev);
4560 Serge 4230
	if (ret)
4231
		goto err;
4232
 
3031 serge 4233
	i915_gem_retire_requests(dev);
4234
 
5060 serge 4235
	i915_gem_stop_ringbuffers(dev);
4560 Serge 4236
	mutex_unlock(&dev->struct_mutex);
4237
 
6084 serge 4238
	cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
3263 Serge 4239
	cancel_delayed_work_sync(&dev_priv->mm.retire_work);
5060 serge 4240
	flush_delayed_work(&dev_priv->mm.idle_work);
3031 serge 4241
 
6084 serge 4242
	/* Assert that we sucessfully flushed all the work and
4243
	 * reset the GPU back to its idle, low power state.
4244
	 */
4245
	WARN_ON(dev_priv->mm.busy);
4246
 
3031 serge 4247
	return 0;
4560 Serge 4248
 
4249
err:
4250
	mutex_unlock(&dev->struct_mutex);
4251
	return ret;
2344 Serge 4252
}
3031 serge 4253
#endif
2332 Serge 4254
 
6084 serge 4255
int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice)
3031 serge 4256
{
6084 serge 4257
	struct intel_engine_cs *ring = req->ring;
4560 Serge 4258
	struct drm_device *dev = ring->dev;
5060 serge 4259
	struct drm_i915_private *dev_priv = dev->dev_private;
4560 Serge 4260
	u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200);
4261
	u32 *remap_info = dev_priv->l3_parity.remap_info[slice];
4262
	int i, ret;
2332 Serge 4263
 
4560 Serge 4264
	if (!HAS_L3_DPF(dev) || !remap_info)
4265
		return 0;
2332 Serge 4266
 
6084 serge 4267
	ret = intel_ring_begin(req, GEN7_L3LOG_SIZE / 4 * 3);
4560 Serge 4268
	if (ret)
4269
		return ret;
2332 Serge 4270
 
4560 Serge 4271
	/*
4272
	 * Note: We do not worry about the concurrent register cacheline hang
4273
	 * here because no other code should access these registers other than
4274
	 * at initialization time.
4275
	 */
3031 serge 4276
	for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) {
4560 Serge 4277
		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
4278
		intel_ring_emit(ring, reg_base + i);
4279
		intel_ring_emit(ring, remap_info[i/4]);
3031 serge 4280
	}
2332 Serge 4281
 
4560 Serge 4282
	intel_ring_advance(ring);
2332 Serge 4283
 
4560 Serge 4284
	return ret;
3031 serge 4285
}
2332 Serge 4286
 
3031 serge 4287
void i915_gem_init_swizzling(struct drm_device *dev)
4288
{
5060 serge 4289
	struct drm_i915_private *dev_priv = dev->dev_private;
2332 Serge 4290
 
3031 serge 4291
	if (INTEL_INFO(dev)->gen < 5 ||
4292
	    dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
4293
		return;
2332 Serge 4294
 
3031 serge 4295
	I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
4296
				 DISP_TILE_SURFACE_SWIZZLING);
2332 Serge 4297
 
3031 serge 4298
	if (IS_GEN5(dev))
4299
		return;
2344 Serge 4300
 
3031 serge 4301
	I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
4302
	if (IS_GEN6(dev))
4303
		I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
3480 Serge 4304
	else if (IS_GEN7(dev))
4305
		I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
4560 Serge 4306
	else if (IS_GEN8(dev))
4307
		I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW));
3031 serge 4308
	else
3480 Serge 4309
		BUG();
3031 serge 4310
}
4311
 
5354 serge 4312
static void init_unused_ring(struct drm_device *dev, u32 base)
2332 Serge 4313
{
3480 Serge 4314
	struct drm_i915_private *dev_priv = dev->dev_private;
5354 serge 4315
 
4316
	I915_WRITE(RING_CTL(base), 0);
4317
	I915_WRITE(RING_HEAD(base), 0);
4318
	I915_WRITE(RING_TAIL(base), 0);
4319
	I915_WRITE(RING_START(base), 0);
4320
}
4321
 
4322
static void init_unused_rings(struct drm_device *dev)
4323
{
4324
	if (IS_I830(dev)) {
4325
		init_unused_ring(dev, PRB1_BASE);
4326
		init_unused_ring(dev, SRB0_BASE);
4327
		init_unused_ring(dev, SRB1_BASE);
4328
		init_unused_ring(dev, SRB2_BASE);
4329
		init_unused_ring(dev, SRB3_BASE);
4330
	} else if (IS_GEN2(dev)) {
4331
		init_unused_ring(dev, SRB0_BASE);
4332
		init_unused_ring(dev, SRB1_BASE);
4333
	} else if (IS_GEN3(dev)) {
4334
		init_unused_ring(dev, PRB1_BASE);
4335
		init_unused_ring(dev, PRB2_BASE);
4336
	}
4337
}
4338
 
4339
int i915_gem_init_rings(struct drm_device *dev)
4340
{
4341
	struct drm_i915_private *dev_priv = dev->dev_private;
2332 Serge 4342
	int ret;
2351 Serge 4343
 
2332 Serge 4344
	ret = intel_init_render_ring_buffer(dev);
4345
	if (ret)
4346
		return ret;
4347
 
6084 serge 4348
	if (HAS_BSD(dev)) {
2332 Serge 4349
		ret = intel_init_bsd_ring_buffer(dev);
4350
		if (ret)
4351
			goto cleanup_render_ring;
4352
	}
4353
 
6084 serge 4354
	if (HAS_BLT(dev)) {
2332 Serge 4355
		ret = intel_init_blt_ring_buffer(dev);
4356
		if (ret)
4357
			goto cleanup_bsd_ring;
4358
	}
4359
 
4104 Serge 4360
	if (HAS_VEBOX(dev)) {
4361
		ret = intel_init_vebox_ring_buffer(dev);
4362
		if (ret)
4363
			goto cleanup_blt_ring;
4364
	}
4365
 
5060 serge 4366
	if (HAS_BSD2(dev)) {
4367
		ret = intel_init_bsd2_ring_buffer(dev);
4368
		if (ret)
4369
			goto cleanup_vebox_ring;
4370
	}
4104 Serge 4371
 
2332 Serge 4372
	return 0;
4373
 
4104 Serge 4374
cleanup_vebox_ring:
4375
	intel_cleanup_ring_buffer(&dev_priv->ring[VECS]);
3480 Serge 4376
cleanup_blt_ring:
4377
	intel_cleanup_ring_buffer(&dev_priv->ring[BCS]);
2332 Serge 4378
cleanup_bsd_ring:
4379
	intel_cleanup_ring_buffer(&dev_priv->ring[VCS]);
4380
cleanup_render_ring:
4381
	intel_cleanup_ring_buffer(&dev_priv->ring[RCS]);
3480 Serge 4382
 
2332 Serge 4383
	return ret;
4384
}
4385
 
3480 Serge 4386
int
4387
i915_gem_init_hw(struct drm_device *dev)
3031 serge 4388
{
5060 serge 4389
	struct drm_i915_private *dev_priv = dev->dev_private;
6084 serge 4390
	struct intel_engine_cs *ring;
4391
	int ret, i, j;
3031 serge 4392
 
3480 Serge 4393
	if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt())
4394
		return -EIO;
3031 serge 4395
 
6084 serge 4396
	/* Double layer security blanket, see i915_gem_init() */
4397
	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4398
 
4104 Serge 4399
	if (dev_priv->ellc_size)
4400
		I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf));
3480 Serge 4401
 
4560 Serge 4402
	if (IS_HASWELL(dev))
4403
		I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev) ?
4404
			   LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
4405
 
3746 Serge 4406
	if (HAS_PCH_NOP(dev)) {
5060 serge 4407
		if (IS_IVYBRIDGE(dev)) {
6084 serge 4408
			u32 temp = I915_READ(GEN7_MSG_CTL);
4409
			temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK);
4410
			I915_WRITE(GEN7_MSG_CTL, temp);
5060 serge 4411
		} else if (INTEL_INFO(dev)->gen >= 7) {
4412
			u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT);
4413
			temp &= ~RESET_PCH_HANDSHAKE_ENABLE;
4414
			I915_WRITE(HSW_NDE_RSTWRN_OPT, temp);
4415
		}
3746 Serge 4416
	}
4417
 
3480 Serge 4418
	i915_gem_init_swizzling(dev);
4419
 
6084 serge 4420
	/*
4421
	 * At least 830 can leave some of the unused rings
4422
	 * "active" (ie. head != tail) after resume which
4423
	 * will prevent c3 entry. Makes sure all unused rings
4424
	 * are totally idle.
4425
	 */
4426
	init_unused_rings(dev);
3480 Serge 4427
 
6084 serge 4428
	BUG_ON(!dev_priv->ring[RCS].default_context);
4560 Serge 4429
 
6084 serge 4430
	ret = i915_ppgtt_init_hw(dev);
4431
	if (ret) {
4432
		DRM_ERROR("PPGTT enable HW failed %d\n", ret);
4433
		goto out;
4434
	}
4435
 
4436
	/* Need to do basic initialisation of all rings first: */
4437
	for_each_ring(ring, dev_priv, i) {
4438
		ret = ring->init_hw(ring);
4439
		if (ret)
4440
			goto out;
4441
	}
4442
 
4443
	/* We can't enable contexts until all firmware is loaded */
4444
	if (HAS_GUC_UCODE(dev)) {
4445
		ret = intel_guc_ucode_load(dev);
4446
		if (ret) {
4447
			/*
4448
			 * If we got an error and GuC submission is enabled, map
4449
			 * the error to -EIO so the GPU will be declared wedged.
4450
			 * OTOH, if we didn't intend to use the GuC anyway, just
4451
			 * discard the error and carry on.
4452
			 */
4453
			DRM_ERROR("Failed to initialize GuC, error %d%s\n", ret,
4454
				  i915.enable_guc_submission ? "" :
4455
				  " (ignored)");
4456
			ret = i915.enable_guc_submission ? -EIO : 0;
4457
			if (ret)
4458
				goto out;
4459
		}
4460
	}
4461
 
3480 Serge 4462
	/*
6084 serge 4463
	 * Increment the next seqno by 0x100 so we have a visible break
4464
	 * on re-initialisation
3480 Serge 4465
	 */
6084 serge 4466
	ret = i915_gem_set_seqno(dev, dev_priv->next_seqno+0x100);
4467
	if (ret)
4468
		goto out;
5354 serge 4469
 
6084 serge 4470
	/* Now it is safe to go back round and do everything else: */
4471
	for_each_ring(ring, dev_priv, i) {
4472
		struct drm_i915_gem_request *req;
4560 Serge 4473
 
6084 serge 4474
		WARN_ON(!ring->default_context);
4475
 
4476
		ret = i915_gem_request_alloc(ring, ring->default_context, &req);
4477
		if (ret) {
4478
			i915_gem_cleanup_ringbuffer(dev);
4479
			goto out;
4480
		}
4481
 
4482
		if (ring->id == RCS) {
4483
			for (j = 0; j < NUM_L3_SLICES(dev); j++)
4484
				i915_gem_l3_remap(req, j);
4485
		}
4486
 
4487
		ret = i915_ppgtt_init_ring(req);
4488
		if (ret && ret != -EIO) {
4489
			DRM_ERROR("PPGTT enable ring #%d failed %d\n", i, ret);
4490
			i915_gem_request_cancel(req);
4491
			i915_gem_cleanup_ringbuffer(dev);
4492
			goto out;
4493
		}
4494
 
4495
		ret = i915_gem_context_enable(req);
4496
		if (ret && ret != -EIO) {
4497
			DRM_ERROR("Context enable ring #%d failed %d\n", i, ret);
4498
			i915_gem_request_cancel(req);
4499
			i915_gem_cleanup_ringbuffer(dev);
4500
			goto out;
4501
		}
4502
 
4503
		i915_add_request_no_flush(req);
5354 serge 4504
	}
4505
 
6084 serge 4506
out:
4507
	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5060 serge 4508
	return ret;
3031 serge 4509
}
4510
 
4511
int i915_gem_init(struct drm_device *dev)
4512
{
4513
	struct drm_i915_private *dev_priv = dev->dev_private;
4514
	int ret;
4515
 
5354 serge 4516
	i915.enable_execlists = intel_sanitize_enable_execlists(dev,
4517
			i915.enable_execlists);
4518
 
3031 serge 4519
	mutex_lock(&dev->struct_mutex);
3746 Serge 4520
 
4521
	if (IS_VALLEYVIEW(dev)) {
4522
		/* VLVA0 (potential hack), BIOS isn't actually waking us */
5060 serge 4523
		I915_WRITE(VLV_GTLC_WAKE_CTRL, VLV_GTLC_ALLOWWAKEREQ);
4524
		if (wait_for((I915_READ(VLV_GTLC_PW_STATUS) &
4525
			      VLV_GTLC_ALLOWWAKEACK), 10))
3746 Serge 4526
			DRM_DEBUG_DRIVER("allow wake ack timed out\n");
4527
	}
4528
 
5354 serge 4529
	if (!i915.enable_execlists) {
6084 serge 4530
		dev_priv->gt.execbuf_submit = i915_gem_ringbuffer_submission;
5354 serge 4531
		dev_priv->gt.init_rings = i915_gem_init_rings;
4532
		dev_priv->gt.cleanup_ring = intel_cleanup_ring_buffer;
4533
		dev_priv->gt.stop_ring = intel_stop_ring_buffer;
4534
	} else {
6084 serge 4535
		dev_priv->gt.execbuf_submit = intel_execlists_submission;
5354 serge 4536
		dev_priv->gt.init_rings = intel_logical_rings_init;
4537
		dev_priv->gt.cleanup_ring = intel_logical_ring_cleanup;
4538
		dev_priv->gt.stop_ring = intel_logical_ring_stop;
4539
	}
4540
 
6084 serge 4541
	/* This is just a security blanket to placate dragons.
4542
	 * On some systems, we very sporadically observe that the first TLBs
4543
	 * used by the CS may be stale, despite us poking the TLB reset. If
4544
	 * we hold the forcewake during initialisation these problems
4545
	 * just magically go away.
4546
	 */
4547
	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
5354 serge 4548
 
6084 serge 4549
//	ret = i915_gem_init_userptr(dev);
4550
//	if (ret)
4551
//		goto out_unlock;
3746 Serge 4552
 
6084 serge 4553
	i915_gem_init_global_gtt(dev);
4554
 
5060 serge 4555
	ret = i915_gem_context_init(dev);
6084 serge 4556
	if (ret)
4557
		goto out_unlock;
3031 serge 4558
 
6084 serge 4559
	ret = dev_priv->gt.init_rings(dev);
4560
	if (ret)
4561
		goto out_unlock;
4562
 
5060 serge 4563
	ret = i915_gem_init_hw(dev);
4564
	if (ret == -EIO) {
4565
		/* Allow ring initialisation to fail by marking the GPU as
4566
		 * wedged. But we only want to do this where the GPU is angry,
4567
		 * for all other failure, such as an allocation failure, bail.
4568
		 */
4569
		DRM_ERROR("Failed to initialize GPU, declaring it wedged\n");
6084 serge 4570
		atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter);
5060 serge 4571
		ret = 0;
4572
	}
6084 serge 4573
 
4574
out_unlock:
4575
	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5060 serge 4576
	mutex_unlock(&dev->struct_mutex);
3746 Serge 4577
 
6084 serge 4578
	return ret;
3031 serge 4579
}
4580
 
2332 Serge 4581
void
4582
i915_gem_cleanup_ringbuffer(struct drm_device *dev)
4583
{
5060 serge 4584
	struct drm_i915_private *dev_priv = dev->dev_private;
4585
	struct intel_engine_cs *ring;
2332 Serge 4586
	int i;
4587
 
3031 serge 4588
	for_each_ring(ring, dev_priv, i)
5354 serge 4589
		dev_priv->gt.cleanup_ring(ring);
2332 Serge 4590
}
4591
 
4592
static void
5060 serge 4593
init_ring_lists(struct intel_engine_cs *ring)
2326 Serge 4594
{
6084 serge 4595
	INIT_LIST_HEAD(&ring->active_list);
4596
	INIT_LIST_HEAD(&ring->request_list);
2326 Serge 4597
}
4598
 
4599
void
4600
i915_gem_load(struct drm_device *dev)
4601
{
5060 serge 4602
	struct drm_i915_private *dev_priv = dev->dev_private;
6084 serge 4603
	int i;
2326 Serge 4604
 
4104 Serge 4605
	INIT_LIST_HEAD(&dev_priv->vm_list);
4560 Serge 4606
	INIT_LIST_HEAD(&dev_priv->context_list);
3031 serge 4607
	INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
4608
	INIT_LIST_HEAD(&dev_priv->mm.bound_list);
6084 serge 4609
	INIT_LIST_HEAD(&dev_priv->mm.fence_list);
4610
	for (i = 0; i < I915_NUM_RINGS; i++)
4611
		init_ring_lists(&dev_priv->ring[i]);
2342 Serge 4612
	for (i = 0; i < I915_MAX_NUM_FENCES; i++)
6084 serge 4613
		INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
2360 Serge 4614
	INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
4615
			  i915_gem_retire_work_handler);
4560 Serge 4616
	INIT_DELAYED_WORK(&dev_priv->mm.idle_work,
4617
			  i915_gem_idle_work_handler);
3480 Serge 4618
	init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
2326 Serge 4619
 
6084 serge 4620
	dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
2326 Serge 4621
 
3746 Serge 4622
	if (INTEL_INFO(dev)->gen >= 7 && !IS_VALLEYVIEW(dev))
4623
		dev_priv->num_fence_regs = 32;
4624
	else if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
6084 serge 4625
		dev_priv->num_fence_regs = 16;
4626
	else
4627
		dev_priv->num_fence_regs = 8;
2326 Serge 4628
 
6084 serge 4629
	if (intel_vgpu_active(dev))
4630
		dev_priv->num_fence_regs =
4631
				I915_READ(vgtif_reg(avail_rs.fence_num));
4632
 
4633
	/*
4634
	 * Set initial sequence number for requests.
4635
	 * Using this number allows the wraparound to happen early,
4636
	 * catching any obvious problems.
4637
	 */
4638
	dev_priv->next_seqno = ((u32)~0 - 0x1100);
4639
	dev_priv->last_seqno = ((u32)~0 - 0x1101);
4640
 
4641
	/* Initialize fence registers to zero */
3746 Serge 4642
	INIT_LIST_HEAD(&dev_priv->mm.fence_list);
4643
	i915_gem_restore_fences(dev);
2326 Serge 4644
 
6084 serge 4645
	i915_gem_detect_bit_6_swizzle(dev);
2326 Serge 4646
 
6084 serge 4647
	dev_priv->mm.interruptible = true;
2326 Serge 4648
 
5060 serge 4649
	mutex_init(&dev_priv->fb_tracking.lock);
2326 Serge 4650
}
4651
 
6084 serge 4652
void i915_gem_release(struct drm_device *dev, struct drm_file *file)
4653
{
4654
	struct drm_i915_file_private *file_priv = file->driver_priv;
4655
 
4656
	/* Clean up our request list when the client is going away, so that
4657
	 * later retire_requests won't dereference our soon-to-be-gone
4658
	 * file_priv.
4659
	 */
4660
	spin_lock(&file_priv->mm.lock);
4661
	while (!list_empty(&file_priv->mm.request_list)) {
4662
		struct drm_i915_gem_request *request;
4663
 
4664
		request = list_first_entry(&file_priv->mm.request_list,
4665
					   struct drm_i915_gem_request,
4666
					   client_list);
4667
		list_del(&request->client_list);
4668
		request->file_priv = NULL;
4669
	}
4670
	spin_unlock(&file_priv->mm.lock);
4671
 
4672
	if (!list_empty(&file_priv->rps.link)) {
4673
		spin_lock(&to_i915(dev)->rps.client_lock);
4674
		list_del(&file_priv->rps.link);
4675
		spin_unlock(&to_i915(dev)->rps.client_lock);
4676
	}
4677
}
4678
 
5060 serge 4679
int i915_gem_open(struct drm_device *dev, struct drm_file *file)
4104 Serge 4680
{
5060 serge 4681
	struct drm_i915_file_private *file_priv;
4104 Serge 4682
	int ret;
2326 Serge 4683
 
5060 serge 4684
	DRM_DEBUG_DRIVER("\n");
4104 Serge 4685
 
5060 serge 4686
	file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
4687
	if (!file_priv)
4104 Serge 4688
		return -ENOMEM;
4689
 
5060 serge 4690
	file->driver_priv = file_priv;
4691
	file_priv->dev_priv = dev->dev_private;
4692
	file_priv->file = file;
6084 serge 4693
	INIT_LIST_HEAD(&file_priv->rps.link);
4104 Serge 4694
 
5060 serge 4695
	spin_lock_init(&file_priv->mm.lock);
4696
	INIT_LIST_HEAD(&file_priv->mm.request_list);
4104 Serge 4697
 
5060 serge 4698
	ret = i915_gem_context_open(dev, file);
4699
	if (ret)
4700
		kfree(file_priv);
4104 Serge 4701
 
4702
	return ret;
4703
}
4704
 
5354 serge 4705
/**
4706
 * i915_gem_track_fb - update frontbuffer tracking
6084 serge 4707
 * @old: current GEM buffer for the frontbuffer slots
4708
 * @new: new GEM buffer for the frontbuffer slots
4709
 * @frontbuffer_bits: bitmask of frontbuffer slots
5354 serge 4710
 *
4711
 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them
4712
 * from @old and setting them in @new. Both @old and @new can be NULL.
4713
 */
5060 serge 4714
void i915_gem_track_fb(struct drm_i915_gem_object *old,
4715
		       struct drm_i915_gem_object *new,
4716
		       unsigned frontbuffer_bits)
4104 Serge 4717
{
5060 serge 4718
	if (old) {
4719
		WARN_ON(!mutex_is_locked(&old->base.dev->struct_mutex));
4720
		WARN_ON(!(old->frontbuffer_bits & frontbuffer_bits));
4721
		old->frontbuffer_bits &= ~frontbuffer_bits;
4104 Serge 4722
	}
4723
 
5060 serge 4724
	if (new) {
4725
		WARN_ON(!mutex_is_locked(&new->base.dev->struct_mutex));
4726
		WARN_ON(new->frontbuffer_bits & frontbuffer_bits);
4727
		new->frontbuffer_bits |= frontbuffer_bits;
4104 Serge 4728
	}
4729
}
4730
 
4731
/* All the new VM stuff */
6084 serge 4732
u64 i915_gem_obj_offset(struct drm_i915_gem_object *o,
4733
			struct i915_address_space *vm)
4104 Serge 4734
{
4735
	struct drm_i915_private *dev_priv = o->base.dev->dev_private;
4736
	struct i915_vma *vma;
4737
 
5354 serge 4738
	WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base);
4104 Serge 4739
 
4740
	list_for_each_entry(vma, &o->vma_list, vma_link) {
6084 serge 4741
		if (i915_is_ggtt(vma->vm) &&
4742
		    vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
4743
			continue;
4104 Serge 4744
		if (vma->vm == vm)
4745
			return vma->node.start;
6084 serge 4746
	}
4104 Serge 4747
 
5060 serge 4748
	WARN(1, "%s vma for this object not found.\n",
4749
	     i915_is_ggtt(vm) ? "global" : "ppgtt");
4750
	return -1;
4104 Serge 4751
}
4752
 
6084 serge 4753
u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o,
4754
				  const struct i915_ggtt_view *view)
4755
{
4756
	struct i915_address_space *ggtt = i915_obj_to_ggtt(o);
4757
	struct i915_vma *vma;
4758
 
4759
	list_for_each_entry(vma, &o->vma_list, vma_link)
4760
		if (vma->vm == ggtt &&
4761
		    i915_ggtt_view_equal(&vma->ggtt_view, view))
4762
			return vma->node.start;
4763
 
4764
	WARN(1, "global vma for this object not found. (view=%u)\n", view->type);
4765
	return -1;
4766
}
4767
 
4104 Serge 4768
bool i915_gem_obj_bound(struct drm_i915_gem_object *o,
4769
			struct i915_address_space *vm)
4770
{
4771
	struct i915_vma *vma;
4772
 
6084 serge 4773
	list_for_each_entry(vma, &o->vma_list, vma_link) {
4774
		if (i915_is_ggtt(vma->vm) &&
4775
		    vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
4776
			continue;
4104 Serge 4777
		if (vma->vm == vm && drm_mm_node_allocated(&vma->node))
4778
			return true;
6084 serge 4779
	}
4104 Serge 4780
 
4781
	return false;
4782
}
4783
 
6084 serge 4784
bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o,
4785
				  const struct i915_ggtt_view *view)
4786
{
4787
	struct i915_address_space *ggtt = i915_obj_to_ggtt(o);
4788
	struct i915_vma *vma;
4789
 
4790
	list_for_each_entry(vma, &o->vma_list, vma_link)
4791
		if (vma->vm == ggtt &&
4792
		    i915_ggtt_view_equal(&vma->ggtt_view, view) &&
4793
		    drm_mm_node_allocated(&vma->node))
4794
			return true;
4795
 
4796
	return false;
4797
}
4798
 
4104 Serge 4799
bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o)
4800
{
4560 Serge 4801
	struct i915_vma *vma;
4104 Serge 4802
 
4560 Serge 4803
	list_for_each_entry(vma, &o->vma_list, vma_link)
4804
		if (drm_mm_node_allocated(&vma->node))
4104 Serge 4805
			return true;
4806
 
4807
	return false;
4808
}
4809
 
4810
unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o,
4811
				struct i915_address_space *vm)
4812
{
4813
	struct drm_i915_private *dev_priv = o->base.dev->dev_private;
4814
	struct i915_vma *vma;
4815
 
5354 serge 4816
	WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base);
4104 Serge 4817
 
4818
	BUG_ON(list_empty(&o->vma_list));
4819
 
6084 serge 4820
	list_for_each_entry(vma, &o->vma_list, vma_link) {
4821
		if (i915_is_ggtt(vma->vm) &&
4822
		    vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
4823
			continue;
4104 Serge 4824
		if (vma->vm == vm)
4825
			return vma->node.size;
6084 serge 4826
	}
4104 Serge 4827
	return 0;
4828
}
4560 Serge 4829
 
6084 serge 4830
bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj)
4831
{
4832
	struct i915_vma *vma;
4833
	list_for_each_entry(vma, &obj->vma_list, vma_link)
4834
		if (vma->pin_count > 0)
4835
			return true;
4560 Serge 4836
 
6084 serge 4837
	return false;
4838
}
5060 serge 4839
 
6084 serge 4840
/* Allocate a new GEM object and fill it with the supplied data */
4841
struct drm_i915_gem_object *
4842
i915_gem_object_create_from_data(struct drm_device *dev,
4843
			         const void *data, size_t size)
4104 Serge 4844
{
6084 serge 4845
	struct drm_i915_gem_object *obj;
4846
	struct sg_table *sg;
4847
	size_t bytes;
4848
	int ret;
4104 Serge 4849
 
6084 serge 4850
	obj = i915_gem_alloc_object(dev, round_up(size, PAGE_SIZE));
4851
	if (IS_ERR_OR_NULL(obj))
4852
		return obj;
4104 Serge 4853
 
6084 serge 4854
	ret = i915_gem_object_set_to_cpu_domain(obj, true);
4855
	if (ret)
4856
		goto fail;
4857
 
4858
	ret = i915_gem_object_get_pages(obj);
4859
	if (ret)
4860
		goto fail;
4861
 
4862
	i915_gem_object_pin_pages(obj);
4863
	sg = obj->pages;
4864
	bytes = sg_copy_from_buffer(sg->sgl, sg->nents, (void *)data, size);
4865
	i915_gem_object_unpin_pages(obj);
4866
 
4867
	if (WARN_ON(bytes != size)) {
4868
		DRM_ERROR("Incomplete copy, wrote %zu of %zu", bytes, size);
4869
		ret = -EFAULT;
4870
		goto fail;
4871
	}
4872
 
4873
	return obj;
4874
 
4875
fail:
4876
	drm_gem_object_unreference(&obj->base);
4877
	return ERR_PTR(ret);
4104 Serge 4878
}