Subversion Repositories Kolibri OS

Rev

Rev 6660 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2326 Serge 1
/*
6084 serge 2
 * Copyright © 2008-2015 Intel Corporation
2326 Serge 3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
 * IN THE SOFTWARE.
22
 *
23
 * Authors:
24
 *    Eric Anholt 
25
 *
26
 */
27
 
3031 serge 28
#include 
4280 Serge 29
#include 
3031 serge 30
#include 
2326 Serge 31
#include "i915_drv.h"
6084 serge 32
#include "i915_vgpu.h"
2351 Serge 33
#include "i915_trace.h"
2326 Serge 34
#include "intel_drv.h"
3260 Serge 35
#include 
2330 Serge 36
#include 
6660 serge 37
#include 
2326 Serge 38
#include 
6660 serge 39
#include 
40
 
6084 serge 41
#define RQ_BUG_ON(expr)
2326 Serge 42
 
2344 Serge 43
extern int x86_clflush_size;
6131 serge 44
#define __copy_to_user_inatomic __copy_to_user
2332 Serge 45
 
3263 Serge 46
#define PROT_READ       0x1             /* page can be read */
47
#define PROT_WRITE      0x2             /* page can be written */
48
#define MAP_SHARED      0x01            /* Share changes */
49
 
2344 Serge 50
 
5060 serge 51
 
3266 Serge 52
struct drm_i915_gem_object *get_fb_obj();
53
 
3263 Serge 54
unsigned long vm_mmap(struct file *file, unsigned long addr,
55
         unsigned long len, unsigned long prot,
56
         unsigned long flag, unsigned long offset);
57
 
2344 Serge 58
 
2332 Serge 59
static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
6084 serge 60
static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
5060 serge 61
static void
6084 serge 62
i915_gem_object_retire__write(struct drm_i915_gem_object *obj);
63
static void
64
i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring);
2326 Serge 65
 
4104 Serge 66
static bool cpu_cache_is_coherent(struct drm_device *dev,
67
				  enum i915_cache_level level)
68
{
69
	return HAS_LLC(dev) || level != I915_CACHE_NONE;
70
}
71
 
72
static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
73
{
74
	if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
75
		return true;
76
 
77
	return obj->pin_display;
78
}
79
 
2332 Serge 80
/* some bookkeeping */
81
static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
82
				  size_t size)
83
{
4104 Serge 84
	spin_lock(&dev_priv->mm.object_stat_lock);
2332 Serge 85
	dev_priv->mm.object_count++;
86
	dev_priv->mm.object_memory += size;
4104 Serge 87
	spin_unlock(&dev_priv->mm.object_stat_lock);
2332 Serge 88
}
89
 
90
static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
91
				     size_t size)
92
{
4104 Serge 93
	spin_lock(&dev_priv->mm.object_stat_lock);
2332 Serge 94
	dev_priv->mm.object_count--;
95
	dev_priv->mm.object_memory -= size;
4104 Serge 96
	spin_unlock(&dev_priv->mm.object_stat_lock);
2332 Serge 97
}
98
 
99
static int
3480 Serge 100
i915_gem_wait_for_error(struct i915_gpu_error *error)
2332 Serge 101
{
102
	int ret;
103
 
3480 Serge 104
#define EXIT_COND (!i915_reset_in_progress(error))
105
	if (EXIT_COND)
2332 Serge 106
		return 0;
3255 Serge 107
#if 0
3031 serge 108
	/*
109
	 * Only wait 10 seconds for the gpu reset to complete to avoid hanging
110
	 * userspace. If it takes that long something really bad is going on and
111
	 * we should simply try to bail out and fail as gracefully as possible.
112
	 */
3480 Serge 113
	ret = wait_event_interruptible_timeout(error->reset_queue,
114
					       EXIT_COND,
115
					       10*HZ);
3031 serge 116
	if (ret == 0) {
117
		DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
118
		return -EIO;
119
	} else if (ret < 0) {
2332 Serge 120
		return ret;
3031 serge 121
	}
2332 Serge 122
 
3255 Serge 123
#endif
3480 Serge 124
#undef EXIT_COND
3255 Serge 125
 
2332 Serge 126
	return 0;
127
}
128
 
129
int i915_mutex_lock_interruptible(struct drm_device *dev)
130
{
3480 Serge 131
	struct drm_i915_private *dev_priv = dev->dev_private;
2332 Serge 132
	int ret;
133
 
3480 Serge 134
	ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
2332 Serge 135
	if (ret)
136
		return ret;
137
 
3480 Serge 138
	ret = mutex_lock_interruptible(&dev->struct_mutex);
139
	if (ret)
140
		return ret;
2332 Serge 141
 
142
	WARN_ON(i915_verify_lists(dev));
143
	return 0;
144
}
145
 
146
int
147
i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
148
			    struct drm_file *file)
149
{
150
	struct drm_i915_private *dev_priv = dev->dev_private;
151
	struct drm_i915_gem_get_aperture *args = data;
6084 serge 152
	struct i915_gtt *ggtt = &dev_priv->gtt;
153
	struct i915_vma *vma;
2332 Serge 154
	size_t pinned;
155
 
156
	pinned = 0;
157
	mutex_lock(&dev->struct_mutex);
6084 serge 158
	list_for_each_entry(vma, &ggtt->base.active_list, mm_list)
159
		if (vma->pin_count)
160
			pinned += vma->node.size;
161
	list_for_each_entry(vma, &ggtt->base.inactive_list, mm_list)
162
		if (vma->pin_count)
163
			pinned += vma->node.size;
2332 Serge 164
	mutex_unlock(&dev->struct_mutex);
165
 
4104 Serge 166
	args->aper_size = dev_priv->gtt.base.total;
2342 Serge 167
	args->aper_available_size = args->aper_size - pinned;
2332 Serge 168
 
169
	return 0;
170
}
171
 
6296 serge 172
static int
173
i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
174
{
175
	char *vaddr = obj->phys_handle->vaddr;
176
	struct sg_table *st;
177
	struct scatterlist *sg;
178
	int i;
179
 
180
	if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
181
		return -EINVAL;
182
 
183
 
184
	st = kmalloc(sizeof(*st), GFP_KERNEL);
185
	if (st == NULL)
186
		return -ENOMEM;
187
 
188
	if (sg_alloc_table(st, 1, GFP_KERNEL)) {
189
		kfree(st);
190
		return -ENOMEM;
191
	}
192
 
193
	sg = st->sgl;
194
	sg->offset = 0;
195
	sg->length = obj->base.size;
196
 
197
	sg_dma_address(sg) = obj->phys_handle->busaddr;
198
	sg_dma_len(sg) = obj->base.size;
199
 
200
	obj->pages = st;
201
	return 0;
202
}
203
 
204
static void
205
i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
206
{
207
	int ret;
208
 
209
	BUG_ON(obj->madv == __I915_MADV_PURGED);
210
 
211
	ret = i915_gem_object_set_to_cpu_domain(obj, true);
212
	if (ret) {
213
		/* In the event of a disaster, abandon all caches and
214
		 * hope for the best.
215
		 */
216
		WARN_ON(ret != -EIO);
217
		obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
218
	}
219
 
220
	if (obj->madv == I915_MADV_DONTNEED)
221
		obj->dirty = 0;
222
 
223
	if (obj->dirty) {
224
		obj->dirty = 0;
225
	}
226
 
227
	sg_free_table(obj->pages);
228
	kfree(obj->pages);
229
}
230
 
231
static void
232
i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
233
{
234
	drm_pci_free(obj->base.dev, obj->phys_handle);
235
}
236
 
237
static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
238
	.get_pages = i915_gem_object_get_pages_phys,
239
	.put_pages = i915_gem_object_put_pages_phys,
240
	.release = i915_gem_object_release_phys,
241
};
242
 
243
static int
244
drop_pages(struct drm_i915_gem_object *obj)
245
{
246
	struct i915_vma *vma, *next;
247
	int ret;
248
 
249
	drm_gem_object_reference(&obj->base);
250
	list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link)
251
		if (i915_vma_unbind(vma))
252
			break;
253
 
254
	ret = i915_gem_object_put_pages(obj);
255
	drm_gem_object_unreference(&obj->base);
256
 
257
	return ret;
258
}
259
 
260
int
261
i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
262
			    int align)
263
{
264
	drm_dma_handle_t *phys;
265
	int ret;
266
 
267
	if (obj->phys_handle) {
268
		if ((unsigned long)obj->phys_handle->vaddr & (align -1))
269
			return -EBUSY;
270
 
271
		return 0;
272
	}
273
 
274
	if (obj->madv != I915_MADV_WILLNEED)
275
		return -EFAULT;
276
 
277
	if (obj->base.filp == NULL)
278
		return -EINVAL;
279
 
280
	ret = drop_pages(obj);
281
	if (ret)
282
		return ret;
283
 
284
	/* create a new object */
285
	phys = drm_pci_alloc(obj->base.dev, obj->base.size, align);
286
	if (!phys)
287
		return -ENOMEM;
288
 
289
	obj->phys_handle = phys;
290
	obj->ops = &i915_gem_phys_ops;
291
 
292
	return i915_gem_object_get_pages(obj);
293
}
3480 Serge 294
void *i915_gem_object_alloc(struct drm_device *dev)
295
{
296
	struct drm_i915_private *dev_priv = dev->dev_private;
5367 serge 297
    return kzalloc(sizeof(struct drm_i915_gem_object), 0);
3480 Serge 298
}
299
 
300
void i915_gem_object_free(struct drm_i915_gem_object *obj)
301
{
302
	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
303
	kfree(obj);
304
}
305
 
3031 serge 306
static int
307
i915_gem_create(struct drm_file *file,
2332 Serge 308
		struct drm_device *dev,
309
		uint64_t size,
310
		uint32_t *handle_p)
311
{
312
	struct drm_i915_gem_object *obj;
313
	int ret;
314
	u32 handle;
315
 
316
	size = roundup(size, PAGE_SIZE);
2342 Serge 317
	if (size == 0)
318
		return -EINVAL;
2332 Serge 319
 
320
	/* Allocate the new object */
321
	obj = i915_gem_alloc_object(dev, size);
322
	if (obj == NULL)
323
		return -ENOMEM;
324
 
325
	ret = drm_gem_handle_create(file, &obj->base, &handle);
4104 Serge 326
	/* drop reference from allocate - handle holds it now */
327
	drm_gem_object_unreference_unlocked(&obj->base);
328
	if (ret)
2332 Serge 329
		return ret;
330
 
331
	*handle_p = handle;
332
	return 0;
333
}
334
 
335
int
336
i915_gem_dumb_create(struct drm_file *file,
337
		     struct drm_device *dev,
338
		     struct drm_mode_create_dumb *args)
339
{
340
	/* have to work out size/pitch and return them */
4560 Serge 341
	args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64);
2332 Serge 342
	args->size = args->pitch * args->height;
343
	return i915_gem_create(file, dev,
344
			       args->size, &args->handle);
345
}
346
 
2326 Serge 347
/**
2332 Serge 348
 * Creates a new mm object and returns a handle to it.
349
 */
350
int
351
i915_gem_create_ioctl(struct drm_device *dev, void *data,
352
		      struct drm_file *file)
353
{
354
	struct drm_i915_gem_create *args = data;
3031 serge 355
 
2332 Serge 356
	return i915_gem_create(file, dev,
357
			       args->size, &args->handle);
358
}
359
 
3031 serge 360
static inline int
361
__copy_to_user_swizzled(char __user *cpu_vaddr,
362
			const char *gpu_vaddr, int gpu_offset,
6084 serge 363
			int length)
2332 Serge 364
{
3031 serge 365
	int ret, cpu_offset = 0;
2332 Serge 366
 
3031 serge 367
	while (length > 0) {
368
		int cacheline_end = ALIGN(gpu_offset + 1, 64);
369
		int this_length = min(cacheline_end - gpu_offset, length);
370
		int swizzled_gpu_offset = gpu_offset ^ 64;
2332 Serge 371
 
3031 serge 372
		ret = __copy_to_user(cpu_vaddr + cpu_offset,
373
				     gpu_vaddr + swizzled_gpu_offset,
374
				     this_length);
375
		if (ret)
376
			return ret + length;
2332 Serge 377
 
3031 serge 378
		cpu_offset += this_length;
379
		gpu_offset += this_length;
380
		length -= this_length;
381
	}
382
 
383
	return 0;
2332 Serge 384
}
385
 
3031 serge 386
static inline int
387
__copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
388
			  const char __user *cpu_vaddr,
389
			  int length)
2332 Serge 390
{
3031 serge 391
	int ret, cpu_offset = 0;
2332 Serge 392
 
393
	while (length > 0) {
394
		int cacheline_end = ALIGN(gpu_offset + 1, 64);
395
		int this_length = min(cacheline_end - gpu_offset, length);
396
		int swizzled_gpu_offset = gpu_offset ^ 64;
397
 
3031 serge 398
		ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
6084 serge 399
				       cpu_vaddr + cpu_offset,
400
				       this_length);
3031 serge 401
		if (ret)
402
			return ret + length;
403
 
2332 Serge 404
		cpu_offset += this_length;
405
		gpu_offset += this_length;
406
		length -= this_length;
407
	}
408
 
3031 serge 409
	return 0;
2332 Serge 410
}
411
 
6131 serge 412
/*
413
 * Pins the specified object's pages and synchronizes the object with
414
 * GPU accesses. Sets needs_clflush to non-zero if the caller should
415
 * flush the object from the CPU cache.
416
 */
417
int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
418
				    int *needs_clflush)
419
{
420
	int ret;
421
 
422
	*needs_clflush = 0;
423
 
424
	if (!obj->base.filp)
425
		return -EINVAL;
426
 
427
	if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
428
		/* If we're not in the cpu read domain, set ourself into the gtt
429
		 * read domain and manually flush cachelines (if required). This
430
		 * optimizes for the case when the gpu will dirty the data
431
		 * anyway again before the next pread happens. */
432
		*needs_clflush = !cpu_cache_is_coherent(obj->base.dev,
433
							obj->cache_level);
434
		ret = i915_gem_object_wait_rendering(obj, true);
435
		if (ret)
436
			return ret;
437
	}
438
 
439
	ret = i915_gem_object_get_pages(obj);
440
	if (ret)
441
		return ret;
442
 
443
	i915_gem_object_pin_pages(obj);
444
 
445
	return ret;
446
}
447
 
3031 serge 448
/* Per-page copy function for the shmem pread fastpath.
449
 * Flushes invalid cachelines before reading the target if
450
 * needs_clflush is set. */
2332 Serge 451
static int
3031 serge 452
shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length,
453
		 char __user *user_data,
454
		 bool page_do_bit17_swizzling, bool needs_clflush)
455
{
6084 serge 456
	char *vaddr;
457
	int ret;
3031 serge 458
 
459
	if (unlikely(page_do_bit17_swizzling))
460
		return -EINVAL;
461
 
6084 serge 462
	vaddr = kmap_atomic(page);
3031 serge 463
	if (needs_clflush)
464
		drm_clflush_virt_range(vaddr + shmem_page_offset,
465
				       page_length);
6084 serge 466
	ret = __copy_to_user_inatomic(user_data,
3031 serge 467
				      vaddr + shmem_page_offset,
6084 serge 468
				      page_length);
469
	kunmap_atomic(vaddr);
3031 serge 470
 
471
	return ret ? -EFAULT : 0;
472
}
473
 
474
static void
475
shmem_clflush_swizzled_range(char *addr, unsigned long length,
476
			     bool swizzled)
477
{
478
	if (unlikely(swizzled)) {
479
		unsigned long start = (unsigned long) addr;
480
		unsigned long end = (unsigned long) addr + length;
481
 
482
		/* For swizzling simply ensure that we always flush both
483
		 * channels. Lame, but simple and it works. Swizzled
484
		 * pwrite/pread is far from a hotpath - current userspace
485
		 * doesn't use it at all. */
486
		start = round_down(start, 128);
487
		end = round_up(end, 128);
488
 
489
		drm_clflush_virt_range((void *)start, end - start);
490
	} else {
491
		drm_clflush_virt_range(addr, length);
492
	}
493
 
494
}
495
 
496
/* Only difference to the fast-path function is that this can handle bit17
497
 * and uses non-atomic copy and kmap functions. */
498
static int
499
shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
500
		 char __user *user_data,
501
		 bool page_do_bit17_swizzling, bool needs_clflush)
502
{
503
	char *vaddr;
504
	int ret;
505
 
506
	vaddr = kmap(page);
507
	if (needs_clflush)
508
		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
509
					     page_length,
510
					     page_do_bit17_swizzling);
511
 
512
	if (page_do_bit17_swizzling)
513
		ret = __copy_to_user_swizzled(user_data,
514
					      vaddr, shmem_page_offset,
515
					      page_length);
516
	else
517
		ret = __copy_to_user(user_data,
518
				     vaddr + shmem_page_offset,
519
				     page_length);
520
	kunmap(page);
521
 
522
	return ret ? - EFAULT : 0;
523
}
524
 
525
static int
526
i915_gem_shmem_pread(struct drm_device *dev,
6084 serge 527
		     struct drm_i915_gem_object *obj,
528
		     struct drm_i915_gem_pread *args,
529
		     struct drm_file *file)
2332 Serge 530
{
3031 serge 531
	char __user *user_data;
2332 Serge 532
	ssize_t remain;
533
	loff_t offset;
3031 serge 534
	int shmem_page_offset, page_length, ret = 0;
535
	int obj_do_bit17_swizzling, page_do_bit17_swizzling;
536
	int prefaulted = 0;
537
	int needs_clflush = 0;
3746 Serge 538
	struct sg_page_iter sg_iter;
2332 Serge 539
 
3746 Serge 540
	user_data = to_user_ptr(args->data_ptr);
2332 Serge 541
	remain = args->size;
542
 
3031 serge 543
	obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
544
 
5060 serge 545
	ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
3031 serge 546
	if (ret)
547
		return ret;
548
 
2332 Serge 549
	offset = args->offset;
550
 
3746 Serge 551
	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
552
			 offset >> PAGE_SHIFT) {
553
		struct page *page = sg_page_iter_page(&sg_iter);
2332 Serge 554
 
3031 serge 555
		if (remain <= 0)
556
			break;
557
 
2332 Serge 558
		/* Operation in this page
559
		 *
3031 serge 560
		 * shmem_page_offset = offset within page in shmem file
2332 Serge 561
		 * page_length = bytes to copy for this page
562
		 */
3031 serge 563
		shmem_page_offset = offset_in_page(offset);
2332 Serge 564
		page_length = remain;
3031 serge 565
		if ((shmem_page_offset + page_length) > PAGE_SIZE)
566
			page_length = PAGE_SIZE - shmem_page_offset;
2332 Serge 567
 
3031 serge 568
		page_do_bit17_swizzling = obj_do_bit17_swizzling &&
569
			(page_to_phys(page) & (1 << 17)) != 0;
2332 Serge 570
 
3031 serge 571
		ret = shmem_pread_fast(page, shmem_page_offset, page_length,
572
				       user_data, page_do_bit17_swizzling,
573
				       needs_clflush);
574
		if (ret == 0)
575
			goto next_page;
2332 Serge 576
 
3031 serge 577
		mutex_unlock(&dev->struct_mutex);
578
 
579
		ret = shmem_pread_slow(page, shmem_page_offset, page_length,
580
				       user_data, page_do_bit17_swizzling,
581
				       needs_clflush);
582
 
583
		mutex_lock(&dev->struct_mutex);
584
 
2332 Serge 585
		if (ret)
3031 serge 586
			goto out;
2332 Serge 587
 
5060 serge 588
next_page:
2332 Serge 589
		remain -= page_length;
590
		user_data += page_length;
591
		offset += page_length;
592
	}
593
 
3031 serge 594
out:
595
	i915_gem_object_unpin_pages(obj);
596
 
597
	return ret;
2332 Serge 598
}
599
 
600
/**
3031 serge 601
 * Reads data from the object referenced by handle.
602
 *
603
 * On error, the contents of *data are undefined.
2332 Serge 604
 */
3031 serge 605
int
606
i915_gem_pread_ioctl(struct drm_device *dev, void *data,
607
		     struct drm_file *file)
608
{
609
	struct drm_i915_gem_pread *args = data;
610
	struct drm_i915_gem_object *obj;
611
	int ret = 0;
612
 
613
	if (args->size == 0)
614
		return 0;
615
 
616
	ret = i915_mutex_lock_interruptible(dev);
617
	if (ret)
618
		return ret;
619
 
620
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
621
	if (&obj->base == NULL) {
622
		ret = -ENOENT;
623
		goto unlock;
624
	}
625
 
626
	/* Bounds check source.  */
627
	if (args->offset > obj->base.size ||
628
	    args->size > obj->base.size - args->offset) {
629
		ret = -EINVAL;
630
		goto out;
631
	}
632
 
633
	/* prime objects have no backing filp to GEM pread/pwrite
634
	 * pages from.
635
	 */
636
	if (!obj->base.filp) {
637
		ret = -EINVAL;
638
		goto out;
639
	}
640
 
641
	trace_i915_gem_object_pread(obj, args->offset, args->size);
642
 
643
	ret = i915_gem_shmem_pread(dev, obj, args, file);
644
 
645
out:
646
	drm_gem_object_unreference(&obj->base);
647
unlock:
648
	mutex_unlock(&dev->struct_mutex);
649
	return ret;
650
}
651
 
652
/* This is the fast write path which cannot handle
653
 * page faults in the source data
654
 */
655
 
656
 
657
/**
658
 * This is the fast pwrite path, where we copy the data directly from the
659
 * user into the GTT, uncached.
660
 */
2332 Serge 661
static int
3031 serge 662
i915_gem_gtt_pwrite_fast(struct drm_device *dev,
663
			 struct drm_i915_gem_object *obj,
664
			 struct drm_i915_gem_pwrite *args,
665
			 struct drm_file *file)
2332 Serge 666
{
5060 serge 667
	struct drm_i915_private *dev_priv = dev->dev_private;
2332 Serge 668
	ssize_t remain;
3031 serge 669
	loff_t offset, page_base;
670
	char __user *user_data;
671
	int page_offset, page_length, ret;
2332 Serge 672
 
5060 serge 673
	ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
3031 serge 674
	if (ret)
675
		goto out;
676
 
677
	ret = i915_gem_object_set_to_gtt_domain(obj, true);
678
	if (ret)
679
		goto out_unpin;
680
 
681
	ret = i915_gem_object_put_fence(obj);
682
	if (ret)
683
		goto out_unpin;
684
 
4539 Serge 685
	user_data = to_user_ptr(args->data_ptr);
2332 Serge 686
	remain = args->size;
687
 
4104 Serge 688
	offset = i915_gem_obj_ggtt_offset(obj) + args->offset;
2332 Serge 689
 
6084 serge 690
	intel_fb_obj_invalidate(obj, ORIGIN_GTT);
691
 
3031 serge 692
	while (remain > 0) {
693
		/* Operation in this page
694
		 *
695
		 * page_base = page offset within aperture
696
		 * page_offset = offset within page
697
		 * page_length = bytes to copy for this page
698
		 */
699
		page_base = offset & PAGE_MASK;
700
		page_offset = offset_in_page(offset);
701
		page_length = remain;
702
		if ((page_offset + remain) > PAGE_SIZE)
703
			page_length = PAGE_SIZE - page_offset;
2332 Serge 704
 
6131 serge 705
		MapPage(dev_priv->gtt.mappable,
706
				dev_priv->gtt.mappable_base+page_base, PG_WRITEC|PG_SW);
3031 serge 707
 
6131 serge 708
		memcpy((char*)dev_priv->gtt.mappable+page_offset, user_data, page_length);
3260 Serge 709
 
3031 serge 710
		remain -= page_length;
711
		user_data += page_length;
712
		offset += page_length;
2332 Serge 713
	}
714
 
6084 serge 715
out_flush:
716
	intel_fb_obj_flush(obj, false, ORIGIN_GTT);
3031 serge 717
out_unpin:
5060 serge 718
	i915_gem_object_ggtt_unpin(obj);
3031 serge 719
out:
6084 serge 720
	return ret;
3031 serge 721
}
722
 
723
/* Per-page copy function for the shmem pwrite fastpath.
724
 * Flushes invalid cachelines before writing to the target if
725
 * needs_clflush_before is set and flushes out any written cachelines after
726
 * writing if needs_clflush is set. */
727
static int
728
shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
729
		  char __user *user_data,
730
		  bool page_do_bit17_swizzling,
731
		  bool needs_clflush_before,
732
		  bool needs_clflush_after)
733
{
734
	char *vaddr;
5354 serge 735
	int ret;
3031 serge 736
 
737
	if (unlikely(page_do_bit17_swizzling))
738
		return -EINVAL;
739
 
5354 serge 740
	vaddr = kmap_atomic(page);
3031 serge 741
	if (needs_clflush_before)
742
		drm_clflush_virt_range(vaddr + shmem_page_offset,
743
				       page_length);
3260 Serge 744
	memcpy(vaddr + shmem_page_offset,
3031 serge 745
						user_data,
746
						page_length);
747
	if (needs_clflush_after)
748
		drm_clflush_virt_range(vaddr + shmem_page_offset,
749
				       page_length);
5354 serge 750
	kunmap_atomic(vaddr);
3031 serge 751
 
752
	return ret ? -EFAULT : 0;
753
}
754
 
755
/* Only difference to the fast-path function is that this can handle bit17
756
 * and uses non-atomic copy and kmap functions. */
757
static int
758
shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
759
		  char __user *user_data,
760
		  bool page_do_bit17_swizzling,
761
		  bool needs_clflush_before,
762
		  bool needs_clflush_after)
763
{
764
	char *vaddr;
765
	int ret;
766
 
767
	vaddr = kmap(page);
768
	if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
769
		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
770
					     page_length,
771
					     page_do_bit17_swizzling);
772
	if (page_do_bit17_swizzling)
773
		ret = __copy_from_user_swizzled(vaddr, shmem_page_offset,
774
						user_data,
775
						page_length);
776
	else
777
		ret = __copy_from_user(vaddr + shmem_page_offset,
778
				       user_data,
779
				       page_length);
780
	if (needs_clflush_after)
781
		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
782
					     page_length,
783
					     page_do_bit17_swizzling);
784
	kunmap(page);
785
 
786
	return ret ? -EFAULT : 0;
787
}
788
 
789
static int
790
i915_gem_shmem_pwrite(struct drm_device *dev,
791
		      struct drm_i915_gem_object *obj,
792
		      struct drm_i915_gem_pwrite *args,
793
		      struct drm_file *file)
794
{
795
	ssize_t remain;
796
	loff_t offset;
797
	char __user *user_data;
798
	int shmem_page_offset, page_length, ret = 0;
799
	int obj_do_bit17_swizzling, page_do_bit17_swizzling;
800
	int hit_slowpath = 0;
801
	int needs_clflush_after = 0;
802
	int needs_clflush_before = 0;
3746 Serge 803
	struct sg_page_iter sg_iter;
3031 serge 804
 
3746 Serge 805
	user_data = to_user_ptr(args->data_ptr);
3031 serge 806
	remain = args->size;
807
 
808
	obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
809
 
810
	if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
811
		/* If we're not in the cpu write domain, set ourself into the gtt
812
		 * write domain and manually flush cachelines (if required). This
813
		 * optimizes for the case when the gpu will use the data
814
		 * right away and we therefore have to clflush anyway. */
4104 Serge 815
		needs_clflush_after = cpu_write_needs_clflush(obj);
4560 Serge 816
		ret = i915_gem_object_wait_rendering(obj, false);
6084 serge 817
		if (ret)
818
			return ret;
819
	}
4104 Serge 820
	/* Same trick applies to invalidate partially written cachelines read
821
	 * before writing. */
822
	if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0)
823
		needs_clflush_before =
824
			!cpu_cache_is_coherent(dev, obj->cache_level);
3031 serge 825
 
826
	ret = i915_gem_object_get_pages(obj);
2332 Serge 827
	if (ret)
3031 serge 828
		return ret;
2332 Serge 829
 
6084 serge 830
	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
831
 
3031 serge 832
	i915_gem_object_pin_pages(obj);
2332 Serge 833
 
834
	offset = args->offset;
3031 serge 835
	obj->dirty = 1;
2332 Serge 836
 
3746 Serge 837
	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
838
			 offset >> PAGE_SHIFT) {
839
		struct page *page = sg_page_iter_page(&sg_iter);
3031 serge 840
		int partial_cacheline_write;
2332 Serge 841
 
3031 serge 842
		if (remain <= 0)
843
			break;
844
 
2332 Serge 845
		/* Operation in this page
846
		 *
847
		 * shmem_page_offset = offset within page in shmem file
848
		 * page_length = bytes to copy for this page
849
		 */
850
		shmem_page_offset = offset_in_page(offset);
851
 
852
		page_length = remain;
853
		if ((shmem_page_offset + page_length) > PAGE_SIZE)
854
			page_length = PAGE_SIZE - shmem_page_offset;
855
 
3031 serge 856
		/* If we don't overwrite a cacheline completely we need to be
857
		 * careful to have up-to-date data by first clflushing. Don't
858
		 * overcomplicate things and flush the entire patch. */
859
		partial_cacheline_write = needs_clflush_before &&
860
			((shmem_page_offset | page_length)
3260 Serge 861
				& (x86_clflush_size - 1));
2332 Serge 862
 
3031 serge 863
		page_do_bit17_swizzling = obj_do_bit17_swizzling &&
864
			(page_to_phys(page) & (1 << 17)) != 0;
2332 Serge 865
 
3031 serge 866
		ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
867
					user_data, page_do_bit17_swizzling,
868
					partial_cacheline_write,
869
					needs_clflush_after);
870
		if (ret == 0)
871
			goto next_page;
872
 
873
		hit_slowpath = 1;
874
		mutex_unlock(&dev->struct_mutex);
6296 serge 875
		ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
876
					user_data, page_do_bit17_swizzling,
877
					partial_cacheline_write,
878
					needs_clflush_after);
3031 serge 879
 
880
		mutex_lock(&dev->struct_mutex);
881
 
882
		if (ret)
883
			goto out;
884
 
5354 serge 885
next_page:
2332 Serge 886
		remain -= page_length;
3031 serge 887
		user_data += page_length;
2332 Serge 888
		offset += page_length;
889
	}
890
 
891
out:
3031 serge 892
	i915_gem_object_unpin_pages(obj);
893
 
894
	if (hit_slowpath) {
3480 Serge 895
		/*
896
		 * Fixup: Flush cpu caches in case we didn't flush the dirty
897
		 * cachelines in-line while writing and the object moved
898
		 * out of the cpu write domain while we've dropped the lock.
899
		 */
900
		if (!needs_clflush_after &&
901
		    obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
4104 Serge 902
			if (i915_gem_clflush_object(obj, obj->pin_display))
6084 serge 903
				needs_clflush_after = true;
3031 serge 904
		}
2332 Serge 905
	}
906
 
3031 serge 907
	if (needs_clflush_after)
3243 Serge 908
		i915_gem_chipset_flush(dev);
6084 serge 909
	else
910
		obj->cache_dirty = true;
3031 serge 911
 
6084 serge 912
	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
2332 Serge 913
	return ret;
914
}
3031 serge 915
 
916
/**
917
 * Writes data to the object referenced by handle.
918
 *
919
 * On error, the contents of the buffer that were to be modified are undefined.
920
 */
921
int
922
i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
923
		      struct drm_file *file)
924
{
6084 serge 925
	struct drm_i915_private *dev_priv = dev->dev_private;
3031 serge 926
	struct drm_i915_gem_pwrite *args = data;
927
	struct drm_i915_gem_object *obj;
928
	int ret;
929
 
4104 Serge 930
	if (args->size == 0)
931
		return 0;
932
 
6084 serge 933
	intel_runtime_pm_get(dev_priv);
3480 Serge 934
 
3031 serge 935
	ret = i915_mutex_lock_interruptible(dev);
936
	if (ret)
6084 serge 937
		goto put_rpm;
3031 serge 938
 
939
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
940
	if (&obj->base == NULL) {
941
		ret = -ENOENT;
942
		goto unlock;
943
	}
944
 
945
	/* Bounds check destination. */
946
	if (args->offset > obj->base.size ||
947
	    args->size > obj->base.size - args->offset) {
948
		ret = -EINVAL;
949
		goto out;
950
	}
951
 
952
	/* prime objects have no backing filp to GEM pread/pwrite
953
	 * pages from.
954
	 */
955
	if (!obj->base.filp) {
956
		ret = -EINVAL;
957
		goto out;
958
	}
959
 
960
	trace_i915_gem_object_pwrite(obj, args->offset, args->size);
961
 
962
	ret = -EFAULT;
963
	/* We can only do the GTT pwrite on untiled buffers, as otherwise
964
	 * it would end up going through the fenced access, and we'll get
965
	 * different detiling behavior between reading and writing.
966
	 * pread/pwrite currently are reading and writing from the CPU
967
	 * perspective, requiring manual detiling by the client.
968
	 */
4104 Serge 969
	if (obj->tiling_mode == I915_TILING_NONE &&
970
	    obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
971
	    cpu_write_needs_clflush(obj)) {
3031 serge 972
		ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
973
		/* Note that the gtt paths might fail with non-page-backed user
974
		 * pointers (e.g. gtt mappings when moving data between
975
		 * textures). Fallback to the shmem path in that case. */
976
	}
977
 
6296 serge 978
	if (ret == -EFAULT || ret == -ENOSPC) {
6084 serge 979
			ret = i915_gem_shmem_pwrite(dev, obj, args, file);
6296 serge 980
	}
3031 serge 981
 
982
out:
983
	drm_gem_object_unreference(&obj->base);
984
unlock:
985
	mutex_unlock(&dev->struct_mutex);
6084 serge 986
put_rpm:
987
	intel_runtime_pm_put(dev_priv);
988
 
3031 serge 989
	return ret;
990
}
991
 
992
int
3480 Serge 993
i915_gem_check_wedge(struct i915_gpu_error *error,
3031 serge 994
		     bool interruptible)
995
{
3480 Serge 996
	if (i915_reset_in_progress(error)) {
3031 serge 997
		/* Non-interruptible callers can't handle -EAGAIN, hence return
998
		 * -EIO unconditionally for these. */
999
		if (!interruptible)
1000
			return -EIO;
2332 Serge 1001
 
3480 Serge 1002
		/* Recovery complete, but the reset failed ... */
1003
		if (i915_terminally_wedged(error))
3031 serge 1004
			return -EIO;
2332 Serge 1005
 
6084 serge 1006
		/*
1007
		 * Check if GPU Reset is in progress - we need intel_ring_begin
1008
		 * to work properly to reinit the hw state while the gpu is
1009
		 * still marked as reset-in-progress. Handle this with a flag.
1010
		 */
1011
		if (!error->reload_in_reset)
1012
			return -EAGAIN;
3031 serge 1013
	}
2332 Serge 1014
 
3031 serge 1015
	return 0;
1016
}
2332 Serge 1017
 
4560 Serge 1018
static void fake_irq(unsigned long data)
1019
{
1020
//	wake_up_process((struct task_struct *)data);
1021
}
1022
 
1023
static bool missed_irq(struct drm_i915_private *dev_priv,
5060 serge 1024
		       struct intel_engine_cs *ring)
4560 Serge 1025
{
1026
	return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings);
1027
}
1028
 
6084 serge 1029
static unsigned long local_clock_us(unsigned *cpu)
4560 Serge 1030
{
6084 serge 1031
	unsigned long t;
1032
 
1033
	/* Cheaply and approximately convert from nanoseconds to microseconds.
1034
	 * The result and subsequent calculations are also defined in the same
1035
	 * approximate microseconds units. The principal source of timing
1036
	 * error here is from the simple truncation.
1037
	 *
1038
	 * Note that local_clock() is only defined wrt to the current CPU;
1039
	 * the comparisons are no longer valid if we switch CPUs. Instead of
1040
	 * blocking preemption for the entire busywait, we can detect the CPU
1041
	 * switch and use that as indicator of system load and a reason to
1042
	 * stop busywaiting, see busywait_stop().
1043
	 */
1044
	t = GetClockNs() >> 10;
1045
 
1046
	return t;
1047
}
1048
 
1049
static bool busywait_stop(unsigned long timeout, unsigned cpu)
1050
{
1051
	unsigned this_cpu = 0;
1052
 
1053
	if (time_after(local_clock_us(&this_cpu), timeout))
4560 Serge 1054
		return true;
1055
 
6084 serge 1056
	return this_cpu != cpu;
4560 Serge 1057
}
1058
 
6084 serge 1059
static int __i915_spin_request(struct drm_i915_gem_request *req, int state)
1060
{
1061
	unsigned long timeout;
1062
	unsigned cpu;
1063
 
1064
	/* When waiting for high frequency requests, e.g. during synchronous
1065
	 * rendering split between the CPU and GPU, the finite amount of time
1066
	 * required to set up the irq and wait upon it limits the response
1067
	 * rate. By busywaiting on the request completion for a short while we
1068
	 * can service the high frequency waits as quick as possible. However,
1069
	 * if it is a slow request, we want to sleep as quickly as possible.
1070
	 * The tradeoff between waiting and sleeping is roughly the time it
1071
	 * takes to sleep on a request, on the order of a microsecond.
1072
	 */
1073
 
1074
	if (req->ring->irq_refcount)
1075
		return -EBUSY;
1076
 
1077
	/* Only spin if we know the GPU is processing this request */
1078
	if (!i915_gem_request_started(req, true))
1079
		return -EAGAIN;
1080
 
1081
	timeout = local_clock_us(&cpu) + 5;
1082
	while (1 /*!need_resched()*/) {
1083
		if (i915_gem_request_completed(req, true))
1084
			return 0;
1085
 
1086
		if (busywait_stop(timeout, cpu))
1087
			break;
1088
 
1089
		cpu_relax_lowlatency();
1090
	}
1091
 
1092
	if (i915_gem_request_completed(req, false))
1093
		return 0;
1094
 
1095
	return -EAGAIN;
1096
}
1097
 
3031 serge 1098
/**
6084 serge 1099
 * __i915_wait_request - wait until execution of request has finished
1100
 * @req: duh!
1101
 * @reset_counter: reset sequence associated with the given request
3031 serge 1102
 * @interruptible: do an interruptible wait (normally yes)
1103
 * @timeout: in - how long to wait (NULL forever); out - how much time remaining
1104
 *
3480 Serge 1105
 * Note: It is of utmost importance that the passed in seqno and reset_counter
1106
 * values have been read by the caller in an smp safe manner. Where read-side
1107
 * locks are involved, it is sufficient to read the reset_counter before
1108
 * unlocking the lock that protects the seqno. For lockless tricks, the
1109
 * reset_counter _must_ be read before, and an appropriate smp_rmb must be
1110
 * inserted.
1111
 *
6084 serge 1112
 * Returns 0 if the request was found within the alloted time. Else returns the
3031 serge 1113
 * errno with remaining time filled in timeout argument.
1114
 */
6084 serge 1115
int __i915_wait_request(struct drm_i915_gem_request *req,
3480 Serge 1116
			unsigned reset_counter,
4560 Serge 1117
			bool interruptible,
5060 serge 1118
			s64 *timeout,
6084 serge 1119
			struct intel_rps_client *rps)
3031 serge 1120
{
6084 serge 1121
	struct intel_engine_cs *ring = i915_gem_request_get_ring(req);
5060 serge 1122
	struct drm_device *dev = ring->dev;
1123
	struct drm_i915_private *dev_priv = dev->dev_private;
4560 Serge 1124
	const bool irq_test_in_progress =
1125
		ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring);
6084 serge 1126
	int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
6088 serge 1127
	wait_queue_t wait;
5060 serge 1128
	unsigned long timeout_expire;
1129
	s64 before, now;
3031 serge 1130
	int ret;
2332 Serge 1131
 
5060 serge 1132
	WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled");
4104 Serge 1133
 
6084 serge 1134
	if (list_empty(&req->list))
3031 serge 1135
		return 0;
2332 Serge 1136
 
6084 serge 1137
	if (i915_gem_request_completed(req, true))
1138
		return 0;
2332 Serge 1139
 
6084 serge 1140
	timeout_expire = 0;
1141
	if (timeout) {
1142
		if (WARN_ON(*timeout < 0))
1143
			return -EINVAL;
1144
 
1145
		if (*timeout == 0)
1146
			return -ETIME;
1147
 
1148
		timeout_expire = jiffies + nsecs_to_jiffies_timeout(*timeout);
3031 serge 1149
	}
2332 Serge 1150
 
6084 serge 1151
	if (INTEL_INFO(dev_priv)->gen >= 6)
1152
		gen6_rps_boost(dev_priv, rps, req->emitted_jiffies);
2332 Serge 1153
 
6084 serge 1154
	/* Record current time in case interrupted by signal, or wedged */
1155
	trace_i915_gem_request_wait_begin(req);
1156
	before = ktime_get_raw_ns();
1157
 
1158
	/* Optimistic spin for the next jiffie before touching IRQs */
1159
	ret = __i915_spin_request(req, state);
1160
	if (ret == 0)
1161
		goto out;
1162
 
1163
	if (!irq_test_in_progress && WARN_ON(!ring->irq_get(ring))) {
1164
		ret = -ENODEV;
1165
		goto out;
1166
	}
1167
 
6088 serge 1168
	INIT_LIST_HEAD(&wait.task_list);
1169
	wait.evnt = CreateEvent(NULL, MANUAL_DESTROY);
2332 Serge 1170
 
4560 Serge 1171
	for (;;) {
6103 serge 1172
		unsigned long flags;
4560 Serge 1173
 
3480 Serge 1174
		/* We need to check whether any gpu reset happened in between
1175
		 * the caller grabbing the seqno and now ... */
4560 Serge 1176
		if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) {
1177
			/* ... but upgrade the -EAGAIN to an -EIO if the gpu
1178
			 * is truely gone. */
1179
			ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
1180
			if (ret == 0)
1181
				ret = -EAGAIN;
1182
			break;
1183
		}
3480 Serge 1184
 
6084 serge 1185
		if (i915_gem_request_completed(req, false)) {
4560 Serge 1186
			ret = 0;
1187
			break;
1188
		}
2332 Serge 1189
 
6088 serge 1190
		if (timeout && time_after_eq(jiffies, timeout_expire)) {
4560 Serge 1191
			ret = -ETIME;
1192
			break;
1193
		}
2332 Serge 1194
 
4560 Serge 1195
        spin_lock_irqsave(&ring->irq_queue.lock, flags);
6088 serge 1196
        if (list_empty(&wait.task_list))
1197
            __add_wait_queue(&ring->irq_queue, &wait);
4560 Serge 1198
        spin_unlock_irqrestore(&ring->irq_queue.lock, flags);
1199
 
6088 serge 1200
            WaitEventTimeout(wait.evnt, 1);
4560 Serge 1201
 
6088 serge 1202
        if (!list_empty(&wait.task_list)) {
4560 Serge 1203
            spin_lock_irqsave(&ring->irq_queue.lock, flags);
6088 serge 1204
            list_del_init(&wait.task_list);
4560 Serge 1205
            spin_unlock_irqrestore(&ring->irq_queue.lock, flags);
1206
        }
1207
 
6088 serge 1208
	};
4560 Serge 1209
 
1210
	if (!irq_test_in_progress)
6084 serge 1211
		ring->irq_put(ring);
2332 Serge 1212
 
6088 serge 1213
    DestroyEvent(wait.evnt);
1214
 
6084 serge 1215
out:
1216
	now = ktime_get_raw_ns();
1217
	trace_i915_gem_request_wait_end(req);
1218
 
1219
	if (timeout) {
1220
		s64 tres = *timeout - (now - before);
1221
 
1222
		*timeout = tres < 0 ? 0 : tres;
1223
 
1224
		/*
1225
		 * Apparently ktime isn't accurate enough and occasionally has a
1226
		 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
1227
		 * things up to make the test happy. We allow up to 1 jiffy.
1228
		 *
1229
		 * This is a regrssion from the timespec->ktime conversion.
1230
		 */
1231
		if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000)
1232
			*timeout = 0;
1233
	}
1234
 
4560 Serge 1235
	return ret;
3031 serge 1236
}
2332 Serge 1237
 
6084 serge 1238
int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
1239
				   struct drm_file *file)
1240
{
1241
	struct drm_i915_private *dev_private;
1242
	struct drm_i915_file_private *file_priv;
1243
 
1244
	WARN_ON(!req || !file || req->file_priv);
1245
 
1246
	if (!req || !file)
1247
		return -EINVAL;
1248
 
1249
	if (req->file_priv)
1250
		return -EINVAL;
1251
 
1252
	dev_private = req->ring->dev->dev_private;
1253
	file_priv = file->driver_priv;
1254
 
1255
	spin_lock(&file_priv->mm.lock);
1256
	req->file_priv = file_priv;
1257
	list_add_tail(&req->client_list, &file_priv->mm.request_list);
1258
	spin_unlock(&file_priv->mm.lock);
1259
 
6660 serge 1260
    req->pid = (struct pid*)1;
6084 serge 1261
 
1262
	return 0;
1263
}
1264
 
1265
static inline void
1266
i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
1267
{
1268
	struct drm_i915_file_private *file_priv = request->file_priv;
1269
 
1270
	if (!file_priv)
1271
		return;
1272
 
1273
	spin_lock(&file_priv->mm.lock);
1274
	list_del(&request->client_list);
1275
	request->file_priv = NULL;
1276
	spin_unlock(&file_priv->mm.lock);
6660 serge 1277
	request->pid = NULL;
6084 serge 1278
}
1279
 
1280
static void i915_gem_request_retire(struct drm_i915_gem_request *request)
1281
{
1282
	trace_i915_gem_request_retire(request);
1283
 
1284
	/* We know the GPU must have read the request to have
1285
	 * sent us the seqno + interrupt, so use the position
1286
	 * of tail of the request to update the last known position
1287
	 * of the GPU head.
1288
	 *
1289
	 * Note this requires that we are always called in request
1290
	 * completion order.
1291
	 */
1292
	request->ringbuf->last_retired_head = request->postfix;
1293
 
1294
	list_del_init(&request->list);
1295
	i915_gem_request_remove_from_client(request);
1296
 
1297
	i915_gem_request_unreference(request);
1298
}
1299
 
1300
static void
1301
__i915_gem_request_retire__upto(struct drm_i915_gem_request *req)
1302
{
1303
	struct intel_engine_cs *engine = req->ring;
1304
	struct drm_i915_gem_request *tmp;
1305
 
6660 serge 1306
	lockdep_assert_held(&engine->dev->struct_mutex);
6084 serge 1307
 
1308
	if (list_empty(&req->list))
1309
		return;
1310
 
1311
	do {
1312
		tmp = list_first_entry(&engine->request_list,
1313
				       typeof(*tmp), list);
1314
 
1315
		i915_gem_request_retire(tmp);
1316
	} while (tmp != req);
1317
 
1318
	WARN_ON(i915_verify_lists(engine->dev));
1319
}
1320
 
3031 serge 1321
/**
6084 serge 1322
 * Waits for a request to be signaled, and cleans up the
3031 serge 1323
 * request and object lists appropriately for that event.
1324
 */
1325
int
6084 serge 1326
i915_wait_request(struct drm_i915_gem_request *req)
3031 serge 1327
{
6084 serge 1328
	struct drm_device *dev;
1329
	struct drm_i915_private *dev_priv;
1330
	bool interruptible;
3031 serge 1331
	int ret;
2332 Serge 1332
 
6084 serge 1333
	BUG_ON(req == NULL);
1334
 
1335
	dev = req->ring->dev;
1336
	dev_priv = dev->dev_private;
1337
	interruptible = dev_priv->mm.interruptible;
1338
 
3031 serge 1339
	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
2332 Serge 1340
 
3480 Serge 1341
	ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
3031 serge 1342
	if (ret)
1343
		return ret;
2332 Serge 1344
 
6084 serge 1345
	ret = __i915_wait_request(req,
1346
				  atomic_read(&dev_priv->gpu_error.reset_counter),
1347
				  interruptible, NULL, NULL);
3031 serge 1348
	if (ret)
1349
		return ret;
2332 Serge 1350
 
6084 serge 1351
	__i915_gem_request_retire__upto(req);
4104 Serge 1352
	return 0;
1353
}
1354
 
3031 serge 1355
/**
1356
 * Ensures that all rendering to the object has completed and the object is
1357
 * safe to unbind from the GTT or access from the CPU.
1358
 */
6084 serge 1359
int
3031 serge 1360
i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
1361
			       bool readonly)
1362
{
6084 serge 1363
	int ret, i;
2332 Serge 1364
 
6084 serge 1365
	if (!obj->active)
3031 serge 1366
		return 0;
2332 Serge 1367
 
6084 serge 1368
	if (readonly) {
1369
		if (obj->last_write_req != NULL) {
1370
			ret = i915_wait_request(obj->last_write_req);
1371
			if (ret)
1372
				return ret;
2332 Serge 1373
 
6084 serge 1374
			i = obj->last_write_req->ring->id;
1375
			if (obj->last_read_req[i] == obj->last_write_req)
1376
				i915_gem_object_retire__read(obj, i);
1377
			else
1378
				i915_gem_object_retire__write(obj);
1379
		}
1380
	} else {
1381
		for (i = 0; i < I915_NUM_RINGS; i++) {
1382
			if (obj->last_read_req[i] == NULL)
1383
				continue;
1384
 
1385
			ret = i915_wait_request(obj->last_read_req[i]);
1386
			if (ret)
1387
				return ret;
1388
 
1389
			i915_gem_object_retire__read(obj, i);
1390
		}
1391
		RQ_BUG_ON(obj->active);
1392
	}
1393
 
1394
	return 0;
3031 serge 1395
}
2332 Serge 1396
 
6084 serge 1397
static void
1398
i915_gem_object_retire_request(struct drm_i915_gem_object *obj,
1399
			       struct drm_i915_gem_request *req)
1400
{
1401
	int ring = req->ring->id;
1402
 
1403
	if (obj->last_read_req[ring] == req)
1404
		i915_gem_object_retire__read(obj, ring);
1405
	else if (obj->last_write_req == req)
1406
		i915_gem_object_retire__write(obj);
1407
 
1408
	__i915_gem_request_retire__upto(req);
1409
}
1410
 
3260 Serge 1411
/* A nonblocking variant of the above wait. This is a highly dangerous routine
1412
 * as the object state may change during this call.
1413
 */
1414
static __must_check int
1415
i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
6084 serge 1416
					    struct intel_rps_client *rps,
3260 Serge 1417
					    bool readonly)
1418
{
1419
	struct drm_device *dev = obj->base.dev;
1420
	struct drm_i915_private *dev_priv = dev->dev_private;
6084 serge 1421
	struct drm_i915_gem_request *requests[I915_NUM_RINGS];
3480 Serge 1422
	unsigned reset_counter;
6084 serge 1423
	int ret, i, n = 0;
2332 Serge 1424
 
3260 Serge 1425
	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1426
	BUG_ON(!dev_priv->mm.interruptible);
2332 Serge 1427
 
6084 serge 1428
	if (!obj->active)
3260 Serge 1429
		return 0;
2332 Serge 1430
 
3480 Serge 1431
	ret = i915_gem_check_wedge(&dev_priv->gpu_error, true);
3260 Serge 1432
	if (ret)
1433
		return ret;
2332 Serge 1434
 
6084 serge 1435
	reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
2332 Serge 1436
 
6084 serge 1437
	if (readonly) {
1438
		struct drm_i915_gem_request *req;
1439
 
1440
		req = obj->last_write_req;
1441
		if (req == NULL)
1442
			return 0;
1443
 
1444
		requests[n++] = i915_gem_request_reference(req);
1445
	} else {
1446
		for (i = 0; i < I915_NUM_RINGS; i++) {
1447
			struct drm_i915_gem_request *req;
1448
 
1449
			req = obj->last_read_req[i];
1450
			if (req == NULL)
1451
				continue;
1452
 
1453
			requests[n++] = i915_gem_request_reference(req);
1454
		}
1455
	}
1456
 
3260 Serge 1457
	mutex_unlock(&dev->struct_mutex);
6084 serge 1458
	for (i = 0; ret == 0 && i < n; i++)
1459
		ret = __i915_wait_request(requests[i], reset_counter, true,
1460
					  NULL, rps);
3260 Serge 1461
	mutex_lock(&dev->struct_mutex);
2332 Serge 1462
 
6084 serge 1463
	for (i = 0; i < n; i++) {
1464
		if (ret == 0)
1465
			i915_gem_object_retire_request(obj, requests[i]);
1466
		i915_gem_request_unreference(requests[i]);
1467
	}
1468
 
1469
	return ret;
3260 Serge 1470
}
2332 Serge 1471
 
6084 serge 1472
static struct intel_rps_client *to_rps_client(struct drm_file *file)
1473
{
1474
	struct drm_i915_file_private *fpriv = file->driver_priv;
1475
	return &fpriv->rps;
1476
}
1477
 
3260 Serge 1478
/**
1479
 * Called when user space prepares to use an object with the CPU, either
1480
 * through the mmap ioctl's mapping or a GTT mapping.
1481
 */
1482
int
1483
i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1484
			  struct drm_file *file)
1485
{
1486
	struct drm_i915_gem_set_domain *args = data;
1487
	struct drm_i915_gem_object *obj;
1488
	uint32_t read_domains = args->read_domains;
1489
	uint32_t write_domain = args->write_domain;
1490
	int ret;
2332 Serge 1491
 
3260 Serge 1492
	/* Only handle setting domains to types used by the CPU. */
1493
	if (write_domain & I915_GEM_GPU_DOMAINS)
1494
		return -EINVAL;
2332 Serge 1495
 
3260 Serge 1496
	if (read_domains & I915_GEM_GPU_DOMAINS)
1497
		return -EINVAL;
2332 Serge 1498
 
3260 Serge 1499
	/* Having something in the write domain implies it's in the read
1500
	 * domain, and only that read domain.  Enforce that in the request.
1501
	 */
1502
	if (write_domain != 0 && read_domains != write_domain)
1503
		return -EINVAL;
2332 Serge 1504
 
3260 Serge 1505
	ret = i915_mutex_lock_interruptible(dev);
1506
	if (ret)
1507
		return ret;
2332 Serge 1508
 
3260 Serge 1509
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1510
	if (&obj->base == NULL) {
1511
		ret = -ENOENT;
1512
		goto unlock;
1513
	}
2332 Serge 1514
 
3260 Serge 1515
	/* Try to flush the object off the GPU without holding the lock.
1516
	 * We will repeat the flush holding the lock in the normal manner
1517
	 * to catch cases where we are gazumped.
1518
	 */
5060 serge 1519
	ret = i915_gem_object_wait_rendering__nonblocking(obj,
6084 serge 1520
							  to_rps_client(file),
5060 serge 1521
							  !write_domain);
3260 Serge 1522
	if (ret)
1523
		goto unref;
2332 Serge 1524
 
6084 serge 1525
	if (read_domains & I915_GEM_DOMAIN_GTT)
3260 Serge 1526
		ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
6084 serge 1527
	else
3260 Serge 1528
		ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
2332 Serge 1529
 
6084 serge 1530
	if (write_domain != 0)
1531
		intel_fb_obj_invalidate(obj,
1532
					write_domain == I915_GEM_DOMAIN_GTT ?
1533
					ORIGIN_GTT : ORIGIN_CPU);
1534
 
3260 Serge 1535
unref:
1536
	drm_gem_object_unreference(&obj->base);
1537
unlock:
1538
	mutex_unlock(&dev->struct_mutex);
1539
	return ret;
1540
}
2332 Serge 1541
 
4293 Serge 1542
/**
1543
 * Called when user space has done writes to this buffer
1544
 */
1545
int
1546
i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1547
			 struct drm_file *file)
1548
{
1549
	struct drm_i915_gem_sw_finish *args = data;
1550
	struct drm_i915_gem_object *obj;
1551
	int ret = 0;
2332 Serge 1552
 
4293 Serge 1553
	ret = i915_mutex_lock_interruptible(dev);
1554
	if (ret)
1555
		return ret;
2332 Serge 1556
 
4293 Serge 1557
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1558
	if (&obj->base == NULL) {
1559
		ret = -ENOENT;
1560
		goto unlock;
1561
	}
2332 Serge 1562
 
4293 Serge 1563
	/* Pinned buffers may be scanout, so flush the cache */
1564
	if (obj->pin_display)
6084 serge 1565
		i915_gem_object_flush_cpu_write_domain(obj);
2332 Serge 1566
 
4293 Serge 1567
	drm_gem_object_unreference(&obj->base);
1568
unlock:
1569
	mutex_unlock(&dev->struct_mutex);
1570
	return ret;
1571
}
1572
 
3260 Serge 1573
/**
1574
 * Maps the contents of an object, returning the address it is mapped
1575
 * into.
1576
 *
1577
 * While the mapping holds a reference on the contents of the object, it doesn't
1578
 * imply a ref on the object itself.
5354 serge 1579
 *
1580
 * IMPORTANT:
1581
 *
1582
 * DRM driver writers who look a this function as an example for how to do GEM
1583
 * mmap support, please don't implement mmap support like here. The modern way
1584
 * to implement DRM mmap support is with an mmap offset ioctl (like
1585
 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
1586
 * That way debug tooling like valgrind will understand what's going on, hiding
1587
 * the mmap call in a driver private ioctl will break that. The i915 driver only
1588
 * does cpu mmaps this way because we didn't know better.
3260 Serge 1589
 */
1590
int
1591
i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1592
		    struct drm_file *file)
1593
{
1594
	struct drm_i915_gem_mmap *args = data;
1595
	struct drm_gem_object *obj;
4392 Serge 1596
	unsigned long addr;
2332 Serge 1597
 
6084 serge 1598
//	if (args->flags & ~(I915_MMAP_WC))
1599
//		return -EINVAL;
3260 Serge 1600
	obj = drm_gem_object_lookup(dev, file, args->handle);
1601
	if (obj == NULL)
1602
		return -ENOENT;
4104 Serge 1603
 
3260 Serge 1604
	/* prime objects have no backing filp to GEM mmap
1605
	 * pages from.
1606
	 */
1607
	if (!obj->filp) {
1608
		drm_gem_object_unreference_unlocked(obj);
1609
		return -EINVAL;
1610
	}
2332 Serge 1611
 
6084 serge 1612
	addr = vm_mmap(obj->filp, 0, args->size,
1613
		       PROT_READ | PROT_WRITE, MAP_SHARED,
1614
		       args->offset);
3260 Serge 1615
	drm_gem_object_unreference_unlocked(obj);
6084 serge 1616
	if (IS_ERR((void *)addr))
1617
		return addr;
2332 Serge 1618
 
3260 Serge 1619
	args->addr_ptr = (uint64_t) addr;
2332 Serge 1620
 
6084 serge 1621
	return 0;
3260 Serge 1622
}
2332 Serge 1623
 
1624
 
1625
 
1626
 
1627
 
1628
 
1629
 
1630
 
3031 serge 1631
 
1632
 
1633
 
1634
 
1635
 
1636
/**
1637
 * i915_gem_release_mmap - remove physical page mappings
1638
 * @obj: obj in question
1639
 *
1640
 * Preserve the reservation of the mmapping with the DRM core code, but
1641
 * relinquish ownership of the pages back to the system.
1642
 *
1643
 * It is vital that we remove the page mapping if we have mapped a tiled
1644
 * object through the GTT and then lose the fence register due to
1645
 * resource pressure. Similarly if the object has been moved out of the
1646
 * aperture, than pages mapped into userspace must be revoked. Removing the
1647
 * mapping will then trigger a page fault on the next user access, allowing
1648
 * fixup by i915_gem_fault().
1649
 */
1650
void
1651
i915_gem_release_mmap(struct drm_i915_gem_object *obj)
1652
{
1653
	if (!obj->fault_mappable)
1654
		return;
1655
 
4104 Serge 1656
//	drm_vma_node_unmap(&obj->base.vma_node, obj->base.dev->dev_mapping);
3031 serge 1657
	obj->fault_mappable = false;
1658
}
1659
 
6084 serge 1660
void
1661
i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv)
1662
{
1663
	struct drm_i915_gem_object *obj;
1664
 
1665
	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
1666
		i915_gem_release_mmap(obj);
1667
}
1668
 
3480 Serge 1669
uint32_t
2332 Serge 1670
i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
1671
{
1672
	uint32_t gtt_size;
1673
 
1674
	if (INTEL_INFO(dev)->gen >= 4 ||
1675
	    tiling_mode == I915_TILING_NONE)
1676
		return size;
1677
 
1678
	/* Previous chips need a power-of-two fence region when tiling */
1679
	if (INTEL_INFO(dev)->gen == 3)
1680
		gtt_size = 1024*1024;
1681
	else
1682
		gtt_size = 512*1024;
1683
 
1684
	while (gtt_size < size)
1685
		gtt_size <<= 1;
1686
 
1687
	return gtt_size;
1688
}
1689
 
1690
/**
1691
 * i915_gem_get_gtt_alignment - return required GTT alignment for an object
1692
 * @obj: object to check
1693
 *
1694
 * Return the required GTT alignment for an object, taking into account
1695
 * potential fence register mapping.
1696
 */
3480 Serge 1697
uint32_t
1698
i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size,
1699
			   int tiling_mode, bool fenced)
2332 Serge 1700
{
1701
	/*
1702
	 * Minimum alignment is 4k (GTT page size), but might be greater
1703
	 * if a fence register is needed for the object.
1704
	 */
3480 Serge 1705
	if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) ||
2332 Serge 1706
	    tiling_mode == I915_TILING_NONE)
1707
		return 4096;
1708
 
1709
	/*
1710
	 * Previous chips need to be aligned to the size of the smallest
1711
	 * fence register that can contain the object.
1712
	 */
1713
	return i915_gem_get_gtt_size(dev, size, tiling_mode);
1714
}
1715
 
1716
 
1717
 
3480 Serge 1718
int
1719
i915_gem_mmap_gtt(struct drm_file *file,
1720
          struct drm_device *dev,
6084 serge 1721
		  uint32_t handle,
3480 Serge 1722
          uint64_t *offset)
1723
{
1724
    struct drm_i915_private *dev_priv = dev->dev_private;
1725
    struct drm_i915_gem_object *obj;
1726
    unsigned long pfn;
1727
    char *mem, *ptr;
1728
    int ret;
1729
 
1730
    ret = i915_mutex_lock_interruptible(dev);
1731
    if (ret)
1732
        return ret;
1733
 
1734
    obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle));
1735
    if (&obj->base == NULL) {
1736
        ret = -ENOENT;
1737
        goto unlock;
1738
    }
1739
 
1740
    if (obj->madv != I915_MADV_WILLNEED) {
5060 serge 1741
		DRM_DEBUG("Attempting to mmap a purgeable buffer\n");
1742
		ret = -EFAULT;
3480 Serge 1743
        goto out;
1744
    }
1745
    /* Now bind it into the GTT if needed */
5060 serge 1746
	ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
3480 Serge 1747
    if (ret)
1748
        goto out;
1749
 
1750
    ret = i915_gem_object_set_to_gtt_domain(obj, 1);
1751
    if (ret)
1752
        goto unpin;
1753
 
1754
    ret = i915_gem_object_get_fence(obj);
1755
    if (ret)
1756
        goto unpin;
1757
 
1758
    obj->fault_mappable = true;
1759
 
4104 Serge 1760
    pfn = dev_priv->gtt.mappable_base + i915_gem_obj_ggtt_offset(obj);
3480 Serge 1761
 
1762
    /* Finally, remap it using the new GTT offset */
1763
 
1764
    mem = UserAlloc(obj->base.size);
1765
    if(unlikely(mem == NULL))
1766
    {
1767
        ret = -ENOMEM;
1768
        goto unpin;
1769
    }
1770
 
1771
    for(ptr = mem; ptr < mem + obj->base.size; ptr+= 4096, pfn+= 4096)
1772
        MapPage(ptr, pfn, PG_SHARED|PG_UW);
1773
 
1774
unpin:
5060 serge 1775
    i915_gem_object_unpin_pages(obj);
3480 Serge 1776
 
1777
 
5367 serge 1778
    *offset = (uint32_t)mem;
3480 Serge 1779
 
1780
out:
6088 serge 1781
	drm_gem_object_unreference(&obj->base);
3480 Serge 1782
unlock:
6088 serge 1783
	mutex_unlock(&dev->struct_mutex);
1784
	return ret;
3480 Serge 1785
}
1786
 
1787
/**
1788
 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
1789
 * @dev: DRM device
1790
 * @data: GTT mapping ioctl data
1791
 * @file: GEM object info
1792
 *
1793
 * Simply returns the fake offset to userspace so it can mmap it.
1794
 * The mmap call will end up in drm_gem_mmap(), which will set things
1795
 * up so we can get faults in the handler above.
1796
 *
1797
 * The fault handler will take care of binding the object into the GTT
1798
 * (since it may have been evicted to make room for something), allocating
1799
 * a fence register, and mapping the appropriate aperture address into
1800
 * userspace.
1801
 */
1802
int
1803
i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
6084 serge 1804
			struct drm_file *file)
3480 Serge 1805
{
6084 serge 1806
	struct drm_i915_gem_mmap_gtt *args = data;
3480 Serge 1807
 
6084 serge 1808
	return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
3480 Serge 1809
}
1810
 
3031 serge 1811
/* Immediately discard the backing storage */
1812
static void
1813
i915_gem_object_truncate(struct drm_i915_gem_object *obj)
1814
{
1815
//	i915_gem_object_free_mmap_offset(obj);
2332 Serge 1816
 
3263 Serge 1817
	if (obj->base.filp == NULL)
1818
		return;
2332 Serge 1819
 
3031 serge 1820
	/* Our goal here is to return as much of the memory as
1821
	 * is possible back to the system as we are called from OOM.
1822
	 * To do this we must instruct the shmfs to drop all of its
1823
	 * backing pages, *now*.
1824
	 */
5060 serge 1825
//	shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
3031 serge 1826
	obj->madv = __I915_MADV_PURGED;
1827
}
2332 Serge 1828
 
5060 serge 1829
/* Try to discard unwanted pages */
1830
static void
1831
i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
3031 serge 1832
{
5060 serge 1833
	struct address_space *mapping;
1834
 
1835
	switch (obj->madv) {
1836
	case I915_MADV_DONTNEED:
1837
		i915_gem_object_truncate(obj);
1838
	case __I915_MADV_PURGED:
1839
		return;
1840
	}
1841
 
1842
	if (obj->base.filp == NULL)
1843
		return;
1844
 
3031 serge 1845
}
2332 Serge 1846
 
3031 serge 1847
static void
1848
i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
1849
{
3746 Serge 1850
	struct sg_page_iter sg_iter;
1851
	int ret;
2332 Serge 1852
 
3031 serge 1853
	BUG_ON(obj->madv == __I915_MADV_PURGED);
2332 Serge 1854
 
3031 serge 1855
	ret = i915_gem_object_set_to_cpu_domain(obj, true);
1856
	if (ret) {
1857
		/* In the event of a disaster, abandon all caches and
1858
		 * hope for the best.
1859
		 */
1860
		WARN_ON(ret != -EIO);
4104 Serge 1861
		i915_gem_clflush_object(obj, true);
3031 serge 1862
		obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
1863
	}
2332 Serge 1864
 
6084 serge 1865
	i915_gem_gtt_finish_object(obj);
6296 serge 1866
 
1867
	if (i915_gem_object_needs_bit17_swizzle(obj))
1868
		i915_gem_object_save_bit_17_swizzle(obj);
1869
 
3031 serge 1870
	if (obj->madv == I915_MADV_DONTNEED)
1871
		obj->dirty = 0;
2332 Serge 1872
 
3746 Serge 1873
	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
1874
		struct page *page = sg_page_iter_page(&sg_iter);
2332 Serge 1875
 
6084 serge 1876
		page_cache_release(page);
3243 Serge 1877
	}
6084 serge 1878
	obj->dirty = 0;
3243 Serge 1879
 
1880
	sg_free_table(obj->pages);
1881
	kfree(obj->pages);
3031 serge 1882
}
2332 Serge 1883
 
3480 Serge 1884
int
3031 serge 1885
i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
1886
{
1887
	const struct drm_i915_gem_object_ops *ops = obj->ops;
2332 Serge 1888
 
3243 Serge 1889
	if (obj->pages == NULL)
3031 serge 1890
		return 0;
2332 Serge 1891
 
3031 serge 1892
	if (obj->pages_pin_count)
1893
		return -EBUSY;
1894
 
4104 Serge 1895
	BUG_ON(i915_gem_obj_bound_any(obj));
1896
 
3243 Serge 1897
	/* ->put_pages might need to allocate memory for the bit17 swizzle
1898
	 * array, hence protect them from being reaped by removing them from gtt
1899
	 * lists early. */
4104 Serge 1900
	list_del(&obj->global_list);
3243 Serge 1901
 
3031 serge 1902
	ops->put_pages(obj);
3243 Serge 1903
	obj->pages = NULL;
3031 serge 1904
 
5060 serge 1905
	i915_gem_object_invalidate(obj);
3031 serge 1906
 
1907
	return 0;
1908
}
1909
 
2332 Serge 1910
static int
3031 serge 1911
i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
2332 Serge 1912
{
3260 Serge 1913
	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
6084 serge 1914
	int page_count, i;
1915
	struct address_space *mapping;
1916
	struct sg_table *st;
3243 Serge 1917
	struct scatterlist *sg;
3746 Serge 1918
	struct sg_page_iter sg_iter;
3243 Serge 1919
	struct page *page;
3746 Serge 1920
	unsigned long last_pfn = 0;	/* suppress gcc warning */
6084 serge 1921
	int ret;
3243 Serge 1922
	gfp_t gfp;
2332 Serge 1923
 
3243 Serge 1924
	/* Assert that the object is not currently in any GPU domain. As it
1925
	 * wasn't in the GTT, there shouldn't be any way it could have been in
1926
	 * a GPU cache
2332 Serge 1927
	 */
3243 Serge 1928
	BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
1929
	BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
1930
 
1931
	st = kmalloc(sizeof(*st), GFP_KERNEL);
1932
	if (st == NULL)
1933
		return -ENOMEM;
1934
 
2332 Serge 1935
	page_count = obj->base.size / PAGE_SIZE;
3243 Serge 1936
	if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
1937
		kfree(st);
2332 Serge 1938
		return -ENOMEM;
3243 Serge 1939
	}
2332 Serge 1940
 
3243 Serge 1941
	/* Get the list of pages out of our struct file.  They'll be pinned
1942
	 * at this point until we release them.
1943
	 *
1944
	 * Fail silently without starting the shrinker
1945
	 */
3746 Serge 1946
	sg = st->sgl;
1947
	st->nents = 0;
1948
	for (i = 0; i < page_count; i++) {
4104 Serge 1949
        page = shmem_read_mapping_page_gfp(obj->base.filp, i, gfp);
3260 Serge 1950
		if (IS_ERR(page)) {
1951
            dbgprintf("%s invalid page %p\n", __FUNCTION__, page);
2332 Serge 1952
			goto err_pages;
3260 Serge 1953
		}
5354 serge 1954
#ifdef CONFIG_SWIOTLB
1955
		if (swiotlb_nr_tbl()) {
1956
			st->nents++;
1957
			sg_set_page(sg, page, PAGE_SIZE, 0);
1958
			sg = sg_next(sg);
1959
			continue;
1960
		}
1961
#endif
3746 Serge 1962
		if (!i || page_to_pfn(page) != last_pfn + 1) {
1963
			if (i)
1964
				sg = sg_next(sg);
1965
			st->nents++;
6084 serge 1966
			sg_set_page(sg, page, PAGE_SIZE, 0);
3746 Serge 1967
		} else {
1968
			sg->length += PAGE_SIZE;
1969
		}
1970
		last_pfn = page_to_pfn(page);
6937 serge 1971
 
1972
		/* Check that the i965g/gm workaround works. */
1973
		WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL));
3243 Serge 1974
	}
5354 serge 1975
#ifdef CONFIG_SWIOTLB
1976
	if (!swiotlb_nr_tbl())
1977
#endif
3746 Serge 1978
		sg_mark_end(sg);
3243 Serge 1979
	obj->pages = st;
3031 serge 1980
 
6084 serge 1981
	ret = i915_gem_gtt_prepare_object(obj);
1982
	if (ret)
1983
		goto err_pages;
5367 serge 1984
 
6296 serge 1985
	if (i915_gem_object_needs_bit17_swizzle(obj))
1986
		i915_gem_object_do_bit_17_swizzle(obj);
1987
 
5367 serge 1988
	if (obj->tiling_mode != I915_TILING_NONE &&
1989
	    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES)
1990
		i915_gem_object_pin_pages(obj);
1991
 
2332 Serge 1992
	return 0;
1993
 
1994
err_pages:
3746 Serge 1995
	sg_mark_end(sg);
1996
	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0)
1997
		page_cache_release(sg_page_iter_page(&sg_iter));
3243 Serge 1998
	sg_free_table(st);
1999
	kfree(st);
6084 serge 2000
 
3243 Serge 2001
	return PTR_ERR(page);
2332 Serge 2002
}
2003
 
3031 serge 2004
/* Ensure that the associated pages are gathered from the backing storage
2005
 * and pinned into our object. i915_gem_object_get_pages() may be called
2006
 * multiple times before they are released by a single call to
2007
 * i915_gem_object_put_pages() - once the pages are no longer referenced
2008
 * either as a result of memory pressure (reaping pages under the shrinker)
2009
 * or as the object is itself released.
2010
 */
2011
int
2012
i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2332 Serge 2013
{
3031 serge 2014
	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
2015
	const struct drm_i915_gem_object_ops *ops = obj->ops;
2016
	int ret;
2332 Serge 2017
 
3243 Serge 2018
	if (obj->pages)
3031 serge 2019
		return 0;
2332 Serge 2020
 
4392 Serge 2021
	if (obj->madv != I915_MADV_WILLNEED) {
5060 serge 2022
		DRM_DEBUG("Attempting to obtain a purgeable object\n");
2023
		return -EFAULT;
4392 Serge 2024
	}
2025
 
3031 serge 2026
	BUG_ON(obj->pages_pin_count);
2332 Serge 2027
 
3031 serge 2028
	ret = ops->get_pages(obj);
2029
	if (ret)
2030
		return ret;
2344 Serge 2031
 
4104 Serge 2032
	list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
6084 serge 2033
 
2034
	obj->get_page.sg = obj->pages->sgl;
2035
	obj->get_page.last = 0;
2036
 
2037
	return 0;
2332 Serge 2038
}
2039
 
6084 serge 2040
void i915_vma_move_to_active(struct i915_vma *vma,
2041
			     struct drm_i915_gem_request *req)
2332 Serge 2042
{
6084 serge 2043
	struct drm_i915_gem_object *obj = vma->obj;
2044
	struct intel_engine_cs *ring;
2332 Serge 2045
 
6084 serge 2046
	ring = i915_gem_request_get_ring(req);
2332 Serge 2047
 
2048
	/* Add a reference if we're newly entering the active list. */
6084 serge 2049
	if (obj->active == 0)
2344 Serge 2050
		drm_gem_object_reference(&obj->base);
6084 serge 2051
	obj->active |= intel_ring_flag(ring);
2332 Serge 2052
 
6084 serge 2053
	list_move_tail(&obj->ring_list[ring->id], &ring->active_list);
2054
	i915_gem_request_assign(&obj->last_read_req[ring->id], req);
2332 Serge 2055
 
6084 serge 2056
	list_move_tail(&vma->mm_list, &vma->vm->active_list);
2332 Serge 2057
}
2058
 
6084 serge 2059
static void
2060
i915_gem_object_retire__write(struct drm_i915_gem_object *obj)
4560 Serge 2061
{
6084 serge 2062
	RQ_BUG_ON(obj->last_write_req == NULL);
2063
	RQ_BUG_ON(!(obj->active & intel_ring_flag(obj->last_write_req->ring)));
2064
 
2065
	i915_gem_request_assign(&obj->last_write_req, NULL);
2066
	intel_fb_obj_flush(obj, true, ORIGIN_CS);
4560 Serge 2067
}
2068
 
2344 Serge 2069
static void
6084 serge 2070
i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring)
2344 Serge 2071
{
5060 serge 2072
	struct i915_vma *vma;
2332 Serge 2073
 
6084 serge 2074
	RQ_BUG_ON(obj->last_read_req[ring] == NULL);
2075
	RQ_BUG_ON(!(obj->active & (1 << ring)));
2332 Serge 2076
 
6084 serge 2077
	list_del_init(&obj->ring_list[ring]);
2078
	i915_gem_request_assign(&obj->last_read_req[ring], NULL);
2344 Serge 2079
 
6084 serge 2080
	if (obj->last_write_req && obj->last_write_req->ring->id == ring)
2081
		i915_gem_object_retire__write(obj);
5354 serge 2082
 
6084 serge 2083
	obj->active &= ~(1 << ring);
2084
	if (obj->active)
2085
		return;
2344 Serge 2086
 
6084 serge 2087
	/* Bump our place on the bound list to keep it roughly in LRU order
2088
	 * so that we don't steal from recently used but inactive objects
2089
	 * (unless we are forced to ofc!)
2090
	 */
2091
	list_move_tail(&obj->global_list,
2092
		       &to_i915(obj->base.dev)->mm.bound_list);
3031 serge 2093
 
6084 serge 2094
	list_for_each_entry(vma, &obj->vma_list, vma_link) {
2095
		if (!list_empty(&vma->mm_list))
2096
			list_move_tail(&vma->mm_list, &vma->vm->inactive_list);
2097
	}
2344 Serge 2098
 
6084 serge 2099
	i915_gem_request_assign(&obj->last_fenced_req, NULL);
2352 Serge 2100
	drm_gem_object_unreference(&obj->base);
2101
}
2102
 
3243 Serge 2103
static int
3480 Serge 2104
i915_gem_init_seqno(struct drm_device *dev, u32 seqno)
2344 Serge 2105
{
3243 Serge 2106
	struct drm_i915_private *dev_priv = dev->dev_private;
5060 serge 2107
	struct intel_engine_cs *ring;
3243 Serge 2108
	int ret, i, j;
2344 Serge 2109
 
3480 Serge 2110
	/* Carefully retire all requests without writing to the rings */
3243 Serge 2111
	for_each_ring(ring, dev_priv, i) {
3480 Serge 2112
		ret = intel_ring_idle(ring);
6084 serge 2113
		if (ret)
2114
			return ret;
3480 Serge 2115
	}
2116
	i915_gem_retire_requests(dev);
3243 Serge 2117
 
3480 Serge 2118
	/* Finally reset hw state */
3243 Serge 2119
	for_each_ring(ring, dev_priv, i) {
3480 Serge 2120
		intel_ring_init_seqno(ring, seqno);
2121
 
5060 serge 2122
		for (j = 0; j < ARRAY_SIZE(ring->semaphore.sync_seqno); j++)
2123
			ring->semaphore.sync_seqno[j] = 0;
3243 Serge 2124
	}
2125
 
2126
	return 0;
2344 Serge 2127
}
2128
 
3480 Serge 2129
int i915_gem_set_seqno(struct drm_device *dev, u32 seqno)
2130
{
2131
	struct drm_i915_private *dev_priv = dev->dev_private;
2132
	int ret;
2133
 
2134
	if (seqno == 0)
2135
		return -EINVAL;
2136
 
2137
	/* HWS page needs to be set less than what we
2138
	 * will inject to ring
2139
	 */
2140
	ret = i915_gem_init_seqno(dev, seqno - 1);
2141
	if (ret)
2142
		return ret;
2143
 
2144
	/* Carefully set the last_seqno value so that wrap
2145
	 * detection still works
2146
	 */
2147
	dev_priv->next_seqno = seqno;
2148
	dev_priv->last_seqno = seqno - 1;
2149
	if (dev_priv->last_seqno == 0)
2150
		dev_priv->last_seqno--;
2151
 
2152
	return 0;
2153
}
2154
 
3243 Serge 2155
int
2156
i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
2344 Serge 2157
{
3243 Serge 2158
	struct drm_i915_private *dev_priv = dev->dev_private;
2344 Serge 2159
 
3243 Serge 2160
	/* reserve 0 for non-seqno */
2161
	if (dev_priv->next_seqno == 0) {
3480 Serge 2162
		int ret = i915_gem_init_seqno(dev, 0);
3243 Serge 2163
		if (ret)
2164
			return ret;
2165
 
2166
		dev_priv->next_seqno = 1;
2167
	}
2168
 
3480 Serge 2169
	*seqno = dev_priv->last_seqno = dev_priv->next_seqno++;
3243 Serge 2170
	return 0;
2332 Serge 2171
}
2172
 
6084 serge 2173
/*
2174
 * NB: This function is not allowed to fail. Doing so would mean the the
2175
 * request is not being tracked for completion but the work itself is
2176
 * going to happen on the hardware. This would be a Bad Thing(tm).
2177
 */
2178
void __i915_add_request(struct drm_i915_gem_request *request,
2179
			struct drm_i915_gem_object *obj,
2180
			bool flush_caches)
2352 Serge 2181
{
6084 serge 2182
	struct intel_engine_cs *ring;
2183
	struct drm_i915_private *dev_priv;
5354 serge 2184
	struct intel_ringbuffer *ringbuf;
6084 serge 2185
	u32 request_start;
2352 Serge 2186
	int ret;
2332 Serge 2187
 
5354 serge 2188
	if (WARN_ON(request == NULL))
6084 serge 2189
		return;
5354 serge 2190
 
6084 serge 2191
	ring = request->ring;
2192
	dev_priv = ring->dev->dev_private;
2193
	ringbuf = request->ringbuf;
5354 serge 2194
 
6084 serge 2195
	/*
2196
	 * To ensure that this call will not fail, space for its emissions
2197
	 * should already have been reserved in the ring buffer. Let the ring
2198
	 * know that it is time to use that space up.
2199
	 */
2200
	intel_ring_reserved_space_use(ringbuf);
2201
 
5354 serge 2202
	request_start = intel_ring_get_tail(ringbuf);
3031 serge 2203
	/*
2204
	 * Emit any outstanding flushes - execbuf can fail to emit the flush
2205
	 * after having emitted the batchbuffer command. Hence we need to fix
2206
	 * things up similar to emitting the lazy request. The difference here
2207
	 * is that the flush _must_ happen before the next request, no matter
2208
	 * what.
2209
	 */
6084 serge 2210
	if (flush_caches) {
2211
		if (i915.enable_execlists)
2212
			ret = logical_ring_flush_all_caches(request);
2213
		else
2214
			ret = intel_ring_flush_all_caches(request);
2215
		/* Not allowed to fail! */
2216
		WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret);
5354 serge 2217
	}
2332 Serge 2218
 
3031 serge 2219
	/* Record the position of the start of the request so that
2220
	 * should we detect the updated seqno part-way through the
6084 serge 2221
	 * GPU processing the request, we never over-estimate the
3031 serge 2222
	 * position of the head.
2223
	 */
6084 serge 2224
	request->postfix = intel_ring_get_tail(ringbuf);
3031 serge 2225
 
6084 serge 2226
	if (i915.enable_execlists)
2227
		ret = ring->emit_request(request);
2228
	else {
2229
		ret = ring->add_request(request);
2230
 
2231
		request->tail = intel_ring_get_tail(ringbuf);
5354 serge 2232
	}
6084 serge 2233
	/* Not allowed to fail! */
2234
	WARN(ret, "emit|add_request failed: %d!\n", ret);
2332 Serge 2235
 
4104 Serge 2236
	request->head = request_start;
2237
 
2238
	/* Whilst this request exists, batch_obj will be on the
2239
	 * active_list, and so will hold the active reference. Only when this
2240
	 * request is retired will the the batch_obj be moved onto the
2241
	 * inactive_list and lose its active reference. Hence we do not need
2242
	 * to explicitly hold another reference here.
2243
	 */
4560 Serge 2244
	request->batch_obj = obj;
4104 Serge 2245
 
5060 serge 2246
	request->emitted_jiffies = jiffies;
6084 serge 2247
	request->previous_seqno = ring->last_submitted_seqno;
2248
	ring->last_submitted_seqno = request->seqno;
2352 Serge 2249
	list_add_tail(&request->list, &ring->request_list);
2332 Serge 2250
 
6084 serge 2251
	trace_i915_gem_request_add(request);
2332 Serge 2252
 
6084 serge 2253
//	i915_queue_hangcheck(ring->dev);
3263 Serge 2254
 
6084 serge 2255
	queue_delayed_work(dev_priv->wq,
2256
			   &dev_priv->mm.retire_work,
2257
			   round_jiffies_up_relative(HZ));
2258
	intel_mark_busy(dev_priv->dev);
2332 Serge 2259
 
6084 serge 2260
	/* Sanity check that the reserved size was large enough. */
2261
	intel_ring_reserved_space_end(ringbuf);
2352 Serge 2262
}
2332 Serge 2263
 
5060 serge 2264
static bool i915_context_is_banned(struct drm_i915_private *dev_priv,
2265
				   const struct intel_context *ctx)
4104 Serge 2266
{
5060 serge 2267
	unsigned long elapsed;
4104 Serge 2268
 
5060 serge 2269
    elapsed = GetTimerTicks()/100 - ctx->hang_stats.guilty_ts;
4104 Serge 2270
 
5060 serge 2271
	if (ctx->hang_stats.banned)
2272
		return true;
4104 Serge 2273
 
6084 serge 2274
	if (ctx->hang_stats.ban_period_seconds &&
2275
	    elapsed <= ctx->hang_stats.ban_period_seconds) {
5060 serge 2276
		if (!i915_gem_context_is_default(ctx)) {
2277
			DRM_DEBUG("context hanging too fast, banning!\n");
4104 Serge 2278
			return true;
5060 serge 2279
		} else if (i915_stop_ring_allow_ban(dev_priv)) {
2280
			if (i915_stop_ring_allow_warn(dev_priv))
6084 serge 2281
				DRM_ERROR("gpu hanging too fast, banning!\n");
4104 Serge 2282
			return true;
6084 serge 2283
		}
4104 Serge 2284
	}
2285
 
2286
	return false;
2287
}
2288
 
5060 serge 2289
static void i915_set_reset_status(struct drm_i915_private *dev_priv,
2290
				  struct intel_context *ctx,
2291
				  const bool guilty)
4560 Serge 2292
{
5060 serge 2293
	struct i915_ctx_hang_stats *hs;
4560 Serge 2294
 
5060 serge 2295
	if (WARN_ON(!ctx))
2296
		return;
4560 Serge 2297
 
5060 serge 2298
	hs = &ctx->hang_stats;
4560 Serge 2299
 
5060 serge 2300
	if (guilty) {
2301
		hs->banned = i915_context_is_banned(dev_priv, ctx);
2302
		hs->batch_active++;
2303
        hs->guilty_ts = GetTimerTicks()/100;
2304
	} else {
2305
		hs->batch_pending++;
4104 Serge 2306
	}
2307
}
2308
 
6084 serge 2309
void i915_gem_request_free(struct kref *req_ref)
4104 Serge 2310
{
6084 serge 2311
	struct drm_i915_gem_request *req = container_of(req_ref,
2312
						 typeof(*req), ref);
2313
	struct intel_context *ctx = req->ctx;
5354 serge 2314
 
6084 serge 2315
	if (req->file_priv)
2316
		i915_gem_request_remove_from_client(req);
4104 Serge 2317
 
5354 serge 2318
	if (ctx) {
2319
		if (i915.enable_execlists) {
6084 serge 2320
			if (ctx != req->ring->default_context)
2321
				intel_lr_context_unpin(req);
2322
		}
4104 Serge 2323
 
5354 serge 2324
		i915_gem_context_unreference(ctx);
2325
	}
6084 serge 2326
 
2327
	kfree(req);
4104 Serge 2328
}
2329
 
6084 serge 2330
int i915_gem_request_alloc(struct intel_engine_cs *ring,
2331
			   struct intel_context *ctx,
2332
			   struct drm_i915_gem_request **req_out)
2333
{
2334
	struct drm_i915_private *dev_priv = to_i915(ring->dev);
2335
	struct drm_i915_gem_request *req;
2336
	int ret;
2337
 
2338
	if (!req_out)
2339
		return -EINVAL;
2340
 
2341
	*req_out = NULL;
2342
 
2343
//	req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL);
2344
	req = kzalloc(sizeof(*req),0);
2345
	if (req == NULL)
2346
		return -ENOMEM;
2347
 
2348
	ret = i915_gem_get_seqno(ring->dev, &req->seqno);
2349
	if (ret)
2350
		goto err;
2351
 
2352
	kref_init(&req->ref);
2353
	req->i915 = dev_priv;
2354
	req->ring = ring;
2355
	req->ctx  = ctx;
2356
	i915_gem_context_reference(req->ctx);
2357
 
2358
	if (i915.enable_execlists)
2359
		ret = intel_logical_ring_alloc_request_extras(req);
2360
	else
2361
		ret = intel_ring_alloc_request_extras(req);
2362
	if (ret) {
2363
		i915_gem_context_unreference(req->ctx);
2364
		goto err;
2365
	}
2366
 
2367
	/*
2368
	 * Reserve space in the ring buffer for all the commands required to
2369
	 * eventually emit this request. This is to guarantee that the
2370
	 * i915_add_request() call can't fail. Note that the reserve may need
2371
	 * to be redone if the request is not actually submitted straight
2372
	 * away, e.g. because a GPU scheduler has deferred it.
2373
	 */
2374
	if (i915.enable_execlists)
2375
		ret = intel_logical_ring_reserve_space(req);
2376
	else
2377
		ret = intel_ring_reserve_space(req);
2378
	if (ret) {
2379
		/*
2380
		 * At this point, the request is fully allocated even if not
2381
		 * fully prepared. Thus it can be cleaned up using the proper
2382
		 * free code.
2383
		 */
2384
		i915_gem_request_cancel(req);
2385
		return ret;
2386
	}
2387
 
2388
	*req_out = req;
2389
	return 0;
2390
 
2391
err:
2392
	kfree(req);
2393
	return ret;
2394
}
2395
 
2396
void i915_gem_request_cancel(struct drm_i915_gem_request *req)
2397
{
2398
	intel_ring_reserved_space_cancel(req->ringbuf);
2399
 
2400
	i915_gem_request_unreference(req);
2401
}
2402
 
5060 serge 2403
struct drm_i915_gem_request *
2404
i915_gem_find_active_request(struct intel_engine_cs *ring)
3031 serge 2405
{
4539 Serge 2406
	struct drm_i915_gem_request *request;
4104 Serge 2407
 
4539 Serge 2408
	list_for_each_entry(request, &ring->request_list, list) {
6084 serge 2409
		if (i915_gem_request_completed(request, false))
4539 Serge 2410
			continue;
4104 Serge 2411
 
5060 serge 2412
		return request;
4539 Serge 2413
	}
5060 serge 2414
 
2415
	return NULL;
4539 Serge 2416
}
2417
 
5060 serge 2418
static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv,
2419
				       struct intel_engine_cs *ring)
2420
{
2421
	struct drm_i915_gem_request *request;
2422
	bool ring_hung;
2423
 
2424
	request = i915_gem_find_active_request(ring);
2425
 
2426
	if (request == NULL)
2427
		return;
2428
 
2429
	ring_hung = ring->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
2430
 
2431
	i915_set_reset_status(dev_priv, request->ctx, ring_hung);
2432
 
2433
	list_for_each_entry_continue(request, &ring->request_list, list)
2434
		i915_set_reset_status(dev_priv, request->ctx, false);
2435
}
2436
 
4539 Serge 2437
static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
5060 serge 2438
					struct intel_engine_cs *ring)
4539 Serge 2439
{
6937 serge 2440
	struct intel_ringbuffer *buffer;
2441
 
4560 Serge 2442
	while (!list_empty(&ring->active_list)) {
2443
		struct drm_i915_gem_object *obj;
2444
 
2445
		obj = list_first_entry(&ring->active_list,
2446
				       struct drm_i915_gem_object,
6084 serge 2447
				       ring_list[ring->id]);
4560 Serge 2448
 
6084 serge 2449
		i915_gem_object_retire__read(obj, ring->id);
4560 Serge 2450
	}
2451
 
2452
	/*
5354 serge 2453
	 * Clear the execlists queue up before freeing the requests, as those
2454
	 * are the ones that keep the context and ringbuffer backing objects
2455
	 * pinned in place.
2456
	 */
2457
 
6937 serge 2458
	if (i915.enable_execlists) {
2459
		spin_lock_irq(&ring->execlist_lock);
6084 serge 2460
 
6937 serge 2461
		/* list_splice_tail_init checks for empty lists */
2462
		list_splice_tail_init(&ring->execlist_queue,
2463
				      &ring->execlist_retired_req_list);
6084 serge 2464
 
6937 serge 2465
		spin_unlock_irq(&ring->execlist_lock);
2466
		intel_execlists_retire_requests(ring);
5354 serge 2467
	}
2468
 
2469
	/*
4560 Serge 2470
	 * We must free the requests after all the corresponding objects have
2471
	 * been moved off active lists. Which is the same order as the normal
2472
	 * retire_requests function does. This is important if object hold
2473
	 * implicit references on things like e.g. ppgtt address spaces through
2474
	 * the request.
2475
	 */
3031 serge 2476
	while (!list_empty(&ring->request_list)) {
2477
		struct drm_i915_gem_request *request;
2332 Serge 2478
 
3031 serge 2479
		request = list_first_entry(&ring->request_list,
2480
					   struct drm_i915_gem_request,
2481
					   list);
2332 Serge 2482
 
6084 serge 2483
		i915_gem_request_retire(request);
3031 serge 2484
	}
6937 serge 2485
 
2486
	/* Having flushed all requests from all queues, we know that all
2487
	 * ringbuffers must now be empty. However, since we do not reclaim
2488
	 * all space when retiring the request (to prevent HEADs colliding
2489
	 * with rapid ringbuffer wraparound) the amount of available space
2490
	 * upon reset is less than when we start. Do one more pass over
2491
	 * all the ringbuffers to reset last_retired_head.
2492
	 */
2493
	list_for_each_entry(buffer, &ring->buffers, link) {
2494
		buffer->last_retired_head = buffer->tail;
2495
		intel_ring_update_space(buffer);
2496
	}
3031 serge 2497
}
2332 Serge 2498
 
3031 serge 2499
void i915_gem_reset(struct drm_device *dev)
2500
{
2501
	struct drm_i915_private *dev_priv = dev->dev_private;
5060 serge 2502
	struct intel_engine_cs *ring;
3031 serge 2503
	int i;
2360 Serge 2504
 
4539 Serge 2505
	/*
2506
	 * Before we free the objects from the requests, we need to inspect
2507
	 * them for finding the guilty party. As the requests only borrow
2508
	 * their reference to the objects, the inspection must be done first.
2509
	 */
3031 serge 2510
	for_each_ring(ring, dev_priv, i)
4539 Serge 2511
		i915_gem_reset_ring_status(dev_priv, ring);
2360 Serge 2512
 
4539 Serge 2513
	for_each_ring(ring, dev_priv, i)
2514
		i915_gem_reset_ring_cleanup(dev_priv, ring);
2515
 
5060 serge 2516
	i915_gem_context_reset(dev);
4560 Serge 2517
 
3746 Serge 2518
	i915_gem_restore_fences(dev);
6084 serge 2519
 
2520
	WARN_ON(i915_verify_lists(dev));
3031 serge 2521
}
2360 Serge 2522
 
2352 Serge 2523
/**
2524
 * This function clears the request list as sequence numbers are passed.
2525
 */
3031 serge 2526
void
5060 serge 2527
i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
2352 Serge 2528
{
6084 serge 2529
	WARN_ON(i915_verify_lists(ring->dev));
2332 Serge 2530
 
6084 serge 2531
	/* Retire requests first as we use it above for the early return.
2532
	 * If we retire requests last, we may use a later seqno and so clear
2533
	 * the requests lists without clearing the active list, leading to
2534
	 * confusion.
2535
	 */
2536
	while (!list_empty(&ring->request_list)) {
2537
		struct drm_i915_gem_request *request;
2332 Serge 2538
 
6084 serge 2539
		request = list_first_entry(&ring->request_list,
2540
					   struct drm_i915_gem_request,
2541
					   list);
2332 Serge 2542
 
6084 serge 2543
		if (!i915_gem_request_completed(request, true))
2544
			break;
2332 Serge 2545
 
6084 serge 2546
		i915_gem_request_retire(request);
2547
	}
2548
 
5060 serge 2549
	/* Move any buffers on the active list that are no longer referenced
2550
	 * by the ringbuffer to the flushing/inactive lists as appropriate,
2551
	 * before we free the context associated with the requests.
2552
	 */
2553
	while (!list_empty(&ring->active_list)) {
2554
		struct drm_i915_gem_object *obj;
2555
 
2556
		obj = list_first_entry(&ring->active_list,
2557
				      struct drm_i915_gem_object,
6084 serge 2558
				      ring_list[ring->id]);
5060 serge 2559
 
6084 serge 2560
		if (!list_empty(&obj->last_read_req[ring->id]->list))
5060 serge 2561
			break;
2562
 
6084 serge 2563
		i915_gem_object_retire__read(obj, ring->id);
5060 serge 2564
	}
2565
 
6084 serge 2566
	if (unlikely(ring->trace_irq_req &&
2567
		     i915_gem_request_completed(ring->trace_irq_req, true))) {
2352 Serge 2568
		ring->irq_put(ring);
6084 serge 2569
		i915_gem_request_assign(&ring->trace_irq_req, NULL);
2352 Serge 2570
	}
2332 Serge 2571
 
2352 Serge 2572
	WARN_ON(i915_verify_lists(ring->dev));
2573
}
2332 Serge 2574
 
4560 Serge 2575
bool
2352 Serge 2576
i915_gem_retire_requests(struct drm_device *dev)
2577
{
5060 serge 2578
	struct drm_i915_private *dev_priv = dev->dev_private;
2579
	struct intel_engine_cs *ring;
4560 Serge 2580
	bool idle = true;
2352 Serge 2581
	int i;
2332 Serge 2582
 
4560 Serge 2583
	for_each_ring(ring, dev_priv, i) {
3031 serge 2584
		i915_gem_retire_requests_ring(ring);
4560 Serge 2585
		idle &= list_empty(&ring->request_list);
5354 serge 2586
		if (i915.enable_execlists) {
2587
			unsigned long flags;
2588
 
2589
			spin_lock_irqsave(&ring->execlist_lock, flags);
2590
			idle &= list_empty(&ring->execlist_queue);
2591
			spin_unlock_irqrestore(&ring->execlist_lock, flags);
2592
 
2593
			intel_execlists_retire_requests(ring);
2594
		}
4560 Serge 2595
	}
2596
 
6937 serge 2597
//	if (idle)
2598
//		mod_delayed_work(dev_priv->wq,
2599
//				   &dev_priv->mm.idle_work,
2600
//				   msecs_to_jiffies(100));
4560 Serge 2601
 
2602
	return idle;
2352 Serge 2603
}
2604
 
2360 Serge 2605
static void
2606
i915_gem_retire_work_handler(struct work_struct *work)
2607
{
4560 Serge 2608
	struct drm_i915_private *dev_priv =
2609
		container_of(work, typeof(*dev_priv), mm.retire_work.work);
2610
	struct drm_device *dev = dev_priv->dev;
2360 Serge 2611
	bool idle;
2352 Serge 2612
 
2360 Serge 2613
	/* Come back later if the device is busy... */
4560 Serge 2614
	idle = false;
2615
	if (mutex_trylock(&dev->struct_mutex)) {
2616
		idle = i915_gem_retire_requests(dev);
2617
		mutex_unlock(&dev->struct_mutex);
2618
	}
2619
	if (!idle)
3482 Serge 2620
		queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work,
2621
				   round_jiffies_up_relative(HZ));
4560 Serge 2622
}
2352 Serge 2623
 
4560 Serge 2624
static void
2625
i915_gem_idle_work_handler(struct work_struct *work)
2626
{
2627
	struct drm_i915_private *dev_priv =
2628
		container_of(work, typeof(*dev_priv), mm.idle_work.work);
6084 serge 2629
	struct drm_device *dev = dev_priv->dev;
2630
	struct intel_engine_cs *ring;
2631
	int i;
2352 Serge 2632
 
6084 serge 2633
	for_each_ring(ring, dev_priv, i)
2634
		if (!list_empty(&ring->request_list))
2635
			return;
2636
 
6937 serge 2637
	/* we probably should sync with hangcheck here, using cancel_work_sync.
2638
	 * Also locking seems to be fubar here, ring->request_list is protected
2639
	 * by dev->struct_mutex. */
2640
 
6084 serge 2641
	intel_mark_idle(dev);
2642
 
2643
	if (mutex_trylock(&dev->struct_mutex)) {
2644
		struct intel_engine_cs *ring;
2645
		int i;
2646
 
2647
		for_each_ring(ring, dev_priv, i)
2648
			i915_gem_batch_pool_fini(&ring->batch_pool);
2649
 
2650
		mutex_unlock(&dev->struct_mutex);
2651
	}
2360 Serge 2652
}
2653
 
2344 Serge 2654
/**
3031 serge 2655
 * Ensures that an object will eventually get non-busy by flushing any required
2656
 * write domains, emitting any outstanding lazy request and retiring and
2657
 * completed requests.
2352 Serge 2658
 */
3031 serge 2659
static int
2660
i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
2352 Serge 2661
{
6084 serge 2662
	int i;
2352 Serge 2663
 
6084 serge 2664
	if (!obj->active)
2665
		return 0;
2352 Serge 2666
 
6084 serge 2667
	for (i = 0; i < I915_NUM_RINGS; i++) {
2668
		struct drm_i915_gem_request *req;
2669
 
2670
		req = obj->last_read_req[i];
2671
		if (req == NULL)
2672
			continue;
2673
 
2674
		if (list_empty(&req->list))
2675
			goto retire;
2676
 
2677
		if (i915_gem_request_completed(req, true)) {
2678
			__i915_gem_request_retire__upto(req);
2679
retire:
2680
			i915_gem_object_retire__read(obj, i);
2681
		}
3031 serge 2682
	}
2352 Serge 2683
 
3031 serge 2684
	return 0;
2685
}
2352 Serge 2686
 
3243 Serge 2687
/**
2688
 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
2689
 * @DRM_IOCTL_ARGS: standard ioctl arguments
2690
 *
2691
 * Returns 0 if successful, else an error is returned with the remaining time in
2692
 * the timeout parameter.
2693
 *  -ETIME: object is still busy after timeout
2694
 *  -ERESTARTSYS: signal interrupted the wait
2695
 *  -ENONENT: object doesn't exist
2696
 * Also possible, but rare:
2697
 *  -EAGAIN: GPU wedged
2698
 *  -ENOMEM: damn
2699
 *  -ENODEV: Internal IRQ fail
2700
 *  -E?: The add request failed
2701
 *
2702
 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
2703
 * non-zero timeout parameter the wait ioctl will wait for the given number of
2704
 * nanoseconds on an object becoming unbusy. Since the wait itself does so
2705
 * without holding struct_mutex the object may become re-busied before this
2706
 * function completes. A similar but shorter * race condition exists in the busy
2707
 * ioctl
2708
 */
4246 Serge 2709
int
2710
i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
2711
{
5060 serge 2712
	struct drm_i915_private *dev_priv = dev->dev_private;
4246 Serge 2713
	struct drm_i915_gem_wait *args = data;
2714
	struct drm_i915_gem_object *obj;
6084 serge 2715
	struct drm_i915_gem_request *req[I915_NUM_RINGS];
4246 Serge 2716
	unsigned reset_counter;
6084 serge 2717
	int i, n = 0;
2718
	int ret;
2352 Serge 2719
 
5354 serge 2720
	if (args->flags != 0)
2721
		return -EINVAL;
2722
 
4246 Serge 2723
	ret = i915_mutex_lock_interruptible(dev);
2724
	if (ret)
2725
		return ret;
2352 Serge 2726
 
4246 Serge 2727
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->bo_handle));
2728
	if (&obj->base == NULL) {
2729
		mutex_unlock(&dev->struct_mutex);
2730
		return -ENOENT;
2731
	}
2352 Serge 2732
 
4246 Serge 2733
	/* Need to make sure the object gets inactive eventually. */
2734
	ret = i915_gem_object_flush_active(obj);
2735
	if (ret)
2736
		goto out;
2352 Serge 2737
 
6084 serge 2738
	if (!obj->active)
2739
		goto out;
2352 Serge 2740
 
4246 Serge 2741
	/* Do this after OLR check to make sure we make forward progress polling
6084 serge 2742
	 * on this IOCTL with a timeout == 0 (like busy ioctl)
4246 Serge 2743
	 */
6084 serge 2744
	if (args->timeout_ns == 0) {
4246 Serge 2745
		ret = -ETIME;
2746
		goto out;
2747
	}
2352 Serge 2748
 
4246 Serge 2749
	drm_gem_object_unreference(&obj->base);
2750
	reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
6084 serge 2751
 
2752
	for (i = 0; i < I915_NUM_RINGS; i++) {
2753
		if (obj->last_read_req[i] == NULL)
2754
			continue;
2755
 
2756
		req[n++] = i915_gem_request_reference(obj->last_read_req[i]);
2757
	}
2758
 
4246 Serge 2759
	mutex_unlock(&dev->struct_mutex);
2352 Serge 2760
 
6084 serge 2761
	for (i = 0; i < n; i++) {
2762
		if (ret == 0)
2763
			ret = __i915_wait_request(req[i], reset_counter, true,
2764
						  args->timeout_ns > 0 ? &args->timeout_ns : NULL,
6937 serge 2765
						  to_rps_client(file));
6084 serge 2766
		i915_gem_request_unreference__unlocked(req[i]);
2767
	}
2768
	return ret;
3243 Serge 2769
 
4246 Serge 2770
out:
2771
	drm_gem_object_unreference(&obj->base);
2772
	mutex_unlock(&dev->struct_mutex);
2773
	return ret;
2774
}
3243 Serge 2775
 
6084 serge 2776
static int
2777
__i915_gem_object_sync(struct drm_i915_gem_object *obj,
2778
		       struct intel_engine_cs *to,
2779
		       struct drm_i915_gem_request *from_req,
2780
		       struct drm_i915_gem_request **to_req)
2781
{
2782
	struct intel_engine_cs *from;
2783
	int ret;
2784
 
2785
	from = i915_gem_request_get_ring(from_req);
2786
	if (to == from)
2787
		return 0;
2788
 
2789
	if (i915_gem_request_completed(from_req, true))
2790
		return 0;
2791
 
2792
	if (!i915_semaphore_is_enabled(obj->base.dev)) {
2793
		struct drm_i915_private *i915 = to_i915(obj->base.dev);
2794
		ret = __i915_wait_request(from_req,
2795
					  atomic_read(&i915->gpu_error.reset_counter),
2796
					  i915->mm.interruptible,
2797
					  NULL,
2798
					  &i915->rps.semaphores);
2799
		if (ret)
2800
			return ret;
2801
 
2802
		i915_gem_object_retire_request(obj, from_req);
2803
	} else {
2804
		int idx = intel_ring_sync_index(from, to);
2805
		u32 seqno = i915_gem_request_get_seqno(from_req);
2806
 
2807
		WARN_ON(!to_req);
2808
 
2809
		if (seqno <= from->semaphore.sync_seqno[idx])
2810
			return 0;
2811
 
2812
		if (*to_req == NULL) {
2813
			ret = i915_gem_request_alloc(to, to->default_context, to_req);
2814
			if (ret)
2815
				return ret;
2816
		}
2817
 
2818
		trace_i915_gem_ring_sync_to(*to_req, from, from_req);
2819
		ret = to->semaphore.sync_to(*to_req, from, seqno);
2820
		if (ret)
2821
			return ret;
2822
 
2823
		/* We use last_read_req because sync_to()
2824
		 * might have just caused seqno wrap under
2825
		 * the radar.
2826
		 */
2827
		from->semaphore.sync_seqno[idx] =
2828
			i915_gem_request_get_seqno(obj->last_read_req[from->id]);
2829
	}
2830
 
2831
	return 0;
2832
}
2833
 
2352 Serge 2834
/**
3031 serge 2835
 * i915_gem_object_sync - sync an object to a ring.
2836
 *
2837
 * @obj: object which may be in use on another ring.
2838
 * @to: ring we wish to use the object on. May be NULL.
6084 serge 2839
 * @to_req: request we wish to use the object for. See below.
2840
 *          This will be allocated and returned if a request is
2841
 *          required but not passed in.
3031 serge 2842
 *
2843
 * This code is meant to abstract object synchronization with the GPU.
2844
 * Calling with NULL implies synchronizing the object with the CPU
6084 serge 2845
 * rather than a particular GPU ring. Conceptually we serialise writes
2846
 * between engines inside the GPU. We only allow one engine to write
2847
 * into a buffer at any time, but multiple readers. To ensure each has
2848
 * a coherent view of memory, we must:
3031 serge 2849
 *
6084 serge 2850
 * - If there is an outstanding write request to the object, the new
2851
 *   request must wait for it to complete (either CPU or in hw, requests
2852
 *   on the same ring will be naturally ordered).
2853
 *
2854
 * - If we are a write request (pending_write_domain is set), the new
2855
 *   request must wait for outstanding read requests to complete.
2856
 *
2857
 * For CPU synchronisation (NULL to) no request is required. For syncing with
2858
 * rings to_req must be non-NULL. However, a request does not have to be
2859
 * pre-allocated. If *to_req is NULL and sync commands will be emitted then a
2860
 * request will be allocated automatically and returned through *to_req. Note
2861
 * that it is not guaranteed that commands will be emitted (because the system
2862
 * might already be idle). Hence there is no need to create a request that
2863
 * might never have any work submitted. Note further that if a request is
2864
 * returned in *to_req, it is the responsibility of the caller to submit
2865
 * that request (after potentially adding more work to it).
2866
 *
3031 serge 2867
 * Returns 0 if successful, else propagates up the lower layer error.
2344 Serge 2868
 */
2869
int
3031 serge 2870
i915_gem_object_sync(struct drm_i915_gem_object *obj,
6084 serge 2871
		     struct intel_engine_cs *to,
2872
		     struct drm_i915_gem_request **to_req)
2344 Serge 2873
{
6084 serge 2874
	const bool readonly = obj->base.pending_write_domain == 0;
2875
	struct drm_i915_gem_request *req[I915_NUM_RINGS];
2876
	int ret, i, n;
2332 Serge 2877
 
6084 serge 2878
	if (!obj->active)
3031 serge 2879
		return 0;
2332 Serge 2880
 
6084 serge 2881
	if (to == NULL)
2882
		return i915_gem_object_wait_rendering(obj, readonly);
2332 Serge 2883
 
6084 serge 2884
	n = 0;
2885
	if (readonly) {
2886
		if (obj->last_write_req)
2887
			req[n++] = obj->last_write_req;
2888
	} else {
2889
		for (i = 0; i < I915_NUM_RINGS; i++)
2890
			if (obj->last_read_req[i])
2891
				req[n++] = obj->last_read_req[i];
2892
	}
2893
	for (i = 0; i < n; i++) {
2894
		ret = __i915_gem_object_sync(obj, to, req[i], to_req);
2895
		if (ret)
2896
			return ret;
2897
	}
3031 serge 2898
 
6084 serge 2899
	return 0;
2344 Serge 2900
}
2332 Serge 2901
 
2344 Serge 2902
static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
2903
{
2904
	u32 old_write_domain, old_read_domains;
2332 Serge 2905
 
2344 Serge 2906
	/* Force a pagefault for domain tracking on next user access */
6084 serge 2907
	i915_gem_release_mmap(obj);
2332 Serge 2908
 
2344 Serge 2909
	if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
2910
		return;
2332 Serge 2911
 
3480 Serge 2912
	/* Wait for any direct GTT access to complete */
2913
	mb();
2914
 
2344 Serge 2915
	old_read_domains = obj->base.read_domains;
2916
	old_write_domain = obj->base.write_domain;
2351 Serge 2917
 
2344 Serge 2918
	obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT;
2919
	obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
2332 Serge 2920
 
2351 Serge 2921
	trace_i915_gem_object_change_domain(obj,
2922
					    old_read_domains,
2923
					    old_write_domain);
2344 Serge 2924
}
2332 Serge 2925
 
6084 serge 2926
static int __i915_vma_unbind(struct i915_vma *vma, bool wait)
2344 Serge 2927
{
4104 Serge 2928
	struct drm_i915_gem_object *obj = vma->obj;
5060 serge 2929
	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
3480 Serge 2930
	int ret;
3263 Serge 2931
 
4104 Serge 2932
	if (list_empty(&vma->vma_link))
2344 Serge 2933
		return 0;
2332 Serge 2934
 
4560 Serge 2935
	if (!drm_mm_node_allocated(&vma->node)) {
2936
		i915_gem_vma_destroy(vma);
2937
		return 0;
2938
	}
2939
 
5060 serge 2940
	if (vma->pin_count)
3031 serge 2941
		return -EBUSY;
2332 Serge 2942
 
3243 Serge 2943
	BUG_ON(obj->pages == NULL);
3031 serge 2944
 
6084 serge 2945
	if (wait) {
2946
		ret = i915_gem_object_wait_rendering(obj, false);
2947
		if (ret)
2948
			return ret;
2949
	}
2332 Serge 2950
 
6084 serge 2951
	if (i915_is_ggtt(vma->vm) &&
2952
	    vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
2953
		i915_gem_object_finish_gtt(obj);
5354 serge 2954
 
6084 serge 2955
		/* release the fence reg _after_ flushing */
2956
		ret = i915_gem_object_put_fence(obj);
2957
		if (ret)
2958
			return ret;
5060 serge 2959
	}
2332 Serge 2960
 
4104 Serge 2961
	trace_i915_vma_unbind(vma);
2332 Serge 2962
 
6084 serge 2963
	vma->vm->unbind_vma(vma);
2964
	vma->bound = 0;
2332 Serge 2965
 
5060 serge 2966
	list_del_init(&vma->mm_list);
6084 serge 2967
	if (i915_is_ggtt(vma->vm)) {
2968
		if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
2969
			obj->map_and_fenceable = false;
2970
		} else if (vma->ggtt_view.pages) {
2971
			sg_free_table(vma->ggtt_view.pages);
2972
			kfree(vma->ggtt_view.pages);
2973
		}
2974
		vma->ggtt_view.pages = NULL;
2975
	}
2332 Serge 2976
 
4104 Serge 2977
	drm_mm_remove_node(&vma->node);
2978
	i915_gem_vma_destroy(vma);
2979
 
2980
	/* Since the unbound list is global, only move to that list if
4560 Serge 2981
	 * no more VMAs exist. */
6084 serge 2982
	if (list_empty(&obj->vma_list))
4104 Serge 2983
		list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list);
2984
 
4560 Serge 2985
	/* And finally now the object is completely decoupled from this vma,
2986
	 * we can drop its hold on the backing storage and allow it to be
2987
	 * reaped by the shrinker.
2988
	 */
2989
	i915_gem_object_unpin_pages(obj);
2990
 
2344 Serge 2991
	return 0;
2992
}
2332 Serge 2993
 
6084 serge 2994
int i915_vma_unbind(struct i915_vma *vma)
2995
{
2996
	return __i915_vma_unbind(vma, true);
2997
}
2998
 
2999
int __i915_vma_unbind_no_wait(struct i915_vma *vma)
3000
{
3001
	return __i915_vma_unbind(vma, false);
3002
}
3003
 
3031 serge 3004
int i915_gpu_idle(struct drm_device *dev)
2344 Serge 3005
{
5060 serge 3006
	struct drm_i915_private *dev_priv = dev->dev_private;
3007
	struct intel_engine_cs *ring;
2344 Serge 3008
	int ret, i;
2332 Serge 3009
 
2344 Serge 3010
	/* Flush everything onto the inactive list. */
3031 serge 3011
	for_each_ring(ring, dev_priv, i) {
5354 serge 3012
		if (!i915.enable_execlists) {
6084 serge 3013
			struct drm_i915_gem_request *req;
3031 serge 3014
 
6084 serge 3015
			ret = i915_gem_request_alloc(ring, ring->default_context, &req);
2352 Serge 3016
			if (ret)
3017
				return ret;
2344 Serge 3018
 
6084 serge 3019
			ret = i915_switch_context(req);
3020
			if (ret) {
3021
				i915_gem_request_cancel(req);
3022
				return ret;
3023
			}
2344 Serge 3024
 
6084 serge 3025
			i915_add_request_no_flush(req);
3026
		}
2332 Serge 3027
 
6084 serge 3028
		ret = intel_ring_idle(ring);
3031 serge 3029
		if (ret)
3030
			return ret;
3031
	}
2332 Serge 3032
 
6084 serge 3033
	WARN_ON(i915_verify_lists(dev));
3031 serge 3034
	return 0;
3035
}
2332 Serge 3036
 
5354 serge 3037
static bool i915_gem_valid_gtt_space(struct i915_vma *vma,
3031 serge 3038
				     unsigned long cache_level)
3039
{
5354 serge 3040
	struct drm_mm_node *gtt_space = &vma->node;
3031 serge 3041
	struct drm_mm_node *other;
2332 Serge 3042
 
5354 serge 3043
	/*
3044
	 * On some machines we have to be careful when putting differing types
3045
	 * of snoopable memory together to avoid the prefetcher crossing memory
3046
	 * domains and dying. During vm initialisation, we decide whether or not
3047
	 * these constraints apply and set the drm_mm.color_adjust
3048
	 * appropriately.
3031 serge 3049
	 */
5354 serge 3050
	if (vma->vm->mm.color_adjust == NULL)
3031 serge 3051
		return true;
2332 Serge 3052
 
4104 Serge 3053
	if (!drm_mm_node_allocated(gtt_space))
3031 serge 3054
		return true;
2332 Serge 3055
 
3031 serge 3056
	if (list_empty(>t_space->node_list))
3057
		return true;
2332 Serge 3058
 
3031 serge 3059
	other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list);
3060
	if (other->allocated && !other->hole_follows && other->color != cache_level)
3061
		return false;
2344 Serge 3062
 
3031 serge 3063
	other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list);
3064
	if (other->allocated && !gtt_space->hole_follows && other->color != cache_level)
3065
		return false;
2344 Serge 3066
 
3031 serge 3067
	return true;
3068
}
2344 Serge 3069
 
2332 Serge 3070
/**
6084 serge 3071
 * Finds free space in the GTT aperture and binds the object or a view of it
3072
 * there.
2332 Serge 3073
 */
5060 serge 3074
static struct i915_vma *
4104 Serge 3075
i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
3076
			   struct i915_address_space *vm,
6084 serge 3077
			   const struct i915_ggtt_view *ggtt_view,
3078
			   unsigned alignment,
5060 serge 3079
			   uint64_t flags)
2332 Serge 3080
{
3081
	struct drm_device *dev = obj->base.dev;
5060 serge 3082
	struct drm_i915_private *dev_priv = dev->dev_private;
6084 serge 3083
	u32 fence_alignment, unfenced_alignment;
3084
	u32 search_flag, alloc_flag;
3085
	u64 start, end;
3086
	u64 size, fence_size;
4104 Serge 3087
	struct i915_vma *vma;
2332 Serge 3088
	int ret;
2326 Serge 3089
 
6084 serge 3090
	if (i915_is_ggtt(vm)) {
3091
		u32 view_size;
2332 Serge 3092
 
6084 serge 3093
		if (WARN_ON(!ggtt_view))
3094
			return ERR_PTR(-EINVAL);
3095
 
3096
		view_size = i915_ggtt_view_size(obj, ggtt_view);
3097
 
3098
		fence_size = i915_gem_get_gtt_size(dev,
3099
						   view_size,
3100
						   obj->tiling_mode);
3101
		fence_alignment = i915_gem_get_gtt_alignment(dev,
3102
							     view_size,
3103
							     obj->tiling_mode,
3104
							     true);
3105
		unfenced_alignment = i915_gem_get_gtt_alignment(dev,
3106
								view_size,
3107
								obj->tiling_mode,
3108
								false);
3109
		size = flags & PIN_MAPPABLE ? fence_size : view_size;
3110
	} else {
3111
		fence_size = i915_gem_get_gtt_size(dev,
3112
						   obj->base.size,
3113
						   obj->tiling_mode);
3114
		fence_alignment = i915_gem_get_gtt_alignment(dev,
3115
							     obj->base.size,
3116
							     obj->tiling_mode,
3117
							     true);
3118
		unfenced_alignment =
3119
			i915_gem_get_gtt_alignment(dev,
3120
						   obj->base.size,
3121
						   obj->tiling_mode,
3122
						   false);
3123
		size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
3124
	}
3125
 
3126
	start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
3127
	end = vm->total;
3128
	if (flags & PIN_MAPPABLE)
3129
		end = min_t(u64, end, dev_priv->gtt.mappable_end);
3130
	if (flags & PIN_ZONE_4G)
6937 serge 3131
		end = min_t(u64, end, (1ULL << 32) - PAGE_SIZE);
6084 serge 3132
 
2332 Serge 3133
	if (alignment == 0)
5060 serge 3134
		alignment = flags & PIN_MAPPABLE ? fence_alignment :
2332 Serge 3135
						unfenced_alignment;
5060 serge 3136
	if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
6084 serge 3137
		DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n",
3138
			  ggtt_view ? ggtt_view->type : 0,
3139
			  alignment);
5060 serge 3140
		return ERR_PTR(-EINVAL);
2332 Serge 3141
	}
3142
 
6084 serge 3143
	/* If binding the object/GGTT view requires more space than the entire
3144
	 * aperture has, reject it early before evicting everything in a vain
3145
	 * attempt to find space.
2332 Serge 3146
	 */
6084 serge 3147
	if (size > end) {
3148
		DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%llu > %s aperture=%llu\n",
3149
			  ggtt_view ? ggtt_view->type : 0,
3150
			  size,
5060 serge 3151
			  flags & PIN_MAPPABLE ? "mappable" : "total",
3152
			  end);
3153
		return ERR_PTR(-E2BIG);
2332 Serge 3154
	}
3155
 
3031 serge 3156
	ret = i915_gem_object_get_pages(obj);
3157
	if (ret)
5060 serge 3158
		return ERR_PTR(ret);
3031 serge 3159
 
3243 Serge 3160
	i915_gem_object_pin_pages(obj);
3161
 
6084 serge 3162
	vma = ggtt_view ? i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) :
3163
			  i915_gem_obj_lookup_or_create_vma(obj, vm);
3164
 
5060 serge 3165
	if (IS_ERR(vma))
4104 Serge 3166
		goto err_unpin;
3243 Serge 3167
 
6937 serge 3168
	if (flags & PIN_OFFSET_FIXED) {
3169
		uint64_t offset = flags & PIN_OFFSET_MASK;
3170
 
3171
		if (offset & (alignment - 1) || offset + size > end) {
3172
			ret = -EINVAL;
3173
			goto err_free_vma;
3174
		}
3175
		vma->node.start = offset;
3176
		vma->node.size = size;
3177
		vma->node.color = obj->cache_level;
3178
		ret = drm_mm_reserve_node(&vm->mm, &vma->node);
3179
		if (ret)
3180
			goto err_free_vma;
3181
	} else {
6084 serge 3182
	if (flags & PIN_HIGH) {
3183
		search_flag = DRM_MM_SEARCH_BELOW;
3184
		alloc_flag = DRM_MM_CREATE_TOP;
3185
	} else {
3186
		search_flag = DRM_MM_SEARCH_DEFAULT;
3187
		alloc_flag = DRM_MM_CREATE_DEFAULT;
3188
	}
3189
 
4104 Serge 3190
search_free:
3191
	ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
3192
						  size, alignment,
5060 serge 3193
						  obj->cache_level,
3194
						  start, end,
6084 serge 3195
						  search_flag,
3196
						  alloc_flag);
3243 Serge 3197
	if (ret) {
2332 Serge 3198
 
4104 Serge 3199
		goto err_free_vma;
2332 Serge 3200
	}
6937 serge 3201
	}
5354 serge 3202
	if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) {
4104 Serge 3203
		ret = -EINVAL;
3204
		goto err_remove_node;
3031 serge 3205
	}
2332 Serge 3206
 
6084 serge 3207
	trace_i915_vma_bind(vma, flags);
3208
	ret = i915_vma_bind(vma, obj->cache_level, flags);
4104 Serge 3209
	if (ret)
3210
		goto err_remove_node;
2332 Serge 3211
 
4104 Serge 3212
	list_move_tail(&obj->global_list, &dev_priv->mm.bound_list);
3213
	list_add_tail(&vma->mm_list, &vm->inactive_list);
2332 Serge 3214
 
5060 serge 3215
	return vma;
4104 Serge 3216
 
3217
err_remove_node:
3218
	drm_mm_remove_node(&vma->node);
3219
err_free_vma:
3220
	i915_gem_vma_destroy(vma);
5060 serge 3221
	vma = ERR_PTR(ret);
4104 Serge 3222
err_unpin:
3223
	i915_gem_object_unpin_pages(obj);
5060 serge 3224
	return vma;
2332 Serge 3225
}
3226
 
4104 Serge 3227
bool
3228
i915_gem_clflush_object(struct drm_i915_gem_object *obj,
3229
			bool force)
2332 Serge 3230
{
3231
	/* If we don't have a page list set up, then we're not pinned
3232
	 * to GPU, and we can ignore the cache flush because it'll happen
3233
	 * again at bind time.
3234
	 */
3243 Serge 3235
	if (obj->pages == NULL)
4104 Serge 3236
		return false;
2332 Serge 3237
 
3480 Serge 3238
	/*
3239
	 * Stolen memory is always coherent with the GPU as it is explicitly
3240
	 * marked as wc by the system, or the system is cache-coherent.
3241
	 */
5354 serge 3242
	if (obj->stolen || obj->phys_handle)
4104 Serge 3243
		return false;
3480 Serge 3244
 
2332 Serge 3245
	/* If the GPU is snooping the contents of the CPU cache,
3246
	 * we do not need to manually clear the CPU cache lines.  However,
3247
	 * the caches are only snooped when the render cache is
3248
	 * flushed/invalidated.  As we always have to emit invalidations
3249
	 * and flushes when moving into and out of the RENDER domain, correct
3250
	 * snooping behaviour occurs naturally as the result of our domain
3251
	 * tracking.
3252
	 */
6084 serge 3253
	if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) {
3254
		obj->cache_dirty = true;
4104 Serge 3255
		return false;
6084 serge 3256
	}
2332 Serge 3257
 
4293 Serge 3258
	trace_i915_gem_object_clflush(obj);
3259
	drm_clflush_sg(obj->pages);
6084 serge 3260
	obj->cache_dirty = false;
2344 Serge 3261
 
4104 Serge 3262
	return true;
2332 Serge 3263
}
3264
 
2344 Serge 3265
/** Flushes the GTT write domain for the object if it's dirty. */
3266
static void
3267
i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
3268
{
3269
	uint32_t old_write_domain;
2332 Serge 3270
 
2344 Serge 3271
	if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
3272
		return;
2332 Serge 3273
 
2344 Serge 3274
	/* No actual flushing is required for the GTT write domain.  Writes
3275
	 * to it immediately go to main memory as far as we know, so there's
3276
	 * no chipset flush.  It also doesn't land in render cache.
3277
	 *
3278
	 * However, we do have to enforce the order so that all writes through
3279
	 * the GTT land before any writes to the device, such as updates to
3280
	 * the GATT itself.
3281
	 */
3282
	wmb();
2332 Serge 3283
 
2344 Serge 3284
	old_write_domain = obj->base.write_domain;
3285
	obj->base.write_domain = 0;
2332 Serge 3286
 
6084 serge 3287
	intel_fb_obj_flush(obj, false, ORIGIN_GTT);
5354 serge 3288
 
2351 Serge 3289
	trace_i915_gem_object_change_domain(obj,
3290
					    obj->base.read_domains,
3291
					    old_write_domain);
2344 Serge 3292
}
2332 Serge 3293
 
3294
/** Flushes the CPU write domain for the object if it's dirty. */
2326 Serge 3295
static void
6084 serge 3296
i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
2332 Serge 3297
{
3298
	uint32_t old_write_domain;
3299
 
3300
	if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
3301
		return;
3302
 
6084 serge 3303
	if (i915_gem_clflush_object(obj, obj->pin_display))
3304
		i915_gem_chipset_flush(obj->base.dev);
4104 Serge 3305
 
2332 Serge 3306
	old_write_domain = obj->base.write_domain;
3307
	obj->base.write_domain = 0;
3308
 
6084 serge 3309
	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
5354 serge 3310
 
2351 Serge 3311
	trace_i915_gem_object_change_domain(obj,
3312
					    obj->base.read_domains,
3313
					    old_write_domain);
2332 Serge 3314
}
3315
 
3316
/**
3317
 * Moves a single object to the GTT read, and possibly write domain.
3318
 *
3319
 * This function returns when the move is complete, including waiting on
3320
 * flushes to occur.
3321
 */
3322
int
3323
i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
3324
{
3325
	uint32_t old_write_domain, old_read_domains;
6084 serge 3326
	struct i915_vma *vma;
2332 Serge 3327
	int ret;
3328
 
3329
	if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
3330
		return 0;
3331
 
3031 serge 3332
	ret = i915_gem_object_wait_rendering(obj, !write);
6084 serge 3333
	if (ret)
3334
		return ret;
2332 Serge 3335
 
6084 serge 3336
	/* Flush and acquire obj->pages so that we are coherent through
3337
	 * direct access in memory with previous cached writes through
3338
	 * shmemfs and that our cache domain tracking remains valid.
3339
	 * For example, if the obj->filp was moved to swap without us
3340
	 * being notified and releasing the pages, we would mistakenly
3341
	 * continue to assume that the obj remained out of the CPU cached
3342
	 * domain.
3343
	 */
3344
	ret = i915_gem_object_get_pages(obj);
3345
	if (ret)
3346
		return ret;
2332 Serge 3347
 
6084 serge 3348
	i915_gem_object_flush_cpu_write_domain(obj);
3349
 
3480 Serge 3350
	/* Serialise direct access to this object with the barriers for
3351
	 * coherent writes from the GPU, by effectively invalidating the
3352
	 * GTT domain upon first access.
3353
	 */
3354
	if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
3355
		mb();
3356
 
2332 Serge 3357
	old_write_domain = obj->base.write_domain;
3358
	old_read_domains = obj->base.read_domains;
3359
 
3360
	/* It should now be out of any other write domains, and we can update
3361
	 * the domain values for our changes.
3362
	 */
3363
	BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
3364
	obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
3365
	if (write) {
3366
		obj->base.read_domains = I915_GEM_DOMAIN_GTT;
3367
		obj->base.write_domain = I915_GEM_DOMAIN_GTT;
3368
		obj->dirty = 1;
3369
	}
3370
 
2351 Serge 3371
	trace_i915_gem_object_change_domain(obj,
3372
					    old_read_domains,
3373
					    old_write_domain);
3374
 
3031 serge 3375
	/* And bump the LRU for this access */
6084 serge 3376
	vma = i915_gem_obj_to_ggtt(obj);
3377
	if (vma && drm_mm_node_allocated(&vma->node) && !obj->active)
3378
		list_move_tail(&vma->mm_list,
3379
			       &to_i915(obj->base.dev)->gtt.base.inactive_list);
3031 serge 3380
 
2332 Serge 3381
	return 0;
3382
}
3383
 
6084 serge 3384
/**
3385
 * Changes the cache-level of an object across all VMA.
3386
 *
3387
 * After this function returns, the object will be in the new cache-level
3388
 * across all GTT and the contents of the backing storage will be coherent,
3389
 * with respect to the new cache-level. In order to keep the backing storage
3390
 * coherent for all users, we only allow a single cache level to be set
3391
 * globally on the object and prevent it from being changed whilst the
3392
 * hardware is reading from the object. That is if the object is currently
3393
 * on the scanout it will be set to uncached (or equivalent display
3394
 * cache coherency) and all non-MOCS GPU access will also be uncached so
3395
 * that all direct access to the scanout remains coherent.
3396
 */
2335 Serge 3397
int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
3398
				    enum i915_cache_level cache_level)
3399
{
3031 serge 3400
	struct drm_device *dev = obj->base.dev;
5060 serge 3401
	struct i915_vma *vma, *next;
6084 serge 3402
	bool bound = false;
3403
	int ret = 0;
2332 Serge 3404
 
2335 Serge 3405
	if (obj->cache_level == cache_level)
6084 serge 3406
		goto out;
2332 Serge 3407
 
6084 serge 3408
	/* Inspect the list of currently bound VMA and unbind any that would
3409
	 * be invalid given the new cache-level. This is principally to
3410
	 * catch the issue of the CS prefetch crossing page boundaries and
3411
	 * reading an invalid PTE on older architectures.
3412
	 */
3413
	list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
3414
		if (!drm_mm_node_allocated(&vma->node))
3415
			continue;
2332 Serge 3416
 
6084 serge 3417
		if (vma->pin_count) {
3418
			DRM_DEBUG("can not change the cache level of pinned objects\n");
3419
			return -EBUSY;
3420
		}
3421
 
5354 serge 3422
		if (!i915_gem_valid_gtt_space(vma, cache_level)) {
4104 Serge 3423
			ret = i915_vma_unbind(vma);
6084 serge 3424
			if (ret)
3425
				return ret;
3426
		} else
3427
			bound = true;
3031 serge 3428
	}
3429
 
6084 serge 3430
	/* We can reuse the existing drm_mm nodes but need to change the
3431
	 * cache-level on the PTE. We could simply unbind them all and
3432
	 * rebind with the correct cache-level on next use. However since
3433
	 * we already have a valid slot, dma mapping, pages etc, we may as
3434
	 * rewrite the PTE in the belief that doing so tramples upon less
3435
	 * state and so involves less work.
3436
	 */
3437
	if (bound) {
3438
		/* Before we change the PTE, the GPU must not be accessing it.
3439
		 * If we wait upon the object, we know that all the bound
3440
		 * VMA are no longer active.
3441
		 */
3442
		ret = i915_gem_object_wait_rendering(obj, false);
2335 Serge 3443
		if (ret)
3444
			return ret;
2332 Serge 3445
 
6084 serge 3446
		if (!HAS_LLC(dev) && cache_level != I915_CACHE_NONE) {
3447
			/* Access to snoopable pages through the GTT is
3448
			 * incoherent and on some machines causes a hard
3449
			 * lockup. Relinquish the CPU mmaping to force
3450
			 * userspace to refault in the pages and we can
3451
			 * then double check if the GTT mapping is still
3452
			 * valid for that pointer access.
3453
			 */
3454
			i915_gem_release_mmap(obj);
2332 Serge 3455
 
6084 serge 3456
			/* As we no longer need a fence for GTT access,
3457
			 * we can relinquish it now (and so prevent having
3458
			 * to steal a fence from someone else on the next
3459
			 * fence request). Note GPU activity would have
3460
			 * dropped the fence as all snoopable access is
3461
			 * supposed to be linear.
3462
			 */
2335 Serge 3463
			ret = i915_gem_object_put_fence(obj);
3464
			if (ret)
3465
				return ret;
6084 serge 3466
		} else {
3467
			/* We either have incoherent backing store and
3468
			 * so no GTT access or the architecture is fully
3469
			 * coherent. In such cases, existing GTT mmaps
3470
			 * ignore the cache bit in the PTE and we can
3471
			 * rewrite it without confusing the GPU or having
3472
			 * to force userspace to fault back in its mmaps.
3473
			 */
3474
		}
2332 Serge 3475
 
6084 serge 3476
		list_for_each_entry(vma, &obj->vma_list, vma_link) {
3477
			if (!drm_mm_node_allocated(&vma->node))
3478
				continue;
3479
 
3480
			ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
3481
			if (ret)
3482
				return ret;
3483
		}
2335 Serge 3484
	}
2332 Serge 3485
 
4104 Serge 3486
	list_for_each_entry(vma, &obj->vma_list, vma_link)
3487
		vma->node.color = cache_level;
3488
	obj->cache_level = cache_level;
3489
 
6084 serge 3490
out:
3491
	/* Flush the dirty CPU caches to the backing storage so that the
3492
	 * object is now coherent at its new cache level (with respect
3493
	 * to the access domain).
3494
	 */
3495
	if (obj->cache_dirty &&
3496
	    obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
3497
	    cpu_write_needs_clflush(obj)) {
3498
		if (i915_gem_clflush_object(obj, true))
3499
			i915_gem_chipset_flush(obj->base.dev);
3500
	}
2332 Serge 3501
 
2335 Serge 3502
	return 0;
3503
}
2332 Serge 3504
 
3260 Serge 3505
int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
3506
			       struct drm_file *file)
3507
{
3508
	struct drm_i915_gem_caching *args = data;
3509
	struct drm_i915_gem_object *obj;
3510
 
3511
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
6084 serge 3512
	if (&obj->base == NULL)
3513
		return -ENOENT;
3260 Serge 3514
 
4104 Serge 3515
	switch (obj->cache_level) {
3516
	case I915_CACHE_LLC:
3517
	case I915_CACHE_L3_LLC:
3518
		args->caching = I915_CACHING_CACHED;
3519
		break;
3260 Serge 3520
 
4104 Serge 3521
	case I915_CACHE_WT:
3522
		args->caching = I915_CACHING_DISPLAY;
3523
		break;
3524
 
3525
	default:
3526
		args->caching = I915_CACHING_NONE;
3527
		break;
3528
	}
3529
 
6084 serge 3530
	drm_gem_object_unreference_unlocked(&obj->base);
3531
	return 0;
3260 Serge 3532
}
3533
 
3534
int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
3535
			       struct drm_file *file)
3536
{
6084 serge 3537
	struct drm_i915_private *dev_priv = dev->dev_private;
3260 Serge 3538
	struct drm_i915_gem_caching *args = data;
3539
	struct drm_i915_gem_object *obj;
3540
	enum i915_cache_level level;
3541
	int ret;
3542
 
3543
	switch (args->caching) {
3544
	case I915_CACHING_NONE:
3545
		level = I915_CACHE_NONE;
3546
		break;
3547
	case I915_CACHING_CACHED:
6084 serge 3548
		/*
3549
		 * Due to a HW issue on BXT A stepping, GPU stores via a
3550
		 * snooped mapping may leave stale data in a corresponding CPU
3551
		 * cacheline, whereas normally such cachelines would get
3552
		 * invalidated.
3553
		 */
6937 serge 3554
		if (IS_BXT_REVID(dev, 0, BXT_REVID_A1))
6084 serge 3555
			return -ENODEV;
3556
 
3260 Serge 3557
		level = I915_CACHE_LLC;
3558
		break;
4104 Serge 3559
	case I915_CACHING_DISPLAY:
3560
		level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE;
3561
		break;
3260 Serge 3562
	default:
3563
		return -EINVAL;
3564
	}
3565
 
6084 serge 3566
	intel_runtime_pm_get(dev_priv);
3567
 
3260 Serge 3568
	ret = i915_mutex_lock_interruptible(dev);
3569
	if (ret)
6084 serge 3570
		goto rpm_put;
3260 Serge 3571
 
3572
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
3573
	if (&obj->base == NULL) {
3574
		ret = -ENOENT;
3575
		goto unlock;
3576
	}
3577
 
3578
	ret = i915_gem_object_set_cache_level(obj, level);
3579
 
3580
	drm_gem_object_unreference(&obj->base);
3581
unlock:
3582
	mutex_unlock(&dev->struct_mutex);
6084 serge 3583
rpm_put:
3584
	intel_runtime_pm_put(dev_priv);
3585
 
3260 Serge 3586
	return ret;
3587
}
3588
 
2335 Serge 3589
/*
3590
 * Prepare buffer for display plane (scanout, cursors, etc).
3591
 * Can be called from an uninterruptible phase (modesetting) and allows
3592
 * any flushes to be pipelined (for pageflips).
3593
 */
3594
int
3595
i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
3596
				     u32 alignment,
6084 serge 3597
				     const struct i915_ggtt_view *view)
2335 Serge 3598
{
3599
	u32 old_read_domains, old_write_domain;
3600
	int ret;
2332 Serge 3601
 
4104 Serge 3602
	/* Mark the pin_display early so that we account for the
3603
	 * display coherency whilst setting up the cache domains.
3604
	 */
6084 serge 3605
	obj->pin_display++;
4104 Serge 3606
 
2335 Serge 3607
	/* The display engine is not coherent with the LLC cache on gen6.  As
3608
	 * a result, we make sure that the pinning that is about to occur is
3609
	 * done with uncached PTEs. This is lowest common denominator for all
3610
	 * chipsets.
3611
	 *
3612
	 * However for gen6+, we could do better by using the GFDT bit instead
3613
	 * of uncaching, which would allow us to flush all the LLC-cached data
3614
	 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
3615
	 */
4104 Serge 3616
	ret = i915_gem_object_set_cache_level(obj,
3617
					      HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE);
2360 Serge 3618
	if (ret)
4104 Serge 3619
		goto err_unpin_display;
2332 Serge 3620
 
2335 Serge 3621
	/* As the user may map the buffer once pinned in the display plane
3622
	 * (e.g. libkms for the bootup splash), we have to ensure that we
3623
	 * always use map_and_fenceable for all scanout buffers.
3624
	 */
6084 serge 3625
	ret = i915_gem_object_ggtt_pin(obj, view, alignment,
3626
				       view->type == I915_GGTT_VIEW_NORMAL ?
3627
				       PIN_MAPPABLE : 0);
2335 Serge 3628
	if (ret)
4104 Serge 3629
		goto err_unpin_display;
2332 Serge 3630
 
6084 serge 3631
	i915_gem_object_flush_cpu_write_domain(obj);
2332 Serge 3632
 
2335 Serge 3633
	old_write_domain = obj->base.write_domain;
3634
	old_read_domains = obj->base.read_domains;
2332 Serge 3635
 
2335 Serge 3636
	/* It should now be out of any other write domains, and we can update
3637
	 * the domain values for our changes.
3638
	 */
3031 serge 3639
	obj->base.write_domain = 0;
2335 Serge 3640
	obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
2332 Serge 3641
 
2351 Serge 3642
	trace_i915_gem_object_change_domain(obj,
3643
					    old_read_domains,
3644
					    old_write_domain);
2332 Serge 3645
 
2335 Serge 3646
	return 0;
4104 Serge 3647
 
3648
err_unpin_display:
6084 serge 3649
	obj->pin_display--;
4104 Serge 3650
	return ret;
2335 Serge 3651
}
2332 Serge 3652
 
4104 Serge 3653
void
6084 serge 3654
i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj,
3655
					 const struct i915_ggtt_view *view)
4104 Serge 3656
{
6084 serge 3657
	if (WARN_ON(obj->pin_display == 0))
3658
		return;
4104 Serge 3659
 
6084 serge 3660
	i915_gem_object_ggtt_unpin_view(obj, view);
2332 Serge 3661
 
6084 serge 3662
	obj->pin_display--;
2344 Serge 3663
}
2332 Serge 3664
 
2344 Serge 3665
/**
3666
 * Moves a single object to the CPU read, and possibly write domain.
3667
 *
3668
 * This function returns when the move is complete, including waiting on
3669
 * flushes to occur.
3670
 */
3031 serge 3671
int
2344 Serge 3672
i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
3673
{
3674
	uint32_t old_write_domain, old_read_domains;
3675
	int ret;
2332 Serge 3676
 
2344 Serge 3677
	if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
3678
		return 0;
2332 Serge 3679
 
3031 serge 3680
	ret = i915_gem_object_wait_rendering(obj, !write);
2344 Serge 3681
	if (ret)
3682
		return ret;
2332 Serge 3683
 
2344 Serge 3684
	i915_gem_object_flush_gtt_write_domain(obj);
2332 Serge 3685
 
2344 Serge 3686
	old_write_domain = obj->base.write_domain;
3687
	old_read_domains = obj->base.read_domains;
2332 Serge 3688
 
2344 Serge 3689
	/* Flush the CPU cache if it's still invalid. */
3690
	if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
4104 Serge 3691
		i915_gem_clflush_object(obj, false);
2332 Serge 3692
 
2344 Serge 3693
		obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
3694
	}
2332 Serge 3695
 
2344 Serge 3696
	/* It should now be out of any other write domains, and we can update
3697
	 * the domain values for our changes.
3698
	 */
3699
	BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
2332 Serge 3700
 
2344 Serge 3701
	/* If we're writing through the CPU, then the GPU read domains will
3702
	 * need to be invalidated at next use.
3703
	 */
3704
	if (write) {
3705
		obj->base.read_domains = I915_GEM_DOMAIN_CPU;
3706
		obj->base.write_domain = I915_GEM_DOMAIN_CPU;
3707
	}
2332 Serge 3708
 
2351 Serge 3709
	trace_i915_gem_object_change_domain(obj,
3710
					    old_read_domains,
3711
					    old_write_domain);
2332 Serge 3712
 
2344 Serge 3713
	return 0;
3714
}
2332 Serge 3715
 
3031 serge 3716
/* Throttle our rendering by waiting until the ring has completed our requests
3717
 * emitted over 20 msec ago.
2344 Serge 3718
 *
3031 serge 3719
 * Note that if we were to use the current jiffies each time around the loop,
3720
 * we wouldn't escape the function with any frames outstanding if the time to
3721
 * render a frame was over 20ms.
3722
 *
3723
 * This should get us reasonable parallelism between CPU and GPU but also
3724
 * relatively low latency when blocking on a particular request to finish.
2344 Serge 3725
 */
3031 serge 3726
static int
3727
i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
2344 Serge 3728
{
3031 serge 3729
	struct drm_i915_private *dev_priv = dev->dev_private;
3730
	struct drm_i915_file_private *file_priv = file->driver_priv;
6084 serge 3731
	unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
3732
	struct drm_i915_gem_request *request, *target = NULL;
3480 Serge 3733
	unsigned reset_counter;
3031 serge 3734
	int ret;
2332 Serge 3735
 
3480 Serge 3736
	ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
3737
	if (ret)
3738
		return ret;
2332 Serge 3739
 
3480 Serge 3740
	ret = i915_gem_check_wedge(&dev_priv->gpu_error, false);
3741
	if (ret)
3742
		return ret;
3743
 
3031 serge 3744
	spin_lock(&file_priv->mm.lock);
3745
	list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
3746
		if (time_after_eq(request->emitted_jiffies, recent_enough))
3747
			break;
2332 Serge 3748
 
6084 serge 3749
		/*
3750
		 * Note that the request might not have been submitted yet.
3751
		 * In which case emitted_jiffies will be zero.
3752
		 */
3753
		if (!request->emitted_jiffies)
3754
			continue;
3755
 
3756
		target = request;
3031 serge 3757
	}
3480 Serge 3758
	reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
6084 serge 3759
	if (target)
3760
		i915_gem_request_reference(target);
3031 serge 3761
	spin_unlock(&file_priv->mm.lock);
2332 Serge 3762
 
6084 serge 3763
	if (target == NULL)
3031 serge 3764
		return 0;
2332 Serge 3765
 
6084 serge 3766
	ret = __i915_wait_request(target, reset_counter, true, NULL, NULL);
3031 serge 3767
	if (ret == 0)
3768
		queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
2332 Serge 3769
 
6084 serge 3770
	i915_gem_request_unreference__unlocked(target);
3771
 
3031 serge 3772
	return ret;
2352 Serge 3773
}
2332 Serge 3774
 
5060 serge 3775
static bool
3776
i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
3777
{
3778
	struct drm_i915_gem_object *obj = vma->obj;
3779
 
3780
	if (alignment &&
3781
	    vma->node.start & (alignment - 1))
3782
		return true;
3783
 
3784
	if (flags & PIN_MAPPABLE && !obj->map_and_fenceable)
3785
		return true;
3786
 
3787
	if (flags & PIN_OFFSET_BIAS &&
3788
	    vma->node.start < (flags & PIN_OFFSET_MASK))
3789
		return true;
3790
 
6937 serge 3791
	if (flags & PIN_OFFSET_FIXED &&
3792
	    vma->node.start != (flags & PIN_OFFSET_MASK))
3793
		return true;
3794
 
5060 serge 3795
	return false;
3796
}
3797
 
6084 serge 3798
void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
2332 Serge 3799
{
6084 serge 3800
	struct drm_i915_gem_object *obj = vma->obj;
3801
	bool mappable, fenceable;
3802
	u32 fence_size, fence_alignment;
3803
 
3804
	fence_size = i915_gem_get_gtt_size(obj->base.dev,
3805
					   obj->base.size,
3806
					   obj->tiling_mode);
3807
	fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev,
3808
						     obj->base.size,
3809
						     obj->tiling_mode,
3810
						     true);
3811
 
3812
	fenceable = (vma->node.size == fence_size &&
3813
		     (vma->node.start & (fence_alignment - 1)) == 0);
3814
 
3815
	mappable = (vma->node.start + fence_size <=
3816
		    to_i915(obj->base.dev)->gtt.mappable_end);
3817
 
3818
	obj->map_and_fenceable = mappable && fenceable;
3819
}
3820
 
3821
static int
3822
i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
3823
		       struct i915_address_space *vm,
3824
		       const struct i915_ggtt_view *ggtt_view,
3825
		       uint32_t alignment,
3826
		       uint64_t flags)
3827
{
5060 serge 3828
	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
4104 Serge 3829
	struct i915_vma *vma;
5354 serge 3830
	unsigned bound;
2332 Serge 3831
	int ret;
3832
 
5060 serge 3833
	if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base))
3834
		return -ENODEV;
2332 Serge 3835
 
5060 serge 3836
	if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm)))
3837
		return -EINVAL;
4104 Serge 3838
 
5354 serge 3839
	if (WARN_ON((flags & (PIN_MAPPABLE | PIN_GLOBAL)) == PIN_MAPPABLE))
3840
		return -EINVAL;
3841
 
6084 serge 3842
	if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view))
3843
		return -EINVAL;
3844
 
3845
	vma = ggtt_view ? i915_gem_obj_to_ggtt_view(obj, ggtt_view) :
3846
			  i915_gem_obj_to_vma(obj, vm);
3847
 
3848
	if (IS_ERR(vma))
3849
		return PTR_ERR(vma);
3850
 
5060 serge 3851
	if (vma) {
3852
		if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
3853
			return -EBUSY;
4104 Serge 3854
 
5060 serge 3855
		if (i915_vma_misplaced(vma, alignment, flags)) {
3856
			WARN(vma->pin_count,
6084 serge 3857
			     "bo is already pinned in %s with incorrect alignment:"
3858
			     " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d,"
2332 Serge 3859
			     " obj->map_and_fenceable=%d\n",
6084 serge 3860
			     ggtt_view ? "ggtt" : "ppgtt",
3861
			     upper_32_bits(vma->node.start),
3862
			     lower_32_bits(vma->node.start),
3863
			     alignment,
5060 serge 3864
			     !!(flags & PIN_MAPPABLE),
2332 Serge 3865
			     obj->map_and_fenceable);
4104 Serge 3866
			ret = i915_vma_unbind(vma);
2332 Serge 3867
			if (ret)
3868
				return ret;
5060 serge 3869
 
3870
			vma = NULL;
2332 Serge 3871
		}
3872
	}
3873
 
5354 serge 3874
	bound = vma ? vma->bound : 0;
5060 serge 3875
	if (vma == NULL || !drm_mm_node_allocated(&vma->node)) {
6084 serge 3876
		vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment,
3877
						 flags);
5060 serge 3878
		if (IS_ERR(vma))
3879
			return PTR_ERR(vma);
6084 serge 3880
	} else {
3881
		ret = i915_vma_bind(vma, obj->cache_level, flags);
3882
		if (ret)
3883
			return ret;
2332 Serge 3884
	}
3885
 
6084 serge 3886
	if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL &&
3887
	    (bound ^ vma->bound) & GLOBAL_BIND) {
3888
		__i915_vma_set_map_and_fenceable(vma);
3889
		WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable);
5354 serge 3890
	}
3891
 
5060 serge 3892
	vma->pin_count++;
2332 Serge 3893
	return 0;
3894
}
3895
 
6084 serge 3896
int
3897
i915_gem_object_pin(struct drm_i915_gem_object *obj,
3898
		    struct i915_address_space *vm,
3899
		    uint32_t alignment,
3900
		    uint64_t flags)
2344 Serge 3901
{
6084 serge 3902
	return i915_gem_object_do_pin(obj, vm,
3903
				      i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL,
3904
				      alignment, flags);
2344 Serge 3905
}
2332 Serge 3906
 
6084 serge 3907
int
3908
i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
3909
			 const struct i915_ggtt_view *view,
3910
			 uint32_t alignment,
3911
			 uint64_t flags)
5060 serge 3912
{
6084 serge 3913
	if (WARN_ONCE(!view, "no view specified"))
3914
		return -EINVAL;
5060 serge 3915
 
6084 serge 3916
	return i915_gem_object_do_pin(obj, i915_obj_to_ggtt(obj), view,
3917
				      alignment, flags | PIN_GLOBAL);
5060 serge 3918
}
3919
 
3920
void
6084 serge 3921
i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
3922
				const struct i915_ggtt_view *view)
5060 serge 3923
{
6084 serge 3924
	struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view);
5060 serge 3925
 
6084 serge 3926
	BUG_ON(!vma);
3927
	WARN_ON(vma->pin_count == 0);
3928
	WARN_ON(!i915_gem_obj_ggtt_bound_view(obj, view));
2332 Serge 3929
 
6084 serge 3930
	--vma->pin_count;
3031 serge 3931
}
2332 Serge 3932
 
3031 serge 3933
int
3934
i915_gem_busy_ioctl(struct drm_device *dev, void *data,
3935
		    struct drm_file *file)
3936
{
3937
	struct drm_i915_gem_busy *args = data;
3938
	struct drm_i915_gem_object *obj;
3939
	int ret;
2332 Serge 3940
 
3031 serge 3941
	ret = i915_mutex_lock_interruptible(dev);
3942
	if (ret)
3943
		return ret;
2332 Serge 3944
 
5060 serge 3945
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
3031 serge 3946
	if (&obj->base == NULL) {
3947
		ret = -ENOENT;
3948
		goto unlock;
3949
	}
2332 Serge 3950
 
3031 serge 3951
	/* Count all active objects as busy, even if they are currently not used
3952
	 * by the gpu. Users of this interface expect objects to eventually
3953
	 * become non-busy without any further actions, therefore emit any
3954
	 * necessary flushes here.
3955
	 */
3956
	ret = i915_gem_object_flush_active(obj);
6084 serge 3957
	if (ret)
3958
		goto unref;
2332 Serge 3959
 
6084 serge 3960
	BUILD_BUG_ON(I915_NUM_RINGS > 16);
3961
	args->busy = obj->active << 16;
3962
	if (obj->last_write_req)
3963
		args->busy |= obj->last_write_req->ring->id;
2332 Serge 3964
 
6084 serge 3965
unref:
3031 serge 3966
	drm_gem_object_unreference(&obj->base);
3967
unlock:
3968
	mutex_unlock(&dev->struct_mutex);
3969
	return ret;
3970
}
2332 Serge 3971
 
3031 serge 3972
int
3973
i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
3974
			struct drm_file *file_priv)
3975
{
3976
	return i915_gem_ring_throttle(dev, file_priv);
3977
}
2332 Serge 3978
 
3263 Serge 3979
#if 0
3980
 
3031 serge 3981
int
3982
i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
3983
		       struct drm_file *file_priv)
3984
{
5354 serge 3985
	struct drm_i915_private *dev_priv = dev->dev_private;
3031 serge 3986
	struct drm_i915_gem_madvise *args = data;
3987
	struct drm_i915_gem_object *obj;
3988
	int ret;
2332 Serge 3989
 
3031 serge 3990
	switch (args->madv) {
3991
	case I915_MADV_DONTNEED:
3992
	case I915_MADV_WILLNEED:
3993
	    break;
3994
	default:
3995
	    return -EINVAL;
3996
	}
2332 Serge 3997
 
3031 serge 3998
	ret = i915_mutex_lock_interruptible(dev);
3999
	if (ret)
4000
		return ret;
2332 Serge 4001
 
3031 serge 4002
	obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle));
4003
	if (&obj->base == NULL) {
4004
		ret = -ENOENT;
4005
		goto unlock;
4006
	}
2332 Serge 4007
 
5060 serge 4008
	if (i915_gem_obj_is_pinned(obj)) {
3031 serge 4009
		ret = -EINVAL;
4010
		goto out;
4011
	}
2332 Serge 4012
 
5354 serge 4013
	if (obj->pages &&
4014
	    obj->tiling_mode != I915_TILING_NONE &&
4015
	    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
4016
		if (obj->madv == I915_MADV_WILLNEED)
4017
			i915_gem_object_unpin_pages(obj);
4018
		if (args->madv == I915_MADV_WILLNEED)
4019
			i915_gem_object_pin_pages(obj);
4020
	}
4021
 
3031 serge 4022
	if (obj->madv != __I915_MADV_PURGED)
4023
		obj->madv = args->madv;
2332 Serge 4024
 
3031 serge 4025
	/* if the object is no longer attached, discard its backing storage */
6084 serge 4026
	if (obj->madv == I915_MADV_DONTNEED && obj->pages == NULL)
3031 serge 4027
		i915_gem_object_truncate(obj);
2332 Serge 4028
 
3031 serge 4029
	args->retained = obj->madv != __I915_MADV_PURGED;
2332 Serge 4030
 
3031 serge 4031
out:
4032
	drm_gem_object_unreference(&obj->base);
4033
unlock:
4034
	mutex_unlock(&dev->struct_mutex);
4035
	return ret;
4036
}
4037
#endif
2332 Serge 4038
 
3031 serge 4039
void i915_gem_object_init(struct drm_i915_gem_object *obj,
4040
			  const struct drm_i915_gem_object_ops *ops)
4041
{
6084 serge 4042
	int i;
4043
 
4104 Serge 4044
	INIT_LIST_HEAD(&obj->global_list);
6084 serge 4045
	for (i = 0; i < I915_NUM_RINGS; i++)
4046
		INIT_LIST_HEAD(&obj->ring_list[i]);
4104 Serge 4047
	INIT_LIST_HEAD(&obj->obj_exec_link);
4048
	INIT_LIST_HEAD(&obj->vma_list);
6084 serge 4049
	INIT_LIST_HEAD(&obj->batch_pool_link);
2332 Serge 4050
 
3031 serge 4051
	obj->ops = ops;
4052
 
4053
	obj->fence_reg = I915_FENCE_REG_NONE;
4054
	obj->madv = I915_MADV_WILLNEED;
4055
 
4056
	i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size);
4057
}
4058
 
4059
static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
6937 serge 4060
	.flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE,
3031 serge 4061
	.get_pages = i915_gem_object_get_pages_gtt,
4062
	.put_pages = i915_gem_object_put_pages_gtt,
4063
};
4064
 
2332 Serge 4065
struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
4066
						  size_t size)
4067
{
4068
	struct drm_i915_gem_object *obj;
3031 serge 4069
	struct address_space *mapping;
3480 Serge 4070
	gfp_t mask;
2340 Serge 4071
 
3746 Serge 4072
	obj = i915_gem_object_alloc(dev);
2332 Serge 4073
	if (obj == NULL)
4074
		return NULL;
4075
 
4076
	if (drm_gem_object_init(dev, &obj->base, size) != 0) {
4104 Serge 4077
		i915_gem_object_free(obj);
2332 Serge 4078
		return NULL;
4079
	}
4080
 
4081
 
3031 serge 4082
	i915_gem_object_init(obj, &i915_gem_object_ops);
2332 Serge 4083
 
4084
	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4085
	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4086
 
3031 serge 4087
	if (HAS_LLC(dev)) {
4088
		/* On some devices, we can have the GPU use the LLC (the CPU
2332 Serge 4089
		 * cache) for about a 10% performance improvement
4090
		 * compared to uncached.  Graphics requests other than
4091
		 * display scanout are coherent with the CPU in
4092
		 * accessing this cache.  This means in this mode we
4093
		 * don't need to clflush on the CPU side, and on the
4094
		 * GPU side we only need to flush internal caches to
4095
		 * get data visible to the CPU.
4096
		 *
4097
		 * However, we maintain the display planes as UC, and so
4098
		 * need to rebind when first used as such.
4099
		 */
4100
		obj->cache_level = I915_CACHE_LLC;
4101
	} else
4102
		obj->cache_level = I915_CACHE_NONE;
4103
 
4560 Serge 4104
	trace_i915_gem_object_create(obj);
4105
 
2332 Serge 4106
	return obj;
4107
}
4108
 
6283 serge 4109
static bool discard_backing_storage(struct drm_i915_gem_object *obj)
4110
{
4111
	/* If we are the last user of the backing storage (be it shmemfs
4112
	 * pages or stolen etc), we know that the pages are going to be
4113
	 * immediately released. In this case, we can then skip copying
4114
	 * back the contents from the GPU.
4115
	 */
4116
 
4117
	if (obj->madv != I915_MADV_WILLNEED)
4118
		return false;
4119
 
4120
	if (obj->base.filp == NULL)
4121
		return true;
4122
 
4123
//        printf("filp %p\n", obj->base.filp);
4124
	shmem_file_delete(obj->base.filp);
4125
	return true;
4126
}
4127
 
3031 serge 4128
void i915_gem_free_object(struct drm_gem_object *gem_obj)
2344 Serge 4129
{
3031 serge 4130
	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
2344 Serge 4131
	struct drm_device *dev = obj->base.dev;
5060 serge 4132
	struct drm_i915_private *dev_priv = dev->dev_private;
4104 Serge 4133
	struct i915_vma *vma, *next;
2332 Serge 4134
 
4560 Serge 4135
	intel_runtime_pm_get(dev_priv);
4136
 
3031 serge 4137
	trace_i915_gem_object_destroy(obj);
4138
 
5060 serge 4139
	list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
4140
		int ret;
3031 serge 4141
 
5060 serge 4142
		vma->pin_count = 0;
4143
		ret = i915_vma_unbind(vma);
4104 Serge 4144
		if (WARN_ON(ret == -ERESTARTSYS)) {
6084 serge 4145
			bool was_interruptible;
3031 serge 4146
 
6084 serge 4147
			was_interruptible = dev_priv->mm.interruptible;
4148
			dev_priv->mm.interruptible = false;
3031 serge 4149
 
4104 Serge 4150
			WARN_ON(i915_vma_unbind(vma));
3031 serge 4151
 
6084 serge 4152
			dev_priv->mm.interruptible = was_interruptible;
4153
		}
2344 Serge 4154
	}
2332 Serge 4155
 
4104 Serge 4156
	/* Stolen objects don't hold a ref, but do hold pin count. Fix that up
4157
	 * before progressing. */
4158
	if (obj->stolen)
4159
		i915_gem_object_unpin_pages(obj);
4160
 
5060 serge 4161
	WARN_ON(obj->frontbuffer_bits);
4162
 
5354 serge 4163
	if (obj->pages && obj->madv == I915_MADV_WILLNEED &&
4164
	    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES &&
4165
	    obj->tiling_mode != I915_TILING_NONE)
4166
		i915_gem_object_unpin_pages(obj);
4167
 
4104 Serge 4168
	if (WARN_ON(obj->pages_pin_count))
6084 serge 4169
		obj->pages_pin_count = 0;
6283 serge 4170
	if (discard_backing_storage(obj))
4171
		obj->madv = I915_MADV_DONTNEED;
3031 serge 4172
	i915_gem_object_put_pages(obj);
4173
//   i915_gem_object_free_mmap_offset(obj);
2332 Serge 4174
 
3243 Serge 4175
	BUG_ON(obj->pages);
2332 Serge 4176
 
6283 serge 4177
	if (obj->ops->release)
4178
		obj->ops->release(obj);
3031 serge 4179
 
2344 Serge 4180
	drm_gem_object_release(&obj->base);
4181
	i915_gem_info_remove_obj(dev_priv, obj->base.size);
2332 Serge 4182
 
2344 Serge 4183
	kfree(obj->bit_17);
4104 Serge 4184
	i915_gem_object_free(obj);
4560 Serge 4185
 
4186
	intel_runtime_pm_put(dev_priv);
2344 Serge 4187
}
2332 Serge 4188
 
4560 Serge 4189
struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
4104 Serge 4190
				     struct i915_address_space *vm)
4191
{
4560 Serge 4192
	struct i915_vma *vma;
6084 serge 4193
	list_for_each_entry(vma, &obj->vma_list, vma_link) {
6937 serge 4194
		if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL &&
4195
		    vma->vm == vm)
4560 Serge 4196
			return vma;
6084 serge 4197
	}
4198
	return NULL;
4199
}
4560 Serge 4200
 
6084 serge 4201
struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj,
4202
					   const struct i915_ggtt_view *view)
4203
{
4204
	struct i915_address_space *ggtt = i915_obj_to_ggtt(obj);
4205
	struct i915_vma *vma;
4206
 
4207
	if (WARN_ONCE(!view, "no view specified"))
4208
		return ERR_PTR(-EINVAL);
4209
 
4210
	list_for_each_entry(vma, &obj->vma_list, vma_link)
4211
		if (vma->vm == ggtt &&
4212
		    i915_ggtt_view_equal(&vma->ggtt_view, view))
4213
			return vma;
4560 Serge 4214
	return NULL;
4215
}
4216
 
4104 Serge 4217
void i915_gem_vma_destroy(struct i915_vma *vma)
4218
{
5354 serge 4219
	struct i915_address_space *vm = NULL;
4104 Serge 4220
	WARN_ON(vma->node.allocated);
4560 Serge 4221
 
4222
	/* Keep the vma as a placeholder in the execbuffer reservation lists */
4223
	if (!list_empty(&vma->exec_list))
4224
		return;
4225
 
5354 serge 4226
	vm = vma->vm;
4227
 
4228
	if (!i915_is_ggtt(vm))
4229
		i915_ppgtt_put(i915_vm_to_ppgtt(vm));
4230
 
4104 Serge 4231
	list_del(&vma->vma_link);
4560 Serge 4232
 
4104 Serge 4233
	kfree(vma);
4234
}
4235
 
6084 serge 4236
static void
4237
i915_gem_stop_ringbuffers(struct drm_device *dev)
4238
{
4239
	struct drm_i915_private *dev_priv = dev->dev_private;
4240
	struct intel_engine_cs *ring;
4241
	int i;
4242
 
4243
	for_each_ring(ring, dev_priv, i)
4244
		dev_priv->gt.stop_ring(ring);
4245
}
4246
 
3031 serge 4247
#if 0
4248
int
4560 Serge 4249
i915_gem_suspend(struct drm_device *dev)
2344 Serge 4250
{
5060 serge 4251
	struct drm_i915_private *dev_priv = dev->dev_private;
4560 Serge 4252
	int ret = 0;
2332 Serge 4253
 
4560 Serge 4254
	mutex_lock(&dev->struct_mutex);
3031 serge 4255
	ret = i915_gpu_idle(dev);
4560 Serge 4256
	if (ret)
4257
		goto err;
4258
 
3031 serge 4259
	i915_gem_retire_requests(dev);
4260
 
5060 serge 4261
	i915_gem_stop_ringbuffers(dev);
4560 Serge 4262
	mutex_unlock(&dev->struct_mutex);
4263
 
6084 serge 4264
	cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
3263 Serge 4265
	cancel_delayed_work_sync(&dev_priv->mm.retire_work);
5060 serge 4266
	flush_delayed_work(&dev_priv->mm.idle_work);
3031 serge 4267
 
6084 serge 4268
	/* Assert that we sucessfully flushed all the work and
4269
	 * reset the GPU back to its idle, low power state.
4270
	 */
4271
	WARN_ON(dev_priv->mm.busy);
4272
 
3031 serge 4273
	return 0;
4560 Serge 4274
 
4275
err:
4276
	mutex_unlock(&dev->struct_mutex);
4277
	return ret;
2344 Serge 4278
}
3031 serge 4279
#endif
2332 Serge 4280
 
6084 serge 4281
int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice)
3031 serge 4282
{
6084 serge 4283
	struct intel_engine_cs *ring = req->ring;
4560 Serge 4284
	struct drm_device *dev = ring->dev;
5060 serge 4285
	struct drm_i915_private *dev_priv = dev->dev_private;
4560 Serge 4286
	u32 *remap_info = dev_priv->l3_parity.remap_info[slice];
4287
	int i, ret;
2332 Serge 4288
 
4560 Serge 4289
	if (!HAS_L3_DPF(dev) || !remap_info)
4290
		return 0;
2332 Serge 4291
 
6084 serge 4292
	ret = intel_ring_begin(req, GEN7_L3LOG_SIZE / 4 * 3);
4560 Serge 4293
	if (ret)
4294
		return ret;
2332 Serge 4295
 
4560 Serge 4296
	/*
4297
	 * Note: We do not worry about the concurrent register cacheline hang
4298
	 * here because no other code should access these registers other than
4299
	 * at initialization time.
4300
	 */
6937 serge 4301
	for (i = 0; i < GEN7_L3LOG_SIZE / 4; i++) {
4560 Serge 4302
		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
6937 serge 4303
		intel_ring_emit_reg(ring, GEN7_L3LOG(slice, i));
4304
		intel_ring_emit(ring, remap_info[i]);
3031 serge 4305
	}
2332 Serge 4306
 
4560 Serge 4307
	intel_ring_advance(ring);
2332 Serge 4308
 
4560 Serge 4309
	return ret;
3031 serge 4310
}
2332 Serge 4311
 
3031 serge 4312
void i915_gem_init_swizzling(struct drm_device *dev)
4313
{
5060 serge 4314
	struct drm_i915_private *dev_priv = dev->dev_private;
2332 Serge 4315
 
3031 serge 4316
	if (INTEL_INFO(dev)->gen < 5 ||
4317
	    dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
4318
		return;
2332 Serge 4319
 
3031 serge 4320
	I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
4321
				 DISP_TILE_SURFACE_SWIZZLING);
2332 Serge 4322
 
3031 serge 4323
	if (IS_GEN5(dev))
4324
		return;
2344 Serge 4325
 
3031 serge 4326
	I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
4327
	if (IS_GEN6(dev))
4328
		I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
3480 Serge 4329
	else if (IS_GEN7(dev))
4330
		I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
4560 Serge 4331
	else if (IS_GEN8(dev))
4332
		I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW));
3031 serge 4333
	else
3480 Serge 4334
		BUG();
3031 serge 4335
}
4336
 
5354 serge 4337
static void init_unused_ring(struct drm_device *dev, u32 base)
2332 Serge 4338
{
3480 Serge 4339
	struct drm_i915_private *dev_priv = dev->dev_private;
5354 serge 4340
 
4341
	I915_WRITE(RING_CTL(base), 0);
4342
	I915_WRITE(RING_HEAD(base), 0);
4343
	I915_WRITE(RING_TAIL(base), 0);
4344
	I915_WRITE(RING_START(base), 0);
4345
}
4346
 
4347
static void init_unused_rings(struct drm_device *dev)
4348
{
4349
	if (IS_I830(dev)) {
4350
		init_unused_ring(dev, PRB1_BASE);
4351
		init_unused_ring(dev, SRB0_BASE);
4352
		init_unused_ring(dev, SRB1_BASE);
4353
		init_unused_ring(dev, SRB2_BASE);
4354
		init_unused_ring(dev, SRB3_BASE);
4355
	} else if (IS_GEN2(dev)) {
4356
		init_unused_ring(dev, SRB0_BASE);
4357
		init_unused_ring(dev, SRB1_BASE);
4358
	} else if (IS_GEN3(dev)) {
4359
		init_unused_ring(dev, PRB1_BASE);
4360
		init_unused_ring(dev, PRB2_BASE);
4361
	}
4362
}
4363
 
4364
int i915_gem_init_rings(struct drm_device *dev)
4365
{
4366
	struct drm_i915_private *dev_priv = dev->dev_private;
2332 Serge 4367
	int ret;
2351 Serge 4368
 
2332 Serge 4369
	ret = intel_init_render_ring_buffer(dev);
4370
	if (ret)
4371
		return ret;
4372
 
6084 serge 4373
	if (HAS_BSD(dev)) {
2332 Serge 4374
		ret = intel_init_bsd_ring_buffer(dev);
4375
		if (ret)
4376
			goto cleanup_render_ring;
4377
	}
4378
 
6084 serge 4379
	if (HAS_BLT(dev)) {
2332 Serge 4380
		ret = intel_init_blt_ring_buffer(dev);
4381
		if (ret)
4382
			goto cleanup_bsd_ring;
4383
	}
4384
 
4104 Serge 4385
	if (HAS_VEBOX(dev)) {
4386
		ret = intel_init_vebox_ring_buffer(dev);
4387
		if (ret)
4388
			goto cleanup_blt_ring;
4389
	}
4390
 
5060 serge 4391
	if (HAS_BSD2(dev)) {
4392
		ret = intel_init_bsd2_ring_buffer(dev);
4393
		if (ret)
4394
			goto cleanup_vebox_ring;
4395
	}
4104 Serge 4396
 
2332 Serge 4397
	return 0;
4398
 
4104 Serge 4399
cleanup_vebox_ring:
4400
	intel_cleanup_ring_buffer(&dev_priv->ring[VECS]);
3480 Serge 4401
cleanup_blt_ring:
4402
	intel_cleanup_ring_buffer(&dev_priv->ring[BCS]);
2332 Serge 4403
cleanup_bsd_ring:
4404
	intel_cleanup_ring_buffer(&dev_priv->ring[VCS]);
4405
cleanup_render_ring:
4406
	intel_cleanup_ring_buffer(&dev_priv->ring[RCS]);
3480 Serge 4407
 
2332 Serge 4408
	return ret;
4409
}
4410
 
3480 Serge 4411
int
4412
i915_gem_init_hw(struct drm_device *dev)
3031 serge 4413
{
5060 serge 4414
	struct drm_i915_private *dev_priv = dev->dev_private;
6084 serge 4415
	struct intel_engine_cs *ring;
4416
	int ret, i, j;
3031 serge 4417
 
3480 Serge 4418
	if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt())
4419
		return -EIO;
3031 serge 4420
 
6084 serge 4421
	/* Double layer security blanket, see i915_gem_init() */
4422
	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4423
 
4104 Serge 4424
	if (dev_priv->ellc_size)
4425
		I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf));
3480 Serge 4426
 
4560 Serge 4427
	if (IS_HASWELL(dev))
4428
		I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev) ?
4429
			   LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
4430
 
3746 Serge 4431
	if (HAS_PCH_NOP(dev)) {
5060 serge 4432
		if (IS_IVYBRIDGE(dev)) {
6084 serge 4433
			u32 temp = I915_READ(GEN7_MSG_CTL);
4434
			temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK);
4435
			I915_WRITE(GEN7_MSG_CTL, temp);
5060 serge 4436
		} else if (INTEL_INFO(dev)->gen >= 7) {
4437
			u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT);
4438
			temp &= ~RESET_PCH_HANDSHAKE_ENABLE;
4439
			I915_WRITE(HSW_NDE_RSTWRN_OPT, temp);
4440
		}
3746 Serge 4441
	}
4442
 
3480 Serge 4443
	i915_gem_init_swizzling(dev);
4444
 
6084 serge 4445
	/*
4446
	 * At least 830 can leave some of the unused rings
4447
	 * "active" (ie. head != tail) after resume which
4448
	 * will prevent c3 entry. Makes sure all unused rings
4449
	 * are totally idle.
4450
	 */
4451
	init_unused_rings(dev);
3480 Serge 4452
 
6084 serge 4453
	BUG_ON(!dev_priv->ring[RCS].default_context);
4560 Serge 4454
 
6084 serge 4455
	ret = i915_ppgtt_init_hw(dev);
4456
	if (ret) {
4457
		DRM_ERROR("PPGTT enable HW failed %d\n", ret);
4458
		goto out;
4459
	}
4460
 
4461
	/* Need to do basic initialisation of all rings first: */
4462
	for_each_ring(ring, dev_priv, i) {
4463
		ret = ring->init_hw(ring);
4464
		if (ret)
4465
			goto out;
4466
	}
4467
 
4468
	/* We can't enable contexts until all firmware is loaded */
4469
	if (HAS_GUC_UCODE(dev)) {
4470
		ret = intel_guc_ucode_load(dev);
4471
		if (ret) {
6937 serge 4472
			DRM_ERROR("Failed to initialize GuC, error %d\n", ret);
4473
			ret = -EIO;
6084 serge 4474
				goto out;
4475
		}
4476
	}
4477
 
3480 Serge 4478
	/*
6084 serge 4479
	 * Increment the next seqno by 0x100 so we have a visible break
4480
	 * on re-initialisation
3480 Serge 4481
	 */
6084 serge 4482
	ret = i915_gem_set_seqno(dev, dev_priv->next_seqno+0x100);
4483
	if (ret)
4484
		goto out;
5354 serge 4485
 
6084 serge 4486
	/* Now it is safe to go back round and do everything else: */
4487
	for_each_ring(ring, dev_priv, i) {
4488
		struct drm_i915_gem_request *req;
4560 Serge 4489
 
6084 serge 4490
		WARN_ON(!ring->default_context);
4491
 
4492
		ret = i915_gem_request_alloc(ring, ring->default_context, &req);
4493
		if (ret) {
4494
			i915_gem_cleanup_ringbuffer(dev);
4495
			goto out;
4496
		}
4497
 
4498
		if (ring->id == RCS) {
4499
			for (j = 0; j < NUM_L3_SLICES(dev); j++)
4500
				i915_gem_l3_remap(req, j);
4501
		}
4502
 
4503
		ret = i915_ppgtt_init_ring(req);
4504
		if (ret && ret != -EIO) {
4505
			DRM_ERROR("PPGTT enable ring #%d failed %d\n", i, ret);
4506
			i915_gem_request_cancel(req);
4507
			i915_gem_cleanup_ringbuffer(dev);
4508
			goto out;
4509
		}
4510
 
4511
		ret = i915_gem_context_enable(req);
4512
		if (ret && ret != -EIO) {
4513
			DRM_ERROR("Context enable ring #%d failed %d\n", i, ret);
4514
			i915_gem_request_cancel(req);
4515
			i915_gem_cleanup_ringbuffer(dev);
4516
			goto out;
4517
		}
4518
 
4519
		i915_add_request_no_flush(req);
5354 serge 4520
	}
4521
 
6084 serge 4522
out:
4523
	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5060 serge 4524
	return ret;
3031 serge 4525
}
4526
 
4527
int i915_gem_init(struct drm_device *dev)
4528
{
4529
	struct drm_i915_private *dev_priv = dev->dev_private;
4530
	int ret;
4531
 
5354 serge 4532
	i915.enable_execlists = intel_sanitize_enable_execlists(dev,
4533
			i915.enable_execlists);
4534
 
3031 serge 4535
	mutex_lock(&dev->struct_mutex);
3746 Serge 4536
 
5354 serge 4537
	if (!i915.enable_execlists) {
6084 serge 4538
		dev_priv->gt.execbuf_submit = i915_gem_ringbuffer_submission;
5354 serge 4539
		dev_priv->gt.init_rings = i915_gem_init_rings;
4540
		dev_priv->gt.cleanup_ring = intel_cleanup_ring_buffer;
4541
		dev_priv->gt.stop_ring = intel_stop_ring_buffer;
4542
	} else {
6084 serge 4543
		dev_priv->gt.execbuf_submit = intel_execlists_submission;
5354 serge 4544
		dev_priv->gt.init_rings = intel_logical_rings_init;
4545
		dev_priv->gt.cleanup_ring = intel_logical_ring_cleanup;
4546
		dev_priv->gt.stop_ring = intel_logical_ring_stop;
4547
	}
4548
 
6084 serge 4549
	/* This is just a security blanket to placate dragons.
4550
	 * On some systems, we very sporadically observe that the first TLBs
4551
	 * used by the CS may be stale, despite us poking the TLB reset. If
4552
	 * we hold the forcewake during initialisation these problems
4553
	 * just magically go away.
4554
	 */
4555
	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
5354 serge 4556
 
6084 serge 4557
//	ret = i915_gem_init_userptr(dev);
4558
//	if (ret)
4559
//		goto out_unlock;
3746 Serge 4560
 
6084 serge 4561
	i915_gem_init_global_gtt(dev);
4562
 
5060 serge 4563
	ret = i915_gem_context_init(dev);
6084 serge 4564
	if (ret)
4565
		goto out_unlock;
3031 serge 4566
 
6084 serge 4567
	ret = dev_priv->gt.init_rings(dev);
4568
	if (ret)
4569
		goto out_unlock;
4570
 
5060 serge 4571
	ret = i915_gem_init_hw(dev);
4572
	if (ret == -EIO) {
4573
		/* Allow ring initialisation to fail by marking the GPU as
4574
		 * wedged. But we only want to do this where the GPU is angry,
4575
		 * for all other failure, such as an allocation failure, bail.
4576
		 */
4577
		DRM_ERROR("Failed to initialize GPU, declaring it wedged\n");
6084 serge 4578
		atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter);
5060 serge 4579
		ret = 0;
4580
	}
6084 serge 4581
 
4582
out_unlock:
4583
	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5060 serge 4584
	mutex_unlock(&dev->struct_mutex);
3746 Serge 4585
 
6084 serge 4586
	return ret;
3031 serge 4587
}
4588
 
2332 Serge 4589
void
4590
i915_gem_cleanup_ringbuffer(struct drm_device *dev)
4591
{
5060 serge 4592
	struct drm_i915_private *dev_priv = dev->dev_private;
4593
	struct intel_engine_cs *ring;
2332 Serge 4594
	int i;
4595
 
3031 serge 4596
	for_each_ring(ring, dev_priv, i)
5354 serge 4597
		dev_priv->gt.cleanup_ring(ring);
2332 Serge 4598
}
4599
 
4600
static void
5060 serge 4601
init_ring_lists(struct intel_engine_cs *ring)
2326 Serge 4602
{
6084 serge 4603
	INIT_LIST_HEAD(&ring->active_list);
4604
	INIT_LIST_HEAD(&ring->request_list);
2326 Serge 4605
}
4606
 
4607
void
4608
i915_gem_load(struct drm_device *dev)
4609
{
5060 serge 4610
	struct drm_i915_private *dev_priv = dev->dev_private;
6084 serge 4611
	int i;
2326 Serge 4612
 
4104 Serge 4613
	INIT_LIST_HEAD(&dev_priv->vm_list);
4560 Serge 4614
	INIT_LIST_HEAD(&dev_priv->context_list);
3031 serge 4615
	INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
4616
	INIT_LIST_HEAD(&dev_priv->mm.bound_list);
6084 serge 4617
	INIT_LIST_HEAD(&dev_priv->mm.fence_list);
4618
	for (i = 0; i < I915_NUM_RINGS; i++)
4619
		init_ring_lists(&dev_priv->ring[i]);
2342 Serge 4620
	for (i = 0; i < I915_MAX_NUM_FENCES; i++)
6084 serge 4621
		INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
2360 Serge 4622
	INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
4623
			  i915_gem_retire_work_handler);
4560 Serge 4624
	INIT_DELAYED_WORK(&dev_priv->mm.idle_work,
4625
			  i915_gem_idle_work_handler);
3480 Serge 4626
	init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
2326 Serge 4627
 
6084 serge 4628
	dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
2326 Serge 4629
 
6937 serge 4630
	if (INTEL_INFO(dev)->gen >= 7 && !IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev))
3746 Serge 4631
		dev_priv->num_fence_regs = 32;
4632
	else if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
6084 serge 4633
		dev_priv->num_fence_regs = 16;
4634
	else
4635
		dev_priv->num_fence_regs = 8;
2326 Serge 4636
 
6084 serge 4637
	if (intel_vgpu_active(dev))
4638
		dev_priv->num_fence_regs =
4639
				I915_READ(vgtif_reg(avail_rs.fence_num));
4640
 
4641
	/*
4642
	 * Set initial sequence number for requests.
4643
	 * Using this number allows the wraparound to happen early,
4644
	 * catching any obvious problems.
4645
	 */
4646
	dev_priv->next_seqno = ((u32)~0 - 0x1100);
4647
	dev_priv->last_seqno = ((u32)~0 - 0x1101);
4648
 
4649
	/* Initialize fence registers to zero */
3746 Serge 4650
	INIT_LIST_HEAD(&dev_priv->mm.fence_list);
4651
	i915_gem_restore_fences(dev);
2326 Serge 4652
 
6084 serge 4653
	i915_gem_detect_bit_6_swizzle(dev);
2326 Serge 4654
 
6084 serge 4655
	dev_priv->mm.interruptible = true;
2326 Serge 4656
 
5060 serge 4657
	mutex_init(&dev_priv->fb_tracking.lock);
2326 Serge 4658
}
4659
 
6084 serge 4660
void i915_gem_release(struct drm_device *dev, struct drm_file *file)
4661
{
4662
	struct drm_i915_file_private *file_priv = file->driver_priv;
4663
 
4664
	/* Clean up our request list when the client is going away, so that
4665
	 * later retire_requests won't dereference our soon-to-be-gone
4666
	 * file_priv.
4667
	 */
4668
	spin_lock(&file_priv->mm.lock);
4669
	while (!list_empty(&file_priv->mm.request_list)) {
4670
		struct drm_i915_gem_request *request;
4671
 
4672
		request = list_first_entry(&file_priv->mm.request_list,
4673
					   struct drm_i915_gem_request,
4674
					   client_list);
4675
		list_del(&request->client_list);
4676
		request->file_priv = NULL;
4677
	}
4678
	spin_unlock(&file_priv->mm.lock);
4679
 
4680
	if (!list_empty(&file_priv->rps.link)) {
4681
		spin_lock(&to_i915(dev)->rps.client_lock);
4682
		list_del(&file_priv->rps.link);
4683
		spin_unlock(&to_i915(dev)->rps.client_lock);
4684
	}
4685
}
4686
 
5060 serge 4687
int i915_gem_open(struct drm_device *dev, struct drm_file *file)
4104 Serge 4688
{
5060 serge 4689
	struct drm_i915_file_private *file_priv;
4104 Serge 4690
	int ret;
2326 Serge 4691
 
5060 serge 4692
	DRM_DEBUG_DRIVER("\n");
4104 Serge 4693
 
5060 serge 4694
	file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
4695
	if (!file_priv)
4104 Serge 4696
		return -ENOMEM;
4697
 
5060 serge 4698
	file->driver_priv = file_priv;
4699
	file_priv->dev_priv = dev->dev_private;
4700
	file_priv->file = file;
6084 serge 4701
	INIT_LIST_HEAD(&file_priv->rps.link);
4104 Serge 4702
 
5060 serge 4703
	spin_lock_init(&file_priv->mm.lock);
4704
	INIT_LIST_HEAD(&file_priv->mm.request_list);
4104 Serge 4705
 
5060 serge 4706
	ret = i915_gem_context_open(dev, file);
4707
	if (ret)
4708
		kfree(file_priv);
4104 Serge 4709
 
4710
	return ret;
4711
}
4712
 
5354 serge 4713
/**
4714
 * i915_gem_track_fb - update frontbuffer tracking
6084 serge 4715
 * @old: current GEM buffer for the frontbuffer slots
4716
 * @new: new GEM buffer for the frontbuffer slots
4717
 * @frontbuffer_bits: bitmask of frontbuffer slots
5354 serge 4718
 *
4719
 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them
4720
 * from @old and setting them in @new. Both @old and @new can be NULL.
4721
 */
5060 serge 4722
void i915_gem_track_fb(struct drm_i915_gem_object *old,
4723
		       struct drm_i915_gem_object *new,
4724
		       unsigned frontbuffer_bits)
4104 Serge 4725
{
5060 serge 4726
	if (old) {
4727
		WARN_ON(!mutex_is_locked(&old->base.dev->struct_mutex));
4728
		WARN_ON(!(old->frontbuffer_bits & frontbuffer_bits));
4729
		old->frontbuffer_bits &= ~frontbuffer_bits;
4104 Serge 4730
	}
4731
 
5060 serge 4732
	if (new) {
4733
		WARN_ON(!mutex_is_locked(&new->base.dev->struct_mutex));
4734
		WARN_ON(new->frontbuffer_bits & frontbuffer_bits);
4735
		new->frontbuffer_bits |= frontbuffer_bits;
4104 Serge 4736
	}
4737
}
4738
 
4739
/* All the new VM stuff */
6084 serge 4740
u64 i915_gem_obj_offset(struct drm_i915_gem_object *o,
4741
			struct i915_address_space *vm)
4104 Serge 4742
{
4743
	struct drm_i915_private *dev_priv = o->base.dev->dev_private;
4744
	struct i915_vma *vma;
4745
 
5354 serge 4746
	WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base);
4104 Serge 4747
 
4748
	list_for_each_entry(vma, &o->vma_list, vma_link) {
6084 serge 4749
		if (i915_is_ggtt(vma->vm) &&
4750
		    vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
4751
			continue;
4104 Serge 4752
		if (vma->vm == vm)
4753
			return vma->node.start;
6084 serge 4754
	}
4104 Serge 4755
 
5060 serge 4756
	WARN(1, "%s vma for this object not found.\n",
4757
	     i915_is_ggtt(vm) ? "global" : "ppgtt");
4758
	return -1;
4104 Serge 4759
}
4760
 
6084 serge 4761
u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o,
4762
				  const struct i915_ggtt_view *view)
4763
{
4764
	struct i915_address_space *ggtt = i915_obj_to_ggtt(o);
4765
	struct i915_vma *vma;
4766
 
4767
	list_for_each_entry(vma, &o->vma_list, vma_link)
4768
		if (vma->vm == ggtt &&
4769
		    i915_ggtt_view_equal(&vma->ggtt_view, view))
4770
			return vma->node.start;
4771
 
4772
	WARN(1, "global vma for this object not found. (view=%u)\n", view->type);
4773
	return -1;
4774
}
4775
 
4104 Serge 4776
bool i915_gem_obj_bound(struct drm_i915_gem_object *o,
4777
			struct i915_address_space *vm)
4778
{
4779
	struct i915_vma *vma;
4780
 
6084 serge 4781
	list_for_each_entry(vma, &o->vma_list, vma_link) {
4782
		if (i915_is_ggtt(vma->vm) &&
4783
		    vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
4784
			continue;
4104 Serge 4785
		if (vma->vm == vm && drm_mm_node_allocated(&vma->node))
4786
			return true;
6084 serge 4787
	}
4104 Serge 4788
 
4789
	return false;
4790
}
4791
 
6084 serge 4792
bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o,
4793
				  const struct i915_ggtt_view *view)
4794
{
4795
	struct i915_address_space *ggtt = i915_obj_to_ggtt(o);
4796
	struct i915_vma *vma;
4797
 
4798
	list_for_each_entry(vma, &o->vma_list, vma_link)
4799
		if (vma->vm == ggtt &&
4800
		    i915_ggtt_view_equal(&vma->ggtt_view, view) &&
4801
		    drm_mm_node_allocated(&vma->node))
4802
			return true;
4803
 
4804
	return false;
4805
}
4806
 
4104 Serge 4807
bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o)
4808
{
4560 Serge 4809
	struct i915_vma *vma;
4104 Serge 4810
 
4560 Serge 4811
	list_for_each_entry(vma, &o->vma_list, vma_link)
4812
		if (drm_mm_node_allocated(&vma->node))
4104 Serge 4813
			return true;
4814
 
4815
	return false;
4816
}
4817
 
4818
unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o,
4819
				struct i915_address_space *vm)
4820
{
4821
	struct drm_i915_private *dev_priv = o->base.dev->dev_private;
4822
	struct i915_vma *vma;
4823
 
5354 serge 4824
	WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base);
4104 Serge 4825
 
4826
	BUG_ON(list_empty(&o->vma_list));
4827
 
6084 serge 4828
	list_for_each_entry(vma, &o->vma_list, vma_link) {
4829
		if (i915_is_ggtt(vma->vm) &&
4830
		    vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
4831
			continue;
4104 Serge 4832
		if (vma->vm == vm)
4833
			return vma->node.size;
6084 serge 4834
	}
4104 Serge 4835
	return 0;
4836
}
4560 Serge 4837
 
6084 serge 4838
bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj)
4839
{
4840
	struct i915_vma *vma;
4841
	list_for_each_entry(vma, &obj->vma_list, vma_link)
4842
		if (vma->pin_count > 0)
4843
			return true;
4560 Serge 4844
 
6084 serge 4845
	return false;
4846
}
5060 serge 4847
 
6937 serge 4848
/* Like i915_gem_object_get_page(), but mark the returned page dirty */
4849
struct page *
4850
i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n)
4851
{
4852
	struct page *page;
4853
 
4854
	/* Only default objects have per-page dirty tracking */
4855
	if (WARN_ON((obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE) == 0))
4856
		return NULL;
4857
 
4858
	page = i915_gem_object_get_page(obj, n);
4859
	set_page_dirty(page);
4860
	return page;
4861
}
4862
 
6084 serge 4863
/* Allocate a new GEM object and fill it with the supplied data */
4864
struct drm_i915_gem_object *
4865
i915_gem_object_create_from_data(struct drm_device *dev,
4866
			         const void *data, size_t size)
4104 Serge 4867
{
6084 serge 4868
	struct drm_i915_gem_object *obj;
4869
	struct sg_table *sg;
4870
	size_t bytes;
4871
	int ret;
4104 Serge 4872
 
6084 serge 4873
	obj = i915_gem_alloc_object(dev, round_up(size, PAGE_SIZE));
4874
	if (IS_ERR_OR_NULL(obj))
4875
		return obj;
4104 Serge 4876
 
6084 serge 4877
	ret = i915_gem_object_set_to_cpu_domain(obj, true);
4878
	if (ret)
4879
		goto fail;
4880
 
4881
	ret = i915_gem_object_get_pages(obj);
4882
	if (ret)
4883
		goto fail;
4884
 
4885
	i915_gem_object_pin_pages(obj);
4886
	sg = obj->pages;
4887
	bytes = sg_copy_from_buffer(sg->sgl, sg->nents, (void *)data, size);
6937 serge 4888
	obj->dirty = 1;		/* Backing store is now out of date */
6084 serge 4889
	i915_gem_object_unpin_pages(obj);
4890
 
4891
	if (WARN_ON(bytes != size)) {
4892
		DRM_ERROR("Incomplete copy, wrote %zu of %zu", bytes, size);
4893
		ret = -EFAULT;
4894
		goto fail;
4895
	}
4896
 
4897
	return obj;
4898
 
4899
fail:
4900
	drm_gem_object_unreference(&obj->base);
4901
	return ERR_PTR(ret);
4104 Serge 4902
}