Subversion Repositories Kolibri OS

Rev

Rev 6131 | Rev 6296 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2326 Serge 1
/*
6084 serge 2
 * Copyright © 2008-2015 Intel Corporation
2326 Serge 3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
 * IN THE SOFTWARE.
22
 *
23
 * Authors:
24
 *    Eric Anholt 
25
 *
26
 */
27
 
3031 serge 28
#include 
4280 Serge 29
#include 
3031 serge 30
#include 
2326 Serge 31
#include "i915_drv.h"
6084 serge 32
#include "i915_vgpu.h"
2351 Serge 33
#include "i915_trace.h"
2326 Serge 34
#include "intel_drv.h"
3260 Serge 35
#include 
2330 Serge 36
#include 
2326 Serge 37
//#include 
3746 Serge 38
#include 
2326 Serge 39
#include 
6084 serge 40
#define RQ_BUG_ON(expr)
2326 Serge 41
 
2344 Serge 42
extern int x86_clflush_size;
6131 serge 43
#define __copy_to_user_inatomic __copy_to_user
2332 Serge 44
 
3263 Serge 45
#define PROT_READ       0x1             /* page can be read */
46
#define PROT_WRITE      0x2             /* page can be written */
47
#define MAP_SHARED      0x01            /* Share changes */
48
 
2344 Serge 49
 
5060 serge 50
 
3266 Serge 51
struct drm_i915_gem_object *get_fb_obj();
52
 
3263 Serge 53
unsigned long vm_mmap(struct file *file, unsigned long addr,
54
         unsigned long len, unsigned long prot,
55
         unsigned long flag, unsigned long offset);
56
 
2344 Serge 57
 
2332 Serge 58
#define MAX_ERRNO       4095
59
 
60
#define IS_ERR_VALUE(x) unlikely((x) >= (unsigned long)-MAX_ERRNO)
6131 serge 61
#define offset_in_page(p)       ((unsigned long)(p) & ~PAGE_MASK)
2332 Serge 62
 
63
static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj);
6084 serge 64
static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj);
5060 serge 65
static void
6084 serge 66
i915_gem_object_retire__write(struct drm_i915_gem_object *obj);
67
static void
68
i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring);
2326 Serge 69
 
4104 Serge 70
static bool cpu_cache_is_coherent(struct drm_device *dev,
71
				  enum i915_cache_level level)
72
{
73
	return HAS_LLC(dev) || level != I915_CACHE_NONE;
74
}
75
 
76
static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
77
{
78
	if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level))
79
		return true;
80
 
81
	return obj->pin_display;
82
}
83
 
2332 Serge 84
/* some bookkeeping */
85
static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv,
86
				  size_t size)
87
{
4104 Serge 88
	spin_lock(&dev_priv->mm.object_stat_lock);
2332 Serge 89
	dev_priv->mm.object_count++;
90
	dev_priv->mm.object_memory += size;
4104 Serge 91
	spin_unlock(&dev_priv->mm.object_stat_lock);
2332 Serge 92
}
93
 
94
static void i915_gem_info_remove_obj(struct drm_i915_private *dev_priv,
95
				     size_t size)
96
{
4104 Serge 97
	spin_lock(&dev_priv->mm.object_stat_lock);
2332 Serge 98
	dev_priv->mm.object_count--;
99
	dev_priv->mm.object_memory -= size;
4104 Serge 100
	spin_unlock(&dev_priv->mm.object_stat_lock);
2332 Serge 101
}
102
 
103
static int
3480 Serge 104
i915_gem_wait_for_error(struct i915_gpu_error *error)
2332 Serge 105
{
106
	int ret;
107
 
3480 Serge 108
#define EXIT_COND (!i915_reset_in_progress(error))
109
	if (EXIT_COND)
2332 Serge 110
		return 0;
3255 Serge 111
#if 0
3031 serge 112
	/*
113
	 * Only wait 10 seconds for the gpu reset to complete to avoid hanging
114
	 * userspace. If it takes that long something really bad is going on and
115
	 * we should simply try to bail out and fail as gracefully as possible.
116
	 */
3480 Serge 117
	ret = wait_event_interruptible_timeout(error->reset_queue,
118
					       EXIT_COND,
119
					       10*HZ);
3031 serge 120
	if (ret == 0) {
121
		DRM_ERROR("Timed out waiting for the gpu reset to complete\n");
122
		return -EIO;
123
	} else if (ret < 0) {
2332 Serge 124
		return ret;
3031 serge 125
	}
2332 Serge 126
 
3255 Serge 127
#endif
3480 Serge 128
#undef EXIT_COND
3255 Serge 129
 
2332 Serge 130
	return 0;
131
}
132
 
133
int i915_mutex_lock_interruptible(struct drm_device *dev)
134
{
3480 Serge 135
	struct drm_i915_private *dev_priv = dev->dev_private;
2332 Serge 136
	int ret;
137
 
3480 Serge 138
	ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
2332 Serge 139
	if (ret)
140
		return ret;
141
 
3480 Serge 142
	ret = mutex_lock_interruptible(&dev->struct_mutex);
143
	if (ret)
144
		return ret;
2332 Serge 145
 
146
	WARN_ON(i915_verify_lists(dev));
147
	return 0;
148
}
149
 
150
int
151
i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
152
			    struct drm_file *file)
153
{
154
	struct drm_i915_private *dev_priv = dev->dev_private;
155
	struct drm_i915_gem_get_aperture *args = data;
6084 serge 156
	struct i915_gtt *ggtt = &dev_priv->gtt;
157
	struct i915_vma *vma;
2332 Serge 158
	size_t pinned;
159
 
160
	pinned = 0;
161
	mutex_lock(&dev->struct_mutex);
6084 serge 162
	list_for_each_entry(vma, &ggtt->base.active_list, mm_list)
163
		if (vma->pin_count)
164
			pinned += vma->node.size;
165
	list_for_each_entry(vma, &ggtt->base.inactive_list, mm_list)
166
		if (vma->pin_count)
167
			pinned += vma->node.size;
2332 Serge 168
	mutex_unlock(&dev->struct_mutex);
169
 
4104 Serge 170
	args->aper_size = dev_priv->gtt.base.total;
2342 Serge 171
	args->aper_available_size = args->aper_size - pinned;
2332 Serge 172
 
173
	return 0;
174
}
175
 
3480 Serge 176
void *i915_gem_object_alloc(struct drm_device *dev)
177
{
178
	struct drm_i915_private *dev_priv = dev->dev_private;
5367 serge 179
    return kzalloc(sizeof(struct drm_i915_gem_object), 0);
3480 Serge 180
}
181
 
182
void i915_gem_object_free(struct drm_i915_gem_object *obj)
183
{
184
	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
185
	kfree(obj);
186
}
187
 
3031 serge 188
static int
189
i915_gem_create(struct drm_file *file,
2332 Serge 190
		struct drm_device *dev,
191
		uint64_t size,
192
		uint32_t *handle_p)
193
{
194
	struct drm_i915_gem_object *obj;
195
	int ret;
196
	u32 handle;
197
 
198
	size = roundup(size, PAGE_SIZE);
2342 Serge 199
	if (size == 0)
200
		return -EINVAL;
2332 Serge 201
 
202
	/* Allocate the new object */
203
	obj = i915_gem_alloc_object(dev, size);
204
	if (obj == NULL)
205
		return -ENOMEM;
206
 
207
	ret = drm_gem_handle_create(file, &obj->base, &handle);
4104 Serge 208
	/* drop reference from allocate - handle holds it now */
209
	drm_gem_object_unreference_unlocked(&obj->base);
210
	if (ret)
2332 Serge 211
		return ret;
212
 
213
	*handle_p = handle;
214
	return 0;
215
}
216
 
217
int
218
i915_gem_dumb_create(struct drm_file *file,
219
		     struct drm_device *dev,
220
		     struct drm_mode_create_dumb *args)
221
{
222
	/* have to work out size/pitch and return them */
4560 Serge 223
	args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64);
2332 Serge 224
	args->size = args->pitch * args->height;
225
	return i915_gem_create(file, dev,
226
			       args->size, &args->handle);
227
}
228
 
2326 Serge 229
/**
2332 Serge 230
 * Creates a new mm object and returns a handle to it.
231
 */
232
int
233
i915_gem_create_ioctl(struct drm_device *dev, void *data,
234
		      struct drm_file *file)
235
{
236
	struct drm_i915_gem_create *args = data;
3031 serge 237
 
2332 Serge 238
	return i915_gem_create(file, dev,
239
			       args->size, &args->handle);
240
}
241
 
3031 serge 242
static inline int
243
__copy_to_user_swizzled(char __user *cpu_vaddr,
244
			const char *gpu_vaddr, int gpu_offset,
6084 serge 245
			int length)
2332 Serge 246
{
3031 serge 247
	int ret, cpu_offset = 0;
2332 Serge 248
 
3031 serge 249
	while (length > 0) {
250
		int cacheline_end = ALIGN(gpu_offset + 1, 64);
251
		int this_length = min(cacheline_end - gpu_offset, length);
252
		int swizzled_gpu_offset = gpu_offset ^ 64;
2332 Serge 253
 
3031 serge 254
		ret = __copy_to_user(cpu_vaddr + cpu_offset,
255
				     gpu_vaddr + swizzled_gpu_offset,
256
				     this_length);
257
		if (ret)
258
			return ret + length;
2332 Serge 259
 
3031 serge 260
		cpu_offset += this_length;
261
		gpu_offset += this_length;
262
		length -= this_length;
263
	}
264
 
265
	return 0;
2332 Serge 266
}
267
 
3031 serge 268
static inline int
269
__copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset,
270
			  const char __user *cpu_vaddr,
271
			  int length)
2332 Serge 272
{
3031 serge 273
	int ret, cpu_offset = 0;
2332 Serge 274
 
275
	while (length > 0) {
276
		int cacheline_end = ALIGN(gpu_offset + 1, 64);
277
		int this_length = min(cacheline_end - gpu_offset, length);
278
		int swizzled_gpu_offset = gpu_offset ^ 64;
279
 
3031 serge 280
		ret = __copy_from_user(gpu_vaddr + swizzled_gpu_offset,
6084 serge 281
				       cpu_vaddr + cpu_offset,
282
				       this_length);
3031 serge 283
		if (ret)
284
			return ret + length;
285
 
2332 Serge 286
		cpu_offset += this_length;
287
		gpu_offset += this_length;
288
		length -= this_length;
289
	}
290
 
3031 serge 291
	return 0;
2332 Serge 292
}
293
 
6131 serge 294
/*
295
 * Pins the specified object's pages and synchronizes the object with
296
 * GPU accesses. Sets needs_clflush to non-zero if the caller should
297
 * flush the object from the CPU cache.
298
 */
299
int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
300
				    int *needs_clflush)
301
{
302
	int ret;
303
 
304
	*needs_clflush = 0;
305
 
306
	if (!obj->base.filp)
307
		return -EINVAL;
308
 
309
	if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) {
310
		/* If we're not in the cpu read domain, set ourself into the gtt
311
		 * read domain and manually flush cachelines (if required). This
312
		 * optimizes for the case when the gpu will dirty the data
313
		 * anyway again before the next pread happens. */
314
		*needs_clflush = !cpu_cache_is_coherent(obj->base.dev,
315
							obj->cache_level);
316
		ret = i915_gem_object_wait_rendering(obj, true);
317
		if (ret)
318
			return ret;
319
	}
320
 
321
	ret = i915_gem_object_get_pages(obj);
322
	if (ret)
323
		return ret;
324
 
325
	i915_gem_object_pin_pages(obj);
326
 
327
	return ret;
328
}
329
 
3031 serge 330
/* Per-page copy function for the shmem pread fastpath.
331
 * Flushes invalid cachelines before reading the target if
332
 * needs_clflush is set. */
2332 Serge 333
static int
3031 serge 334
shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length,
335
		 char __user *user_data,
336
		 bool page_do_bit17_swizzling, bool needs_clflush)
337
{
6084 serge 338
	char *vaddr;
339
	int ret;
3031 serge 340
 
341
	if (unlikely(page_do_bit17_swizzling))
342
		return -EINVAL;
343
 
6084 serge 344
	vaddr = kmap_atomic(page);
3031 serge 345
	if (needs_clflush)
346
		drm_clflush_virt_range(vaddr + shmem_page_offset,
347
				       page_length);
6084 serge 348
	ret = __copy_to_user_inatomic(user_data,
3031 serge 349
				      vaddr + shmem_page_offset,
6084 serge 350
				      page_length);
351
	kunmap_atomic(vaddr);
3031 serge 352
 
353
	return ret ? -EFAULT : 0;
354
}
355
 
356
static void
357
shmem_clflush_swizzled_range(char *addr, unsigned long length,
358
			     bool swizzled)
359
{
360
	if (unlikely(swizzled)) {
361
		unsigned long start = (unsigned long) addr;
362
		unsigned long end = (unsigned long) addr + length;
363
 
364
		/* For swizzling simply ensure that we always flush both
365
		 * channels. Lame, but simple and it works. Swizzled
366
		 * pwrite/pread is far from a hotpath - current userspace
367
		 * doesn't use it at all. */
368
		start = round_down(start, 128);
369
		end = round_up(end, 128);
370
 
371
		drm_clflush_virt_range((void *)start, end - start);
372
	} else {
373
		drm_clflush_virt_range(addr, length);
374
	}
375
 
376
}
377
 
378
/* Only difference to the fast-path function is that this can handle bit17
379
 * and uses non-atomic copy and kmap functions. */
380
static int
381
shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
382
		 char __user *user_data,
383
		 bool page_do_bit17_swizzling, bool needs_clflush)
384
{
385
	char *vaddr;
386
	int ret;
387
 
388
	vaddr = kmap(page);
389
	if (needs_clflush)
390
		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
391
					     page_length,
392
					     page_do_bit17_swizzling);
393
 
394
	if (page_do_bit17_swizzling)
395
		ret = __copy_to_user_swizzled(user_data,
396
					      vaddr, shmem_page_offset,
397
					      page_length);
398
	else
399
		ret = __copy_to_user(user_data,
400
				     vaddr + shmem_page_offset,
401
				     page_length);
402
	kunmap(page);
403
 
404
	return ret ? - EFAULT : 0;
405
}
406
 
407
static int
408
i915_gem_shmem_pread(struct drm_device *dev,
6084 serge 409
		     struct drm_i915_gem_object *obj,
410
		     struct drm_i915_gem_pread *args,
411
		     struct drm_file *file)
2332 Serge 412
{
3031 serge 413
	char __user *user_data;
2332 Serge 414
	ssize_t remain;
415
	loff_t offset;
3031 serge 416
	int shmem_page_offset, page_length, ret = 0;
417
	int obj_do_bit17_swizzling, page_do_bit17_swizzling;
418
	int prefaulted = 0;
419
	int needs_clflush = 0;
3746 Serge 420
	struct sg_page_iter sg_iter;
2332 Serge 421
 
3746 Serge 422
	user_data = to_user_ptr(args->data_ptr);
2332 Serge 423
	remain = args->size;
424
 
3031 serge 425
	obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
426
 
5060 serge 427
	ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
3031 serge 428
	if (ret)
429
		return ret;
430
 
2332 Serge 431
	offset = args->offset;
432
 
3746 Serge 433
	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
434
			 offset >> PAGE_SHIFT) {
435
		struct page *page = sg_page_iter_page(&sg_iter);
2332 Serge 436
 
3031 serge 437
		if (remain <= 0)
438
			break;
439
 
2332 Serge 440
		/* Operation in this page
441
		 *
3031 serge 442
		 * shmem_page_offset = offset within page in shmem file
2332 Serge 443
		 * page_length = bytes to copy for this page
444
		 */
3031 serge 445
		shmem_page_offset = offset_in_page(offset);
2332 Serge 446
		page_length = remain;
3031 serge 447
		if ((shmem_page_offset + page_length) > PAGE_SIZE)
448
			page_length = PAGE_SIZE - shmem_page_offset;
2332 Serge 449
 
3031 serge 450
		page_do_bit17_swizzling = obj_do_bit17_swizzling &&
451
			(page_to_phys(page) & (1 << 17)) != 0;
2332 Serge 452
 
3031 serge 453
		ret = shmem_pread_fast(page, shmem_page_offset, page_length,
454
				       user_data, page_do_bit17_swizzling,
455
				       needs_clflush);
456
		if (ret == 0)
457
			goto next_page;
2332 Serge 458
 
3031 serge 459
		mutex_unlock(&dev->struct_mutex);
460
 
461
		ret = shmem_pread_slow(page, shmem_page_offset, page_length,
462
				       user_data, page_do_bit17_swizzling,
463
				       needs_clflush);
464
 
465
		mutex_lock(&dev->struct_mutex);
466
 
2332 Serge 467
		if (ret)
3031 serge 468
			goto out;
2332 Serge 469
 
5060 serge 470
next_page:
2332 Serge 471
		remain -= page_length;
472
		user_data += page_length;
473
		offset += page_length;
474
	}
475
 
3031 serge 476
out:
477
	i915_gem_object_unpin_pages(obj);
478
 
479
	return ret;
2332 Serge 480
}
481
 
482
/**
3031 serge 483
 * Reads data from the object referenced by handle.
484
 *
485
 * On error, the contents of *data are undefined.
2332 Serge 486
 */
3031 serge 487
int
488
i915_gem_pread_ioctl(struct drm_device *dev, void *data,
489
		     struct drm_file *file)
490
{
491
	struct drm_i915_gem_pread *args = data;
492
	struct drm_i915_gem_object *obj;
493
	int ret = 0;
494
 
495
	if (args->size == 0)
496
		return 0;
497
 
498
	ret = i915_mutex_lock_interruptible(dev);
499
	if (ret)
500
		return ret;
501
 
502
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
503
	if (&obj->base == NULL) {
504
		ret = -ENOENT;
505
		goto unlock;
506
	}
507
 
508
	/* Bounds check source.  */
509
	if (args->offset > obj->base.size ||
510
	    args->size > obj->base.size - args->offset) {
511
		ret = -EINVAL;
512
		goto out;
513
	}
514
 
515
	/* prime objects have no backing filp to GEM pread/pwrite
516
	 * pages from.
517
	 */
518
	if (!obj->base.filp) {
519
		ret = -EINVAL;
520
		goto out;
521
	}
522
 
523
	trace_i915_gem_object_pread(obj, args->offset, args->size);
524
 
525
	ret = i915_gem_shmem_pread(dev, obj, args, file);
526
 
527
out:
528
	drm_gem_object_unreference(&obj->base);
529
unlock:
530
	mutex_unlock(&dev->struct_mutex);
531
	return ret;
532
}
533
 
534
/* This is the fast write path which cannot handle
535
 * page faults in the source data
536
 */
537
 
538
 
539
/**
540
 * This is the fast pwrite path, where we copy the data directly from the
541
 * user into the GTT, uncached.
542
 */
2332 Serge 543
static int
3031 serge 544
i915_gem_gtt_pwrite_fast(struct drm_device *dev,
545
			 struct drm_i915_gem_object *obj,
546
			 struct drm_i915_gem_pwrite *args,
547
			 struct drm_file *file)
2332 Serge 548
{
5060 serge 549
	struct drm_i915_private *dev_priv = dev->dev_private;
2332 Serge 550
	ssize_t remain;
3031 serge 551
	loff_t offset, page_base;
552
	char __user *user_data;
553
	int page_offset, page_length, ret;
2332 Serge 554
 
5060 serge 555
	ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
3031 serge 556
	if (ret)
557
		goto out;
558
 
559
	ret = i915_gem_object_set_to_gtt_domain(obj, true);
560
	if (ret)
561
		goto out_unpin;
562
 
563
	ret = i915_gem_object_put_fence(obj);
564
	if (ret)
565
		goto out_unpin;
566
 
4539 Serge 567
	user_data = to_user_ptr(args->data_ptr);
2332 Serge 568
	remain = args->size;
569
 
4104 Serge 570
	offset = i915_gem_obj_ggtt_offset(obj) + args->offset;
2332 Serge 571
 
6084 serge 572
	intel_fb_obj_invalidate(obj, ORIGIN_GTT);
573
 
3031 serge 574
	while (remain > 0) {
575
		/* Operation in this page
576
		 *
577
		 * page_base = page offset within aperture
578
		 * page_offset = offset within page
579
		 * page_length = bytes to copy for this page
580
		 */
581
		page_base = offset & PAGE_MASK;
582
		page_offset = offset_in_page(offset);
583
		page_length = remain;
584
		if ((page_offset + remain) > PAGE_SIZE)
585
			page_length = PAGE_SIZE - page_offset;
2332 Serge 586
 
6131 serge 587
		MapPage(dev_priv->gtt.mappable,
588
				dev_priv->gtt.mappable_base+page_base, PG_WRITEC|PG_SW);
3031 serge 589
 
6131 serge 590
		memcpy((char*)dev_priv->gtt.mappable+page_offset, user_data, page_length);
3260 Serge 591
 
3031 serge 592
		remain -= page_length;
593
		user_data += page_length;
594
		offset += page_length;
2332 Serge 595
	}
596
 
6084 serge 597
out_flush:
598
	intel_fb_obj_flush(obj, false, ORIGIN_GTT);
3031 serge 599
out_unpin:
5060 serge 600
	i915_gem_object_ggtt_unpin(obj);
3031 serge 601
out:
6084 serge 602
	return ret;
3031 serge 603
}
604
 
605
/* Per-page copy function for the shmem pwrite fastpath.
606
 * Flushes invalid cachelines before writing to the target if
607
 * needs_clflush_before is set and flushes out any written cachelines after
608
 * writing if needs_clflush is set. */
609
static int
610
shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
611
		  char __user *user_data,
612
		  bool page_do_bit17_swizzling,
613
		  bool needs_clflush_before,
614
		  bool needs_clflush_after)
615
{
616
	char *vaddr;
5354 serge 617
	int ret;
3031 serge 618
 
619
	if (unlikely(page_do_bit17_swizzling))
620
		return -EINVAL;
621
 
5354 serge 622
	vaddr = kmap_atomic(page);
3031 serge 623
	if (needs_clflush_before)
624
		drm_clflush_virt_range(vaddr + shmem_page_offset,
625
				       page_length);
3260 Serge 626
	memcpy(vaddr + shmem_page_offset,
3031 serge 627
						user_data,
628
						page_length);
629
	if (needs_clflush_after)
630
		drm_clflush_virt_range(vaddr + shmem_page_offset,
631
				       page_length);
5354 serge 632
	kunmap_atomic(vaddr);
3031 serge 633
 
634
	return ret ? -EFAULT : 0;
635
}
3260 Serge 636
#if 0
3031 serge 637
 
638
/* Only difference to the fast-path function is that this can handle bit17
639
 * and uses non-atomic copy and kmap functions. */
640
static int
641
shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
642
		  char __user *user_data,
643
		  bool page_do_bit17_swizzling,
644
		  bool needs_clflush_before,
645
		  bool needs_clflush_after)
646
{
647
	char *vaddr;
648
	int ret;
649
 
650
	vaddr = kmap(page);
651
	if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
652
		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
653
					     page_length,
654
					     page_do_bit17_swizzling);
655
	if (page_do_bit17_swizzling)
656
		ret = __copy_from_user_swizzled(vaddr, shmem_page_offset,
657
						user_data,
658
						page_length);
659
	else
660
		ret = __copy_from_user(vaddr + shmem_page_offset,
661
				       user_data,
662
				       page_length);
663
	if (needs_clflush_after)
664
		shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
665
					     page_length,
666
					     page_do_bit17_swizzling);
667
	kunmap(page);
668
 
669
	return ret ? -EFAULT : 0;
670
}
3260 Serge 671
#endif
3031 serge 672
 
3260 Serge 673
 
3031 serge 674
static int
675
i915_gem_shmem_pwrite(struct drm_device *dev,
676
		      struct drm_i915_gem_object *obj,
677
		      struct drm_i915_gem_pwrite *args,
678
		      struct drm_file *file)
679
{
680
	ssize_t remain;
681
	loff_t offset;
682
	char __user *user_data;
683
	int shmem_page_offset, page_length, ret = 0;
684
	int obj_do_bit17_swizzling, page_do_bit17_swizzling;
685
	int hit_slowpath = 0;
686
	int needs_clflush_after = 0;
687
	int needs_clflush_before = 0;
3746 Serge 688
	struct sg_page_iter sg_iter;
3031 serge 689
 
3746 Serge 690
	user_data = to_user_ptr(args->data_ptr);
3031 serge 691
	remain = args->size;
692
 
693
	obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
694
 
695
	if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
696
		/* If we're not in the cpu write domain, set ourself into the gtt
697
		 * write domain and manually flush cachelines (if required). This
698
		 * optimizes for the case when the gpu will use the data
699
		 * right away and we therefore have to clflush anyway. */
4104 Serge 700
		needs_clflush_after = cpu_write_needs_clflush(obj);
4560 Serge 701
		ret = i915_gem_object_wait_rendering(obj, false);
6084 serge 702
		if (ret)
703
			return ret;
704
	}
4104 Serge 705
	/* Same trick applies to invalidate partially written cachelines read
706
	 * before writing. */
707
	if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0)
708
		needs_clflush_before =
709
			!cpu_cache_is_coherent(dev, obj->cache_level);
3031 serge 710
 
711
	ret = i915_gem_object_get_pages(obj);
2332 Serge 712
	if (ret)
3031 serge 713
		return ret;
2332 Serge 714
 
6084 serge 715
	intel_fb_obj_invalidate(obj, ORIGIN_CPU);
716
 
3031 serge 717
	i915_gem_object_pin_pages(obj);
2332 Serge 718
 
719
	offset = args->offset;
3031 serge 720
	obj->dirty = 1;
2332 Serge 721
 
3746 Serge 722
	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents,
723
			 offset >> PAGE_SHIFT) {
724
		struct page *page = sg_page_iter_page(&sg_iter);
3031 serge 725
		int partial_cacheline_write;
2332 Serge 726
 
3031 serge 727
		if (remain <= 0)
728
			break;
729
 
2332 Serge 730
		/* Operation in this page
731
		 *
732
		 * shmem_page_offset = offset within page in shmem file
733
		 * page_length = bytes to copy for this page
734
		 */
735
		shmem_page_offset = offset_in_page(offset);
736
 
737
		page_length = remain;
738
		if ((shmem_page_offset + page_length) > PAGE_SIZE)
739
			page_length = PAGE_SIZE - shmem_page_offset;
740
 
3031 serge 741
		/* If we don't overwrite a cacheline completely we need to be
742
		 * careful to have up-to-date data by first clflushing. Don't
743
		 * overcomplicate things and flush the entire patch. */
744
		partial_cacheline_write = needs_clflush_before &&
745
			((shmem_page_offset | page_length)
3260 Serge 746
				& (x86_clflush_size - 1));
2332 Serge 747
 
3031 serge 748
		page_do_bit17_swizzling = obj_do_bit17_swizzling &&
749
			(page_to_phys(page) & (1 << 17)) != 0;
2332 Serge 750
 
3031 serge 751
		ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
752
					user_data, page_do_bit17_swizzling,
753
					partial_cacheline_write,
754
					needs_clflush_after);
755
		if (ret == 0)
756
			goto next_page;
757
 
758
		hit_slowpath = 1;
759
		mutex_unlock(&dev->struct_mutex);
3260 Serge 760
		dbgprintf("%s need shmem_pwrite_slow\n",__FUNCTION__);
3031 serge 761
 
3260 Serge 762
//		ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
763
//					user_data, page_do_bit17_swizzling,
764
//					partial_cacheline_write,
765
//					needs_clflush_after);
766
 
3031 serge 767
		mutex_lock(&dev->struct_mutex);
768
 
769
		if (ret)
770
			goto out;
771
 
5354 serge 772
next_page:
2332 Serge 773
		remain -= page_length;
3031 serge 774
		user_data += page_length;
2332 Serge 775
		offset += page_length;
776
	}
777
 
778
out:
3031 serge 779
	i915_gem_object_unpin_pages(obj);
780
 
781
	if (hit_slowpath) {
3480 Serge 782
		/*
783
		 * Fixup: Flush cpu caches in case we didn't flush the dirty
784
		 * cachelines in-line while writing and the object moved
785
		 * out of the cpu write domain while we've dropped the lock.
786
		 */
787
		if (!needs_clflush_after &&
788
		    obj->base.write_domain != I915_GEM_DOMAIN_CPU) {
4104 Serge 789
			if (i915_gem_clflush_object(obj, obj->pin_display))
6084 serge 790
				needs_clflush_after = true;
3031 serge 791
		}
2332 Serge 792
	}
793
 
3031 serge 794
	if (needs_clflush_after)
3243 Serge 795
		i915_gem_chipset_flush(dev);
6084 serge 796
	else
797
		obj->cache_dirty = true;
3031 serge 798
 
6084 serge 799
	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
2332 Serge 800
	return ret;
801
}
3031 serge 802
 
803
/**
804
 * Writes data to the object referenced by handle.
805
 *
806
 * On error, the contents of the buffer that were to be modified are undefined.
807
 */
808
int
809
i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
810
		      struct drm_file *file)
811
{
6084 serge 812
	struct drm_i915_private *dev_priv = dev->dev_private;
3031 serge 813
	struct drm_i915_gem_pwrite *args = data;
814
	struct drm_i915_gem_object *obj;
815
	int ret;
816
 
4104 Serge 817
	if (args->size == 0)
818
		return 0;
819
 
6084 serge 820
	intel_runtime_pm_get(dev_priv);
3480 Serge 821
 
3031 serge 822
	ret = i915_mutex_lock_interruptible(dev);
823
	if (ret)
6084 serge 824
		goto put_rpm;
3031 serge 825
 
826
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
827
	if (&obj->base == NULL) {
828
		ret = -ENOENT;
829
		goto unlock;
830
	}
831
 
832
	/* Bounds check destination. */
833
	if (args->offset > obj->base.size ||
834
	    args->size > obj->base.size - args->offset) {
835
		ret = -EINVAL;
836
		goto out;
837
	}
838
 
839
	/* prime objects have no backing filp to GEM pread/pwrite
840
	 * pages from.
841
	 */
842
	if (!obj->base.filp) {
843
		ret = -EINVAL;
844
		goto out;
845
	}
846
 
847
	trace_i915_gem_object_pwrite(obj, args->offset, args->size);
848
 
849
	ret = -EFAULT;
850
	/* We can only do the GTT pwrite on untiled buffers, as otherwise
851
	 * it would end up going through the fenced access, and we'll get
852
	 * different detiling behavior between reading and writing.
853
	 * pread/pwrite currently are reading and writing from the CPU
854
	 * perspective, requiring manual detiling by the client.
855
	 */
4104 Serge 856
	if (obj->tiling_mode == I915_TILING_NONE &&
857
	    obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
858
	    cpu_write_needs_clflush(obj)) {
3031 serge 859
		ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file);
860
		/* Note that the gtt paths might fail with non-page-backed user
861
		 * pointers (e.g. gtt mappings when moving data between
862
		 * textures). Fallback to the shmem path in that case. */
863
	}
864
 
865
	if (ret == -EFAULT || ret == -ENOSPC)
6084 serge 866
			ret = i915_gem_shmem_pwrite(dev, obj, args, file);
3031 serge 867
 
868
out:
869
	drm_gem_object_unreference(&obj->base);
870
unlock:
871
	mutex_unlock(&dev->struct_mutex);
6084 serge 872
put_rpm:
873
	intel_runtime_pm_put(dev_priv);
874
 
3031 serge 875
	return ret;
876
}
877
 
878
int
3480 Serge 879
i915_gem_check_wedge(struct i915_gpu_error *error,
3031 serge 880
		     bool interruptible)
881
{
3480 Serge 882
	if (i915_reset_in_progress(error)) {
3031 serge 883
		/* Non-interruptible callers can't handle -EAGAIN, hence return
884
		 * -EIO unconditionally for these. */
885
		if (!interruptible)
886
			return -EIO;
2332 Serge 887
 
3480 Serge 888
		/* Recovery complete, but the reset failed ... */
889
		if (i915_terminally_wedged(error))
3031 serge 890
			return -EIO;
2332 Serge 891
 
6084 serge 892
		/*
893
		 * Check if GPU Reset is in progress - we need intel_ring_begin
894
		 * to work properly to reinit the hw state while the gpu is
895
		 * still marked as reset-in-progress. Handle this with a flag.
896
		 */
897
		if (!error->reload_in_reset)
898
			return -EAGAIN;
3031 serge 899
	}
2332 Serge 900
 
3031 serge 901
	return 0;
902
}
2332 Serge 903
 
4560 Serge 904
static void fake_irq(unsigned long data)
905
{
906
//	wake_up_process((struct task_struct *)data);
907
}
908
 
909
static bool missed_irq(struct drm_i915_private *dev_priv,
5060 serge 910
		       struct intel_engine_cs *ring)
4560 Serge 911
{
912
	return test_bit(ring->id, &dev_priv->gpu_error.missed_irq_rings);
913
}
914
 
6084 serge 915
static unsigned long local_clock_us(unsigned *cpu)
4560 Serge 916
{
6084 serge 917
	unsigned long t;
918
 
919
	/* Cheaply and approximately convert from nanoseconds to microseconds.
920
	 * The result and subsequent calculations are also defined in the same
921
	 * approximate microseconds units. The principal source of timing
922
	 * error here is from the simple truncation.
923
	 *
924
	 * Note that local_clock() is only defined wrt to the current CPU;
925
	 * the comparisons are no longer valid if we switch CPUs. Instead of
926
	 * blocking preemption for the entire busywait, we can detect the CPU
927
	 * switch and use that as indicator of system load and a reason to
928
	 * stop busywaiting, see busywait_stop().
929
	 */
930
	t = GetClockNs() >> 10;
931
 
932
	return t;
933
}
934
 
935
static bool busywait_stop(unsigned long timeout, unsigned cpu)
936
{
937
	unsigned this_cpu = 0;
938
 
939
	if (time_after(local_clock_us(&this_cpu), timeout))
4560 Serge 940
		return true;
941
 
6084 serge 942
	return this_cpu != cpu;
4560 Serge 943
}
944
 
6084 serge 945
static int __i915_spin_request(struct drm_i915_gem_request *req, int state)
946
{
947
	unsigned long timeout;
948
	unsigned cpu;
949
 
950
	/* When waiting for high frequency requests, e.g. during synchronous
951
	 * rendering split between the CPU and GPU, the finite amount of time
952
	 * required to set up the irq and wait upon it limits the response
953
	 * rate. By busywaiting on the request completion for a short while we
954
	 * can service the high frequency waits as quick as possible. However,
955
	 * if it is a slow request, we want to sleep as quickly as possible.
956
	 * The tradeoff between waiting and sleeping is roughly the time it
957
	 * takes to sleep on a request, on the order of a microsecond.
958
	 */
959
 
960
	if (req->ring->irq_refcount)
961
		return -EBUSY;
962
 
963
	/* Only spin if we know the GPU is processing this request */
964
	if (!i915_gem_request_started(req, true))
965
		return -EAGAIN;
966
 
967
	timeout = local_clock_us(&cpu) + 5;
968
	while (1 /*!need_resched()*/) {
969
		if (i915_gem_request_completed(req, true))
970
			return 0;
971
 
972
		if (busywait_stop(timeout, cpu))
973
			break;
974
 
975
		cpu_relax_lowlatency();
976
	}
977
 
978
	if (i915_gem_request_completed(req, false))
979
		return 0;
980
 
981
	return -EAGAIN;
982
}
983
 
3031 serge 984
/**
6084 serge 985
 * __i915_wait_request - wait until execution of request has finished
986
 * @req: duh!
987
 * @reset_counter: reset sequence associated with the given request
3031 serge 988
 * @interruptible: do an interruptible wait (normally yes)
989
 * @timeout: in - how long to wait (NULL forever); out - how much time remaining
990
 *
3480 Serge 991
 * Note: It is of utmost importance that the passed in seqno and reset_counter
992
 * values have been read by the caller in an smp safe manner. Where read-side
993
 * locks are involved, it is sufficient to read the reset_counter before
994
 * unlocking the lock that protects the seqno. For lockless tricks, the
995
 * reset_counter _must_ be read before, and an appropriate smp_rmb must be
996
 * inserted.
997
 *
6084 serge 998
 * Returns 0 if the request was found within the alloted time. Else returns the
3031 serge 999
 * errno with remaining time filled in timeout argument.
1000
 */
6084 serge 1001
int __i915_wait_request(struct drm_i915_gem_request *req,
3480 Serge 1002
			unsigned reset_counter,
4560 Serge 1003
			bool interruptible,
5060 serge 1004
			s64 *timeout,
6084 serge 1005
			struct intel_rps_client *rps)
3031 serge 1006
{
6084 serge 1007
	struct intel_engine_cs *ring = i915_gem_request_get_ring(req);
5060 serge 1008
	struct drm_device *dev = ring->dev;
1009
	struct drm_i915_private *dev_priv = dev->dev_private;
4560 Serge 1010
	const bool irq_test_in_progress =
1011
		ACCESS_ONCE(dev_priv->gpu_error.test_irq_rings) & intel_ring_flag(ring);
6084 serge 1012
	int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
6088 serge 1013
	wait_queue_t wait;
5060 serge 1014
	unsigned long timeout_expire;
1015
	s64 before, now;
3031 serge 1016
	int ret;
2332 Serge 1017
 
5060 serge 1018
	WARN(!intel_irqs_enabled(dev_priv), "IRQs disabled");
4104 Serge 1019
 
6084 serge 1020
	if (list_empty(&req->list))
3031 serge 1021
		return 0;
2332 Serge 1022
 
6084 serge 1023
	if (i915_gem_request_completed(req, true))
1024
		return 0;
2332 Serge 1025
 
6084 serge 1026
	timeout_expire = 0;
1027
	if (timeout) {
1028
		if (WARN_ON(*timeout < 0))
1029
			return -EINVAL;
1030
 
1031
		if (*timeout == 0)
1032
			return -ETIME;
1033
 
1034
		timeout_expire = jiffies + nsecs_to_jiffies_timeout(*timeout);
3031 serge 1035
	}
2332 Serge 1036
 
6084 serge 1037
	if (INTEL_INFO(dev_priv)->gen >= 6)
1038
		gen6_rps_boost(dev_priv, rps, req->emitted_jiffies);
2332 Serge 1039
 
6084 serge 1040
	/* Record current time in case interrupted by signal, or wedged */
1041
	trace_i915_gem_request_wait_begin(req);
1042
	before = ktime_get_raw_ns();
1043
 
1044
	/* Optimistic spin for the next jiffie before touching IRQs */
1045
	ret = __i915_spin_request(req, state);
1046
	if (ret == 0)
1047
		goto out;
1048
 
1049
	if (!irq_test_in_progress && WARN_ON(!ring->irq_get(ring))) {
1050
		ret = -ENODEV;
1051
		goto out;
1052
	}
1053
 
6088 serge 1054
	INIT_LIST_HEAD(&wait.task_list);
1055
	wait.evnt = CreateEvent(NULL, MANUAL_DESTROY);
2332 Serge 1056
 
4560 Serge 1057
	for (;;) {
6103 serge 1058
		unsigned long flags;
4560 Serge 1059
 
3480 Serge 1060
		/* We need to check whether any gpu reset happened in between
1061
		 * the caller grabbing the seqno and now ... */
4560 Serge 1062
		if (reset_counter != atomic_read(&dev_priv->gpu_error.reset_counter)) {
1063
			/* ... but upgrade the -EAGAIN to an -EIO if the gpu
1064
			 * is truely gone. */
1065
			ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
1066
			if (ret == 0)
1067
				ret = -EAGAIN;
1068
			break;
1069
		}
3480 Serge 1070
 
6084 serge 1071
		if (i915_gem_request_completed(req, false)) {
4560 Serge 1072
			ret = 0;
1073
			break;
1074
		}
2332 Serge 1075
 
6088 serge 1076
		if (timeout && time_after_eq(jiffies, timeout_expire)) {
4560 Serge 1077
			ret = -ETIME;
1078
			break;
1079
		}
2332 Serge 1080
 
4560 Serge 1081
        spin_lock_irqsave(&ring->irq_queue.lock, flags);
6088 serge 1082
        if (list_empty(&wait.task_list))
1083
            __add_wait_queue(&ring->irq_queue, &wait);
4560 Serge 1084
        spin_unlock_irqrestore(&ring->irq_queue.lock, flags);
1085
 
6088 serge 1086
            WaitEventTimeout(wait.evnt, 1);
4560 Serge 1087
 
6088 serge 1088
        if (!list_empty(&wait.task_list)) {
4560 Serge 1089
            spin_lock_irqsave(&ring->irq_queue.lock, flags);
6088 serge 1090
            list_del_init(&wait.task_list);
4560 Serge 1091
            spin_unlock_irqrestore(&ring->irq_queue.lock, flags);
1092
        }
1093
 
6088 serge 1094
	};
4560 Serge 1095
 
1096
	if (!irq_test_in_progress)
6084 serge 1097
		ring->irq_put(ring);
2332 Serge 1098
 
6088 serge 1099
    DestroyEvent(wait.evnt);
1100
 
6084 serge 1101
out:
1102
	now = ktime_get_raw_ns();
1103
	trace_i915_gem_request_wait_end(req);
1104
 
1105
	if (timeout) {
1106
		s64 tres = *timeout - (now - before);
1107
 
1108
		*timeout = tres < 0 ? 0 : tres;
1109
 
1110
		/*
1111
		 * Apparently ktime isn't accurate enough and occasionally has a
1112
		 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
1113
		 * things up to make the test happy. We allow up to 1 jiffy.
1114
		 *
1115
		 * This is a regrssion from the timespec->ktime conversion.
1116
		 */
1117
		if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000)
1118
			*timeout = 0;
1119
	}
1120
 
4560 Serge 1121
	return ret;
3031 serge 1122
}
2332 Serge 1123
 
6084 serge 1124
int i915_gem_request_add_to_client(struct drm_i915_gem_request *req,
1125
				   struct drm_file *file)
1126
{
1127
	struct drm_i915_private *dev_private;
1128
	struct drm_i915_file_private *file_priv;
1129
 
1130
	WARN_ON(!req || !file || req->file_priv);
1131
 
1132
	if (!req || !file)
1133
		return -EINVAL;
1134
 
1135
	if (req->file_priv)
1136
		return -EINVAL;
1137
 
1138
	dev_private = req->ring->dev->dev_private;
1139
	file_priv = file->driver_priv;
1140
 
1141
	spin_lock(&file_priv->mm.lock);
1142
	req->file_priv = file_priv;
1143
	list_add_tail(&req->client_list, &file_priv->mm.request_list);
1144
	spin_unlock(&file_priv->mm.lock);
1145
 
1146
	req->pid = 1;
1147
 
1148
	return 0;
1149
}
1150
 
1151
static inline void
1152
i915_gem_request_remove_from_client(struct drm_i915_gem_request *request)
1153
{
1154
	struct drm_i915_file_private *file_priv = request->file_priv;
1155
 
1156
	if (!file_priv)
1157
		return;
1158
 
1159
	spin_lock(&file_priv->mm.lock);
1160
	list_del(&request->client_list);
1161
	request->file_priv = NULL;
1162
	spin_unlock(&file_priv->mm.lock);
1163
}
1164
 
1165
static void i915_gem_request_retire(struct drm_i915_gem_request *request)
1166
{
1167
	trace_i915_gem_request_retire(request);
1168
 
1169
	/* We know the GPU must have read the request to have
1170
	 * sent us the seqno + interrupt, so use the position
1171
	 * of tail of the request to update the last known position
1172
	 * of the GPU head.
1173
	 *
1174
	 * Note this requires that we are always called in request
1175
	 * completion order.
1176
	 */
1177
	request->ringbuf->last_retired_head = request->postfix;
1178
 
1179
	list_del_init(&request->list);
1180
	i915_gem_request_remove_from_client(request);
1181
 
1182
	i915_gem_request_unreference(request);
1183
}
1184
 
1185
static void
1186
__i915_gem_request_retire__upto(struct drm_i915_gem_request *req)
1187
{
1188
	struct intel_engine_cs *engine = req->ring;
1189
	struct drm_i915_gem_request *tmp;
1190
 
1191
 
1192
	if (list_empty(&req->list))
1193
		return;
1194
 
1195
	do {
1196
		tmp = list_first_entry(&engine->request_list,
1197
				       typeof(*tmp), list);
1198
 
1199
		i915_gem_request_retire(tmp);
1200
	} while (tmp != req);
1201
 
1202
	WARN_ON(i915_verify_lists(engine->dev));
1203
}
1204
 
3031 serge 1205
/**
6084 serge 1206
 * Waits for a request to be signaled, and cleans up the
3031 serge 1207
 * request and object lists appropriately for that event.
1208
 */
1209
int
6084 serge 1210
i915_wait_request(struct drm_i915_gem_request *req)
3031 serge 1211
{
6084 serge 1212
	struct drm_device *dev;
1213
	struct drm_i915_private *dev_priv;
1214
	bool interruptible;
3031 serge 1215
	int ret;
2332 Serge 1216
 
6084 serge 1217
	BUG_ON(req == NULL);
1218
 
1219
	dev = req->ring->dev;
1220
	dev_priv = dev->dev_private;
1221
	interruptible = dev_priv->mm.interruptible;
1222
 
3031 serge 1223
	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
2332 Serge 1224
 
3480 Serge 1225
	ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
3031 serge 1226
	if (ret)
1227
		return ret;
2332 Serge 1228
 
6084 serge 1229
	ret = __i915_wait_request(req,
1230
				  atomic_read(&dev_priv->gpu_error.reset_counter),
1231
				  interruptible, NULL, NULL);
3031 serge 1232
	if (ret)
1233
		return ret;
2332 Serge 1234
 
6084 serge 1235
	__i915_gem_request_retire__upto(req);
4104 Serge 1236
	return 0;
1237
}
1238
 
3031 serge 1239
/**
1240
 * Ensures that all rendering to the object has completed and the object is
1241
 * safe to unbind from the GTT or access from the CPU.
1242
 */
6084 serge 1243
int
3031 serge 1244
i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
1245
			       bool readonly)
1246
{
6084 serge 1247
	int ret, i;
2332 Serge 1248
 
6084 serge 1249
	if (!obj->active)
3031 serge 1250
		return 0;
2332 Serge 1251
 
6084 serge 1252
	if (readonly) {
1253
		if (obj->last_write_req != NULL) {
1254
			ret = i915_wait_request(obj->last_write_req);
1255
			if (ret)
1256
				return ret;
2332 Serge 1257
 
6084 serge 1258
			i = obj->last_write_req->ring->id;
1259
			if (obj->last_read_req[i] == obj->last_write_req)
1260
				i915_gem_object_retire__read(obj, i);
1261
			else
1262
				i915_gem_object_retire__write(obj);
1263
		}
1264
	} else {
1265
		for (i = 0; i < I915_NUM_RINGS; i++) {
1266
			if (obj->last_read_req[i] == NULL)
1267
				continue;
1268
 
1269
			ret = i915_wait_request(obj->last_read_req[i]);
1270
			if (ret)
1271
				return ret;
1272
 
1273
			i915_gem_object_retire__read(obj, i);
1274
		}
1275
		RQ_BUG_ON(obj->active);
1276
	}
1277
 
1278
	return 0;
3031 serge 1279
}
2332 Serge 1280
 
6084 serge 1281
static void
1282
i915_gem_object_retire_request(struct drm_i915_gem_object *obj,
1283
			       struct drm_i915_gem_request *req)
1284
{
1285
	int ring = req->ring->id;
1286
 
1287
	if (obj->last_read_req[ring] == req)
1288
		i915_gem_object_retire__read(obj, ring);
1289
	else if (obj->last_write_req == req)
1290
		i915_gem_object_retire__write(obj);
1291
 
1292
	__i915_gem_request_retire__upto(req);
1293
}
1294
 
3260 Serge 1295
/* A nonblocking variant of the above wait. This is a highly dangerous routine
1296
 * as the object state may change during this call.
1297
 */
1298
static __must_check int
1299
i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
6084 serge 1300
					    struct intel_rps_client *rps,
3260 Serge 1301
					    bool readonly)
1302
{
1303
	struct drm_device *dev = obj->base.dev;
1304
	struct drm_i915_private *dev_priv = dev->dev_private;
6084 serge 1305
	struct drm_i915_gem_request *requests[I915_NUM_RINGS];
3480 Serge 1306
	unsigned reset_counter;
6084 serge 1307
	int ret, i, n = 0;
2332 Serge 1308
 
3260 Serge 1309
	BUG_ON(!mutex_is_locked(&dev->struct_mutex));
1310
	BUG_ON(!dev_priv->mm.interruptible);
2332 Serge 1311
 
6084 serge 1312
	if (!obj->active)
3260 Serge 1313
		return 0;
2332 Serge 1314
 
3480 Serge 1315
	ret = i915_gem_check_wedge(&dev_priv->gpu_error, true);
3260 Serge 1316
	if (ret)
1317
		return ret;
2332 Serge 1318
 
6084 serge 1319
	reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
2332 Serge 1320
 
6084 serge 1321
	if (readonly) {
1322
		struct drm_i915_gem_request *req;
1323
 
1324
		req = obj->last_write_req;
1325
		if (req == NULL)
1326
			return 0;
1327
 
1328
		requests[n++] = i915_gem_request_reference(req);
1329
	} else {
1330
		for (i = 0; i < I915_NUM_RINGS; i++) {
1331
			struct drm_i915_gem_request *req;
1332
 
1333
			req = obj->last_read_req[i];
1334
			if (req == NULL)
1335
				continue;
1336
 
1337
			requests[n++] = i915_gem_request_reference(req);
1338
		}
1339
	}
1340
 
3260 Serge 1341
	mutex_unlock(&dev->struct_mutex);
6084 serge 1342
	for (i = 0; ret == 0 && i < n; i++)
1343
		ret = __i915_wait_request(requests[i], reset_counter, true,
1344
					  NULL, rps);
3260 Serge 1345
	mutex_lock(&dev->struct_mutex);
2332 Serge 1346
 
6084 serge 1347
	for (i = 0; i < n; i++) {
1348
		if (ret == 0)
1349
			i915_gem_object_retire_request(obj, requests[i]);
1350
		i915_gem_request_unreference(requests[i]);
1351
	}
1352
 
1353
	return ret;
3260 Serge 1354
}
2332 Serge 1355
 
6084 serge 1356
static struct intel_rps_client *to_rps_client(struct drm_file *file)
1357
{
1358
	struct drm_i915_file_private *fpriv = file->driver_priv;
1359
	return &fpriv->rps;
1360
}
1361
 
3260 Serge 1362
/**
1363
 * Called when user space prepares to use an object with the CPU, either
1364
 * through the mmap ioctl's mapping or a GTT mapping.
1365
 */
1366
int
1367
i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
1368
			  struct drm_file *file)
1369
{
1370
	struct drm_i915_gem_set_domain *args = data;
1371
	struct drm_i915_gem_object *obj;
1372
	uint32_t read_domains = args->read_domains;
1373
	uint32_t write_domain = args->write_domain;
1374
	int ret;
2332 Serge 1375
 
3260 Serge 1376
	/* Only handle setting domains to types used by the CPU. */
1377
	if (write_domain & I915_GEM_GPU_DOMAINS)
1378
		return -EINVAL;
2332 Serge 1379
 
3260 Serge 1380
	if (read_domains & I915_GEM_GPU_DOMAINS)
1381
		return -EINVAL;
2332 Serge 1382
 
3260 Serge 1383
	/* Having something in the write domain implies it's in the read
1384
	 * domain, and only that read domain.  Enforce that in the request.
1385
	 */
1386
	if (write_domain != 0 && read_domains != write_domain)
1387
		return -EINVAL;
2332 Serge 1388
 
3260 Serge 1389
	ret = i915_mutex_lock_interruptible(dev);
1390
	if (ret)
1391
		return ret;
2332 Serge 1392
 
3260 Serge 1393
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1394
	if (&obj->base == NULL) {
1395
		ret = -ENOENT;
1396
		goto unlock;
1397
	}
2332 Serge 1398
 
3260 Serge 1399
	/* Try to flush the object off the GPU without holding the lock.
1400
	 * We will repeat the flush holding the lock in the normal manner
1401
	 * to catch cases where we are gazumped.
1402
	 */
5060 serge 1403
	ret = i915_gem_object_wait_rendering__nonblocking(obj,
6084 serge 1404
							  to_rps_client(file),
5060 serge 1405
							  !write_domain);
3260 Serge 1406
	if (ret)
1407
		goto unref;
2332 Serge 1408
 
6084 serge 1409
	if (read_domains & I915_GEM_DOMAIN_GTT)
3260 Serge 1410
		ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
6084 serge 1411
	else
3260 Serge 1412
		ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
2332 Serge 1413
 
6084 serge 1414
	if (write_domain != 0)
1415
		intel_fb_obj_invalidate(obj,
1416
					write_domain == I915_GEM_DOMAIN_GTT ?
1417
					ORIGIN_GTT : ORIGIN_CPU);
1418
 
3260 Serge 1419
unref:
1420
	drm_gem_object_unreference(&obj->base);
1421
unlock:
1422
	mutex_unlock(&dev->struct_mutex);
1423
	return ret;
1424
}
2332 Serge 1425
 
4293 Serge 1426
/**
1427
 * Called when user space has done writes to this buffer
1428
 */
1429
int
1430
i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1431
			 struct drm_file *file)
1432
{
1433
	struct drm_i915_gem_sw_finish *args = data;
1434
	struct drm_i915_gem_object *obj;
1435
	int ret = 0;
2332 Serge 1436
 
4293 Serge 1437
	ret = i915_mutex_lock_interruptible(dev);
1438
	if (ret)
1439
		return ret;
2332 Serge 1440
 
4293 Serge 1441
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
1442
	if (&obj->base == NULL) {
1443
		ret = -ENOENT;
1444
		goto unlock;
1445
	}
2332 Serge 1446
 
4293 Serge 1447
	/* Pinned buffers may be scanout, so flush the cache */
1448
	if (obj->pin_display)
6084 serge 1449
		i915_gem_object_flush_cpu_write_domain(obj);
2332 Serge 1450
 
4293 Serge 1451
	drm_gem_object_unreference(&obj->base);
1452
unlock:
1453
	mutex_unlock(&dev->struct_mutex);
1454
	return ret;
1455
}
1456
 
3260 Serge 1457
/**
1458
 * Maps the contents of an object, returning the address it is mapped
1459
 * into.
1460
 *
1461
 * While the mapping holds a reference on the contents of the object, it doesn't
1462
 * imply a ref on the object itself.
5354 serge 1463
 *
1464
 * IMPORTANT:
1465
 *
1466
 * DRM driver writers who look a this function as an example for how to do GEM
1467
 * mmap support, please don't implement mmap support like here. The modern way
1468
 * to implement DRM mmap support is with an mmap offset ioctl (like
1469
 * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
1470
 * That way debug tooling like valgrind will understand what's going on, hiding
1471
 * the mmap call in a driver private ioctl will break that. The i915 driver only
1472
 * does cpu mmaps this way because we didn't know better.
3260 Serge 1473
 */
1474
int
1475
i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1476
		    struct drm_file *file)
1477
{
1478
	struct drm_i915_gem_mmap *args = data;
1479
	struct drm_gem_object *obj;
4392 Serge 1480
	unsigned long addr;
2332 Serge 1481
 
6084 serge 1482
//	if (args->flags & ~(I915_MMAP_WC))
1483
//		return -EINVAL;
3260 Serge 1484
	obj = drm_gem_object_lookup(dev, file, args->handle);
1485
	if (obj == NULL)
1486
		return -ENOENT;
4104 Serge 1487
 
3260 Serge 1488
	/* prime objects have no backing filp to GEM mmap
1489
	 * pages from.
1490
	 */
1491
	if (!obj->filp) {
1492
		drm_gem_object_unreference_unlocked(obj);
1493
		return -EINVAL;
1494
	}
2332 Serge 1495
 
6084 serge 1496
	addr = vm_mmap(obj->filp, 0, args->size,
1497
		       PROT_READ | PROT_WRITE, MAP_SHARED,
1498
		       args->offset);
3260 Serge 1499
	drm_gem_object_unreference_unlocked(obj);
6084 serge 1500
	if (IS_ERR((void *)addr))
1501
		return addr;
2332 Serge 1502
 
3260 Serge 1503
	args->addr_ptr = (uint64_t) addr;
2332 Serge 1504
 
6084 serge 1505
	return 0;
3260 Serge 1506
}
2332 Serge 1507
 
1508
 
1509
 
1510
 
1511
 
1512
 
1513
 
1514
 
3031 serge 1515
 
1516
 
1517
 
1518
 
1519
 
1520
/**
1521
 * i915_gem_release_mmap - remove physical page mappings
1522
 * @obj: obj in question
1523
 *
1524
 * Preserve the reservation of the mmapping with the DRM core code, but
1525
 * relinquish ownership of the pages back to the system.
1526
 *
1527
 * It is vital that we remove the page mapping if we have mapped a tiled
1528
 * object through the GTT and then lose the fence register due to
1529
 * resource pressure. Similarly if the object has been moved out of the
1530
 * aperture, than pages mapped into userspace must be revoked. Removing the
1531
 * mapping will then trigger a page fault on the next user access, allowing
1532
 * fixup by i915_gem_fault().
1533
 */
1534
void
1535
i915_gem_release_mmap(struct drm_i915_gem_object *obj)
1536
{
1537
	if (!obj->fault_mappable)
1538
		return;
1539
 
4104 Serge 1540
//	drm_vma_node_unmap(&obj->base.vma_node, obj->base.dev->dev_mapping);
3031 serge 1541
	obj->fault_mappable = false;
1542
}
1543
 
6084 serge 1544
void
1545
i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv)
1546
{
1547
	struct drm_i915_gem_object *obj;
1548
 
1549
	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
1550
		i915_gem_release_mmap(obj);
1551
}
1552
 
3480 Serge 1553
uint32_t
2332 Serge 1554
i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
1555
{
1556
	uint32_t gtt_size;
1557
 
1558
	if (INTEL_INFO(dev)->gen >= 4 ||
1559
	    tiling_mode == I915_TILING_NONE)
1560
		return size;
1561
 
1562
	/* Previous chips need a power-of-two fence region when tiling */
1563
	if (INTEL_INFO(dev)->gen == 3)
1564
		gtt_size = 1024*1024;
1565
	else
1566
		gtt_size = 512*1024;
1567
 
1568
	while (gtt_size < size)
1569
		gtt_size <<= 1;
1570
 
1571
	return gtt_size;
1572
}
1573
 
1574
/**
1575
 * i915_gem_get_gtt_alignment - return required GTT alignment for an object
1576
 * @obj: object to check
1577
 *
1578
 * Return the required GTT alignment for an object, taking into account
1579
 * potential fence register mapping.
1580
 */
3480 Serge 1581
uint32_t
1582
i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size,
1583
			   int tiling_mode, bool fenced)
2332 Serge 1584
{
1585
	/*
1586
	 * Minimum alignment is 4k (GTT page size), but might be greater
1587
	 * if a fence register is needed for the object.
1588
	 */
3480 Serge 1589
	if (INTEL_INFO(dev)->gen >= 4 || (!fenced && IS_G33(dev)) ||
2332 Serge 1590
	    tiling_mode == I915_TILING_NONE)
1591
		return 4096;
1592
 
1593
	/*
1594
	 * Previous chips need to be aligned to the size of the smallest
1595
	 * fence register that can contain the object.
1596
	 */
1597
	return i915_gem_get_gtt_size(dev, size, tiling_mode);
1598
}
1599
 
1600
 
1601
 
3480 Serge 1602
int
1603
i915_gem_mmap_gtt(struct drm_file *file,
1604
          struct drm_device *dev,
6084 serge 1605
		  uint32_t handle,
3480 Serge 1606
          uint64_t *offset)
1607
{
1608
    struct drm_i915_private *dev_priv = dev->dev_private;
1609
    struct drm_i915_gem_object *obj;
1610
    unsigned long pfn;
1611
    char *mem, *ptr;
1612
    int ret;
1613
 
1614
    ret = i915_mutex_lock_interruptible(dev);
1615
    if (ret)
1616
        return ret;
1617
 
1618
    obj = to_intel_bo(drm_gem_object_lookup(dev, file, handle));
1619
    if (&obj->base == NULL) {
1620
        ret = -ENOENT;
1621
        goto unlock;
1622
    }
1623
 
1624
    if (obj->madv != I915_MADV_WILLNEED) {
5060 serge 1625
		DRM_DEBUG("Attempting to mmap a purgeable buffer\n");
1626
		ret = -EFAULT;
3480 Serge 1627
        goto out;
1628
    }
1629
    /* Now bind it into the GTT if needed */
5060 serge 1630
	ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
3480 Serge 1631
    if (ret)
1632
        goto out;
1633
 
1634
    ret = i915_gem_object_set_to_gtt_domain(obj, 1);
1635
    if (ret)
1636
        goto unpin;
1637
 
1638
    ret = i915_gem_object_get_fence(obj);
1639
    if (ret)
1640
        goto unpin;
1641
 
1642
    obj->fault_mappable = true;
1643
 
4104 Serge 1644
    pfn = dev_priv->gtt.mappable_base + i915_gem_obj_ggtt_offset(obj);
3480 Serge 1645
 
1646
    /* Finally, remap it using the new GTT offset */
1647
 
1648
    mem = UserAlloc(obj->base.size);
1649
    if(unlikely(mem == NULL))
1650
    {
1651
        ret = -ENOMEM;
1652
        goto unpin;
1653
    }
1654
 
1655
    for(ptr = mem; ptr < mem + obj->base.size; ptr+= 4096, pfn+= 4096)
1656
        MapPage(ptr, pfn, PG_SHARED|PG_UW);
1657
 
1658
unpin:
5060 serge 1659
    i915_gem_object_unpin_pages(obj);
3480 Serge 1660
 
1661
 
5367 serge 1662
    *offset = (uint32_t)mem;
3480 Serge 1663
 
1664
out:
6088 serge 1665
	drm_gem_object_unreference(&obj->base);
3480 Serge 1666
unlock:
6088 serge 1667
	mutex_unlock(&dev->struct_mutex);
1668
	return ret;
3480 Serge 1669
}
1670
 
1671
/**
1672
 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
1673
 * @dev: DRM device
1674
 * @data: GTT mapping ioctl data
1675
 * @file: GEM object info
1676
 *
1677
 * Simply returns the fake offset to userspace so it can mmap it.
1678
 * The mmap call will end up in drm_gem_mmap(), which will set things
1679
 * up so we can get faults in the handler above.
1680
 *
1681
 * The fault handler will take care of binding the object into the GTT
1682
 * (since it may have been evicted to make room for something), allocating
1683
 * a fence register, and mapping the appropriate aperture address into
1684
 * userspace.
1685
 */
1686
int
1687
i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
6084 serge 1688
			struct drm_file *file)
3480 Serge 1689
{
6084 serge 1690
	struct drm_i915_gem_mmap_gtt *args = data;
3480 Serge 1691
 
6084 serge 1692
	return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
3480 Serge 1693
}
1694
 
3031 serge 1695
/* Immediately discard the backing storage */
1696
static void
1697
i915_gem_object_truncate(struct drm_i915_gem_object *obj)
1698
{
1699
//	i915_gem_object_free_mmap_offset(obj);
2332 Serge 1700
 
3263 Serge 1701
	if (obj->base.filp == NULL)
1702
		return;
2332 Serge 1703
 
3031 serge 1704
	/* Our goal here is to return as much of the memory as
1705
	 * is possible back to the system as we are called from OOM.
1706
	 * To do this we must instruct the shmfs to drop all of its
1707
	 * backing pages, *now*.
1708
	 */
5060 serge 1709
//	shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
3031 serge 1710
	obj->madv = __I915_MADV_PURGED;
1711
}
2332 Serge 1712
 
5060 serge 1713
/* Try to discard unwanted pages */
1714
static void
1715
i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
3031 serge 1716
{
5060 serge 1717
	struct address_space *mapping;
1718
 
1719
	switch (obj->madv) {
1720
	case I915_MADV_DONTNEED:
1721
		i915_gem_object_truncate(obj);
1722
	case __I915_MADV_PURGED:
1723
		return;
1724
	}
1725
 
1726
	if (obj->base.filp == NULL)
1727
		return;
1728
 
3031 serge 1729
}
2332 Serge 1730
 
3031 serge 1731
static void
1732
i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
1733
{
3746 Serge 1734
	struct sg_page_iter sg_iter;
1735
	int ret;
2332 Serge 1736
 
3031 serge 1737
	BUG_ON(obj->madv == __I915_MADV_PURGED);
2332 Serge 1738
 
3031 serge 1739
	ret = i915_gem_object_set_to_cpu_domain(obj, true);
1740
	if (ret) {
1741
		/* In the event of a disaster, abandon all caches and
1742
		 * hope for the best.
1743
		 */
1744
		WARN_ON(ret != -EIO);
4104 Serge 1745
		i915_gem_clflush_object(obj, true);
3031 serge 1746
		obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
1747
	}
2332 Serge 1748
 
6084 serge 1749
	i915_gem_gtt_finish_object(obj);
3031 serge 1750
	if (obj->madv == I915_MADV_DONTNEED)
1751
		obj->dirty = 0;
2332 Serge 1752
 
3746 Serge 1753
	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
1754
		struct page *page = sg_page_iter_page(&sg_iter);
2332 Serge 1755
 
6084 serge 1756
		page_cache_release(page);
3243 Serge 1757
	}
6084 serge 1758
	obj->dirty = 0;
3243 Serge 1759
 
1760
	sg_free_table(obj->pages);
1761
	kfree(obj->pages);
3031 serge 1762
}
2332 Serge 1763
 
3480 Serge 1764
int
3031 serge 1765
i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
1766
{
1767
	const struct drm_i915_gem_object_ops *ops = obj->ops;
2332 Serge 1768
 
3243 Serge 1769
	if (obj->pages == NULL)
3031 serge 1770
		return 0;
2332 Serge 1771
 
3031 serge 1772
	if (obj->pages_pin_count)
1773
		return -EBUSY;
1774
 
4104 Serge 1775
	BUG_ON(i915_gem_obj_bound_any(obj));
1776
 
3243 Serge 1777
	/* ->put_pages might need to allocate memory for the bit17 swizzle
1778
	 * array, hence protect them from being reaped by removing them from gtt
1779
	 * lists early. */
4104 Serge 1780
	list_del(&obj->global_list);
3243 Serge 1781
 
3031 serge 1782
	ops->put_pages(obj);
3243 Serge 1783
	obj->pages = NULL;
3031 serge 1784
 
5060 serge 1785
	i915_gem_object_invalidate(obj);
3031 serge 1786
 
1787
	return 0;
1788
}
1789
 
2332 Serge 1790
static int
3031 serge 1791
i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
2332 Serge 1792
{
3260 Serge 1793
	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
6084 serge 1794
	int page_count, i;
1795
	struct address_space *mapping;
1796
	struct sg_table *st;
3243 Serge 1797
	struct scatterlist *sg;
3746 Serge 1798
	struct sg_page_iter sg_iter;
3243 Serge 1799
	struct page *page;
3746 Serge 1800
	unsigned long last_pfn = 0;	/* suppress gcc warning */
6084 serge 1801
	int ret;
3243 Serge 1802
	gfp_t gfp;
2332 Serge 1803
 
3243 Serge 1804
	/* Assert that the object is not currently in any GPU domain. As it
1805
	 * wasn't in the GTT, there shouldn't be any way it could have been in
1806
	 * a GPU cache
2332 Serge 1807
	 */
3243 Serge 1808
	BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
1809
	BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
1810
 
1811
	st = kmalloc(sizeof(*st), GFP_KERNEL);
1812
	if (st == NULL)
1813
		return -ENOMEM;
1814
 
2332 Serge 1815
	page_count = obj->base.size / PAGE_SIZE;
3243 Serge 1816
	if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
1817
		kfree(st);
2332 Serge 1818
		return -ENOMEM;
3243 Serge 1819
	}
2332 Serge 1820
 
3243 Serge 1821
	/* Get the list of pages out of our struct file.  They'll be pinned
1822
	 * at this point until we release them.
1823
	 *
1824
	 * Fail silently without starting the shrinker
1825
	 */
3746 Serge 1826
	sg = st->sgl;
1827
	st->nents = 0;
1828
	for (i = 0; i < page_count; i++) {
4104 Serge 1829
        page = shmem_read_mapping_page_gfp(obj->base.filp, i, gfp);
3260 Serge 1830
		if (IS_ERR(page)) {
1831
            dbgprintf("%s invalid page %p\n", __FUNCTION__, page);
2332 Serge 1832
			goto err_pages;
3260 Serge 1833
		}
5354 serge 1834
#ifdef CONFIG_SWIOTLB
1835
		if (swiotlb_nr_tbl()) {
1836
			st->nents++;
1837
			sg_set_page(sg, page, PAGE_SIZE, 0);
1838
			sg = sg_next(sg);
1839
			continue;
1840
		}
1841
#endif
3746 Serge 1842
		if (!i || page_to_pfn(page) != last_pfn + 1) {
1843
			if (i)
1844
				sg = sg_next(sg);
1845
			st->nents++;
6084 serge 1846
			sg_set_page(sg, page, PAGE_SIZE, 0);
3746 Serge 1847
		} else {
1848
			sg->length += PAGE_SIZE;
1849
		}
1850
		last_pfn = page_to_pfn(page);
3243 Serge 1851
	}
5354 serge 1852
#ifdef CONFIG_SWIOTLB
1853
	if (!swiotlb_nr_tbl())
1854
#endif
3746 Serge 1855
		sg_mark_end(sg);
3243 Serge 1856
	obj->pages = st;
3031 serge 1857
 
6084 serge 1858
	ret = i915_gem_gtt_prepare_object(obj);
1859
	if (ret)
1860
		goto err_pages;
5367 serge 1861
 
1862
	if (obj->tiling_mode != I915_TILING_NONE &&
1863
	    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES)
1864
		i915_gem_object_pin_pages(obj);
1865
 
2332 Serge 1866
	return 0;
1867
 
1868
err_pages:
3746 Serge 1869
	sg_mark_end(sg);
1870
	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0)
1871
		page_cache_release(sg_page_iter_page(&sg_iter));
3243 Serge 1872
	sg_free_table(st);
1873
	kfree(st);
6084 serge 1874
 
3243 Serge 1875
	return PTR_ERR(page);
2332 Serge 1876
}
1877
 
3031 serge 1878
/* Ensure that the associated pages are gathered from the backing storage
1879
 * and pinned into our object. i915_gem_object_get_pages() may be called
1880
 * multiple times before they are released by a single call to
1881
 * i915_gem_object_put_pages() - once the pages are no longer referenced
1882
 * either as a result of memory pressure (reaping pages under the shrinker)
1883
 * or as the object is itself released.
1884
 */
1885
int
1886
i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
2332 Serge 1887
{
3031 serge 1888
	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
1889
	const struct drm_i915_gem_object_ops *ops = obj->ops;
1890
	int ret;
2332 Serge 1891
 
3243 Serge 1892
	if (obj->pages)
3031 serge 1893
		return 0;
2332 Serge 1894
 
4392 Serge 1895
	if (obj->madv != I915_MADV_WILLNEED) {
5060 serge 1896
		DRM_DEBUG("Attempting to obtain a purgeable object\n");
1897
		return -EFAULT;
4392 Serge 1898
	}
1899
 
3031 serge 1900
	BUG_ON(obj->pages_pin_count);
2332 Serge 1901
 
3031 serge 1902
	ret = ops->get_pages(obj);
1903
	if (ret)
1904
		return ret;
2344 Serge 1905
 
4104 Serge 1906
	list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
6084 serge 1907
 
1908
	obj->get_page.sg = obj->pages->sgl;
1909
	obj->get_page.last = 0;
1910
 
1911
	return 0;
2332 Serge 1912
}
1913
 
6084 serge 1914
void i915_vma_move_to_active(struct i915_vma *vma,
1915
			     struct drm_i915_gem_request *req)
2332 Serge 1916
{
6084 serge 1917
	struct drm_i915_gem_object *obj = vma->obj;
1918
	struct intel_engine_cs *ring;
2332 Serge 1919
 
6084 serge 1920
	ring = i915_gem_request_get_ring(req);
2332 Serge 1921
 
1922
	/* Add a reference if we're newly entering the active list. */
6084 serge 1923
	if (obj->active == 0)
2344 Serge 1924
		drm_gem_object_reference(&obj->base);
6084 serge 1925
	obj->active |= intel_ring_flag(ring);
2332 Serge 1926
 
6084 serge 1927
	list_move_tail(&obj->ring_list[ring->id], &ring->active_list);
1928
	i915_gem_request_assign(&obj->last_read_req[ring->id], req);
2332 Serge 1929
 
6084 serge 1930
	list_move_tail(&vma->mm_list, &vma->vm->active_list);
2332 Serge 1931
}
1932
 
6084 serge 1933
static void
1934
i915_gem_object_retire__write(struct drm_i915_gem_object *obj)
4560 Serge 1935
{
6084 serge 1936
	RQ_BUG_ON(obj->last_write_req == NULL);
1937
	RQ_BUG_ON(!(obj->active & intel_ring_flag(obj->last_write_req->ring)));
1938
 
1939
	i915_gem_request_assign(&obj->last_write_req, NULL);
1940
	intel_fb_obj_flush(obj, true, ORIGIN_CS);
4560 Serge 1941
}
1942
 
2344 Serge 1943
static void
6084 serge 1944
i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring)
2344 Serge 1945
{
5060 serge 1946
	struct i915_vma *vma;
2332 Serge 1947
 
6084 serge 1948
	RQ_BUG_ON(obj->last_read_req[ring] == NULL);
1949
	RQ_BUG_ON(!(obj->active & (1 << ring)));
2332 Serge 1950
 
6084 serge 1951
	list_del_init(&obj->ring_list[ring]);
1952
	i915_gem_request_assign(&obj->last_read_req[ring], NULL);
2344 Serge 1953
 
6084 serge 1954
	if (obj->last_write_req && obj->last_write_req->ring->id == ring)
1955
		i915_gem_object_retire__write(obj);
5354 serge 1956
 
6084 serge 1957
	obj->active &= ~(1 << ring);
1958
	if (obj->active)
1959
		return;
2344 Serge 1960
 
6084 serge 1961
	/* Bump our place on the bound list to keep it roughly in LRU order
1962
	 * so that we don't steal from recently used but inactive objects
1963
	 * (unless we are forced to ofc!)
1964
	 */
1965
	list_move_tail(&obj->global_list,
1966
		       &to_i915(obj->base.dev)->mm.bound_list);
3031 serge 1967
 
6084 serge 1968
	list_for_each_entry(vma, &obj->vma_list, vma_link) {
1969
		if (!list_empty(&vma->mm_list))
1970
			list_move_tail(&vma->mm_list, &vma->vm->inactive_list);
1971
	}
2344 Serge 1972
 
6084 serge 1973
	i915_gem_request_assign(&obj->last_fenced_req, NULL);
2352 Serge 1974
	drm_gem_object_unreference(&obj->base);
1975
}
1976
 
3243 Serge 1977
static int
3480 Serge 1978
i915_gem_init_seqno(struct drm_device *dev, u32 seqno)
2344 Serge 1979
{
3243 Serge 1980
	struct drm_i915_private *dev_priv = dev->dev_private;
5060 serge 1981
	struct intel_engine_cs *ring;
3243 Serge 1982
	int ret, i, j;
2344 Serge 1983
 
3480 Serge 1984
	/* Carefully retire all requests without writing to the rings */
3243 Serge 1985
	for_each_ring(ring, dev_priv, i) {
3480 Serge 1986
		ret = intel_ring_idle(ring);
6084 serge 1987
		if (ret)
1988
			return ret;
3480 Serge 1989
	}
1990
	i915_gem_retire_requests(dev);
3243 Serge 1991
 
3480 Serge 1992
	/* Finally reset hw state */
3243 Serge 1993
	for_each_ring(ring, dev_priv, i) {
3480 Serge 1994
		intel_ring_init_seqno(ring, seqno);
1995
 
5060 serge 1996
		for (j = 0; j < ARRAY_SIZE(ring->semaphore.sync_seqno); j++)
1997
			ring->semaphore.sync_seqno[j] = 0;
3243 Serge 1998
	}
1999
 
2000
	return 0;
2344 Serge 2001
}
2002
 
3480 Serge 2003
int i915_gem_set_seqno(struct drm_device *dev, u32 seqno)
2004
{
2005
	struct drm_i915_private *dev_priv = dev->dev_private;
2006
	int ret;
2007
 
2008
	if (seqno == 0)
2009
		return -EINVAL;
2010
 
2011
	/* HWS page needs to be set less than what we
2012
	 * will inject to ring
2013
	 */
2014
	ret = i915_gem_init_seqno(dev, seqno - 1);
2015
	if (ret)
2016
		return ret;
2017
 
2018
	/* Carefully set the last_seqno value so that wrap
2019
	 * detection still works
2020
	 */
2021
	dev_priv->next_seqno = seqno;
2022
	dev_priv->last_seqno = seqno - 1;
2023
	if (dev_priv->last_seqno == 0)
2024
		dev_priv->last_seqno--;
2025
 
2026
	return 0;
2027
}
2028
 
3243 Serge 2029
int
2030
i915_gem_get_seqno(struct drm_device *dev, u32 *seqno)
2344 Serge 2031
{
3243 Serge 2032
	struct drm_i915_private *dev_priv = dev->dev_private;
2344 Serge 2033
 
3243 Serge 2034
	/* reserve 0 for non-seqno */
2035
	if (dev_priv->next_seqno == 0) {
3480 Serge 2036
		int ret = i915_gem_init_seqno(dev, 0);
3243 Serge 2037
		if (ret)
2038
			return ret;
2039
 
2040
		dev_priv->next_seqno = 1;
2041
	}
2042
 
3480 Serge 2043
	*seqno = dev_priv->last_seqno = dev_priv->next_seqno++;
3243 Serge 2044
	return 0;
2332 Serge 2045
}
2046
 
6084 serge 2047
/*
2048
 * NB: This function is not allowed to fail. Doing so would mean the the
2049
 * request is not being tracked for completion but the work itself is
2050
 * going to happen on the hardware. This would be a Bad Thing(tm).
2051
 */
2052
void __i915_add_request(struct drm_i915_gem_request *request,
2053
			struct drm_i915_gem_object *obj,
2054
			bool flush_caches)
2352 Serge 2055
{
6084 serge 2056
	struct intel_engine_cs *ring;
2057
	struct drm_i915_private *dev_priv;
5354 serge 2058
	struct intel_ringbuffer *ringbuf;
6084 serge 2059
	u32 request_start;
2352 Serge 2060
	int ret;
2332 Serge 2061
 
5354 serge 2062
	if (WARN_ON(request == NULL))
6084 serge 2063
		return;
5354 serge 2064
 
6084 serge 2065
	ring = request->ring;
2066
	dev_priv = ring->dev->dev_private;
2067
	ringbuf = request->ringbuf;
5354 serge 2068
 
6084 serge 2069
	/*
2070
	 * To ensure that this call will not fail, space for its emissions
2071
	 * should already have been reserved in the ring buffer. Let the ring
2072
	 * know that it is time to use that space up.
2073
	 */
2074
	intel_ring_reserved_space_use(ringbuf);
2075
 
5354 serge 2076
	request_start = intel_ring_get_tail(ringbuf);
3031 serge 2077
	/*
2078
	 * Emit any outstanding flushes - execbuf can fail to emit the flush
2079
	 * after having emitted the batchbuffer command. Hence we need to fix
2080
	 * things up similar to emitting the lazy request. The difference here
2081
	 * is that the flush _must_ happen before the next request, no matter
2082
	 * what.
2083
	 */
6084 serge 2084
	if (flush_caches) {
2085
		if (i915.enable_execlists)
2086
			ret = logical_ring_flush_all_caches(request);
2087
		else
2088
			ret = intel_ring_flush_all_caches(request);
2089
		/* Not allowed to fail! */
2090
		WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret);
5354 serge 2091
	}
2332 Serge 2092
 
3031 serge 2093
	/* Record the position of the start of the request so that
2094
	 * should we detect the updated seqno part-way through the
6084 serge 2095
	 * GPU processing the request, we never over-estimate the
3031 serge 2096
	 * position of the head.
2097
	 */
6084 serge 2098
	request->postfix = intel_ring_get_tail(ringbuf);
3031 serge 2099
 
6084 serge 2100
	if (i915.enable_execlists)
2101
		ret = ring->emit_request(request);
2102
	else {
2103
		ret = ring->add_request(request);
2104
 
2105
		request->tail = intel_ring_get_tail(ringbuf);
5354 serge 2106
	}
6084 serge 2107
	/* Not allowed to fail! */
2108
	WARN(ret, "emit|add_request failed: %d!\n", ret);
2332 Serge 2109
 
4104 Serge 2110
	request->head = request_start;
2111
 
2112
	/* Whilst this request exists, batch_obj will be on the
2113
	 * active_list, and so will hold the active reference. Only when this
2114
	 * request is retired will the the batch_obj be moved onto the
2115
	 * inactive_list and lose its active reference. Hence we do not need
2116
	 * to explicitly hold another reference here.
2117
	 */
4560 Serge 2118
	request->batch_obj = obj;
4104 Serge 2119
 
5060 serge 2120
	request->emitted_jiffies = jiffies;
6084 serge 2121
	request->previous_seqno = ring->last_submitted_seqno;
2122
	ring->last_submitted_seqno = request->seqno;
2352 Serge 2123
	list_add_tail(&request->list, &ring->request_list);
2332 Serge 2124
 
6084 serge 2125
	trace_i915_gem_request_add(request);
2332 Serge 2126
 
6084 serge 2127
//	i915_queue_hangcheck(ring->dev);
3263 Serge 2128
 
6084 serge 2129
	queue_delayed_work(dev_priv->wq,
2130
			   &dev_priv->mm.retire_work,
2131
			   round_jiffies_up_relative(HZ));
2132
	intel_mark_busy(dev_priv->dev);
2332 Serge 2133
 
6084 serge 2134
	/* Sanity check that the reserved size was large enough. */
2135
	intel_ring_reserved_space_end(ringbuf);
2352 Serge 2136
}
2332 Serge 2137
 
5060 serge 2138
static bool i915_context_is_banned(struct drm_i915_private *dev_priv,
2139
				   const struct intel_context *ctx)
4104 Serge 2140
{
5060 serge 2141
	unsigned long elapsed;
4104 Serge 2142
 
5060 serge 2143
    elapsed = GetTimerTicks()/100 - ctx->hang_stats.guilty_ts;
4104 Serge 2144
 
5060 serge 2145
	if (ctx->hang_stats.banned)
2146
		return true;
4104 Serge 2147
 
6084 serge 2148
	if (ctx->hang_stats.ban_period_seconds &&
2149
	    elapsed <= ctx->hang_stats.ban_period_seconds) {
5060 serge 2150
		if (!i915_gem_context_is_default(ctx)) {
2151
			DRM_DEBUG("context hanging too fast, banning!\n");
4104 Serge 2152
			return true;
5060 serge 2153
		} else if (i915_stop_ring_allow_ban(dev_priv)) {
2154
			if (i915_stop_ring_allow_warn(dev_priv))
6084 serge 2155
				DRM_ERROR("gpu hanging too fast, banning!\n");
4104 Serge 2156
			return true;
6084 serge 2157
		}
4104 Serge 2158
	}
2159
 
2160
	return false;
2161
}
2162
 
5060 serge 2163
static void i915_set_reset_status(struct drm_i915_private *dev_priv,
2164
				  struct intel_context *ctx,
2165
				  const bool guilty)
4560 Serge 2166
{
5060 serge 2167
	struct i915_ctx_hang_stats *hs;
4560 Serge 2168
 
5060 serge 2169
	if (WARN_ON(!ctx))
2170
		return;
4560 Serge 2171
 
5060 serge 2172
	hs = &ctx->hang_stats;
4560 Serge 2173
 
5060 serge 2174
	if (guilty) {
2175
		hs->banned = i915_context_is_banned(dev_priv, ctx);
2176
		hs->batch_active++;
2177
        hs->guilty_ts = GetTimerTicks()/100;
2178
	} else {
2179
		hs->batch_pending++;
4104 Serge 2180
	}
2181
}
2182
 
6084 serge 2183
void i915_gem_request_free(struct kref *req_ref)
4104 Serge 2184
{
6084 serge 2185
	struct drm_i915_gem_request *req = container_of(req_ref,
2186
						 typeof(*req), ref);
2187
	struct intel_context *ctx = req->ctx;
5354 serge 2188
 
6084 serge 2189
	if (req->file_priv)
2190
		i915_gem_request_remove_from_client(req);
4104 Serge 2191
 
5354 serge 2192
	if (ctx) {
2193
		if (i915.enable_execlists) {
6084 serge 2194
			if (ctx != req->ring->default_context)
2195
				intel_lr_context_unpin(req);
2196
		}
4104 Serge 2197
 
5354 serge 2198
		i915_gem_context_unreference(ctx);
2199
	}
6084 serge 2200
 
2201
	kfree(req);
4104 Serge 2202
}
2203
 
6084 serge 2204
int i915_gem_request_alloc(struct intel_engine_cs *ring,
2205
			   struct intel_context *ctx,
2206
			   struct drm_i915_gem_request **req_out)
2207
{
2208
	struct drm_i915_private *dev_priv = to_i915(ring->dev);
2209
	struct drm_i915_gem_request *req;
2210
	int ret;
2211
 
2212
	if (!req_out)
2213
		return -EINVAL;
2214
 
2215
	*req_out = NULL;
2216
 
2217
//	req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL);
2218
	req = kzalloc(sizeof(*req),0);
2219
	if (req == NULL)
2220
		return -ENOMEM;
2221
 
2222
	ret = i915_gem_get_seqno(ring->dev, &req->seqno);
2223
	if (ret)
2224
		goto err;
2225
 
2226
	kref_init(&req->ref);
2227
	req->i915 = dev_priv;
2228
	req->ring = ring;
2229
	req->ctx  = ctx;
2230
	i915_gem_context_reference(req->ctx);
2231
 
2232
	if (i915.enable_execlists)
2233
		ret = intel_logical_ring_alloc_request_extras(req);
2234
	else
2235
		ret = intel_ring_alloc_request_extras(req);
2236
	if (ret) {
2237
		i915_gem_context_unreference(req->ctx);
2238
		goto err;
2239
	}
2240
 
2241
	/*
2242
	 * Reserve space in the ring buffer for all the commands required to
2243
	 * eventually emit this request. This is to guarantee that the
2244
	 * i915_add_request() call can't fail. Note that the reserve may need
2245
	 * to be redone if the request is not actually submitted straight
2246
	 * away, e.g. because a GPU scheduler has deferred it.
2247
	 */
2248
	if (i915.enable_execlists)
2249
		ret = intel_logical_ring_reserve_space(req);
2250
	else
2251
		ret = intel_ring_reserve_space(req);
2252
	if (ret) {
2253
		/*
2254
		 * At this point, the request is fully allocated even if not
2255
		 * fully prepared. Thus it can be cleaned up using the proper
2256
		 * free code.
2257
		 */
2258
		i915_gem_request_cancel(req);
2259
		return ret;
2260
	}
2261
 
2262
	*req_out = req;
2263
	return 0;
2264
 
2265
err:
2266
	kfree(req);
2267
	return ret;
2268
}
2269
 
2270
void i915_gem_request_cancel(struct drm_i915_gem_request *req)
2271
{
2272
	intel_ring_reserved_space_cancel(req->ringbuf);
2273
 
2274
	i915_gem_request_unreference(req);
2275
}
2276
 
5060 serge 2277
struct drm_i915_gem_request *
2278
i915_gem_find_active_request(struct intel_engine_cs *ring)
3031 serge 2279
{
4539 Serge 2280
	struct drm_i915_gem_request *request;
4104 Serge 2281
 
4539 Serge 2282
	list_for_each_entry(request, &ring->request_list, list) {
6084 serge 2283
		if (i915_gem_request_completed(request, false))
4539 Serge 2284
			continue;
4104 Serge 2285
 
5060 serge 2286
		return request;
4539 Serge 2287
	}
5060 serge 2288
 
2289
	return NULL;
4539 Serge 2290
}
2291
 
5060 serge 2292
static void i915_gem_reset_ring_status(struct drm_i915_private *dev_priv,
2293
				       struct intel_engine_cs *ring)
2294
{
2295
	struct drm_i915_gem_request *request;
2296
	bool ring_hung;
2297
 
2298
	request = i915_gem_find_active_request(ring);
2299
 
2300
	if (request == NULL)
2301
		return;
2302
 
2303
	ring_hung = ring->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG;
2304
 
2305
	i915_set_reset_status(dev_priv, request->ctx, ring_hung);
2306
 
2307
	list_for_each_entry_continue(request, &ring->request_list, list)
2308
		i915_set_reset_status(dev_priv, request->ctx, false);
2309
}
2310
 
4539 Serge 2311
static void i915_gem_reset_ring_cleanup(struct drm_i915_private *dev_priv,
5060 serge 2312
					struct intel_engine_cs *ring)
4539 Serge 2313
{
4560 Serge 2314
	while (!list_empty(&ring->active_list)) {
2315
		struct drm_i915_gem_object *obj;
2316
 
2317
		obj = list_first_entry(&ring->active_list,
2318
				       struct drm_i915_gem_object,
6084 serge 2319
				       ring_list[ring->id]);
4560 Serge 2320
 
6084 serge 2321
		i915_gem_object_retire__read(obj, ring->id);
4560 Serge 2322
	}
2323
 
2324
	/*
5354 serge 2325
	 * Clear the execlists queue up before freeing the requests, as those
2326
	 * are the ones that keep the context and ringbuffer backing objects
2327
	 * pinned in place.
2328
	 */
2329
	while (!list_empty(&ring->execlist_queue)) {
6084 serge 2330
		struct drm_i915_gem_request *submit_req;
5354 serge 2331
 
2332
		submit_req = list_first_entry(&ring->execlist_queue,
6084 serge 2333
				struct drm_i915_gem_request,
5354 serge 2334
				execlist_link);
2335
		list_del(&submit_req->execlist_link);
6084 serge 2336
 
2337
		if (submit_req->ctx != ring->default_context)
2338
			intel_lr_context_unpin(submit_req);
2339
 
2340
		i915_gem_request_unreference(submit_req);
5354 serge 2341
	}
2342
 
2343
	/*
4560 Serge 2344
	 * We must free the requests after all the corresponding objects have
2345
	 * been moved off active lists. Which is the same order as the normal
2346
	 * retire_requests function does. This is important if object hold
2347
	 * implicit references on things like e.g. ppgtt address spaces through
2348
	 * the request.
2349
	 */
3031 serge 2350
	while (!list_empty(&ring->request_list)) {
2351
		struct drm_i915_gem_request *request;
2332 Serge 2352
 
3031 serge 2353
		request = list_first_entry(&ring->request_list,
2354
					   struct drm_i915_gem_request,
2355
					   list);
2332 Serge 2356
 
6084 serge 2357
		i915_gem_request_retire(request);
3031 serge 2358
	}
2359
}
2332 Serge 2360
 
3031 serge 2361
void i915_gem_reset(struct drm_device *dev)
2362
{
2363
	struct drm_i915_private *dev_priv = dev->dev_private;
5060 serge 2364
	struct intel_engine_cs *ring;
3031 serge 2365
	int i;
2360 Serge 2366
 
4539 Serge 2367
	/*
2368
	 * Before we free the objects from the requests, we need to inspect
2369
	 * them for finding the guilty party. As the requests only borrow
2370
	 * their reference to the objects, the inspection must be done first.
2371
	 */
3031 serge 2372
	for_each_ring(ring, dev_priv, i)
4539 Serge 2373
		i915_gem_reset_ring_status(dev_priv, ring);
2360 Serge 2374
 
4539 Serge 2375
	for_each_ring(ring, dev_priv, i)
2376
		i915_gem_reset_ring_cleanup(dev_priv, ring);
2377
 
5060 serge 2378
	i915_gem_context_reset(dev);
4560 Serge 2379
 
3746 Serge 2380
	i915_gem_restore_fences(dev);
6084 serge 2381
 
2382
	WARN_ON(i915_verify_lists(dev));
3031 serge 2383
}
2360 Serge 2384
 
2352 Serge 2385
/**
2386
 * This function clears the request list as sequence numbers are passed.
2387
 */
3031 serge 2388
void
5060 serge 2389
i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
2352 Serge 2390
{
6084 serge 2391
	WARN_ON(i915_verify_lists(ring->dev));
2332 Serge 2392
 
6084 serge 2393
	/* Retire requests first as we use it above for the early return.
2394
	 * If we retire requests last, we may use a later seqno and so clear
2395
	 * the requests lists without clearing the active list, leading to
2396
	 * confusion.
2397
	 */
2398
	while (!list_empty(&ring->request_list)) {
2399
		struct drm_i915_gem_request *request;
2332 Serge 2400
 
6084 serge 2401
		request = list_first_entry(&ring->request_list,
2402
					   struct drm_i915_gem_request,
2403
					   list);
2332 Serge 2404
 
6084 serge 2405
		if (!i915_gem_request_completed(request, true))
2406
			break;
2332 Serge 2407
 
6084 serge 2408
		i915_gem_request_retire(request);
2409
	}
2410
 
5060 serge 2411
	/* Move any buffers on the active list that are no longer referenced
2412
	 * by the ringbuffer to the flushing/inactive lists as appropriate,
2413
	 * before we free the context associated with the requests.
2414
	 */
2415
	while (!list_empty(&ring->active_list)) {
2416
		struct drm_i915_gem_object *obj;
2417
 
2418
		obj = list_first_entry(&ring->active_list,
2419
				      struct drm_i915_gem_object,
6084 serge 2420
				      ring_list[ring->id]);
5060 serge 2421
 
6084 serge 2422
		if (!list_empty(&obj->last_read_req[ring->id]->list))
5060 serge 2423
			break;
2424
 
6084 serge 2425
		i915_gem_object_retire__read(obj, ring->id);
5060 serge 2426
	}
2427
 
6084 serge 2428
	if (unlikely(ring->trace_irq_req &&
2429
		     i915_gem_request_completed(ring->trace_irq_req, true))) {
2352 Serge 2430
		ring->irq_put(ring);
6084 serge 2431
		i915_gem_request_assign(&ring->trace_irq_req, NULL);
2352 Serge 2432
	}
2332 Serge 2433
 
2352 Serge 2434
	WARN_ON(i915_verify_lists(ring->dev));
2435
}
2332 Serge 2436
 
4560 Serge 2437
bool
2352 Serge 2438
i915_gem_retire_requests(struct drm_device *dev)
2439
{
5060 serge 2440
	struct drm_i915_private *dev_priv = dev->dev_private;
2441
	struct intel_engine_cs *ring;
4560 Serge 2442
	bool idle = true;
2352 Serge 2443
	int i;
2332 Serge 2444
 
4560 Serge 2445
	for_each_ring(ring, dev_priv, i) {
3031 serge 2446
		i915_gem_retire_requests_ring(ring);
4560 Serge 2447
		idle &= list_empty(&ring->request_list);
5354 serge 2448
		if (i915.enable_execlists) {
2449
			unsigned long flags;
2450
 
2451
			spin_lock_irqsave(&ring->execlist_lock, flags);
2452
			idle &= list_empty(&ring->execlist_queue);
2453
			spin_unlock_irqrestore(&ring->execlist_lock, flags);
2454
 
2455
			intel_execlists_retire_requests(ring);
2456
		}
4560 Serge 2457
	}
2458
 
2459
	if (idle)
2460
		mod_delayed_work(dev_priv->wq,
2461
				   &dev_priv->mm.idle_work,
2462
				   msecs_to_jiffies(100));
2463
 
2464
	return idle;
2352 Serge 2465
}
2466
 
2360 Serge 2467
static void
2468
i915_gem_retire_work_handler(struct work_struct *work)
2469
{
4560 Serge 2470
	struct drm_i915_private *dev_priv =
2471
		container_of(work, typeof(*dev_priv), mm.retire_work.work);
2472
	struct drm_device *dev = dev_priv->dev;
2360 Serge 2473
	bool idle;
2352 Serge 2474
 
2360 Serge 2475
	/* Come back later if the device is busy... */
4560 Serge 2476
	idle = false;
2477
	if (mutex_trylock(&dev->struct_mutex)) {
2478
		idle = i915_gem_retire_requests(dev);
2479
		mutex_unlock(&dev->struct_mutex);
2480
	}
2481
	if (!idle)
3482 Serge 2482
		queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work,
2483
				   round_jiffies_up_relative(HZ));
4560 Serge 2484
}
2352 Serge 2485
 
4560 Serge 2486
static void
2487
i915_gem_idle_work_handler(struct work_struct *work)
2488
{
2489
	struct drm_i915_private *dev_priv =
2490
		container_of(work, typeof(*dev_priv), mm.idle_work.work);
6084 serge 2491
	struct drm_device *dev = dev_priv->dev;
2492
	struct intel_engine_cs *ring;
2493
	int i;
2352 Serge 2494
 
6084 serge 2495
	for_each_ring(ring, dev_priv, i)
2496
		if (!list_empty(&ring->request_list))
2497
			return;
2498
 
2499
	intel_mark_idle(dev);
2500
 
2501
	if (mutex_trylock(&dev->struct_mutex)) {
2502
		struct intel_engine_cs *ring;
2503
		int i;
2504
 
2505
		for_each_ring(ring, dev_priv, i)
2506
			i915_gem_batch_pool_fini(&ring->batch_pool);
2507
 
2508
		mutex_unlock(&dev->struct_mutex);
2509
	}
2360 Serge 2510
}
2511
 
2344 Serge 2512
/**
3031 serge 2513
 * Ensures that an object will eventually get non-busy by flushing any required
2514
 * write domains, emitting any outstanding lazy request and retiring and
2515
 * completed requests.
2352 Serge 2516
 */
3031 serge 2517
static int
2518
i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
2352 Serge 2519
{
6084 serge 2520
	int i;
2352 Serge 2521
 
6084 serge 2522
	if (!obj->active)
2523
		return 0;
2352 Serge 2524
 
6084 serge 2525
	for (i = 0; i < I915_NUM_RINGS; i++) {
2526
		struct drm_i915_gem_request *req;
2527
 
2528
		req = obj->last_read_req[i];
2529
		if (req == NULL)
2530
			continue;
2531
 
2532
		if (list_empty(&req->list))
2533
			goto retire;
2534
 
2535
		if (i915_gem_request_completed(req, true)) {
2536
			__i915_gem_request_retire__upto(req);
2537
retire:
2538
			i915_gem_object_retire__read(obj, i);
2539
		}
3031 serge 2540
	}
2352 Serge 2541
 
3031 serge 2542
	return 0;
2543
}
2352 Serge 2544
 
3243 Serge 2545
/**
2546
 * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
2547
 * @DRM_IOCTL_ARGS: standard ioctl arguments
2548
 *
2549
 * Returns 0 if successful, else an error is returned with the remaining time in
2550
 * the timeout parameter.
2551
 *  -ETIME: object is still busy after timeout
2552
 *  -ERESTARTSYS: signal interrupted the wait
2553
 *  -ENONENT: object doesn't exist
2554
 * Also possible, but rare:
2555
 *  -EAGAIN: GPU wedged
2556
 *  -ENOMEM: damn
2557
 *  -ENODEV: Internal IRQ fail
2558
 *  -E?: The add request failed
2559
 *
2560
 * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
2561
 * non-zero timeout parameter the wait ioctl will wait for the given number of
2562
 * nanoseconds on an object becoming unbusy. Since the wait itself does so
2563
 * without holding struct_mutex the object may become re-busied before this
2564
 * function completes. A similar but shorter * race condition exists in the busy
2565
 * ioctl
2566
 */
4246 Serge 2567
int
2568
i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
2569
{
5060 serge 2570
	struct drm_i915_private *dev_priv = dev->dev_private;
4246 Serge 2571
	struct drm_i915_gem_wait *args = data;
2572
	struct drm_i915_gem_object *obj;
6084 serge 2573
	struct drm_i915_gem_request *req[I915_NUM_RINGS];
4246 Serge 2574
	unsigned reset_counter;
6084 serge 2575
	int i, n = 0;
2576
	int ret;
2352 Serge 2577
 
5354 serge 2578
	if (args->flags != 0)
2579
		return -EINVAL;
2580
 
4246 Serge 2581
	ret = i915_mutex_lock_interruptible(dev);
2582
	if (ret)
2583
		return ret;
2352 Serge 2584
 
4246 Serge 2585
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->bo_handle));
2586
	if (&obj->base == NULL) {
2587
		mutex_unlock(&dev->struct_mutex);
2588
		return -ENOENT;
2589
	}
2352 Serge 2590
 
4246 Serge 2591
	/* Need to make sure the object gets inactive eventually. */
2592
	ret = i915_gem_object_flush_active(obj);
2593
	if (ret)
2594
		goto out;
2352 Serge 2595
 
6084 serge 2596
	if (!obj->active)
2597
		goto out;
2352 Serge 2598
 
4246 Serge 2599
	/* Do this after OLR check to make sure we make forward progress polling
6084 serge 2600
	 * on this IOCTL with a timeout == 0 (like busy ioctl)
4246 Serge 2601
	 */
6084 serge 2602
	if (args->timeout_ns == 0) {
4246 Serge 2603
		ret = -ETIME;
2604
		goto out;
2605
	}
2352 Serge 2606
 
4246 Serge 2607
	drm_gem_object_unreference(&obj->base);
2608
	reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
6084 serge 2609
 
2610
	for (i = 0; i < I915_NUM_RINGS; i++) {
2611
		if (obj->last_read_req[i] == NULL)
2612
			continue;
2613
 
2614
		req[n++] = i915_gem_request_reference(obj->last_read_req[i]);
2615
	}
2616
 
4246 Serge 2617
	mutex_unlock(&dev->struct_mutex);
2352 Serge 2618
 
6084 serge 2619
	for (i = 0; i < n; i++) {
2620
		if (ret == 0)
2621
			ret = __i915_wait_request(req[i], reset_counter, true,
2622
						  args->timeout_ns > 0 ? &args->timeout_ns : NULL,
2623
						  file->driver_priv);
2624
		i915_gem_request_unreference__unlocked(req[i]);
2625
	}
2626
	return ret;
3243 Serge 2627
 
4246 Serge 2628
out:
2629
	drm_gem_object_unreference(&obj->base);
2630
	mutex_unlock(&dev->struct_mutex);
2631
	return ret;
2632
}
3243 Serge 2633
 
6084 serge 2634
static int
2635
__i915_gem_object_sync(struct drm_i915_gem_object *obj,
2636
		       struct intel_engine_cs *to,
2637
		       struct drm_i915_gem_request *from_req,
2638
		       struct drm_i915_gem_request **to_req)
2639
{
2640
	struct intel_engine_cs *from;
2641
	int ret;
2642
 
2643
	from = i915_gem_request_get_ring(from_req);
2644
	if (to == from)
2645
		return 0;
2646
 
2647
	if (i915_gem_request_completed(from_req, true))
2648
		return 0;
2649
 
2650
	if (!i915_semaphore_is_enabled(obj->base.dev)) {
2651
		struct drm_i915_private *i915 = to_i915(obj->base.dev);
2652
		ret = __i915_wait_request(from_req,
2653
					  atomic_read(&i915->gpu_error.reset_counter),
2654
					  i915->mm.interruptible,
2655
					  NULL,
2656
					  &i915->rps.semaphores);
2657
		if (ret)
2658
			return ret;
2659
 
2660
		i915_gem_object_retire_request(obj, from_req);
2661
	} else {
2662
		int idx = intel_ring_sync_index(from, to);
2663
		u32 seqno = i915_gem_request_get_seqno(from_req);
2664
 
2665
		WARN_ON(!to_req);
2666
 
2667
		if (seqno <= from->semaphore.sync_seqno[idx])
2668
			return 0;
2669
 
2670
		if (*to_req == NULL) {
2671
			ret = i915_gem_request_alloc(to, to->default_context, to_req);
2672
			if (ret)
2673
				return ret;
2674
		}
2675
 
2676
		trace_i915_gem_ring_sync_to(*to_req, from, from_req);
2677
		ret = to->semaphore.sync_to(*to_req, from, seqno);
2678
		if (ret)
2679
			return ret;
2680
 
2681
		/* We use last_read_req because sync_to()
2682
		 * might have just caused seqno wrap under
2683
		 * the radar.
2684
		 */
2685
		from->semaphore.sync_seqno[idx] =
2686
			i915_gem_request_get_seqno(obj->last_read_req[from->id]);
2687
	}
2688
 
2689
	return 0;
2690
}
2691
 
2352 Serge 2692
/**
3031 serge 2693
 * i915_gem_object_sync - sync an object to a ring.
2694
 *
2695
 * @obj: object which may be in use on another ring.
2696
 * @to: ring we wish to use the object on. May be NULL.
6084 serge 2697
 * @to_req: request we wish to use the object for. See below.
2698
 *          This will be allocated and returned if a request is
2699
 *          required but not passed in.
3031 serge 2700
 *
2701
 * This code is meant to abstract object synchronization with the GPU.
2702
 * Calling with NULL implies synchronizing the object with the CPU
6084 serge 2703
 * rather than a particular GPU ring. Conceptually we serialise writes
2704
 * between engines inside the GPU. We only allow one engine to write
2705
 * into a buffer at any time, but multiple readers. To ensure each has
2706
 * a coherent view of memory, we must:
3031 serge 2707
 *
6084 serge 2708
 * - If there is an outstanding write request to the object, the new
2709
 *   request must wait for it to complete (either CPU or in hw, requests
2710
 *   on the same ring will be naturally ordered).
2711
 *
2712
 * - If we are a write request (pending_write_domain is set), the new
2713
 *   request must wait for outstanding read requests to complete.
2714
 *
2715
 * For CPU synchronisation (NULL to) no request is required. For syncing with
2716
 * rings to_req must be non-NULL. However, a request does not have to be
2717
 * pre-allocated. If *to_req is NULL and sync commands will be emitted then a
2718
 * request will be allocated automatically and returned through *to_req. Note
2719
 * that it is not guaranteed that commands will be emitted (because the system
2720
 * might already be idle). Hence there is no need to create a request that
2721
 * might never have any work submitted. Note further that if a request is
2722
 * returned in *to_req, it is the responsibility of the caller to submit
2723
 * that request (after potentially adding more work to it).
2724
 *
3031 serge 2725
 * Returns 0 if successful, else propagates up the lower layer error.
2344 Serge 2726
 */
2727
int
3031 serge 2728
i915_gem_object_sync(struct drm_i915_gem_object *obj,
6084 serge 2729
		     struct intel_engine_cs *to,
2730
		     struct drm_i915_gem_request **to_req)
2344 Serge 2731
{
6084 serge 2732
	const bool readonly = obj->base.pending_write_domain == 0;
2733
	struct drm_i915_gem_request *req[I915_NUM_RINGS];
2734
	int ret, i, n;
2332 Serge 2735
 
6084 serge 2736
	if (!obj->active)
3031 serge 2737
		return 0;
2332 Serge 2738
 
6084 serge 2739
	if (to == NULL)
2740
		return i915_gem_object_wait_rendering(obj, readonly);
2332 Serge 2741
 
6084 serge 2742
	n = 0;
2743
	if (readonly) {
2744
		if (obj->last_write_req)
2745
			req[n++] = obj->last_write_req;
2746
	} else {
2747
		for (i = 0; i < I915_NUM_RINGS; i++)
2748
			if (obj->last_read_req[i])
2749
				req[n++] = obj->last_read_req[i];
2750
	}
2751
	for (i = 0; i < n; i++) {
2752
		ret = __i915_gem_object_sync(obj, to, req[i], to_req);
2753
		if (ret)
2754
			return ret;
2755
	}
3031 serge 2756
 
6084 serge 2757
	return 0;
2344 Serge 2758
}
2332 Serge 2759
 
2344 Serge 2760
static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj)
2761
{
2762
	u32 old_write_domain, old_read_domains;
2332 Serge 2763
 
2344 Serge 2764
	/* Force a pagefault for domain tracking on next user access */
6084 serge 2765
	i915_gem_release_mmap(obj);
2332 Serge 2766
 
2344 Serge 2767
	if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
2768
		return;
2332 Serge 2769
 
3480 Serge 2770
	/* Wait for any direct GTT access to complete */
2771
	mb();
2772
 
2344 Serge 2773
	old_read_domains = obj->base.read_domains;
2774
	old_write_domain = obj->base.write_domain;
2351 Serge 2775
 
2344 Serge 2776
	obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT;
2777
	obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
2332 Serge 2778
 
2351 Serge 2779
	trace_i915_gem_object_change_domain(obj,
2780
					    old_read_domains,
2781
					    old_write_domain);
2344 Serge 2782
}
2332 Serge 2783
 
6084 serge 2784
static int __i915_vma_unbind(struct i915_vma *vma, bool wait)
2344 Serge 2785
{
4104 Serge 2786
	struct drm_i915_gem_object *obj = vma->obj;
5060 serge 2787
	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
3480 Serge 2788
	int ret;
3263 Serge 2789
 
4104 Serge 2790
	if (list_empty(&vma->vma_link))
2344 Serge 2791
		return 0;
2332 Serge 2792
 
4560 Serge 2793
	if (!drm_mm_node_allocated(&vma->node)) {
2794
		i915_gem_vma_destroy(vma);
2795
		return 0;
2796
	}
2797
 
5060 serge 2798
	if (vma->pin_count)
3031 serge 2799
		return -EBUSY;
2332 Serge 2800
 
3243 Serge 2801
	BUG_ON(obj->pages == NULL);
3031 serge 2802
 
6084 serge 2803
	if (wait) {
2804
		ret = i915_gem_object_wait_rendering(obj, false);
2805
		if (ret)
2806
			return ret;
2807
	}
2332 Serge 2808
 
6084 serge 2809
	if (i915_is_ggtt(vma->vm) &&
2810
	    vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
2811
		i915_gem_object_finish_gtt(obj);
5354 serge 2812
 
6084 serge 2813
		/* release the fence reg _after_ flushing */
2814
		ret = i915_gem_object_put_fence(obj);
2815
		if (ret)
2816
			return ret;
5060 serge 2817
	}
2332 Serge 2818
 
4104 Serge 2819
	trace_i915_vma_unbind(vma);
2332 Serge 2820
 
6084 serge 2821
	vma->vm->unbind_vma(vma);
2822
	vma->bound = 0;
2332 Serge 2823
 
5060 serge 2824
	list_del_init(&vma->mm_list);
6084 serge 2825
	if (i915_is_ggtt(vma->vm)) {
2826
		if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) {
2827
			obj->map_and_fenceable = false;
2828
		} else if (vma->ggtt_view.pages) {
2829
			sg_free_table(vma->ggtt_view.pages);
2830
			kfree(vma->ggtt_view.pages);
2831
		}
2832
		vma->ggtt_view.pages = NULL;
2833
	}
2332 Serge 2834
 
4104 Serge 2835
	drm_mm_remove_node(&vma->node);
2836
	i915_gem_vma_destroy(vma);
2837
 
2838
	/* Since the unbound list is global, only move to that list if
4560 Serge 2839
	 * no more VMAs exist. */
6084 serge 2840
	if (list_empty(&obj->vma_list))
4104 Serge 2841
		list_move_tail(&obj->global_list, &dev_priv->mm.unbound_list);
2842
 
4560 Serge 2843
	/* And finally now the object is completely decoupled from this vma,
2844
	 * we can drop its hold on the backing storage and allow it to be
2845
	 * reaped by the shrinker.
2846
	 */
2847
	i915_gem_object_unpin_pages(obj);
2848
 
2344 Serge 2849
	return 0;
2850
}
2332 Serge 2851
 
6084 serge 2852
int i915_vma_unbind(struct i915_vma *vma)
2853
{
2854
	return __i915_vma_unbind(vma, true);
2855
}
2856
 
2857
int __i915_vma_unbind_no_wait(struct i915_vma *vma)
2858
{
2859
	return __i915_vma_unbind(vma, false);
2860
}
2861
 
3031 serge 2862
int i915_gpu_idle(struct drm_device *dev)
2344 Serge 2863
{
5060 serge 2864
	struct drm_i915_private *dev_priv = dev->dev_private;
2865
	struct intel_engine_cs *ring;
2344 Serge 2866
	int ret, i;
2332 Serge 2867
 
2344 Serge 2868
	/* Flush everything onto the inactive list. */
3031 serge 2869
	for_each_ring(ring, dev_priv, i) {
5354 serge 2870
		if (!i915.enable_execlists) {
6084 serge 2871
			struct drm_i915_gem_request *req;
3031 serge 2872
 
6084 serge 2873
			ret = i915_gem_request_alloc(ring, ring->default_context, &req);
2352 Serge 2874
			if (ret)
2875
				return ret;
2344 Serge 2876
 
6084 serge 2877
			ret = i915_switch_context(req);
2878
			if (ret) {
2879
				i915_gem_request_cancel(req);
2880
				return ret;
2881
			}
2344 Serge 2882
 
6084 serge 2883
			i915_add_request_no_flush(req);
2884
		}
2332 Serge 2885
 
6084 serge 2886
		ret = intel_ring_idle(ring);
3031 serge 2887
		if (ret)
2888
			return ret;
2889
	}
2332 Serge 2890
 
6084 serge 2891
	WARN_ON(i915_verify_lists(dev));
3031 serge 2892
	return 0;
2893
}
2332 Serge 2894
 
5354 serge 2895
static bool i915_gem_valid_gtt_space(struct i915_vma *vma,
3031 serge 2896
				     unsigned long cache_level)
2897
{
5354 serge 2898
	struct drm_mm_node *gtt_space = &vma->node;
3031 serge 2899
	struct drm_mm_node *other;
2332 Serge 2900
 
5354 serge 2901
	/*
2902
	 * On some machines we have to be careful when putting differing types
2903
	 * of snoopable memory together to avoid the prefetcher crossing memory
2904
	 * domains and dying. During vm initialisation, we decide whether or not
2905
	 * these constraints apply and set the drm_mm.color_adjust
2906
	 * appropriately.
3031 serge 2907
	 */
5354 serge 2908
	if (vma->vm->mm.color_adjust == NULL)
3031 serge 2909
		return true;
2332 Serge 2910
 
4104 Serge 2911
	if (!drm_mm_node_allocated(gtt_space))
3031 serge 2912
		return true;
2332 Serge 2913
 
3031 serge 2914
	if (list_empty(>t_space->node_list))
2915
		return true;
2332 Serge 2916
 
3031 serge 2917
	other = list_entry(gtt_space->node_list.prev, struct drm_mm_node, node_list);
2918
	if (other->allocated && !other->hole_follows && other->color != cache_level)
2919
		return false;
2344 Serge 2920
 
3031 serge 2921
	other = list_entry(gtt_space->node_list.next, struct drm_mm_node, node_list);
2922
	if (other->allocated && !gtt_space->hole_follows && other->color != cache_level)
2923
		return false;
2344 Serge 2924
 
3031 serge 2925
	return true;
2926
}
2344 Serge 2927
 
2332 Serge 2928
/**
6084 serge 2929
 * Finds free space in the GTT aperture and binds the object or a view of it
2930
 * there.
2332 Serge 2931
 */
5060 serge 2932
static struct i915_vma *
4104 Serge 2933
i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
2934
			   struct i915_address_space *vm,
6084 serge 2935
			   const struct i915_ggtt_view *ggtt_view,
2936
			   unsigned alignment,
5060 serge 2937
			   uint64_t flags)
2332 Serge 2938
{
2939
	struct drm_device *dev = obj->base.dev;
5060 serge 2940
	struct drm_i915_private *dev_priv = dev->dev_private;
6084 serge 2941
	u32 fence_alignment, unfenced_alignment;
2942
	u32 search_flag, alloc_flag;
2943
	u64 start, end;
2944
	u64 size, fence_size;
4104 Serge 2945
	struct i915_vma *vma;
2332 Serge 2946
	int ret;
2326 Serge 2947
 
6084 serge 2948
	if (i915_is_ggtt(vm)) {
2949
		u32 view_size;
2332 Serge 2950
 
6084 serge 2951
		if (WARN_ON(!ggtt_view))
2952
			return ERR_PTR(-EINVAL);
2953
 
2954
		view_size = i915_ggtt_view_size(obj, ggtt_view);
2955
 
2956
		fence_size = i915_gem_get_gtt_size(dev,
2957
						   view_size,
2958
						   obj->tiling_mode);
2959
		fence_alignment = i915_gem_get_gtt_alignment(dev,
2960
							     view_size,
2961
							     obj->tiling_mode,
2962
							     true);
2963
		unfenced_alignment = i915_gem_get_gtt_alignment(dev,
2964
								view_size,
2965
								obj->tiling_mode,
2966
								false);
2967
		size = flags & PIN_MAPPABLE ? fence_size : view_size;
2968
	} else {
2969
		fence_size = i915_gem_get_gtt_size(dev,
2970
						   obj->base.size,
2971
						   obj->tiling_mode);
2972
		fence_alignment = i915_gem_get_gtt_alignment(dev,
2973
							     obj->base.size,
2974
							     obj->tiling_mode,
2975
							     true);
2976
		unfenced_alignment =
2977
			i915_gem_get_gtt_alignment(dev,
2978
						   obj->base.size,
2979
						   obj->tiling_mode,
2980
						   false);
2981
		size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
2982
	}
2983
 
2984
	start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
2985
	end = vm->total;
2986
	if (flags & PIN_MAPPABLE)
2987
		end = min_t(u64, end, dev_priv->gtt.mappable_end);
2988
	if (flags & PIN_ZONE_4G)
2989
		end = min_t(u64, end, (1ULL << 32));
2990
 
2332 Serge 2991
	if (alignment == 0)
5060 serge 2992
		alignment = flags & PIN_MAPPABLE ? fence_alignment :
2332 Serge 2993
						unfenced_alignment;
5060 serge 2994
	if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
6084 serge 2995
		DRM_DEBUG("Invalid object (view type=%u) alignment requested %u\n",
2996
			  ggtt_view ? ggtt_view->type : 0,
2997
			  alignment);
5060 serge 2998
		return ERR_PTR(-EINVAL);
2332 Serge 2999
	}
3000
 
6084 serge 3001
	/* If binding the object/GGTT view requires more space than the entire
3002
	 * aperture has, reject it early before evicting everything in a vain
3003
	 * attempt to find space.
2332 Serge 3004
	 */
6084 serge 3005
	if (size > end) {
3006
		DRM_DEBUG("Attempting to bind an object (view type=%u) larger than the aperture: size=%llu > %s aperture=%llu\n",
3007
			  ggtt_view ? ggtt_view->type : 0,
3008
			  size,
5060 serge 3009
			  flags & PIN_MAPPABLE ? "mappable" : "total",
3010
			  end);
3011
		return ERR_PTR(-E2BIG);
2332 Serge 3012
	}
3013
 
3031 serge 3014
	ret = i915_gem_object_get_pages(obj);
3015
	if (ret)
5060 serge 3016
		return ERR_PTR(ret);
3031 serge 3017
 
3243 Serge 3018
	i915_gem_object_pin_pages(obj);
3019
 
6084 serge 3020
	vma = ggtt_view ? i915_gem_obj_lookup_or_create_ggtt_vma(obj, ggtt_view) :
3021
			  i915_gem_obj_lookup_or_create_vma(obj, vm);
3022
 
5060 serge 3023
	if (IS_ERR(vma))
4104 Serge 3024
		goto err_unpin;
3243 Serge 3025
 
6084 serge 3026
	if (flags & PIN_HIGH) {
3027
		search_flag = DRM_MM_SEARCH_BELOW;
3028
		alloc_flag = DRM_MM_CREATE_TOP;
3029
	} else {
3030
		search_flag = DRM_MM_SEARCH_DEFAULT;
3031
		alloc_flag = DRM_MM_CREATE_DEFAULT;
3032
	}
3033
 
4104 Serge 3034
search_free:
3035
	ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
3036
						  size, alignment,
5060 serge 3037
						  obj->cache_level,
3038
						  start, end,
6084 serge 3039
						  search_flag,
3040
						  alloc_flag);
3243 Serge 3041
	if (ret) {
2332 Serge 3042
 
4104 Serge 3043
		goto err_free_vma;
2332 Serge 3044
	}
5354 serge 3045
	if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) {
4104 Serge 3046
		ret = -EINVAL;
3047
		goto err_remove_node;
3031 serge 3048
	}
2332 Serge 3049
 
6084 serge 3050
	trace_i915_vma_bind(vma, flags);
3051
	ret = i915_vma_bind(vma, obj->cache_level, flags);
4104 Serge 3052
	if (ret)
3053
		goto err_remove_node;
2332 Serge 3054
 
4104 Serge 3055
	list_move_tail(&obj->global_list, &dev_priv->mm.bound_list);
3056
	list_add_tail(&vma->mm_list, &vm->inactive_list);
2332 Serge 3057
 
5060 serge 3058
	return vma;
4104 Serge 3059
 
3060
err_remove_node:
3061
	drm_mm_remove_node(&vma->node);
3062
err_free_vma:
3063
	i915_gem_vma_destroy(vma);
5060 serge 3064
	vma = ERR_PTR(ret);
4104 Serge 3065
err_unpin:
3066
	i915_gem_object_unpin_pages(obj);
5060 serge 3067
	return vma;
2332 Serge 3068
}
3069
 
4104 Serge 3070
bool
3071
i915_gem_clflush_object(struct drm_i915_gem_object *obj,
3072
			bool force)
2332 Serge 3073
{
3074
	/* If we don't have a page list set up, then we're not pinned
3075
	 * to GPU, and we can ignore the cache flush because it'll happen
3076
	 * again at bind time.
3077
	 */
3243 Serge 3078
	if (obj->pages == NULL)
4104 Serge 3079
		return false;
2332 Serge 3080
 
3480 Serge 3081
	/*
3082
	 * Stolen memory is always coherent with the GPU as it is explicitly
3083
	 * marked as wc by the system, or the system is cache-coherent.
3084
	 */
5354 serge 3085
	if (obj->stolen || obj->phys_handle)
4104 Serge 3086
		return false;
3480 Serge 3087
 
2332 Serge 3088
	/* If the GPU is snooping the contents of the CPU cache,
3089
	 * we do not need to manually clear the CPU cache lines.  However,
3090
	 * the caches are only snooped when the render cache is
3091
	 * flushed/invalidated.  As we always have to emit invalidations
3092
	 * and flushes when moving into and out of the RENDER domain, correct
3093
	 * snooping behaviour occurs naturally as the result of our domain
3094
	 * tracking.
3095
	 */
6084 serge 3096
	if (!force && cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) {
3097
		obj->cache_dirty = true;
4104 Serge 3098
		return false;
6084 serge 3099
	}
2332 Serge 3100
 
4293 Serge 3101
	trace_i915_gem_object_clflush(obj);
3102
	drm_clflush_sg(obj->pages);
6084 serge 3103
	obj->cache_dirty = false;
2344 Serge 3104
 
4104 Serge 3105
	return true;
2332 Serge 3106
}
3107
 
2344 Serge 3108
/** Flushes the GTT write domain for the object if it's dirty. */
3109
static void
3110
i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
3111
{
3112
	uint32_t old_write_domain;
2332 Serge 3113
 
2344 Serge 3114
	if (obj->base.write_domain != I915_GEM_DOMAIN_GTT)
3115
		return;
2332 Serge 3116
 
2344 Serge 3117
	/* No actual flushing is required for the GTT write domain.  Writes
3118
	 * to it immediately go to main memory as far as we know, so there's
3119
	 * no chipset flush.  It also doesn't land in render cache.
3120
	 *
3121
	 * However, we do have to enforce the order so that all writes through
3122
	 * the GTT land before any writes to the device, such as updates to
3123
	 * the GATT itself.
3124
	 */
3125
	wmb();
2332 Serge 3126
 
2344 Serge 3127
	old_write_domain = obj->base.write_domain;
3128
	obj->base.write_domain = 0;
2332 Serge 3129
 
6084 serge 3130
	intel_fb_obj_flush(obj, false, ORIGIN_GTT);
5354 serge 3131
 
2351 Serge 3132
	trace_i915_gem_object_change_domain(obj,
3133
					    obj->base.read_domains,
3134
					    old_write_domain);
2344 Serge 3135
}
2332 Serge 3136
 
3137
/** Flushes the CPU write domain for the object if it's dirty. */
2326 Serge 3138
static void
6084 serge 3139
i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj)
2332 Serge 3140
{
3141
	uint32_t old_write_domain;
3142
 
3143
	if (obj->base.write_domain != I915_GEM_DOMAIN_CPU)
3144
		return;
3145
 
6084 serge 3146
	if (i915_gem_clflush_object(obj, obj->pin_display))
3147
		i915_gem_chipset_flush(obj->base.dev);
4104 Serge 3148
 
2332 Serge 3149
	old_write_domain = obj->base.write_domain;
3150
	obj->base.write_domain = 0;
3151
 
6084 serge 3152
	intel_fb_obj_flush(obj, false, ORIGIN_CPU);
5354 serge 3153
 
2351 Serge 3154
	trace_i915_gem_object_change_domain(obj,
3155
					    obj->base.read_domains,
3156
					    old_write_domain);
2332 Serge 3157
}
3158
 
3159
/**
3160
 * Moves a single object to the GTT read, and possibly write domain.
3161
 *
3162
 * This function returns when the move is complete, including waiting on
3163
 * flushes to occur.
3164
 */
3165
int
3166
i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
3167
{
3168
	uint32_t old_write_domain, old_read_domains;
6084 serge 3169
	struct i915_vma *vma;
2332 Serge 3170
	int ret;
3171
 
3172
	if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
3173
		return 0;
3174
 
3031 serge 3175
	ret = i915_gem_object_wait_rendering(obj, !write);
6084 serge 3176
	if (ret)
3177
		return ret;
2332 Serge 3178
 
6084 serge 3179
	/* Flush and acquire obj->pages so that we are coherent through
3180
	 * direct access in memory with previous cached writes through
3181
	 * shmemfs and that our cache domain tracking remains valid.
3182
	 * For example, if the obj->filp was moved to swap without us
3183
	 * being notified and releasing the pages, we would mistakenly
3184
	 * continue to assume that the obj remained out of the CPU cached
3185
	 * domain.
3186
	 */
3187
	ret = i915_gem_object_get_pages(obj);
3188
	if (ret)
3189
		return ret;
2332 Serge 3190
 
6084 serge 3191
	i915_gem_object_flush_cpu_write_domain(obj);
3192
 
3480 Serge 3193
	/* Serialise direct access to this object with the barriers for
3194
	 * coherent writes from the GPU, by effectively invalidating the
3195
	 * GTT domain upon first access.
3196
	 */
3197
	if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0)
3198
		mb();
3199
 
2332 Serge 3200
	old_write_domain = obj->base.write_domain;
3201
	old_read_domains = obj->base.read_domains;
3202
 
3203
	/* It should now be out of any other write domains, and we can update
3204
	 * the domain values for our changes.
3205
	 */
3206
	BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
3207
	obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
3208
	if (write) {
3209
		obj->base.read_domains = I915_GEM_DOMAIN_GTT;
3210
		obj->base.write_domain = I915_GEM_DOMAIN_GTT;
3211
		obj->dirty = 1;
3212
	}
3213
 
2351 Serge 3214
	trace_i915_gem_object_change_domain(obj,
3215
					    old_read_domains,
3216
					    old_write_domain);
3217
 
3031 serge 3218
	/* And bump the LRU for this access */
6084 serge 3219
	vma = i915_gem_obj_to_ggtt(obj);
3220
	if (vma && drm_mm_node_allocated(&vma->node) && !obj->active)
3221
		list_move_tail(&vma->mm_list,
3222
			       &to_i915(obj->base.dev)->gtt.base.inactive_list);
3031 serge 3223
 
2332 Serge 3224
	return 0;
3225
}
3226
 
6084 serge 3227
/**
3228
 * Changes the cache-level of an object across all VMA.
3229
 *
3230
 * After this function returns, the object will be in the new cache-level
3231
 * across all GTT and the contents of the backing storage will be coherent,
3232
 * with respect to the new cache-level. In order to keep the backing storage
3233
 * coherent for all users, we only allow a single cache level to be set
3234
 * globally on the object and prevent it from being changed whilst the
3235
 * hardware is reading from the object. That is if the object is currently
3236
 * on the scanout it will be set to uncached (or equivalent display
3237
 * cache coherency) and all non-MOCS GPU access will also be uncached so
3238
 * that all direct access to the scanout remains coherent.
3239
 */
2335 Serge 3240
int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
3241
				    enum i915_cache_level cache_level)
3242
{
3031 serge 3243
	struct drm_device *dev = obj->base.dev;
5060 serge 3244
	struct i915_vma *vma, *next;
6084 serge 3245
	bool bound = false;
3246
	int ret = 0;
2332 Serge 3247
 
2335 Serge 3248
	if (obj->cache_level == cache_level)
6084 serge 3249
		goto out;
2332 Serge 3250
 
6084 serge 3251
	/* Inspect the list of currently bound VMA and unbind any that would
3252
	 * be invalid given the new cache-level. This is principally to
3253
	 * catch the issue of the CS prefetch crossing page boundaries and
3254
	 * reading an invalid PTE on older architectures.
3255
	 */
3256
	list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
3257
		if (!drm_mm_node_allocated(&vma->node))
3258
			continue;
2332 Serge 3259
 
6084 serge 3260
		if (vma->pin_count) {
3261
			DRM_DEBUG("can not change the cache level of pinned objects\n");
3262
			return -EBUSY;
3263
		}
3264
 
5354 serge 3265
		if (!i915_gem_valid_gtt_space(vma, cache_level)) {
4104 Serge 3266
			ret = i915_vma_unbind(vma);
6084 serge 3267
			if (ret)
3268
				return ret;
3269
		} else
3270
			bound = true;
3031 serge 3271
	}
3272
 
6084 serge 3273
	/* We can reuse the existing drm_mm nodes but need to change the
3274
	 * cache-level on the PTE. We could simply unbind them all and
3275
	 * rebind with the correct cache-level on next use. However since
3276
	 * we already have a valid slot, dma mapping, pages etc, we may as
3277
	 * rewrite the PTE in the belief that doing so tramples upon less
3278
	 * state and so involves less work.
3279
	 */
3280
	if (bound) {
3281
		/* Before we change the PTE, the GPU must not be accessing it.
3282
		 * If we wait upon the object, we know that all the bound
3283
		 * VMA are no longer active.
3284
		 */
3285
		ret = i915_gem_object_wait_rendering(obj, false);
2335 Serge 3286
		if (ret)
3287
			return ret;
2332 Serge 3288
 
6084 serge 3289
		if (!HAS_LLC(dev) && cache_level != I915_CACHE_NONE) {
3290
			/* Access to snoopable pages through the GTT is
3291
			 * incoherent and on some machines causes a hard
3292
			 * lockup. Relinquish the CPU mmaping to force
3293
			 * userspace to refault in the pages and we can
3294
			 * then double check if the GTT mapping is still
3295
			 * valid for that pointer access.
3296
			 */
3297
			i915_gem_release_mmap(obj);
2332 Serge 3298
 
6084 serge 3299
			/* As we no longer need a fence for GTT access,
3300
			 * we can relinquish it now (and so prevent having
3301
			 * to steal a fence from someone else on the next
3302
			 * fence request). Note GPU activity would have
3303
			 * dropped the fence as all snoopable access is
3304
			 * supposed to be linear.
3305
			 */
2335 Serge 3306
			ret = i915_gem_object_put_fence(obj);
3307
			if (ret)
3308
				return ret;
6084 serge 3309
		} else {
3310
			/* We either have incoherent backing store and
3311
			 * so no GTT access or the architecture is fully
3312
			 * coherent. In such cases, existing GTT mmaps
3313
			 * ignore the cache bit in the PTE and we can
3314
			 * rewrite it without confusing the GPU or having
3315
			 * to force userspace to fault back in its mmaps.
3316
			 */
3317
		}
2332 Serge 3318
 
6084 serge 3319
		list_for_each_entry(vma, &obj->vma_list, vma_link) {
3320
			if (!drm_mm_node_allocated(&vma->node))
3321
				continue;
3322
 
3323
			ret = i915_vma_bind(vma, cache_level, PIN_UPDATE);
3324
			if (ret)
3325
				return ret;
3326
		}
2335 Serge 3327
	}
2332 Serge 3328
 
4104 Serge 3329
	list_for_each_entry(vma, &obj->vma_list, vma_link)
3330
		vma->node.color = cache_level;
3331
	obj->cache_level = cache_level;
3332
 
6084 serge 3333
out:
3334
	/* Flush the dirty CPU caches to the backing storage so that the
3335
	 * object is now coherent at its new cache level (with respect
3336
	 * to the access domain).
3337
	 */
3338
	if (obj->cache_dirty &&
3339
	    obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
3340
	    cpu_write_needs_clflush(obj)) {
3341
		if (i915_gem_clflush_object(obj, true))
3342
			i915_gem_chipset_flush(obj->base.dev);
3343
	}
2332 Serge 3344
 
2335 Serge 3345
	return 0;
3346
}
2332 Serge 3347
 
3260 Serge 3348
int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
3349
			       struct drm_file *file)
3350
{
3351
	struct drm_i915_gem_caching *args = data;
3352
	struct drm_i915_gem_object *obj;
3353
 
3354
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
6084 serge 3355
	if (&obj->base == NULL)
3356
		return -ENOENT;
3260 Serge 3357
 
4104 Serge 3358
	switch (obj->cache_level) {
3359
	case I915_CACHE_LLC:
3360
	case I915_CACHE_L3_LLC:
3361
		args->caching = I915_CACHING_CACHED;
3362
		break;
3260 Serge 3363
 
4104 Serge 3364
	case I915_CACHE_WT:
3365
		args->caching = I915_CACHING_DISPLAY;
3366
		break;
3367
 
3368
	default:
3369
		args->caching = I915_CACHING_NONE;
3370
		break;
3371
	}
3372
 
6084 serge 3373
	drm_gem_object_unreference_unlocked(&obj->base);
3374
	return 0;
3260 Serge 3375
}
3376
 
3377
int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
3378
			       struct drm_file *file)
3379
{
6084 serge 3380
	struct drm_i915_private *dev_priv = dev->dev_private;
3260 Serge 3381
	struct drm_i915_gem_caching *args = data;
3382
	struct drm_i915_gem_object *obj;
3383
	enum i915_cache_level level;
3384
	int ret;
3385
 
3386
	switch (args->caching) {
3387
	case I915_CACHING_NONE:
3388
		level = I915_CACHE_NONE;
3389
		break;
3390
	case I915_CACHING_CACHED:
6084 serge 3391
		/*
3392
		 * Due to a HW issue on BXT A stepping, GPU stores via a
3393
		 * snooped mapping may leave stale data in a corresponding CPU
3394
		 * cacheline, whereas normally such cachelines would get
3395
		 * invalidated.
3396
		 */
3397
		if (IS_BROXTON(dev) && INTEL_REVID(dev) < BXT_REVID_B0)
3398
			return -ENODEV;
3399
 
3260 Serge 3400
		level = I915_CACHE_LLC;
3401
		break;
4104 Serge 3402
	case I915_CACHING_DISPLAY:
3403
		level = HAS_WT(dev) ? I915_CACHE_WT : I915_CACHE_NONE;
3404
		break;
3260 Serge 3405
	default:
3406
		return -EINVAL;
3407
	}
3408
 
6084 serge 3409
	intel_runtime_pm_get(dev_priv);
3410
 
3260 Serge 3411
	ret = i915_mutex_lock_interruptible(dev);
3412
	if (ret)
6084 serge 3413
		goto rpm_put;
3260 Serge 3414
 
3415
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
3416
	if (&obj->base == NULL) {
3417
		ret = -ENOENT;
3418
		goto unlock;
3419
	}
3420
 
3421
	ret = i915_gem_object_set_cache_level(obj, level);
3422
 
3423
	drm_gem_object_unreference(&obj->base);
3424
unlock:
3425
	mutex_unlock(&dev->struct_mutex);
6084 serge 3426
rpm_put:
3427
	intel_runtime_pm_put(dev_priv);
3428
 
3260 Serge 3429
	return ret;
3430
}
3431
 
2335 Serge 3432
/*
3433
 * Prepare buffer for display plane (scanout, cursors, etc).
3434
 * Can be called from an uninterruptible phase (modesetting) and allows
3435
 * any flushes to be pipelined (for pageflips).
3436
 */
3437
int
3438
i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
3439
				     u32 alignment,
6084 serge 3440
				     struct intel_engine_cs *pipelined,
3441
				     struct drm_i915_gem_request **pipelined_request,
3442
				     const struct i915_ggtt_view *view)
2335 Serge 3443
{
3444
	u32 old_read_domains, old_write_domain;
3445
	int ret;
2332 Serge 3446
 
6084 serge 3447
	ret = i915_gem_object_sync(obj, pipelined, pipelined_request);
2335 Serge 3448
	if (ret)
3449
		return ret;
2332 Serge 3450
 
4104 Serge 3451
	/* Mark the pin_display early so that we account for the
3452
	 * display coherency whilst setting up the cache domains.
3453
	 */
6084 serge 3454
	obj->pin_display++;
4104 Serge 3455
 
2335 Serge 3456
	/* The display engine is not coherent with the LLC cache on gen6.  As
3457
	 * a result, we make sure that the pinning that is about to occur is
3458
	 * done with uncached PTEs. This is lowest common denominator for all
3459
	 * chipsets.
3460
	 *
3461
	 * However for gen6+, we could do better by using the GFDT bit instead
3462
	 * of uncaching, which would allow us to flush all the LLC-cached data
3463
	 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
3464
	 */
4104 Serge 3465
	ret = i915_gem_object_set_cache_level(obj,
3466
					      HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE);
2360 Serge 3467
	if (ret)
4104 Serge 3468
		goto err_unpin_display;
2332 Serge 3469
 
2335 Serge 3470
	/* As the user may map the buffer once pinned in the display plane
3471
	 * (e.g. libkms for the bootup splash), we have to ensure that we
3472
	 * always use map_and_fenceable for all scanout buffers.
3473
	 */
6084 serge 3474
	ret = i915_gem_object_ggtt_pin(obj, view, alignment,
3475
				       view->type == I915_GGTT_VIEW_NORMAL ?
3476
				       PIN_MAPPABLE : 0);
2335 Serge 3477
	if (ret)
4104 Serge 3478
		goto err_unpin_display;
2332 Serge 3479
 
6084 serge 3480
	i915_gem_object_flush_cpu_write_domain(obj);
2332 Serge 3481
 
2335 Serge 3482
	old_write_domain = obj->base.write_domain;
3483
	old_read_domains = obj->base.read_domains;
2332 Serge 3484
 
2335 Serge 3485
	/* It should now be out of any other write domains, and we can update
3486
	 * the domain values for our changes.
3487
	 */
3031 serge 3488
	obj->base.write_domain = 0;
2335 Serge 3489
	obj->base.read_domains |= I915_GEM_DOMAIN_GTT;
2332 Serge 3490
 
2351 Serge 3491
	trace_i915_gem_object_change_domain(obj,
3492
					    old_read_domains,
3493
					    old_write_domain);
2332 Serge 3494
 
2335 Serge 3495
	return 0;
4104 Serge 3496
 
3497
err_unpin_display:
6084 serge 3498
	obj->pin_display--;
4104 Serge 3499
	return ret;
2335 Serge 3500
}
2332 Serge 3501
 
4104 Serge 3502
void
6084 serge 3503
i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj,
3504
					 const struct i915_ggtt_view *view)
4104 Serge 3505
{
6084 serge 3506
	if (WARN_ON(obj->pin_display == 0))
3507
		return;
4104 Serge 3508
 
6084 serge 3509
	i915_gem_object_ggtt_unpin_view(obj, view);
2332 Serge 3510
 
6084 serge 3511
	obj->pin_display--;
2344 Serge 3512
}
2332 Serge 3513
 
2344 Serge 3514
/**
3515
 * Moves a single object to the CPU read, and possibly write domain.
3516
 *
3517
 * This function returns when the move is complete, including waiting on
3518
 * flushes to occur.
3519
 */
3031 serge 3520
int
2344 Serge 3521
i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
3522
{
3523
	uint32_t old_write_domain, old_read_domains;
3524
	int ret;
2332 Serge 3525
 
2344 Serge 3526
	if (obj->base.write_domain == I915_GEM_DOMAIN_CPU)
3527
		return 0;
2332 Serge 3528
 
3031 serge 3529
	ret = i915_gem_object_wait_rendering(obj, !write);
2344 Serge 3530
	if (ret)
3531
		return ret;
2332 Serge 3532
 
2344 Serge 3533
	i915_gem_object_flush_gtt_write_domain(obj);
2332 Serge 3534
 
2344 Serge 3535
	old_write_domain = obj->base.write_domain;
3536
	old_read_domains = obj->base.read_domains;
2332 Serge 3537
 
2344 Serge 3538
	/* Flush the CPU cache if it's still invalid. */
3539
	if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) {
4104 Serge 3540
		i915_gem_clflush_object(obj, false);
2332 Serge 3541
 
2344 Serge 3542
		obj->base.read_domains |= I915_GEM_DOMAIN_CPU;
3543
	}
2332 Serge 3544
 
2344 Serge 3545
	/* It should now be out of any other write domains, and we can update
3546
	 * the domain values for our changes.
3547
	 */
3548
	BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
2332 Serge 3549
 
2344 Serge 3550
	/* If we're writing through the CPU, then the GPU read domains will
3551
	 * need to be invalidated at next use.
3552
	 */
3553
	if (write) {
3554
		obj->base.read_domains = I915_GEM_DOMAIN_CPU;
3555
		obj->base.write_domain = I915_GEM_DOMAIN_CPU;
3556
	}
2332 Serge 3557
 
2351 Serge 3558
	trace_i915_gem_object_change_domain(obj,
3559
					    old_read_domains,
3560
					    old_write_domain);
2332 Serge 3561
 
2344 Serge 3562
	return 0;
3563
}
2332 Serge 3564
 
3031 serge 3565
/* Throttle our rendering by waiting until the ring has completed our requests
3566
 * emitted over 20 msec ago.
2344 Serge 3567
 *
3031 serge 3568
 * Note that if we were to use the current jiffies each time around the loop,
3569
 * we wouldn't escape the function with any frames outstanding if the time to
3570
 * render a frame was over 20ms.
3571
 *
3572
 * This should get us reasonable parallelism between CPU and GPU but also
3573
 * relatively low latency when blocking on a particular request to finish.
2344 Serge 3574
 */
3031 serge 3575
static int
3576
i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
2344 Serge 3577
{
3031 serge 3578
	struct drm_i915_private *dev_priv = dev->dev_private;
3579
	struct drm_i915_file_private *file_priv = file->driver_priv;
6084 serge 3580
	unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
3581
	struct drm_i915_gem_request *request, *target = NULL;
3480 Serge 3582
	unsigned reset_counter;
3031 serge 3583
	int ret;
2332 Serge 3584
 
3480 Serge 3585
	ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
3586
	if (ret)
3587
		return ret;
2332 Serge 3588
 
3480 Serge 3589
	ret = i915_gem_check_wedge(&dev_priv->gpu_error, false);
3590
	if (ret)
3591
		return ret;
3592
 
3031 serge 3593
	spin_lock(&file_priv->mm.lock);
3594
	list_for_each_entry(request, &file_priv->mm.request_list, client_list) {
3595
		if (time_after_eq(request->emitted_jiffies, recent_enough))
3596
			break;
2332 Serge 3597
 
6084 serge 3598
		/*
3599
		 * Note that the request might not have been submitted yet.
3600
		 * In which case emitted_jiffies will be zero.
3601
		 */
3602
		if (!request->emitted_jiffies)
3603
			continue;
3604
 
3605
		target = request;
3031 serge 3606
	}
3480 Serge 3607
	reset_counter = atomic_read(&dev_priv->gpu_error.reset_counter);
6084 serge 3608
	if (target)
3609
		i915_gem_request_reference(target);
3031 serge 3610
	spin_unlock(&file_priv->mm.lock);
2332 Serge 3611
 
6084 serge 3612
	if (target == NULL)
3031 serge 3613
		return 0;
2332 Serge 3614
 
6084 serge 3615
	ret = __i915_wait_request(target, reset_counter, true, NULL, NULL);
3031 serge 3616
	if (ret == 0)
3617
		queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, 0);
2332 Serge 3618
 
6084 serge 3619
	i915_gem_request_unreference__unlocked(target);
3620
 
3031 serge 3621
	return ret;
2352 Serge 3622
}
2332 Serge 3623
 
5060 serge 3624
static bool
3625
i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
3626
{
3627
	struct drm_i915_gem_object *obj = vma->obj;
3628
 
3629
	if (alignment &&
3630
	    vma->node.start & (alignment - 1))
3631
		return true;
3632
 
3633
	if (flags & PIN_MAPPABLE && !obj->map_and_fenceable)
3634
		return true;
3635
 
3636
	if (flags & PIN_OFFSET_BIAS &&
3637
	    vma->node.start < (flags & PIN_OFFSET_MASK))
3638
		return true;
3639
 
3640
	return false;
3641
}
3642
 
6084 serge 3643
void __i915_vma_set_map_and_fenceable(struct i915_vma *vma)
2332 Serge 3644
{
6084 serge 3645
	struct drm_i915_gem_object *obj = vma->obj;
3646
	bool mappable, fenceable;
3647
	u32 fence_size, fence_alignment;
3648
 
3649
	fence_size = i915_gem_get_gtt_size(obj->base.dev,
3650
					   obj->base.size,
3651
					   obj->tiling_mode);
3652
	fence_alignment = i915_gem_get_gtt_alignment(obj->base.dev,
3653
						     obj->base.size,
3654
						     obj->tiling_mode,
3655
						     true);
3656
 
3657
	fenceable = (vma->node.size == fence_size &&
3658
		     (vma->node.start & (fence_alignment - 1)) == 0);
3659
 
3660
	mappable = (vma->node.start + fence_size <=
3661
		    to_i915(obj->base.dev)->gtt.mappable_end);
3662
 
3663
	obj->map_and_fenceable = mappable && fenceable;
3664
}
3665
 
3666
static int
3667
i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
3668
		       struct i915_address_space *vm,
3669
		       const struct i915_ggtt_view *ggtt_view,
3670
		       uint32_t alignment,
3671
		       uint64_t flags)
3672
{
5060 serge 3673
	struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
4104 Serge 3674
	struct i915_vma *vma;
5354 serge 3675
	unsigned bound;
2332 Serge 3676
	int ret;
3677
 
5060 serge 3678
	if (WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base))
3679
		return -ENODEV;
2332 Serge 3680
 
5060 serge 3681
	if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm)))
3682
		return -EINVAL;
4104 Serge 3683
 
5354 serge 3684
	if (WARN_ON((flags & (PIN_MAPPABLE | PIN_GLOBAL)) == PIN_MAPPABLE))
3685
		return -EINVAL;
3686
 
6084 serge 3687
	if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view))
3688
		return -EINVAL;
3689
 
3690
	vma = ggtt_view ? i915_gem_obj_to_ggtt_view(obj, ggtt_view) :
3691
			  i915_gem_obj_to_vma(obj, vm);
3692
 
3693
	if (IS_ERR(vma))
3694
		return PTR_ERR(vma);
3695
 
5060 serge 3696
	if (vma) {
3697
		if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
3698
			return -EBUSY;
4104 Serge 3699
 
5060 serge 3700
		if (i915_vma_misplaced(vma, alignment, flags)) {
3701
			WARN(vma->pin_count,
6084 serge 3702
			     "bo is already pinned in %s with incorrect alignment:"
3703
			     " offset=%08x %08x, req.alignment=%x, req.map_and_fenceable=%d,"
2332 Serge 3704
			     " obj->map_and_fenceable=%d\n",
6084 serge 3705
			     ggtt_view ? "ggtt" : "ppgtt",
3706
			     upper_32_bits(vma->node.start),
3707
			     lower_32_bits(vma->node.start),
3708
			     alignment,
5060 serge 3709
			     !!(flags & PIN_MAPPABLE),
2332 Serge 3710
			     obj->map_and_fenceable);
4104 Serge 3711
			ret = i915_vma_unbind(vma);
2332 Serge 3712
			if (ret)
3713
				return ret;
5060 serge 3714
 
3715
			vma = NULL;
2332 Serge 3716
		}
3717
	}
3718
 
5354 serge 3719
	bound = vma ? vma->bound : 0;
5060 serge 3720
	if (vma == NULL || !drm_mm_node_allocated(&vma->node)) {
6084 serge 3721
		vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment,
3722
						 flags);
5060 serge 3723
		if (IS_ERR(vma))
3724
			return PTR_ERR(vma);
6084 serge 3725
	} else {
3726
		ret = i915_vma_bind(vma, obj->cache_level, flags);
3727
		if (ret)
3728
			return ret;
2332 Serge 3729
	}
3730
 
6084 serge 3731
	if (ggtt_view && ggtt_view->type == I915_GGTT_VIEW_NORMAL &&
3732
	    (bound ^ vma->bound) & GLOBAL_BIND) {
3733
		__i915_vma_set_map_and_fenceable(vma);
3734
		WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable);
5354 serge 3735
	}
3736
 
5060 serge 3737
	vma->pin_count++;
2332 Serge 3738
	return 0;
3739
}
3740
 
6084 serge 3741
int
3742
i915_gem_object_pin(struct drm_i915_gem_object *obj,
3743
		    struct i915_address_space *vm,
3744
		    uint32_t alignment,
3745
		    uint64_t flags)
2344 Serge 3746
{
6084 serge 3747
	return i915_gem_object_do_pin(obj, vm,
3748
				      i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL,
3749
				      alignment, flags);
2344 Serge 3750
}
2332 Serge 3751
 
6084 serge 3752
int
3753
i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
3754
			 const struct i915_ggtt_view *view,
3755
			 uint32_t alignment,
3756
			 uint64_t flags)
5060 serge 3757
{
6084 serge 3758
	if (WARN_ONCE(!view, "no view specified"))
3759
		return -EINVAL;
5060 serge 3760
 
6084 serge 3761
	return i915_gem_object_do_pin(obj, i915_obj_to_ggtt(obj), view,
3762
				      alignment, flags | PIN_GLOBAL);
5060 serge 3763
}
3764
 
3765
void
6084 serge 3766
i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj,
3767
				const struct i915_ggtt_view *view)
5060 serge 3768
{
6084 serge 3769
	struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view);
5060 serge 3770
 
6084 serge 3771
	BUG_ON(!vma);
3772
	WARN_ON(vma->pin_count == 0);
3773
	WARN_ON(!i915_gem_obj_ggtt_bound_view(obj, view));
2332 Serge 3774
 
6084 serge 3775
	--vma->pin_count;
3031 serge 3776
}
2332 Serge 3777
 
3031 serge 3778
int
3779
i915_gem_busy_ioctl(struct drm_device *dev, void *data,
3780
		    struct drm_file *file)
3781
{
3782
	struct drm_i915_gem_busy *args = data;
3783
	struct drm_i915_gem_object *obj;
3784
	int ret;
2332 Serge 3785
 
3031 serge 3786
	ret = i915_mutex_lock_interruptible(dev);
3787
	if (ret)
3788
		return ret;
2332 Serge 3789
 
5060 serge 3790
	obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
3031 serge 3791
	if (&obj->base == NULL) {
3792
		ret = -ENOENT;
3793
		goto unlock;
3794
	}
2332 Serge 3795
 
3031 serge 3796
	/* Count all active objects as busy, even if they are currently not used
3797
	 * by the gpu. Users of this interface expect objects to eventually
3798
	 * become non-busy without any further actions, therefore emit any
3799
	 * necessary flushes here.
3800
	 */
3801
	ret = i915_gem_object_flush_active(obj);
6084 serge 3802
	if (ret)
3803
		goto unref;
2332 Serge 3804
 
6084 serge 3805
	BUILD_BUG_ON(I915_NUM_RINGS > 16);
3806
	args->busy = obj->active << 16;
3807
	if (obj->last_write_req)
3808
		args->busy |= obj->last_write_req->ring->id;
2332 Serge 3809
 
6084 serge 3810
unref:
3031 serge 3811
	drm_gem_object_unreference(&obj->base);
3812
unlock:
3813
	mutex_unlock(&dev->struct_mutex);
3814
	return ret;
3815
}
2332 Serge 3816
 
3031 serge 3817
int
3818
i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
3819
			struct drm_file *file_priv)
3820
{
3821
	return i915_gem_ring_throttle(dev, file_priv);
3822
}
2332 Serge 3823
 
3263 Serge 3824
#if 0
3825
 
3031 serge 3826
int
3827
i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
3828
		       struct drm_file *file_priv)
3829
{
5354 serge 3830
	struct drm_i915_private *dev_priv = dev->dev_private;
3031 serge 3831
	struct drm_i915_gem_madvise *args = data;
3832
	struct drm_i915_gem_object *obj;
3833
	int ret;
2332 Serge 3834
 
3031 serge 3835
	switch (args->madv) {
3836
	case I915_MADV_DONTNEED:
3837
	case I915_MADV_WILLNEED:
3838
	    break;
3839
	default:
3840
	    return -EINVAL;
3841
	}
2332 Serge 3842
 
3031 serge 3843
	ret = i915_mutex_lock_interruptible(dev);
3844
	if (ret)
3845
		return ret;
2332 Serge 3846
 
3031 serge 3847
	obj = to_intel_bo(drm_gem_object_lookup(dev, file_priv, args->handle));
3848
	if (&obj->base == NULL) {
3849
		ret = -ENOENT;
3850
		goto unlock;
3851
	}
2332 Serge 3852
 
5060 serge 3853
	if (i915_gem_obj_is_pinned(obj)) {
3031 serge 3854
		ret = -EINVAL;
3855
		goto out;
3856
	}
2332 Serge 3857
 
5354 serge 3858
	if (obj->pages &&
3859
	    obj->tiling_mode != I915_TILING_NONE &&
3860
	    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) {
3861
		if (obj->madv == I915_MADV_WILLNEED)
3862
			i915_gem_object_unpin_pages(obj);
3863
		if (args->madv == I915_MADV_WILLNEED)
3864
			i915_gem_object_pin_pages(obj);
3865
	}
3866
 
3031 serge 3867
	if (obj->madv != __I915_MADV_PURGED)
3868
		obj->madv = args->madv;
2332 Serge 3869
 
3031 serge 3870
	/* if the object is no longer attached, discard its backing storage */
6084 serge 3871
	if (obj->madv == I915_MADV_DONTNEED && obj->pages == NULL)
3031 serge 3872
		i915_gem_object_truncate(obj);
2332 Serge 3873
 
3031 serge 3874
	args->retained = obj->madv != __I915_MADV_PURGED;
2332 Serge 3875
 
3031 serge 3876
out:
3877
	drm_gem_object_unreference(&obj->base);
3878
unlock:
3879
	mutex_unlock(&dev->struct_mutex);
3880
	return ret;
3881
}
3882
#endif
2332 Serge 3883
 
3031 serge 3884
void i915_gem_object_init(struct drm_i915_gem_object *obj,
3885
			  const struct drm_i915_gem_object_ops *ops)
3886
{
6084 serge 3887
	int i;
3888
 
4104 Serge 3889
	INIT_LIST_HEAD(&obj->global_list);
6084 serge 3890
	for (i = 0; i < I915_NUM_RINGS; i++)
3891
		INIT_LIST_HEAD(&obj->ring_list[i]);
4104 Serge 3892
	INIT_LIST_HEAD(&obj->obj_exec_link);
3893
	INIT_LIST_HEAD(&obj->vma_list);
6084 serge 3894
	INIT_LIST_HEAD(&obj->batch_pool_link);
2332 Serge 3895
 
3031 serge 3896
	obj->ops = ops;
3897
 
3898
	obj->fence_reg = I915_FENCE_REG_NONE;
3899
	obj->madv = I915_MADV_WILLNEED;
3900
 
3901
	i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size);
3902
}
3903
 
3904
static const struct drm_i915_gem_object_ops i915_gem_object_ops = {
3905
	.get_pages = i915_gem_object_get_pages_gtt,
3906
	.put_pages = i915_gem_object_put_pages_gtt,
3907
};
3908
 
2332 Serge 3909
struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
3910
						  size_t size)
3911
{
3912
	struct drm_i915_gem_object *obj;
3031 serge 3913
	struct address_space *mapping;
3480 Serge 3914
	gfp_t mask;
2340 Serge 3915
 
3746 Serge 3916
	obj = i915_gem_object_alloc(dev);
2332 Serge 3917
	if (obj == NULL)
3918
		return NULL;
3919
 
3920
	if (drm_gem_object_init(dev, &obj->base, size) != 0) {
4104 Serge 3921
		i915_gem_object_free(obj);
2332 Serge 3922
		return NULL;
3923
	}
3924
 
3925
 
3031 serge 3926
	i915_gem_object_init(obj, &i915_gem_object_ops);
2332 Serge 3927
 
3928
	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
3929
	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
3930
 
3031 serge 3931
	if (HAS_LLC(dev)) {
3932
		/* On some devices, we can have the GPU use the LLC (the CPU
2332 Serge 3933
		 * cache) for about a 10% performance improvement
3934
		 * compared to uncached.  Graphics requests other than
3935
		 * display scanout are coherent with the CPU in
3936
		 * accessing this cache.  This means in this mode we
3937
		 * don't need to clflush on the CPU side, and on the
3938
		 * GPU side we only need to flush internal caches to
3939
		 * get data visible to the CPU.
3940
		 *
3941
		 * However, we maintain the display planes as UC, and so
3942
		 * need to rebind when first used as such.
3943
		 */
3944
		obj->cache_level = I915_CACHE_LLC;
3945
	} else
3946
		obj->cache_level = I915_CACHE_NONE;
3947
 
4560 Serge 3948
	trace_i915_gem_object_create(obj);
3949
 
2332 Serge 3950
	return obj;
3951
}
3952
 
6283 serge 3953
static bool discard_backing_storage(struct drm_i915_gem_object *obj)
3954
{
3955
	/* If we are the last user of the backing storage (be it shmemfs
3956
	 * pages or stolen etc), we know that the pages are going to be
3957
	 * immediately released. In this case, we can then skip copying
3958
	 * back the contents from the GPU.
3959
	 */
3960
 
3961
	if (obj->madv != I915_MADV_WILLNEED)
3962
		return false;
3963
 
3964
	if (obj->base.filp == NULL)
3965
		return true;
3966
 
3967
//        printf("filp %p\n", obj->base.filp);
3968
	shmem_file_delete(obj->base.filp);
3969
	return true;
3970
}
3971
 
3031 serge 3972
void i915_gem_free_object(struct drm_gem_object *gem_obj)
2344 Serge 3973
{
3031 serge 3974
	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
2344 Serge 3975
	struct drm_device *dev = obj->base.dev;
5060 serge 3976
	struct drm_i915_private *dev_priv = dev->dev_private;
4104 Serge 3977
	struct i915_vma *vma, *next;
2332 Serge 3978
 
4560 Serge 3979
	intel_runtime_pm_get(dev_priv);
3980
 
3031 serge 3981
	trace_i915_gem_object_destroy(obj);
3982
 
5060 serge 3983
	list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) {
3984
		int ret;
3031 serge 3985
 
5060 serge 3986
		vma->pin_count = 0;
3987
		ret = i915_vma_unbind(vma);
4104 Serge 3988
		if (WARN_ON(ret == -ERESTARTSYS)) {
6084 serge 3989
			bool was_interruptible;
3031 serge 3990
 
6084 serge 3991
			was_interruptible = dev_priv->mm.interruptible;
3992
			dev_priv->mm.interruptible = false;
3031 serge 3993
 
4104 Serge 3994
			WARN_ON(i915_vma_unbind(vma));
3031 serge 3995
 
6084 serge 3996
			dev_priv->mm.interruptible = was_interruptible;
3997
		}
2344 Serge 3998
	}
2332 Serge 3999
 
4104 Serge 4000
	/* Stolen objects don't hold a ref, but do hold pin count. Fix that up
4001
	 * before progressing. */
4002
	if (obj->stolen)
4003
		i915_gem_object_unpin_pages(obj);
4004
 
5060 serge 4005
	WARN_ON(obj->frontbuffer_bits);
4006
 
5354 serge 4007
	if (obj->pages && obj->madv == I915_MADV_WILLNEED &&
4008
	    dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES &&
4009
	    obj->tiling_mode != I915_TILING_NONE)
4010
		i915_gem_object_unpin_pages(obj);
4011
 
4104 Serge 4012
	if (WARN_ON(obj->pages_pin_count))
6084 serge 4013
		obj->pages_pin_count = 0;
6283 serge 4014
	if (discard_backing_storage(obj))
4015
		obj->madv = I915_MADV_DONTNEED;
3031 serge 4016
	i915_gem_object_put_pages(obj);
4017
//   i915_gem_object_free_mmap_offset(obj);
2332 Serge 4018
 
3243 Serge 4019
	BUG_ON(obj->pages);
2332 Serge 4020
 
6283 serge 4021
	if (obj->ops->release)
4022
		obj->ops->release(obj);
3031 serge 4023
 
2344 Serge 4024
	drm_gem_object_release(&obj->base);
4025
	i915_gem_info_remove_obj(dev_priv, obj->base.size);
2332 Serge 4026
 
2344 Serge 4027
	kfree(obj->bit_17);
4104 Serge 4028
	i915_gem_object_free(obj);
4560 Serge 4029
 
4030
	intel_runtime_pm_put(dev_priv);
2344 Serge 4031
}
2332 Serge 4032
 
4560 Serge 4033
struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
4104 Serge 4034
				     struct i915_address_space *vm)
4035
{
4560 Serge 4036
	struct i915_vma *vma;
6084 serge 4037
	list_for_each_entry(vma, &obj->vma_list, vma_link) {
4038
		if (i915_is_ggtt(vma->vm) &&
4039
		    vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
4040
			continue;
4560 Serge 4041
		if (vma->vm == vm)
4042
			return vma;
6084 serge 4043
	}
4044
	return NULL;
4045
}
4560 Serge 4046
 
6084 serge 4047
struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj,
4048
					   const struct i915_ggtt_view *view)
4049
{
4050
	struct i915_address_space *ggtt = i915_obj_to_ggtt(obj);
4051
	struct i915_vma *vma;
4052
 
4053
	if (WARN_ONCE(!view, "no view specified"))
4054
		return ERR_PTR(-EINVAL);
4055
 
4056
	list_for_each_entry(vma, &obj->vma_list, vma_link)
4057
		if (vma->vm == ggtt &&
4058
		    i915_ggtt_view_equal(&vma->ggtt_view, view))
4059
			return vma;
4560 Serge 4060
	return NULL;
4061
}
4062
 
4104 Serge 4063
void i915_gem_vma_destroy(struct i915_vma *vma)
4064
{
5354 serge 4065
	struct i915_address_space *vm = NULL;
4104 Serge 4066
	WARN_ON(vma->node.allocated);
4560 Serge 4067
 
4068
	/* Keep the vma as a placeholder in the execbuffer reservation lists */
4069
	if (!list_empty(&vma->exec_list))
4070
		return;
4071
 
5354 serge 4072
	vm = vma->vm;
4073
 
4074
	if (!i915_is_ggtt(vm))
4075
		i915_ppgtt_put(i915_vm_to_ppgtt(vm));
4076
 
4104 Serge 4077
	list_del(&vma->vma_link);
4560 Serge 4078
 
4104 Serge 4079
	kfree(vma);
4080
}
4081
 
6084 serge 4082
static void
4083
i915_gem_stop_ringbuffers(struct drm_device *dev)
4084
{
4085
	struct drm_i915_private *dev_priv = dev->dev_private;
4086
	struct intel_engine_cs *ring;
4087
	int i;
4088
 
4089
	for_each_ring(ring, dev_priv, i)
4090
		dev_priv->gt.stop_ring(ring);
4091
}
4092
 
3031 serge 4093
#if 0
4094
int
4560 Serge 4095
i915_gem_suspend(struct drm_device *dev)
2344 Serge 4096
{
5060 serge 4097
	struct drm_i915_private *dev_priv = dev->dev_private;
4560 Serge 4098
	int ret = 0;
2332 Serge 4099
 
4560 Serge 4100
	mutex_lock(&dev->struct_mutex);
3031 serge 4101
	ret = i915_gpu_idle(dev);
4560 Serge 4102
	if (ret)
4103
		goto err;
4104
 
3031 serge 4105
	i915_gem_retire_requests(dev);
4106
 
5060 serge 4107
	i915_gem_stop_ringbuffers(dev);
4560 Serge 4108
	mutex_unlock(&dev->struct_mutex);
4109
 
6084 serge 4110
	cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
3263 Serge 4111
	cancel_delayed_work_sync(&dev_priv->mm.retire_work);
5060 serge 4112
	flush_delayed_work(&dev_priv->mm.idle_work);
3031 serge 4113
 
6084 serge 4114
	/* Assert that we sucessfully flushed all the work and
4115
	 * reset the GPU back to its idle, low power state.
4116
	 */
4117
	WARN_ON(dev_priv->mm.busy);
4118
 
3031 serge 4119
	return 0;
4560 Serge 4120
 
4121
err:
4122
	mutex_unlock(&dev->struct_mutex);
4123
	return ret;
2344 Serge 4124
}
3031 serge 4125
#endif
2332 Serge 4126
 
6084 serge 4127
int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice)
3031 serge 4128
{
6084 serge 4129
	struct intel_engine_cs *ring = req->ring;
4560 Serge 4130
	struct drm_device *dev = ring->dev;
5060 serge 4131
	struct drm_i915_private *dev_priv = dev->dev_private;
4560 Serge 4132
	u32 reg_base = GEN7_L3LOG_BASE + (slice * 0x200);
4133
	u32 *remap_info = dev_priv->l3_parity.remap_info[slice];
4134
	int i, ret;
2332 Serge 4135
 
4560 Serge 4136
	if (!HAS_L3_DPF(dev) || !remap_info)
4137
		return 0;
2332 Serge 4138
 
6084 serge 4139
	ret = intel_ring_begin(req, GEN7_L3LOG_SIZE / 4 * 3);
4560 Serge 4140
	if (ret)
4141
		return ret;
2332 Serge 4142
 
4560 Serge 4143
	/*
4144
	 * Note: We do not worry about the concurrent register cacheline hang
4145
	 * here because no other code should access these registers other than
4146
	 * at initialization time.
4147
	 */
3031 serge 4148
	for (i = 0; i < GEN7_L3LOG_SIZE; i += 4) {
4560 Serge 4149
		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
4150
		intel_ring_emit(ring, reg_base + i);
4151
		intel_ring_emit(ring, remap_info[i/4]);
3031 serge 4152
	}
2332 Serge 4153
 
4560 Serge 4154
	intel_ring_advance(ring);
2332 Serge 4155
 
4560 Serge 4156
	return ret;
3031 serge 4157
}
2332 Serge 4158
 
3031 serge 4159
void i915_gem_init_swizzling(struct drm_device *dev)
4160
{
5060 serge 4161
	struct drm_i915_private *dev_priv = dev->dev_private;
2332 Serge 4162
 
3031 serge 4163
	if (INTEL_INFO(dev)->gen < 5 ||
4164
	    dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_NONE)
4165
		return;
2332 Serge 4166
 
3031 serge 4167
	I915_WRITE(DISP_ARB_CTL, I915_READ(DISP_ARB_CTL) |
4168
				 DISP_TILE_SURFACE_SWIZZLING);
2332 Serge 4169
 
3031 serge 4170
	if (IS_GEN5(dev))
4171
		return;
2344 Serge 4172
 
3031 serge 4173
	I915_WRITE(TILECTL, I915_READ(TILECTL) | TILECTL_SWZCTL);
4174
	if (IS_GEN6(dev))
4175
		I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB));
3480 Serge 4176
	else if (IS_GEN7(dev))
4177
		I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB));
4560 Serge 4178
	else if (IS_GEN8(dev))
4179
		I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW));
3031 serge 4180
	else
3480 Serge 4181
		BUG();
3031 serge 4182
}
4183
 
5354 serge 4184
static void init_unused_ring(struct drm_device *dev, u32 base)
2332 Serge 4185
{
3480 Serge 4186
	struct drm_i915_private *dev_priv = dev->dev_private;
5354 serge 4187
 
4188
	I915_WRITE(RING_CTL(base), 0);
4189
	I915_WRITE(RING_HEAD(base), 0);
4190
	I915_WRITE(RING_TAIL(base), 0);
4191
	I915_WRITE(RING_START(base), 0);
4192
}
4193
 
4194
static void init_unused_rings(struct drm_device *dev)
4195
{
4196
	if (IS_I830(dev)) {
4197
		init_unused_ring(dev, PRB1_BASE);
4198
		init_unused_ring(dev, SRB0_BASE);
4199
		init_unused_ring(dev, SRB1_BASE);
4200
		init_unused_ring(dev, SRB2_BASE);
4201
		init_unused_ring(dev, SRB3_BASE);
4202
	} else if (IS_GEN2(dev)) {
4203
		init_unused_ring(dev, SRB0_BASE);
4204
		init_unused_ring(dev, SRB1_BASE);
4205
	} else if (IS_GEN3(dev)) {
4206
		init_unused_ring(dev, PRB1_BASE);
4207
		init_unused_ring(dev, PRB2_BASE);
4208
	}
4209
}
4210
 
4211
int i915_gem_init_rings(struct drm_device *dev)
4212
{
4213
	struct drm_i915_private *dev_priv = dev->dev_private;
2332 Serge 4214
	int ret;
2351 Serge 4215
 
2332 Serge 4216
	ret = intel_init_render_ring_buffer(dev);
4217
	if (ret)
4218
		return ret;
4219
 
6084 serge 4220
	if (HAS_BSD(dev)) {
2332 Serge 4221
		ret = intel_init_bsd_ring_buffer(dev);
4222
		if (ret)
4223
			goto cleanup_render_ring;
4224
	}
4225
 
6084 serge 4226
	if (HAS_BLT(dev)) {
2332 Serge 4227
		ret = intel_init_blt_ring_buffer(dev);
4228
		if (ret)
4229
			goto cleanup_bsd_ring;
4230
	}
4231
 
4104 Serge 4232
	if (HAS_VEBOX(dev)) {
4233
		ret = intel_init_vebox_ring_buffer(dev);
4234
		if (ret)
4235
			goto cleanup_blt_ring;
4236
	}
4237
 
5060 serge 4238
	if (HAS_BSD2(dev)) {
4239
		ret = intel_init_bsd2_ring_buffer(dev);
4240
		if (ret)
4241
			goto cleanup_vebox_ring;
4242
	}
4104 Serge 4243
 
2332 Serge 4244
	return 0;
4245
 
4104 Serge 4246
cleanup_vebox_ring:
4247
	intel_cleanup_ring_buffer(&dev_priv->ring[VECS]);
3480 Serge 4248
cleanup_blt_ring:
4249
	intel_cleanup_ring_buffer(&dev_priv->ring[BCS]);
2332 Serge 4250
cleanup_bsd_ring:
4251
	intel_cleanup_ring_buffer(&dev_priv->ring[VCS]);
4252
cleanup_render_ring:
4253
	intel_cleanup_ring_buffer(&dev_priv->ring[RCS]);
3480 Serge 4254
 
2332 Serge 4255
	return ret;
4256
}
4257
 
3480 Serge 4258
int
4259
i915_gem_init_hw(struct drm_device *dev)
3031 serge 4260
{
5060 serge 4261
	struct drm_i915_private *dev_priv = dev->dev_private;
6084 serge 4262
	struct intel_engine_cs *ring;
4263
	int ret, i, j;
3031 serge 4264
 
3480 Serge 4265
	if (INTEL_INFO(dev)->gen < 6 && !intel_enable_gtt())
4266
		return -EIO;
3031 serge 4267
 
6084 serge 4268
	/* Double layer security blanket, see i915_gem_init() */
4269
	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
4270
 
4104 Serge 4271
	if (dev_priv->ellc_size)
4272
		I915_WRITE(HSW_IDICR, I915_READ(HSW_IDICR) | IDIHASHMSK(0xf));
3480 Serge 4273
 
4560 Serge 4274
	if (IS_HASWELL(dev))
4275
		I915_WRITE(MI_PREDICATE_RESULT_2, IS_HSW_GT3(dev) ?
4276
			   LOWER_SLICE_ENABLED : LOWER_SLICE_DISABLED);
4277
 
3746 Serge 4278
	if (HAS_PCH_NOP(dev)) {
5060 serge 4279
		if (IS_IVYBRIDGE(dev)) {
6084 serge 4280
			u32 temp = I915_READ(GEN7_MSG_CTL);
4281
			temp &= ~(WAIT_FOR_PCH_FLR_ACK | WAIT_FOR_PCH_RESET_ACK);
4282
			I915_WRITE(GEN7_MSG_CTL, temp);
5060 serge 4283
		} else if (INTEL_INFO(dev)->gen >= 7) {
4284
			u32 temp = I915_READ(HSW_NDE_RSTWRN_OPT);
4285
			temp &= ~RESET_PCH_HANDSHAKE_ENABLE;
4286
			I915_WRITE(HSW_NDE_RSTWRN_OPT, temp);
4287
		}
3746 Serge 4288
	}
4289
 
3480 Serge 4290
	i915_gem_init_swizzling(dev);
4291
 
6084 serge 4292
	/*
4293
	 * At least 830 can leave some of the unused rings
4294
	 * "active" (ie. head != tail) after resume which
4295
	 * will prevent c3 entry. Makes sure all unused rings
4296
	 * are totally idle.
4297
	 */
4298
	init_unused_rings(dev);
3480 Serge 4299
 
6084 serge 4300
	BUG_ON(!dev_priv->ring[RCS].default_context);
4560 Serge 4301
 
6084 serge 4302
	ret = i915_ppgtt_init_hw(dev);
4303
	if (ret) {
4304
		DRM_ERROR("PPGTT enable HW failed %d\n", ret);
4305
		goto out;
4306
	}
4307
 
4308
	/* Need to do basic initialisation of all rings first: */
4309
	for_each_ring(ring, dev_priv, i) {
4310
		ret = ring->init_hw(ring);
4311
		if (ret)
4312
			goto out;
4313
	}
4314
 
4315
	/* We can't enable contexts until all firmware is loaded */
4316
	if (HAS_GUC_UCODE(dev)) {
4317
		ret = intel_guc_ucode_load(dev);
4318
		if (ret) {
4319
			/*
4320
			 * If we got an error and GuC submission is enabled, map
4321
			 * the error to -EIO so the GPU will be declared wedged.
4322
			 * OTOH, if we didn't intend to use the GuC anyway, just
4323
			 * discard the error and carry on.
4324
			 */
4325
			DRM_ERROR("Failed to initialize GuC, error %d%s\n", ret,
4326
				  i915.enable_guc_submission ? "" :
4327
				  " (ignored)");
4328
			ret = i915.enable_guc_submission ? -EIO : 0;
4329
			if (ret)
4330
				goto out;
4331
		}
4332
	}
4333
 
3480 Serge 4334
	/*
6084 serge 4335
	 * Increment the next seqno by 0x100 so we have a visible break
4336
	 * on re-initialisation
3480 Serge 4337
	 */
6084 serge 4338
	ret = i915_gem_set_seqno(dev, dev_priv->next_seqno+0x100);
4339
	if (ret)
4340
		goto out;
5354 serge 4341
 
6084 serge 4342
	/* Now it is safe to go back round and do everything else: */
4343
	for_each_ring(ring, dev_priv, i) {
4344
		struct drm_i915_gem_request *req;
4560 Serge 4345
 
6084 serge 4346
		WARN_ON(!ring->default_context);
4347
 
4348
		ret = i915_gem_request_alloc(ring, ring->default_context, &req);
4349
		if (ret) {
4350
			i915_gem_cleanup_ringbuffer(dev);
4351
			goto out;
4352
		}
4353
 
4354
		if (ring->id == RCS) {
4355
			for (j = 0; j < NUM_L3_SLICES(dev); j++)
4356
				i915_gem_l3_remap(req, j);
4357
		}
4358
 
4359
		ret = i915_ppgtt_init_ring(req);
4360
		if (ret && ret != -EIO) {
4361
			DRM_ERROR("PPGTT enable ring #%d failed %d\n", i, ret);
4362
			i915_gem_request_cancel(req);
4363
			i915_gem_cleanup_ringbuffer(dev);
4364
			goto out;
4365
		}
4366
 
4367
		ret = i915_gem_context_enable(req);
4368
		if (ret && ret != -EIO) {
4369
			DRM_ERROR("Context enable ring #%d failed %d\n", i, ret);
4370
			i915_gem_request_cancel(req);
4371
			i915_gem_cleanup_ringbuffer(dev);
4372
			goto out;
4373
		}
4374
 
4375
		i915_add_request_no_flush(req);
5354 serge 4376
	}
4377
 
6084 serge 4378
out:
4379
	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5060 serge 4380
	return ret;
3031 serge 4381
}
4382
 
4383
int i915_gem_init(struct drm_device *dev)
4384
{
4385
	struct drm_i915_private *dev_priv = dev->dev_private;
4386
	int ret;
4387
 
5354 serge 4388
	i915.enable_execlists = intel_sanitize_enable_execlists(dev,
4389
			i915.enable_execlists);
4390
 
3031 serge 4391
	mutex_lock(&dev->struct_mutex);
3746 Serge 4392
 
4393
	if (IS_VALLEYVIEW(dev)) {
4394
		/* VLVA0 (potential hack), BIOS isn't actually waking us */
5060 serge 4395
		I915_WRITE(VLV_GTLC_WAKE_CTRL, VLV_GTLC_ALLOWWAKEREQ);
4396
		if (wait_for((I915_READ(VLV_GTLC_PW_STATUS) &
4397
			      VLV_GTLC_ALLOWWAKEACK), 10))
3746 Serge 4398
			DRM_DEBUG_DRIVER("allow wake ack timed out\n");
4399
	}
4400
 
5354 serge 4401
	if (!i915.enable_execlists) {
6084 serge 4402
		dev_priv->gt.execbuf_submit = i915_gem_ringbuffer_submission;
5354 serge 4403
		dev_priv->gt.init_rings = i915_gem_init_rings;
4404
		dev_priv->gt.cleanup_ring = intel_cleanup_ring_buffer;
4405
		dev_priv->gt.stop_ring = intel_stop_ring_buffer;
4406
	} else {
6084 serge 4407
		dev_priv->gt.execbuf_submit = intel_execlists_submission;
5354 serge 4408
		dev_priv->gt.init_rings = intel_logical_rings_init;
4409
		dev_priv->gt.cleanup_ring = intel_logical_ring_cleanup;
4410
		dev_priv->gt.stop_ring = intel_logical_ring_stop;
4411
	}
4412
 
6084 serge 4413
	/* This is just a security blanket to placate dragons.
4414
	 * On some systems, we very sporadically observe that the first TLBs
4415
	 * used by the CS may be stale, despite us poking the TLB reset. If
4416
	 * we hold the forcewake during initialisation these problems
4417
	 * just magically go away.
4418
	 */
4419
	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
5354 serge 4420
 
6084 serge 4421
//	ret = i915_gem_init_userptr(dev);
4422
//	if (ret)
4423
//		goto out_unlock;
3746 Serge 4424
 
6084 serge 4425
	i915_gem_init_global_gtt(dev);
4426
 
5060 serge 4427
	ret = i915_gem_context_init(dev);
6084 serge 4428
	if (ret)
4429
		goto out_unlock;
3031 serge 4430
 
6084 serge 4431
	ret = dev_priv->gt.init_rings(dev);
4432
	if (ret)
4433
		goto out_unlock;
4434
 
5060 serge 4435
	ret = i915_gem_init_hw(dev);
4436
	if (ret == -EIO) {
4437
		/* Allow ring initialisation to fail by marking the GPU as
4438
		 * wedged. But we only want to do this where the GPU is angry,
4439
		 * for all other failure, such as an allocation failure, bail.
4440
		 */
4441
		DRM_ERROR("Failed to initialize GPU, declaring it wedged\n");
6084 serge 4442
		atomic_or(I915_WEDGED, &dev_priv->gpu_error.reset_counter);
5060 serge 4443
		ret = 0;
4444
	}
6084 serge 4445
 
4446
out_unlock:
4447
	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
5060 serge 4448
	mutex_unlock(&dev->struct_mutex);
3746 Serge 4449
 
6084 serge 4450
	return ret;
3031 serge 4451
}
4452
 
2332 Serge 4453
void
4454
i915_gem_cleanup_ringbuffer(struct drm_device *dev)
4455
{
5060 serge 4456
	struct drm_i915_private *dev_priv = dev->dev_private;
4457
	struct intel_engine_cs *ring;
2332 Serge 4458
	int i;
4459
 
3031 serge 4460
	for_each_ring(ring, dev_priv, i)
5354 serge 4461
		dev_priv->gt.cleanup_ring(ring);
2332 Serge 4462
}
4463
 
4464
static void
5060 serge 4465
init_ring_lists(struct intel_engine_cs *ring)
2326 Serge 4466
{
6084 serge 4467
	INIT_LIST_HEAD(&ring->active_list);
4468
	INIT_LIST_HEAD(&ring->request_list);
2326 Serge 4469
}
4470
 
4471
void
4472
i915_gem_load(struct drm_device *dev)
4473
{
5060 serge 4474
	struct drm_i915_private *dev_priv = dev->dev_private;
6084 serge 4475
	int i;
2326 Serge 4476
 
4104 Serge 4477
	INIT_LIST_HEAD(&dev_priv->vm_list);
4560 Serge 4478
	INIT_LIST_HEAD(&dev_priv->context_list);
3031 serge 4479
	INIT_LIST_HEAD(&dev_priv->mm.unbound_list);
4480
	INIT_LIST_HEAD(&dev_priv->mm.bound_list);
6084 serge 4481
	INIT_LIST_HEAD(&dev_priv->mm.fence_list);
4482
	for (i = 0; i < I915_NUM_RINGS; i++)
4483
		init_ring_lists(&dev_priv->ring[i]);
2342 Serge 4484
	for (i = 0; i < I915_MAX_NUM_FENCES; i++)
6084 serge 4485
		INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
2360 Serge 4486
	INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
4487
			  i915_gem_retire_work_handler);
4560 Serge 4488
	INIT_DELAYED_WORK(&dev_priv->mm.idle_work,
4489
			  i915_gem_idle_work_handler);
3480 Serge 4490
	init_waitqueue_head(&dev_priv->gpu_error.reset_queue);
2326 Serge 4491
 
6084 serge 4492
	dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL;
2326 Serge 4493
 
3746 Serge 4494
	if (INTEL_INFO(dev)->gen >= 7 && !IS_VALLEYVIEW(dev))
4495
		dev_priv->num_fence_regs = 32;
4496
	else if (INTEL_INFO(dev)->gen >= 4 || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
6084 serge 4497
		dev_priv->num_fence_regs = 16;
4498
	else
4499
		dev_priv->num_fence_regs = 8;
2326 Serge 4500
 
6084 serge 4501
	if (intel_vgpu_active(dev))
4502
		dev_priv->num_fence_regs =
4503
				I915_READ(vgtif_reg(avail_rs.fence_num));
4504
 
4505
	/*
4506
	 * Set initial sequence number for requests.
4507
	 * Using this number allows the wraparound to happen early,
4508
	 * catching any obvious problems.
4509
	 */
4510
	dev_priv->next_seqno = ((u32)~0 - 0x1100);
4511
	dev_priv->last_seqno = ((u32)~0 - 0x1101);
4512
 
4513
	/* Initialize fence registers to zero */
3746 Serge 4514
	INIT_LIST_HEAD(&dev_priv->mm.fence_list);
4515
	i915_gem_restore_fences(dev);
2326 Serge 4516
 
6084 serge 4517
	i915_gem_detect_bit_6_swizzle(dev);
2326 Serge 4518
 
6084 serge 4519
	dev_priv->mm.interruptible = true;
2326 Serge 4520
 
5060 serge 4521
	mutex_init(&dev_priv->fb_tracking.lock);
2326 Serge 4522
}
4523
 
6084 serge 4524
void i915_gem_release(struct drm_device *dev, struct drm_file *file)
4525
{
4526
	struct drm_i915_file_private *file_priv = file->driver_priv;
4527
 
4528
	/* Clean up our request list when the client is going away, so that
4529
	 * later retire_requests won't dereference our soon-to-be-gone
4530
	 * file_priv.
4531
	 */
4532
	spin_lock(&file_priv->mm.lock);
4533
	while (!list_empty(&file_priv->mm.request_list)) {
4534
		struct drm_i915_gem_request *request;
4535
 
4536
		request = list_first_entry(&file_priv->mm.request_list,
4537
					   struct drm_i915_gem_request,
4538
					   client_list);
4539
		list_del(&request->client_list);
4540
		request->file_priv = NULL;
4541
	}
4542
	spin_unlock(&file_priv->mm.lock);
4543
 
4544
	if (!list_empty(&file_priv->rps.link)) {
4545
		spin_lock(&to_i915(dev)->rps.client_lock);
4546
		list_del(&file_priv->rps.link);
4547
		spin_unlock(&to_i915(dev)->rps.client_lock);
4548
	}
4549
}
4550
 
5060 serge 4551
int i915_gem_open(struct drm_device *dev, struct drm_file *file)
4104 Serge 4552
{
5060 serge 4553
	struct drm_i915_file_private *file_priv;
4104 Serge 4554
	int ret;
2326 Serge 4555
 
5060 serge 4556
	DRM_DEBUG_DRIVER("\n");
4104 Serge 4557
 
5060 serge 4558
	file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
4559
	if (!file_priv)
4104 Serge 4560
		return -ENOMEM;
4561
 
5060 serge 4562
	file->driver_priv = file_priv;
4563
	file_priv->dev_priv = dev->dev_private;
4564
	file_priv->file = file;
6084 serge 4565
	INIT_LIST_HEAD(&file_priv->rps.link);
4104 Serge 4566
 
5060 serge 4567
	spin_lock_init(&file_priv->mm.lock);
4568
	INIT_LIST_HEAD(&file_priv->mm.request_list);
4104 Serge 4569
 
5060 serge 4570
	ret = i915_gem_context_open(dev, file);
4571
	if (ret)
4572
		kfree(file_priv);
4104 Serge 4573
 
4574
	return ret;
4575
}
4576
 
5354 serge 4577
/**
4578
 * i915_gem_track_fb - update frontbuffer tracking
6084 serge 4579
 * @old: current GEM buffer for the frontbuffer slots
4580
 * @new: new GEM buffer for the frontbuffer slots
4581
 * @frontbuffer_bits: bitmask of frontbuffer slots
5354 serge 4582
 *
4583
 * This updates the frontbuffer tracking bits @frontbuffer_bits by clearing them
4584
 * from @old and setting them in @new. Both @old and @new can be NULL.
4585
 */
5060 serge 4586
void i915_gem_track_fb(struct drm_i915_gem_object *old,
4587
		       struct drm_i915_gem_object *new,
4588
		       unsigned frontbuffer_bits)
4104 Serge 4589
{
5060 serge 4590
	if (old) {
4591
		WARN_ON(!mutex_is_locked(&old->base.dev->struct_mutex));
4592
		WARN_ON(!(old->frontbuffer_bits & frontbuffer_bits));
4593
		old->frontbuffer_bits &= ~frontbuffer_bits;
4104 Serge 4594
	}
4595
 
5060 serge 4596
	if (new) {
4597
		WARN_ON(!mutex_is_locked(&new->base.dev->struct_mutex));
4598
		WARN_ON(new->frontbuffer_bits & frontbuffer_bits);
4599
		new->frontbuffer_bits |= frontbuffer_bits;
4104 Serge 4600
	}
4601
}
4602
 
4603
/* All the new VM stuff */
6084 serge 4604
u64 i915_gem_obj_offset(struct drm_i915_gem_object *o,
4605
			struct i915_address_space *vm)
4104 Serge 4606
{
4607
	struct drm_i915_private *dev_priv = o->base.dev->dev_private;
4608
	struct i915_vma *vma;
4609
 
5354 serge 4610
	WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base);
4104 Serge 4611
 
4612
	list_for_each_entry(vma, &o->vma_list, vma_link) {
6084 serge 4613
		if (i915_is_ggtt(vma->vm) &&
4614
		    vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
4615
			continue;
4104 Serge 4616
		if (vma->vm == vm)
4617
			return vma->node.start;
6084 serge 4618
	}
4104 Serge 4619
 
5060 serge 4620
	WARN(1, "%s vma for this object not found.\n",
4621
	     i915_is_ggtt(vm) ? "global" : "ppgtt");
4622
	return -1;
4104 Serge 4623
}
4624
 
6084 serge 4625
u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o,
4626
				  const struct i915_ggtt_view *view)
4627
{
4628
	struct i915_address_space *ggtt = i915_obj_to_ggtt(o);
4629
	struct i915_vma *vma;
4630
 
4631
	list_for_each_entry(vma, &o->vma_list, vma_link)
4632
		if (vma->vm == ggtt &&
4633
		    i915_ggtt_view_equal(&vma->ggtt_view, view))
4634
			return vma->node.start;
4635
 
4636
	WARN(1, "global vma for this object not found. (view=%u)\n", view->type);
4637
	return -1;
4638
}
4639
 
4104 Serge 4640
bool i915_gem_obj_bound(struct drm_i915_gem_object *o,
4641
			struct i915_address_space *vm)
4642
{
4643
	struct i915_vma *vma;
4644
 
6084 serge 4645
	list_for_each_entry(vma, &o->vma_list, vma_link) {
4646
		if (i915_is_ggtt(vma->vm) &&
4647
		    vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
4648
			continue;
4104 Serge 4649
		if (vma->vm == vm && drm_mm_node_allocated(&vma->node))
4650
			return true;
6084 serge 4651
	}
4104 Serge 4652
 
4653
	return false;
4654
}
4655
 
6084 serge 4656
bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o,
4657
				  const struct i915_ggtt_view *view)
4658
{
4659
	struct i915_address_space *ggtt = i915_obj_to_ggtt(o);
4660
	struct i915_vma *vma;
4661
 
4662
	list_for_each_entry(vma, &o->vma_list, vma_link)
4663
		if (vma->vm == ggtt &&
4664
		    i915_ggtt_view_equal(&vma->ggtt_view, view) &&
4665
		    drm_mm_node_allocated(&vma->node))
4666
			return true;
4667
 
4668
	return false;
4669
}
4670
 
4104 Serge 4671
bool i915_gem_obj_bound_any(struct drm_i915_gem_object *o)
4672
{
4560 Serge 4673
	struct i915_vma *vma;
4104 Serge 4674
 
4560 Serge 4675
	list_for_each_entry(vma, &o->vma_list, vma_link)
4676
		if (drm_mm_node_allocated(&vma->node))
4104 Serge 4677
			return true;
4678
 
4679
	return false;
4680
}
4681
 
4682
unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o,
4683
				struct i915_address_space *vm)
4684
{
4685
	struct drm_i915_private *dev_priv = o->base.dev->dev_private;
4686
	struct i915_vma *vma;
4687
 
5354 serge 4688
	WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base);
4104 Serge 4689
 
4690
	BUG_ON(list_empty(&o->vma_list));
4691
 
6084 serge 4692
	list_for_each_entry(vma, &o->vma_list, vma_link) {
4693
		if (i915_is_ggtt(vma->vm) &&
4694
		    vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL)
4695
			continue;
4104 Serge 4696
		if (vma->vm == vm)
4697
			return vma->node.size;
6084 serge 4698
	}
4104 Serge 4699
	return 0;
4700
}
4560 Serge 4701
 
6084 serge 4702
bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj)
4703
{
4704
	struct i915_vma *vma;
4705
	list_for_each_entry(vma, &obj->vma_list, vma_link)
4706
		if (vma->pin_count > 0)
4707
			return true;
4560 Serge 4708
 
6084 serge 4709
	return false;
4710
}
5060 serge 4711
 
6084 serge 4712
/* Allocate a new GEM object and fill it with the supplied data */
4713
struct drm_i915_gem_object *
4714
i915_gem_object_create_from_data(struct drm_device *dev,
4715
			         const void *data, size_t size)
4104 Serge 4716
{
6084 serge 4717
	struct drm_i915_gem_object *obj;
4718
	struct sg_table *sg;
4719
	size_t bytes;
4720
	int ret;
4104 Serge 4721
 
6084 serge 4722
	obj = i915_gem_alloc_object(dev, round_up(size, PAGE_SIZE));
4723
	if (IS_ERR_OR_NULL(obj))
4724
		return obj;
4104 Serge 4725
 
6084 serge 4726
	ret = i915_gem_object_set_to_cpu_domain(obj, true);
4727
	if (ret)
4728
		goto fail;
4729
 
4730
	ret = i915_gem_object_get_pages(obj);
4731
	if (ret)
4732
		goto fail;
4733
 
4734
	i915_gem_object_pin_pages(obj);
4735
	sg = obj->pages;
4736
	bytes = sg_copy_from_buffer(sg->sgl, sg->nents, (void *)data, size);
4737
	i915_gem_object_unpin_pages(obj);
4738
 
4739
	if (WARN_ON(bytes != size)) {
4740
		DRM_ERROR("Incomplete copy, wrote %zu of %zu", bytes, size);
4741
		ret = -EFAULT;
4742
		goto fail;
4743
	}
4744
 
4745
	return obj;
4746
 
4747
fail:
4748
	drm_gem_object_unreference(&obj->base);
4749
	return ERR_PTR(ret);
4104 Serge 4750
}