Subversion Repositories Kolibri OS

Rev

Rev 6937 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2332 Serge 1
/*
2
 * Copyright © 2010 Daniel Vetter
5060 serge 3
 * Copyright © 2011-2014 Intel Corporation
2332 Serge 4
 *
5
 * Permission is hereby granted, free of charge, to any person obtaining a
6
 * copy of this software and associated documentation files (the "Software"),
7
 * to deal in the Software without restriction, including without limitation
8
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
 * and/or sell copies of the Software, and to permit persons to whom the
10
 * Software is furnished to do so, subject to the following conditions:
11
 *
12
 * The above copyright notice and this permission notice (including the next
13
 * paragraph) shall be included in all copies or substantial portions of the
14
 * Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22
 * IN THE SOFTWARE.
23
 *
24
 */
25
 
5354 serge 26
#include 
3031 serge 27
#include 
28
#include 
2332 Serge 29
#include "i915_drv.h"
6084 serge 30
#include "i915_vgpu.h"
2351 Serge 31
#include "i915_trace.h"
2332 Serge 32
#include "intel_drv.h"
33
 
6084 serge 34
/**
35
 * DOC: Global GTT views
36
 *
37
 * Background and previous state
38
 *
39
 * Historically objects could exists (be bound) in global GTT space only as
40
 * singular instances with a view representing all of the object's backing pages
41
 * in a linear fashion. This view will be called a normal view.
42
 *
43
 * To support multiple views of the same object, where the number of mapped
44
 * pages is not equal to the backing store, or where the layout of the pages
45
 * is not linear, concept of a GGTT view was added.
46
 *
47
 * One example of an alternative view is a stereo display driven by a single
48
 * image. In this case we would have a framebuffer looking like this
49
 * (2x2 pages):
50
 *
51
 *    12
52
 *    34
53
 *
54
 * Above would represent a normal GGTT view as normally mapped for GPU or CPU
55
 * rendering. In contrast, fed to the display engine would be an alternative
56
 * view which could look something like this:
57
 *
58
 *   1212
59
 *   3434
60
 *
61
 * In this example both the size and layout of pages in the alternative view is
62
 * different from the normal view.
63
 *
64
 * Implementation and usage
65
 *
66
 * GGTT views are implemented using VMAs and are distinguished via enum
67
 * i915_ggtt_view_type and struct i915_ggtt_view.
68
 *
69
 * A new flavour of core GEM functions which work with GGTT bound objects were
70
 * added with the _ggtt_ infix, and sometimes with _view postfix to avoid
71
 * renaming  in large amounts of code. They take the struct i915_ggtt_view
72
 * parameter encapsulating all metadata required to implement a view.
73
 *
74
 * As a helper for callers which are only interested in the normal view,
75
 * globally const i915_ggtt_view_normal singleton instance exists. All old core
76
 * GEM API functions, the ones not taking the view parameter, are operating on,
77
 * or with the normal GGTT view.
78
 *
79
 * Code wanting to add or use a new GGTT view needs to:
80
 *
81
 * 1. Add a new enum with a suitable name.
82
 * 2. Extend the metadata in the i915_ggtt_view structure if required.
83
 * 3. Add support to i915_get_vma_pages().
84
 *
85
 * New views are required to build a scatter-gather table from within the
86
 * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and
87
 * exists for the lifetime of an VMA.
88
 *
89
 * Core API is designed to have copy semantics which means that passed in
90
 * struct i915_ggtt_view does not need to be persistent (left around after
91
 * calling the core API functions).
92
 *
93
 */
3243 Serge 94
 
6084 serge 95
static int
96
i915_get_ggtt_vma_pages(struct i915_vma *vma);
97
 
7144 serge 98
const struct i915_ggtt_view i915_ggtt_view_normal = {
99
	.type = I915_GGTT_VIEW_NORMAL,
100
};
6084 serge 101
const struct i915_ggtt_view i915_ggtt_view_rotated = {
7144 serge 102
	.type = I915_GGTT_VIEW_ROTATED,
6084 serge 103
};
104
 
5354 serge 105
static int sanitize_enable_ppgtt(struct drm_device *dev, int enable_ppgtt)
5060 serge 106
{
5354 serge 107
	bool has_aliasing_ppgtt;
108
	bool has_full_ppgtt;
6937 serge 109
	bool has_full_48bit_ppgtt;
3243 Serge 110
 
5354 serge 111
	has_aliasing_ppgtt = INTEL_INFO(dev)->gen >= 6;
112
	has_full_ppgtt = INTEL_INFO(dev)->gen >= 7;
6937 serge 113
	has_full_48bit_ppgtt = IS_BROADWELL(dev) || INTEL_INFO(dev)->gen >= 9;
3243 Serge 114
 
6084 serge 115
	if (intel_vgpu_active(dev))
116
		has_full_ppgtt = false; /* emulation is too hard */
117
 
5354 serge 118
	/*
119
	 * We don't allow disabling PPGTT for gen9+ as it's a requirement for
120
	 * execlists, the sole mechanism available to submit work.
121
	 */
122
	if (INTEL_INFO(dev)->gen < 9 &&
123
	    (enable_ppgtt == 0 || !has_aliasing_ppgtt))
5060 serge 124
		return 0;
4104 Serge 125
 
5060 serge 126
	if (enable_ppgtt == 1)
127
		return 1;
4560 Serge 128
 
5354 serge 129
	if (enable_ppgtt == 2 && has_full_ppgtt)
5060 serge 130
		return 2;
4560 Serge 131
 
6937 serge 132
	if (enable_ppgtt == 3 && has_full_48bit_ppgtt)
133
		return 3;
134
 
5060 serge 135
#ifdef CONFIG_INTEL_IOMMU
136
	/* Disable ppgtt on SNB if VT-d is on. */
137
	if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) {
138
		DRM_INFO("Disabling PPGTT because VT-d is on\n");
139
		return 0;
140
	}
141
#endif
142
 
143
	/* Early VLV doesn't have this */
6937 serge 144
	if (IS_VALLEYVIEW(dev) && dev->pdev->revision < 0xb) {
5060 serge 145
		DRM_DEBUG_DRIVER("disabling PPGTT on pre-B3 step VLV\n");
146
		return 0;
147
	}
148
 
6084 serge 149
	if (INTEL_INFO(dev)->gen >= 8 && i915.enable_execlists)
6937 serge 150
		return has_full_48bit_ppgtt ? 3 : 2;
6084 serge 151
	else
152
		return has_aliasing_ppgtt ? 1 : 0;
5060 serge 153
}
154
 
6084 serge 155
static int ppgtt_bind_vma(struct i915_vma *vma,
156
			  enum i915_cache_level cache_level,
157
			  u32 unused)
158
{
159
	u32 pte_flags = 0;
5060 serge 160
 
6084 serge 161
	/* Currently applicable only to VLV */
162
	if (vma->obj->gt_ro)
163
		pte_flags |= PTE_READ_ONLY;
5060 serge 164
 
6084 serge 165
	vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start,
166
				cache_level, pte_flags);
167
 
168
	return 0;
169
}
170
 
171
static void ppgtt_unbind_vma(struct i915_vma *vma)
4560 Serge 172
{
6084 serge 173
	vma->vm->clear_range(vma->vm,
174
			     vma->node.start,
175
			     vma->obj->base.size,
176
			     true);
177
}
178
 
179
static gen8_pte_t gen8_pte_encode(dma_addr_t addr,
180
				  enum i915_cache_level level,
181
				  bool valid)
182
{
183
	gen8_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0;
4560 Serge 184
	pte |= addr;
5060 serge 185
 
186
	switch (level) {
187
	case I915_CACHE_NONE:
188
		pte |= PPAT_UNCACHED_INDEX;
189
		break;
190
	case I915_CACHE_WT:
191
		pte |= PPAT_DISPLAY_ELLC_INDEX;
192
		break;
193
	default:
4560 Serge 194
		pte |= PPAT_CACHED_INDEX;
5060 serge 195
		break;
196
	}
197
 
4560 Serge 198
	return pte;
199
}
200
 
6084 serge 201
static gen8_pde_t gen8_pde_encode(const dma_addr_t addr,
202
				  const enum i915_cache_level level)
4560 Serge 203
{
6084 serge 204
	gen8_pde_t pde = _PAGE_PRESENT | _PAGE_RW;
4560 Serge 205
	pde |= addr;
206
	if (level != I915_CACHE_NONE)
207
		pde |= PPAT_CACHED_PDE_INDEX;
208
	else
209
		pde |= PPAT_UNCACHED_INDEX;
210
	return pde;
211
}
212
 
6084 serge 213
#define gen8_pdpe_encode gen8_pde_encode
214
#define gen8_pml4e_encode gen8_pde_encode
215
 
216
static gen6_pte_t snb_pte_encode(dma_addr_t addr,
217
				 enum i915_cache_level level,
218
				 bool valid, u32 unused)
4104 Serge 219
{
6084 serge 220
	gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
4104 Serge 221
	pte |= GEN6_PTE_ADDR_ENCODE(addr);
222
 
223
	switch (level) {
224
	case I915_CACHE_L3_LLC:
225
	case I915_CACHE_LLC:
226
		pte |= GEN6_PTE_CACHE_LLC;
227
		break;
228
	case I915_CACHE_NONE:
229
		pte |= GEN6_PTE_UNCACHED;
230
		break;
231
	default:
6084 serge 232
		MISSING_CASE(level);
4104 Serge 233
	}
234
 
235
	return pte;
236
}
237
 
6084 serge 238
static gen6_pte_t ivb_pte_encode(dma_addr_t addr,
239
				 enum i915_cache_level level,
240
				 bool valid, u32 unused)
3243 Serge 241
{
6084 serge 242
	gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
3243 Serge 243
	pte |= GEN6_PTE_ADDR_ENCODE(addr);
244
 
245
	switch (level) {
4104 Serge 246
	case I915_CACHE_L3_LLC:
247
		pte |= GEN7_PTE_CACHE_L3_LLC;
3243 Serge 248
		break;
249
	case I915_CACHE_LLC:
250
		pte |= GEN6_PTE_CACHE_LLC;
251
		break;
252
	case I915_CACHE_NONE:
6084 serge 253
		pte |= GEN6_PTE_UNCACHED;
3243 Serge 254
		break;
255
	default:
6084 serge 256
		MISSING_CASE(level);
3243 Serge 257
	}
258
 
259
	return pte;
260
}
261
 
6084 serge 262
static gen6_pte_t byt_pte_encode(dma_addr_t addr,
263
				 enum i915_cache_level level,
264
				 bool valid, u32 flags)
3746 Serge 265
{
6084 serge 266
	gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
4104 Serge 267
	pte |= GEN6_PTE_ADDR_ENCODE(addr);
268
 
5060 serge 269
	if (!(flags & PTE_READ_ONLY))
6084 serge 270
		pte |= BYT_PTE_WRITEABLE;
4104 Serge 271
 
272
	if (level != I915_CACHE_NONE)
273
		pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
274
 
275
	return pte;
276
}
277
 
6084 serge 278
static gen6_pte_t hsw_pte_encode(dma_addr_t addr,
279
				 enum i915_cache_level level,
280
				 bool valid, u32 unused)
4104 Serge 281
{
6084 serge 282
	gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
4104 Serge 283
	pte |= HSW_PTE_ADDR_ENCODE(addr);
284
 
285
	if (level != I915_CACHE_NONE)
286
		pte |= HSW_WB_LLC_AGE3;
287
 
288
	return pte;
289
}
290
 
6084 serge 291
static gen6_pte_t iris_pte_encode(dma_addr_t addr,
292
				  enum i915_cache_level level,
293
				  bool valid, u32 unused)
4104 Serge 294
{
6084 serge 295
	gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
4104 Serge 296
	pte |= HSW_PTE_ADDR_ENCODE(addr);
297
 
298
	switch (level) {
299
	case I915_CACHE_NONE:
300
		break;
301
	case I915_CACHE_WT:
4560 Serge 302
		pte |= HSW_WT_ELLC_LLC_AGE3;
4104 Serge 303
		break;
304
	default:
4560 Serge 305
		pte |= HSW_WB_ELLC_LLC_AGE3;
4104 Serge 306
		break;
307
	}
308
 
309
	return pte;
310
}
311
 
6084 serge 312
static int __setup_page_dma(struct drm_device *dev,
313
			    struct i915_page_dma *p, gfp_t flags)
314
{
315
	struct device *device = &dev->pdev->dev;
316
 
317
	p->page = alloc_page(flags);
318
	if (!p->page)
319
		return -ENOMEM;
320
 
321
	p->daddr = page_to_phys(p->page);
322
 
323
	return 0;
324
}
325
 
326
static int setup_page_dma(struct drm_device *dev, struct i915_page_dma *p)
327
{
328
	return __setup_page_dma(dev, p, GFP_KERNEL);
329
}
330
 
331
static void cleanup_page_dma(struct drm_device *dev, struct i915_page_dma *p)
332
{
333
	if (WARN_ON(!p->page))
334
		return;
335
 
336
	__free_page(p->page);
337
	memset(p, 0, sizeof(*p));
338
}
339
 
340
static void *kmap_page_dma(struct i915_page_dma *p)
341
{
342
	return kmap_atomic(p->page);
343
}
344
 
345
/* We use the flushing unmap only with ppgtt structures:
346
 * page directories, page tables and scratch pages.
347
 */
348
static void kunmap_page_dma(struct drm_device *dev, void *vaddr)
349
{
350
	/* There are only few exceptions for gen >=6. chv and bxt.
351
	 * And we are not sure about the latter so play safe for now.
352
	 */
353
	if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
354
		drm_clflush_virt_range(vaddr, PAGE_SIZE);
355
 
356
	kunmap_atomic(vaddr);
357
}
358
 
359
#define kmap_px(px) kmap_page_dma(px_base(px))
360
#define kunmap_px(ppgtt, vaddr) kunmap_page_dma((ppgtt)->base.dev, (vaddr))
361
 
362
#define setup_px(dev, px) setup_page_dma((dev), px_base(px))
363
#define cleanup_px(dev, px) cleanup_page_dma((dev), px_base(px))
364
#define fill_px(dev, px, v) fill_page_dma((dev), px_base(px), (v))
365
#define fill32_px(dev, px, v) fill_page_dma_32((dev), px_base(px), (v))
366
 
367
static void fill_page_dma(struct drm_device *dev, struct i915_page_dma *p,
368
			  const uint64_t val)
369
{
370
	int i;
371
	uint64_t * const vaddr = kmap_page_dma(p);
372
 
373
	for (i = 0; i < 512; i++)
374
		vaddr[i] = val;
375
 
376
	kunmap_page_dma(dev, vaddr);
377
}
378
 
379
static void fill_page_dma_32(struct drm_device *dev, struct i915_page_dma *p,
380
			     const uint32_t val32)
381
{
382
	uint64_t v = val32;
383
 
384
	v = v << 32 | val32;
385
 
386
	fill_page_dma(dev, p, v);
387
}
388
 
389
static struct i915_page_scratch *alloc_scratch_page(struct drm_device *dev)
390
{
391
	struct i915_page_scratch *sp;
392
	int ret;
393
 
394
	sp = kzalloc(sizeof(*sp), GFP_KERNEL);
395
	if (sp == NULL)
396
		return ERR_PTR(-ENOMEM);
397
 
398
	ret = __setup_page_dma(dev, px_base(sp), GFP_DMA32 | __GFP_ZERO);
399
	if (ret) {
400
		kfree(sp);
401
		return ERR_PTR(ret);
402
	}
403
 
404
//   set_pages_uc(px_page(sp), 1);
405
 
406
	return sp;
407
}
408
 
409
static void free_scratch_page(struct drm_device *dev,
410
			      struct i915_page_scratch *sp)
411
{
412
//   set_pages_wb(px_page(sp), 1);
413
 
414
	cleanup_px(dev, sp);
415
	kfree(sp);
416
}
417
 
418
static struct i915_page_table *alloc_pt(struct drm_device *dev)
419
{
420
	struct i915_page_table *pt;
421
	const size_t count = INTEL_INFO(dev)->gen >= 8 ?
422
		GEN8_PTES : GEN6_PTES;
423
	int ret = -ENOMEM;
424
 
425
	pt = kzalloc(sizeof(*pt), GFP_KERNEL);
426
	if (!pt)
427
		return ERR_PTR(-ENOMEM);
428
 
429
	pt->used_ptes = kcalloc(BITS_TO_LONGS(count), sizeof(*pt->used_ptes),
430
				GFP_KERNEL);
431
 
432
	if (!pt->used_ptes)
433
		goto fail_bitmap;
434
 
435
	ret = setup_px(dev, pt);
436
	if (ret)
437
		goto fail_page_m;
438
 
439
	return pt;
440
 
441
fail_page_m:
442
	kfree(pt->used_ptes);
443
fail_bitmap:
444
	kfree(pt);
445
 
446
	return ERR_PTR(ret);
447
}
448
 
449
static void free_pt(struct drm_device *dev, struct i915_page_table *pt)
450
{
451
	cleanup_px(dev, pt);
452
	kfree(pt->used_ptes);
453
	kfree(pt);
454
}
455
 
456
static void gen8_initialize_pt(struct i915_address_space *vm,
457
			       struct i915_page_table *pt)
458
{
459
	gen8_pte_t scratch_pte;
460
 
461
	scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
462
				      I915_CACHE_LLC, true);
463
 
464
	fill_px(vm->dev, pt, scratch_pte);
465
}
466
 
467
static void gen6_initialize_pt(struct i915_address_space *vm,
468
			       struct i915_page_table *pt)
469
{
470
	gen6_pte_t scratch_pte;
471
 
472
	WARN_ON(px_dma(vm->scratch_page) == 0);
473
 
474
	scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
475
				     I915_CACHE_LLC, true, 0);
476
 
477
	fill32_px(vm->dev, pt, scratch_pte);
478
}
479
 
480
static struct i915_page_directory *alloc_pd(struct drm_device *dev)
481
{
482
	struct i915_page_directory *pd;
483
	int ret = -ENOMEM;
484
 
485
	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
486
	if (!pd)
487
		return ERR_PTR(-ENOMEM);
488
 
489
	pd->used_pdes = kcalloc(BITS_TO_LONGS(I915_PDES),
490
				sizeof(*pd->used_pdes), GFP_KERNEL);
491
	if (!pd->used_pdes)
492
		goto fail_bitmap;
493
 
494
	ret = setup_px(dev, pd);
495
	if (ret)
496
		goto fail_page_m;
497
 
498
	return pd;
499
 
500
fail_page_m:
501
	kfree(pd->used_pdes);
502
fail_bitmap:
503
	kfree(pd);
504
 
505
	return ERR_PTR(ret);
506
}
507
 
508
static void free_pd(struct drm_device *dev, struct i915_page_directory *pd)
509
{
510
	if (px_page(pd)) {
511
		cleanup_px(dev, pd);
512
		kfree(pd->used_pdes);
513
		kfree(pd);
514
	}
515
}
516
 
517
static void gen8_initialize_pd(struct i915_address_space *vm,
518
			       struct i915_page_directory *pd)
519
{
520
	gen8_pde_t scratch_pde;
521
 
522
	scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC);
523
 
524
	fill_px(vm->dev, pd, scratch_pde);
525
}
526
 
527
static int __pdp_init(struct drm_device *dev,
528
		      struct i915_page_directory_pointer *pdp)
529
{
530
	size_t pdpes = I915_PDPES_PER_PDP(dev);
531
 
532
	pdp->used_pdpes = kcalloc(BITS_TO_LONGS(pdpes),
533
				  sizeof(unsigned long),
534
				  GFP_KERNEL);
535
	if (!pdp->used_pdpes)
536
		return -ENOMEM;
537
 
538
	pdp->page_directory = kcalloc(pdpes, sizeof(*pdp->page_directory),
539
				      GFP_KERNEL);
540
	if (!pdp->page_directory) {
541
		kfree(pdp->used_pdpes);
542
		/* the PDP might be the statically allocated top level. Keep it
543
		 * as clean as possible */
544
		pdp->used_pdpes = NULL;
545
		return -ENOMEM;
546
	}
547
 
548
	return 0;
549
}
550
 
551
static void __pdp_fini(struct i915_page_directory_pointer *pdp)
552
{
553
	kfree(pdp->used_pdpes);
554
	kfree(pdp->page_directory);
555
	pdp->page_directory = NULL;
556
}
557
 
558
static struct
559
i915_page_directory_pointer *alloc_pdp(struct drm_device *dev)
560
{
561
	struct i915_page_directory_pointer *pdp;
562
	int ret = -ENOMEM;
563
 
564
	WARN_ON(!USES_FULL_48BIT_PPGTT(dev));
565
 
566
	pdp = kzalloc(sizeof(*pdp), GFP_KERNEL);
567
	if (!pdp)
568
		return ERR_PTR(-ENOMEM);
569
 
570
	ret = __pdp_init(dev, pdp);
571
	if (ret)
572
		goto fail_bitmap;
573
 
574
	ret = setup_px(dev, pdp);
575
	if (ret)
576
		goto fail_page_m;
577
 
578
	return pdp;
579
 
580
fail_page_m:
581
	__pdp_fini(pdp);
582
fail_bitmap:
583
	kfree(pdp);
584
 
585
	return ERR_PTR(ret);
586
}
587
 
588
static void free_pdp(struct drm_device *dev,
589
		     struct i915_page_directory_pointer *pdp)
590
{
591
	__pdp_fini(pdp);
592
	if (USES_FULL_48BIT_PPGTT(dev)) {
593
		cleanup_px(dev, pdp);
594
		kfree(pdp);
595
	}
596
}
597
 
598
static void gen8_initialize_pdp(struct i915_address_space *vm,
599
				struct i915_page_directory_pointer *pdp)
600
{
601
	gen8_ppgtt_pdpe_t scratch_pdpe;
602
 
603
	scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC);
604
 
605
	fill_px(vm->dev, pdp, scratch_pdpe);
606
}
607
 
608
static void gen8_initialize_pml4(struct i915_address_space *vm,
609
				 struct i915_pml4 *pml4)
610
{
611
	gen8_ppgtt_pml4e_t scratch_pml4e;
612
 
613
	scratch_pml4e = gen8_pml4e_encode(px_dma(vm->scratch_pdp),
614
					  I915_CACHE_LLC);
615
 
616
	fill_px(vm->dev, pml4, scratch_pml4e);
617
}
618
 
619
static void
620
gen8_setup_page_directory(struct i915_hw_ppgtt *ppgtt,
621
			  struct i915_page_directory_pointer *pdp,
622
			  struct i915_page_directory *pd,
623
			  int index)
624
{
625
	gen8_ppgtt_pdpe_t *page_directorypo;
626
 
627
	if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
628
		return;
629
 
630
	page_directorypo = kmap_px(pdp);
631
	page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC);
632
	kunmap_px(ppgtt, page_directorypo);
633
}
634
 
635
static void
636
gen8_setup_page_directory_pointer(struct i915_hw_ppgtt *ppgtt,
637
				  struct i915_pml4 *pml4,
638
				  struct i915_page_directory_pointer *pdp,
639
				  int index)
640
{
641
	gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4);
642
 
643
	WARN_ON(!USES_FULL_48BIT_PPGTT(ppgtt->base.dev));
644
	pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC);
645
	kunmap_px(ppgtt, pagemap);
646
}
647
 
4560 Serge 648
/* Broadwell Page Directory Pointer Descriptors */
6084 serge 649
static int gen8_write_pdp(struct drm_i915_gem_request *req,
650
			  unsigned entry,
651
			  dma_addr_t addr)
4560 Serge 652
{
6084 serge 653
	struct intel_engine_cs *ring = req->ring;
4560 Serge 654
	int ret;
655
 
656
	BUG_ON(entry >= 4);
657
 
6084 serge 658
	ret = intel_ring_begin(req, 6);
4560 Serge 659
	if (ret)
660
		return ret;
661
 
662
	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
6937 serge 663
	intel_ring_emit_reg(ring, GEN8_RING_PDP_UDW(ring, entry));
6084 serge 664
	intel_ring_emit(ring, upper_32_bits(addr));
4560 Serge 665
	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
6937 serge 666
	intel_ring_emit_reg(ring, GEN8_RING_PDP_LDW(ring, entry));
6084 serge 667
	intel_ring_emit(ring, lower_32_bits(addr));
4560 Serge 668
	intel_ring_advance(ring);
669
 
670
	return 0;
671
}
672
 
6084 serge 673
static int gen8_legacy_mm_switch(struct i915_hw_ppgtt *ppgtt,
674
				 struct drm_i915_gem_request *req)
4560 Serge 675
{
5060 serge 676
	int i, ret;
4560 Serge 677
 
6084 serge 678
	for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) {
679
		const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
4560 Serge 680
 
6084 serge 681
		ret = gen8_write_pdp(req, i, pd_daddr);
682
		if (ret)
5060 serge 683
			return ret;
4560 Serge 684
	}
5060 serge 685
 
4560 Serge 686
	return 0;
687
}
688
 
6084 serge 689
static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt,
690
			      struct drm_i915_gem_request *req)
4560 Serge 691
{
6084 serge 692
	return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4));
693
}
694
 
695
static void gen8_ppgtt_clear_pte_range(struct i915_address_space *vm,
696
				       struct i915_page_directory_pointer *pdp,
697
				       uint64_t start,
698
				       uint64_t length,
699
				       gen8_pte_t scratch_pte)
700
{
4560 Serge 701
	struct i915_hw_ppgtt *ppgtt =
702
		container_of(vm, struct i915_hw_ppgtt, base);
6084 serge 703
	gen8_pte_t *pt_vaddr;
704
	unsigned pdpe = gen8_pdpe_index(start);
705
	unsigned pde = gen8_pde_index(start);
706
	unsigned pte = gen8_pte_index(start);
5060 serge 707
	unsigned num_entries = length >> PAGE_SHIFT;
4560 Serge 708
	unsigned last_pte, i;
709
 
6084 serge 710
	if (WARN_ON(!pdp))
711
		return;
4560 Serge 712
 
713
	while (num_entries) {
6084 serge 714
		struct i915_page_directory *pd;
715
		struct i915_page_table *pt;
4560 Serge 716
 
6084 serge 717
		if (WARN_ON(!pdp->page_directory[pdpe]))
718
			break;
719
 
720
		pd = pdp->page_directory[pdpe];
721
 
722
		if (WARN_ON(!pd->page_table[pde]))
723
			break;
724
 
725
		pt = pd->page_table[pde];
726
 
727
		if (WARN_ON(!px_page(pt)))
728
			break;
729
 
5060 serge 730
		last_pte = pte + num_entries;
6084 serge 731
		if (last_pte > GEN8_PTES)
732
			last_pte = GEN8_PTES;
4560 Serge 733
 
6084 serge 734
		pt_vaddr = kmap_px(pt);
4560 Serge 735
 
5060 serge 736
		for (i = pte; i < last_pte; i++) {
4560 Serge 737
			pt_vaddr[i] = scratch_pte;
5060 serge 738
			num_entries--;
739
		}
4560 Serge 740
 
6084 serge 741
		kunmap_px(ppgtt, pt);
5060 serge 742
 
743
		pte = 0;
6084 serge 744
		if (++pde == I915_PDES) {
745
			if (++pdpe == I915_PDPES_PER_PDP(vm->dev))
746
				break;
5060 serge 747
			pde = 0;
748
		}
4560 Serge 749
	}
750
}
751
 
6084 serge 752
static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
753
				   uint64_t start,
754
				   uint64_t length,
755
				   bool use_scratch)
4560 Serge 756
{
757
	struct i915_hw_ppgtt *ppgtt =
758
		container_of(vm, struct i915_hw_ppgtt, base);
6084 serge 759
	gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
760
						 I915_CACHE_LLC, use_scratch);
4560 Serge 761
 
6084 serge 762
	if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
763
		gen8_ppgtt_clear_pte_range(vm, &ppgtt->pdp, start, length,
764
					   scratch_pte);
765
	} else {
6937 serge 766
		uint64_t pml4e;
6084 serge 767
		struct i915_page_directory_pointer *pdp;
768
 
6937 serge 769
		gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) {
6084 serge 770
			gen8_ppgtt_clear_pte_range(vm, pdp, start, length,
771
						   scratch_pte);
772
		}
773
	}
774
}
775
 
776
static void
777
gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm,
778
			      struct i915_page_directory_pointer *pdp,
779
			      struct sg_page_iter *sg_iter,
780
			      uint64_t start,
781
			      enum i915_cache_level cache_level)
782
{
783
	struct i915_hw_ppgtt *ppgtt =
784
		container_of(vm, struct i915_hw_ppgtt, base);
785
	gen8_pte_t *pt_vaddr;
786
	unsigned pdpe = gen8_pdpe_index(start);
787
	unsigned pde = gen8_pde_index(start);
788
	unsigned pte = gen8_pte_index(start);
789
 
5354 serge 790
	pt_vaddr = NULL;
4560 Serge 791
 
6084 serge 792
	while (__sg_page_iter_next(sg_iter)) {
793
		if (pt_vaddr == NULL) {
794
			struct i915_page_directory *pd = pdp->page_directory[pdpe];
795
			struct i915_page_table *pt = pd->page_table[pde];
796
			pt_vaddr = kmap_px(pt);
797
		}
4560 Serge 798
 
5060 serge 799
		pt_vaddr[pte] =
6084 serge 800
			gen8_pte_encode(sg_page_iter_dma_address(sg_iter),
4560 Serge 801
					cache_level, true);
6084 serge 802
		if (++pte == GEN8_PTES) {
803
			kunmap_px(ppgtt, pt_vaddr);
5354 serge 804
			pt_vaddr = NULL;
6084 serge 805
			if (++pde == I915_PDES) {
806
				if (++pdpe == I915_PDPES_PER_PDP(vm->dev))
807
					break;
5060 serge 808
				pde = 0;
809
			}
810
			pte = 0;
4560 Serge 811
		}
812
	}
6084 serge 813
 
814
	if (pt_vaddr)
815
		kunmap_px(ppgtt, pt_vaddr);
816
}
817
 
818
static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
819
				      struct sg_table *pages,
820
				      uint64_t start,
821
				      enum i915_cache_level cache_level,
822
				      u32 unused)
823
{
824
	struct i915_hw_ppgtt *ppgtt =
825
		container_of(vm, struct i915_hw_ppgtt, base);
826
	struct sg_page_iter sg_iter;
827
 
828
	__sg_page_iter_start(&sg_iter, pages->sgl, sg_nents(pages->sgl), 0);
829
 
830
	if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
831
		gen8_ppgtt_insert_pte_entries(vm, &ppgtt->pdp, &sg_iter, start,
832
					      cache_level);
833
	} else {
834
		struct i915_page_directory_pointer *pdp;
6937 serge 835
		uint64_t pml4e;
6084 serge 836
		uint64_t length = (uint64_t)pages->orig_nents << PAGE_SHIFT;
837
 
6937 serge 838
		gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, pml4e) {
6084 serge 839
			gen8_ppgtt_insert_pte_entries(vm, pdp, &sg_iter,
840
						      start, cache_level);
841
		}
5354 serge 842
	}
4560 Serge 843
}
844
 
6084 serge 845
static void gen8_free_page_tables(struct drm_device *dev,
846
				  struct i915_page_directory *pd)
4560 Serge 847
{
5060 serge 848
	int i;
849
 
6084 serge 850
	if (!px_page(pd))
5060 serge 851
		return;
852
 
6084 serge 853
	for_each_set_bit(i, pd->used_pdes, I915_PDES) {
854
		if (WARN_ON(!pd->page_table[i]))
855
			continue;
856
 
857
		free_pt(dev, pd->page_table[i]);
858
		pd->page_table[i] = NULL;
859
	}
5060 serge 860
}
861
 
6084 serge 862
static int gen8_init_scratch(struct i915_address_space *vm)
5060 serge 863
{
6084 serge 864
	struct drm_device *dev = vm->dev;
865
 
866
	vm->scratch_page = alloc_scratch_page(dev);
867
	if (IS_ERR(vm->scratch_page))
868
		return PTR_ERR(vm->scratch_page);
869
 
870
	vm->scratch_pt = alloc_pt(dev);
871
	if (IS_ERR(vm->scratch_pt)) {
872
		free_scratch_page(dev, vm->scratch_page);
873
		return PTR_ERR(vm->scratch_pt);
874
	}
875
 
876
	vm->scratch_pd = alloc_pd(dev);
877
	if (IS_ERR(vm->scratch_pd)) {
878
		free_pt(dev, vm->scratch_pt);
879
		free_scratch_page(dev, vm->scratch_page);
880
		return PTR_ERR(vm->scratch_pd);
881
	}
882
 
883
	if (USES_FULL_48BIT_PPGTT(dev)) {
884
		vm->scratch_pdp = alloc_pdp(dev);
885
		if (IS_ERR(vm->scratch_pdp)) {
886
			free_pd(dev, vm->scratch_pd);
887
			free_pt(dev, vm->scratch_pt);
888
			free_scratch_page(dev, vm->scratch_page);
889
			return PTR_ERR(vm->scratch_pdp);
890
		}
891
	}
892
 
893
	gen8_initialize_pt(vm, vm->scratch_pt);
894
	gen8_initialize_pd(vm, vm->scratch_pd);
895
	if (USES_FULL_48BIT_PPGTT(dev))
896
		gen8_initialize_pdp(vm, vm->scratch_pdp);
897
 
898
	return 0;
899
}
900
 
901
static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create)
902
{
903
	enum vgt_g2v_type msg;
904
	struct drm_device *dev = ppgtt->base.dev;
905
	struct drm_i915_private *dev_priv = dev->dev_private;
5060 serge 906
	int i;
907
 
6084 serge 908
	if (USES_FULL_48BIT_PPGTT(dev)) {
909
		u64 daddr = px_dma(&ppgtt->pml4);
910
 
6937 serge 911
		I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr));
912
		I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr));
6084 serge 913
 
914
		msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE :
915
				VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY);
916
	} else {
917
		for (i = 0; i < GEN8_LEGACY_PDPES; i++) {
918
			u64 daddr = i915_page_dir_dma_addr(ppgtt, i);
919
 
6937 serge 920
			I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr));
921
			I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr));
6084 serge 922
		}
923
 
924
		msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE :
925
				VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY);
5060 serge 926
	}
927
 
6084 serge 928
	I915_WRITE(vgtif_reg(g2v_notify), msg);
929
 
930
	return 0;
5060 serge 931
}
932
 
6084 serge 933
static void gen8_free_scratch(struct i915_address_space *vm)
5060 serge 934
{
6084 serge 935
	struct drm_device *dev = vm->dev;
4560 Serge 936
 
6084 serge 937
	if (USES_FULL_48BIT_PPGTT(dev))
938
		free_pdp(dev, vm->scratch_pdp);
939
	free_pd(dev, vm->scratch_pd);
940
	free_pt(dev, vm->scratch_pt);
941
	free_scratch_page(dev, vm->scratch_page);
942
}
943
 
944
static void gen8_ppgtt_cleanup_3lvl(struct drm_device *dev,
945
				    struct i915_page_directory_pointer *pdp)
946
{
947
	int i;
948
 
949
	for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev)) {
950
		if (WARN_ON(!pdp->page_directory[i]))
5060 serge 951
			continue;
4560 Serge 952
 
6084 serge 953
		gen8_free_page_tables(dev, pdp->page_directory[i]);
954
		free_pd(dev, pdp->page_directory[i]);
955
	}
4560 Serge 956
 
6084 serge 957
	free_pdp(dev, pdp);
958
}
959
 
960
static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt)
961
{
962
	int i;
963
 
964
	for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) {
965
		if (WARN_ON(!ppgtt->pml4.pdps[i]))
966
			continue;
967
 
968
		gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, ppgtt->pml4.pdps[i]);
5060 serge 969
	}
6084 serge 970
 
971
	cleanup_px(ppgtt->base.dev, &ppgtt->pml4);
5060 serge 972
}
4560 Serge 973
 
5060 serge 974
static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
975
{
976
	struct i915_hw_ppgtt *ppgtt =
977
		container_of(vm, struct i915_hw_ppgtt, base);
978
 
6084 serge 979
	if (intel_vgpu_active(vm->dev))
980
		gen8_ppgtt_notify_vgt(ppgtt, false);
981
 
982
	if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
983
		gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, &ppgtt->pdp);
984
	else
985
		gen8_ppgtt_cleanup_4lvl(ppgtt);
986
 
987
	gen8_free_scratch(vm);
5060 serge 988
}
989
 
6084 serge 990
/**
991
 * gen8_ppgtt_alloc_pagetabs() - Allocate page tables for VA range.
992
 * @vm:	Master vm structure.
993
 * @pd:	Page directory for this address range.
994
 * @start:	Starting virtual address to begin allocations.
995
 * @length:	Size of the allocations.
996
 * @new_pts:	Bitmap set by function with new allocations. Likely used by the
997
 *		caller to free on error.
998
 *
999
 * Allocate the required number of page tables. Extremely similar to
1000
 * gen8_ppgtt_alloc_page_directories(). The main difference is here we are limited by
1001
 * the page directory boundary (instead of the page directory pointer). That
1002
 * boundary is 1GB virtual. Therefore, unlike gen8_ppgtt_alloc_page_directories(), it is
1003
 * possible, and likely that the caller will need to use multiple calls of this
1004
 * function to achieve the appropriate allocation.
1005
 *
1006
 * Return: 0 if success; negative error code otherwise.
1007
 */
1008
static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm,
1009
				     struct i915_page_directory *pd,
1010
				     uint64_t start,
1011
				     uint64_t length,
1012
				     unsigned long *new_pts)
5060 serge 1013
{
6084 serge 1014
	struct drm_device *dev = vm->dev;
1015
	struct i915_page_table *pt;
1016
	uint32_t pde;
5060 serge 1017
 
6937 serge 1018
	gen8_for_each_pde(pt, pd, start, length, pde) {
6084 serge 1019
		/* Don't reallocate page tables */
1020
		if (test_bit(pde, pd->used_pdes)) {
1021
			/* Scratch is never allocated this way */
1022
			WARN_ON(pt == vm->scratch_pt);
1023
			continue;
1024
		}
5060 serge 1025
 
6084 serge 1026
		pt = alloc_pt(dev);
1027
		if (IS_ERR(pt))
1028
			goto unwind_out;
5060 serge 1029
 
6084 serge 1030
		gen8_initialize_pt(vm, pt);
1031
		pd->page_table[pde] = pt;
1032
		__set_bit(pde, new_pts);
1033
		trace_i915_page_table_entry_alloc(vm, pde, start, GEN8_PDE_SHIFT);
1034
	}
5060 serge 1035
 
6084 serge 1036
	return 0;
1037
 
1038
unwind_out:
1039
	for_each_set_bit(pde, new_pts, I915_PDES)
1040
		free_pt(dev, pd->page_table[pde]);
1041
 
1042
	return -ENOMEM;
5060 serge 1043
}
1044
 
6084 serge 1045
/**
1046
 * gen8_ppgtt_alloc_page_directories() - Allocate page directories for VA range.
1047
 * @vm:	Master vm structure.
1048
 * @pdp:	Page directory pointer for this address range.
1049
 * @start:	Starting virtual address to begin allocations.
1050
 * @length:	Size of the allocations.
1051
 * @new_pds:	Bitmap set by function with new allocations. Likely used by the
1052
 *		caller to free on error.
1053
 *
1054
 * Allocate the required number of page directories starting at the pde index of
1055
 * @start, and ending at the pde index @start + @length. This function will skip
1056
 * over already allocated page directories within the range, and only allocate
1057
 * new ones, setting the appropriate pointer within the pdp as well as the
1058
 * correct position in the bitmap @new_pds.
1059
 *
1060
 * The function will only allocate the pages within the range for a give page
1061
 * directory pointer. In other words, if @start + @length straddles a virtually
1062
 * addressed PDP boundary (512GB for 4k pages), there will be more allocations
1063
 * required by the caller, This is not currently possible, and the BUG in the
1064
 * code will prevent it.
1065
 *
1066
 * Return: 0 if success; negative error code otherwise.
1067
 */
1068
static int
1069
gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm,
1070
				  struct i915_page_directory_pointer *pdp,
1071
				  uint64_t start,
1072
				  uint64_t length,
1073
				  unsigned long *new_pds)
5060 serge 1074
{
6084 serge 1075
	struct drm_device *dev = vm->dev;
1076
	struct i915_page_directory *pd;
1077
	uint32_t pdpe;
1078
	uint32_t pdpes = I915_PDPES_PER_PDP(dev);
5060 serge 1079
 
6084 serge 1080
	WARN_ON(!bitmap_empty(new_pds, pdpes));
1081
 
6937 serge 1082
	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
6084 serge 1083
		if (test_bit(pdpe, pdp->used_pdpes))
1084
			continue;
1085
 
1086
		pd = alloc_pd(dev);
1087
		if (IS_ERR(pd))
5060 serge 1088
			goto unwind_out;
6084 serge 1089
 
1090
		gen8_initialize_pd(vm, pd);
1091
		pdp->page_directory[pdpe] = pd;
1092
		__set_bit(pdpe, new_pds);
1093
		trace_i915_page_directory_entry_alloc(vm, pdpe, start, GEN8_PDPE_SHIFT);
4560 Serge 1094
	}
1095
 
5060 serge 1096
	return 0;
1097
 
1098
unwind_out:
6084 serge 1099
	for_each_set_bit(pdpe, new_pds, pdpes)
1100
		free_pd(dev, pdp->page_directory[pdpe]);
5060 serge 1101
 
6084 serge 1102
	return -ENOMEM;
4560 Serge 1103
}
1104
 
6084 serge 1105
/**
1106
 * gen8_ppgtt_alloc_page_dirpointers() - Allocate pdps for VA range.
1107
 * @vm:	Master vm structure.
1108
 * @pml4:	Page map level 4 for this address range.
1109
 * @start:	Starting virtual address to begin allocations.
1110
 * @length:	Size of the allocations.
1111
 * @new_pdps:	Bitmap set by function with new allocations. Likely used by the
1112
 *		caller to free on error.
1113
 *
1114
 * Allocate the required number of page directory pointers. Extremely similar to
1115
 * gen8_ppgtt_alloc_page_directories() and gen8_ppgtt_alloc_pagetabs().
1116
 * The main difference is here we are limited by the pml4 boundary (instead of
1117
 * the page directory pointer).
1118
 *
1119
 * Return: 0 if success; negative error code otherwise.
1120
 */
1121
static int
1122
gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm,
1123
				  struct i915_pml4 *pml4,
1124
				  uint64_t start,
1125
				  uint64_t length,
1126
				  unsigned long *new_pdps)
5060 serge 1127
{
6084 serge 1128
	struct drm_device *dev = vm->dev;
1129
	struct i915_page_directory_pointer *pdp;
1130
	uint32_t pml4e;
5060 serge 1131
 
6084 serge 1132
	WARN_ON(!bitmap_empty(new_pdps, GEN8_PML4ES_PER_PML4));
1133
 
6937 serge 1134
	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
6084 serge 1135
		if (!test_bit(pml4e, pml4->used_pml4es)) {
1136
			pdp = alloc_pdp(dev);
1137
			if (IS_ERR(pdp))
1138
				goto unwind_out;
1139
 
1140
			gen8_initialize_pdp(vm, pdp);
1141
			pml4->pdps[pml4e] = pdp;
1142
			__set_bit(pml4e, new_pdps);
1143
			trace_i915_page_directory_pointer_entry_alloc(vm,
1144
								      pml4e,
1145
								      start,
1146
								      GEN8_PML4E_SHIFT);
5060 serge 1147
		}
6084 serge 1148
	}
5060 serge 1149
 
1150
	return 0;
6084 serge 1151
 
1152
unwind_out:
1153
	for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
1154
		free_pdp(dev, pml4->pdps[pml4e]);
1155
 
1156
	return -ENOMEM;
5060 serge 1157
}
1158
 
6084 serge 1159
static void
1160
free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long *new_pts)
5060 serge 1161
{
6084 serge 1162
	kfree(new_pts);
1163
	kfree(new_pds);
1164
}
1165
 
1166
/* Fills in the page directory bitmap, and the array of page tables bitmap. Both
1167
 * of these are based on the number of PDPEs in the system.
1168
 */
1169
static
1170
int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds,
1171
					 unsigned long **new_pts,
1172
					 uint32_t pdpes)
1173
{
1174
	unsigned long *pds;
1175
	unsigned long *pts;
1176
 
1177
	pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_TEMPORARY);
1178
	if (!pds)
5060 serge 1179
		return -ENOMEM;
1180
 
6084 serge 1181
	pts = kcalloc(pdpes, BITS_TO_LONGS(I915_PDES) * sizeof(unsigned long),
1182
		      GFP_TEMPORARY);
1183
	if (!pts)
1184
		goto err_out;
5060 serge 1185
 
6084 serge 1186
	*new_pds = pds;
1187
	*new_pts = pts;
1188
 
5060 serge 1189
	return 0;
6084 serge 1190
 
1191
err_out:
1192
	free_gen8_temp_bitmaps(pds, pts);
1193
	return -ENOMEM;
5060 serge 1194
}
1195
 
6084 serge 1196
/* PDE TLBs are a pain to invalidate on GEN8+. When we modify
1197
 * the page table structures, we mark them dirty so that
1198
 * context switching/execlist queuing code takes extra steps
1199
 * to ensure that tlbs are flushed.
1200
 */
1201
static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt)
5060 serge 1202
{
6084 serge 1203
	ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.dev)->ring_mask;
1204
}
1205
 
1206
static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
1207
				    struct i915_page_directory_pointer *pdp,
1208
				    uint64_t start,
1209
				    uint64_t length)
1210
{
1211
	struct i915_hw_ppgtt *ppgtt =
1212
		container_of(vm, struct i915_hw_ppgtt, base);
1213
	unsigned long *new_page_dirs, *new_page_tables;
1214
	struct drm_device *dev = vm->dev;
1215
	struct i915_page_directory *pd;
1216
	const uint64_t orig_start = start;
1217
	const uint64_t orig_length = length;
1218
	uint32_t pdpe;
1219
	uint32_t pdpes = I915_PDPES_PER_PDP(dev);
5060 serge 1220
	int ret;
1221
 
6084 serge 1222
	/* Wrap is never okay since we can only represent 48b, and we don't
1223
	 * actually use the other side of the canonical address space.
1224
	 */
1225
	if (WARN_ON(start + length < start))
1226
		return -ENODEV;
1227
 
1228
	if (WARN_ON(start + length > vm->total))
1229
		return -ENODEV;
1230
 
1231
	ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes);
5060 serge 1232
	if (ret)
1233
		return ret;
1234
 
6084 serge 1235
	/* Do the allocations first so we can easily bail out */
1236
	ret = gen8_ppgtt_alloc_page_directories(vm, pdp, start, length,
1237
						new_page_dirs);
5060 serge 1238
	if (ret) {
6084 serge 1239
		free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
5060 serge 1240
		return ret;
1241
	}
1242
 
6084 serge 1243
	/* For every page directory referenced, allocate page tables */
6937 serge 1244
	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
6084 serge 1245
		ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length,
1246
						new_page_tables + pdpe * BITS_TO_LONGS(I915_PDES));
1247
		if (ret)
1248
			goto err_out;
1249
	}
5060 serge 1250
 
6084 serge 1251
	start = orig_start;
1252
	length = orig_length;
5060 serge 1253
 
6084 serge 1254
	/* Allocations have completed successfully, so set the bitmaps, and do
1255
	 * the mappings. */
6937 serge 1256
	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
6084 serge 1257
		gen8_pde_t *const page_directory = kmap_px(pd);
1258
		struct i915_page_table *pt;
1259
		uint64_t pd_len = length;
1260
		uint64_t pd_start = start;
1261
		uint32_t pde;
1262
 
1263
		/* Every pd should be allocated, we just did that above. */
1264
		WARN_ON(!pd);
1265
 
6937 serge 1266
		gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) {
6084 serge 1267
			/* Same reasoning as pd */
1268
			WARN_ON(!pt);
1269
			WARN_ON(!pd_len);
1270
			WARN_ON(!gen8_pte_count(pd_start, pd_len));
1271
 
1272
			/* Set our used ptes within the page table */
1273
			bitmap_set(pt->used_ptes,
1274
				   gen8_pte_index(pd_start),
1275
				   gen8_pte_count(pd_start, pd_len));
1276
 
1277
			/* Our pde is now pointing to the pagetable, pt */
1278
			__set_bit(pde, pd->used_pdes);
1279
 
1280
			/* Map the PDE to the page table */
1281
			page_directory[pde] = gen8_pde_encode(px_dma(pt),
1282
							      I915_CACHE_LLC);
1283
			trace_i915_page_table_entry_map(&ppgtt->base, pde, pt,
1284
							gen8_pte_index(start),
1285
							gen8_pte_count(start, length),
1286
							GEN8_PTES);
1287
 
1288
			/* NB: We haven't yet mapped ptes to pages. At this
1289
			 * point we're still relying on insert_entries() */
1290
		}
1291
 
1292
		kunmap_px(ppgtt, page_directory);
1293
		__set_bit(pdpe, pdp->used_pdpes);
1294
		gen8_setup_page_directory(ppgtt, pdp, pd, pdpe);
1295
	}
1296
 
1297
	free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
1298
	mark_tlbs_dirty(ppgtt);
1299
	return 0;
1300
 
1301
err_out:
1302
	while (pdpe--) {
6937 serge 1303
		unsigned long temp;
1304
 
6084 serge 1305
		for_each_set_bit(temp, new_page_tables + pdpe *
1306
				BITS_TO_LONGS(I915_PDES), I915_PDES)
1307
			free_pt(dev, pdp->page_directory[pdpe]->page_table[temp]);
1308
	}
1309
 
1310
	for_each_set_bit(pdpe, new_page_dirs, pdpes)
1311
		free_pd(dev, pdp->page_directory[pdpe]);
1312
 
1313
	free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
1314
	mark_tlbs_dirty(ppgtt);
5060 serge 1315
	return ret;
1316
}
1317
 
6084 serge 1318
static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
1319
				    struct i915_pml4 *pml4,
1320
				    uint64_t start,
1321
				    uint64_t length)
5060 serge 1322
{
6084 serge 1323
	DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4);
1324
	struct i915_hw_ppgtt *ppgtt =
1325
			container_of(vm, struct i915_hw_ppgtt, base);
1326
	struct i915_page_directory_pointer *pdp;
6937 serge 1327
	uint64_t pml4e;
6084 serge 1328
	int ret = 0;
5060 serge 1329
 
6084 serge 1330
	/* Do the pml4 allocations first, so we don't need to track the newly
1331
	 * allocated tables below the pdp */
1332
	bitmap_zero(new_pdps, GEN8_PML4ES_PER_PML4);
5060 serge 1333
 
6084 serge 1334
	/* The pagedirectory and pagetable allocations are done in the shared 3
1335
	 * and 4 level code. Just allocate the pdps.
1336
	 */
1337
	ret = gen8_ppgtt_alloc_page_dirpointers(vm, pml4, start, length,
1338
						new_pdps);
1339
	if (ret)
1340
		return ret;
5060 serge 1341
 
6084 serge 1342
	WARN(bitmap_weight(new_pdps, GEN8_PML4ES_PER_PML4) > 2,
1343
	     "The allocation has spanned more than 512GB. "
1344
	     "It is highly likely this is incorrect.");
5060 serge 1345
 
6937 serge 1346
	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
6084 serge 1347
		WARN_ON(!pdp);
1348
 
1349
		ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length);
1350
		if (ret)
1351
			goto err_out;
1352
 
1353
		gen8_setup_page_directory_pointer(ppgtt, pml4, pdp, pml4e);
1354
	}
1355
 
1356
	bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es,
1357
		  GEN8_PML4ES_PER_PML4);
1358
 
5060 serge 1359
	return 0;
6084 serge 1360
 
1361
err_out:
1362
	for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
1363
		gen8_ppgtt_cleanup_3lvl(vm->dev, pml4->pdps[pml4e]);
1364
 
1365
	return ret;
5060 serge 1366
}
1367
 
6084 serge 1368
static int gen8_alloc_va_range(struct i915_address_space *vm,
1369
			       uint64_t start, uint64_t length)
5060 serge 1370
{
6084 serge 1371
	struct i915_hw_ppgtt *ppgtt =
1372
		container_of(vm, struct i915_hw_ppgtt, base);
1373
 
1374
	if (USES_FULL_48BIT_PPGTT(vm->dev))
1375
		return gen8_alloc_va_range_4lvl(vm, &ppgtt->pml4, start, length);
1376
	else
1377
		return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length);
1378
}
1379
 
1380
static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp,
1381
			  uint64_t start, uint64_t length,
1382
			  gen8_pte_t scratch_pte,
1383
			  struct seq_file *m)
1384
{
1385
	struct i915_page_directory *pd;
1386
	uint32_t pdpe;
1387
 
6937 serge 1388
	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
6084 serge 1389
		struct i915_page_table *pt;
1390
		uint64_t pd_len = length;
1391
		uint64_t pd_start = start;
1392
		uint32_t pde;
1393
 
1394
		if (!test_bit(pdpe, pdp->used_pdpes))
1395
			continue;
1396
 
1397
		seq_printf(m, "\tPDPE #%d\n", pdpe);
6937 serge 1398
		gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) {
6084 serge 1399
			uint32_t  pte;
1400
			gen8_pte_t *pt_vaddr;
1401
 
1402
			if (!test_bit(pde, pd->used_pdes))
1403
				continue;
1404
 
1405
			pt_vaddr = kmap_px(pt);
1406
			for (pte = 0; pte < GEN8_PTES; pte += 4) {
1407
				uint64_t va =
1408
					(pdpe << GEN8_PDPE_SHIFT) |
1409
					(pde << GEN8_PDE_SHIFT) |
1410
					(pte << GEN8_PTE_SHIFT);
1411
				int i;
1412
				bool found = false;
1413
 
1414
				for (i = 0; i < 4; i++)
1415
					if (pt_vaddr[pte + i] != scratch_pte)
1416
						found = true;
1417
				if (!found)
1418
					continue;
1419
 
1420
				seq_printf(m, "\t\t0x%llx [%03d,%03d,%04d]: =", va, pdpe, pde, pte);
1421
				for (i = 0; i < 4; i++) {
1422
					if (pt_vaddr[pte + i] != scratch_pte)
1423
						seq_printf(m, " %llx", pt_vaddr[pte + i]);
1424
					else
1425
						seq_puts(m, "  SCRATCH ");
1426
				}
1427
				seq_puts(m, "\n");
1428
			}
1429
			/* don't use kunmap_px, it could trigger
1430
			 * an unnecessary flush.
1431
			 */
1432
			kunmap_atomic(pt_vaddr);
1433
		}
1434
	}
1435
}
1436
 
1437
static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
1438
{
1439
	struct i915_address_space *vm = &ppgtt->base;
1440
	uint64_t start = ppgtt->base.start;
1441
	uint64_t length = ppgtt->base.total;
1442
	gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
1443
						 I915_CACHE_LLC, true);
1444
 
1445
	if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
1446
		gen8_dump_pdp(&ppgtt->pdp, start, length, scratch_pte, m);
1447
	} else {
6937 serge 1448
		uint64_t pml4e;
6084 serge 1449
		struct i915_pml4 *pml4 = &ppgtt->pml4;
1450
		struct i915_page_directory_pointer *pdp;
1451
 
6937 serge 1452
		gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
6084 serge 1453
			if (!test_bit(pml4e, pml4->used_pml4es))
1454
				continue;
1455
 
1456
			seq_printf(m, "    PML4E #%llu\n", pml4e);
1457
			gen8_dump_pdp(pdp, start, length, scratch_pte, m);
1458
		}
1459
	}
1460
}
1461
 
1462
static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt)
1463
{
1464
	unsigned long *new_page_dirs, *new_page_tables;
1465
	uint32_t pdpes = I915_PDPES_PER_PDP(dev);
5060 serge 1466
	int ret;
1467
 
6084 serge 1468
	/* We allocate temp bitmap for page tables for no gain
1469
	 * but as this is for init only, lets keep the things simple
1470
	 */
1471
	ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes);
1472
	if (ret)
1473
		return ret;
5060 serge 1474
 
6084 serge 1475
	/* Allocate for all pdps regardless of how the ppgtt
1476
	 * was defined.
1477
	 */
1478
	ret = gen8_ppgtt_alloc_page_directories(&ppgtt->base, &ppgtt->pdp,
1479
						0, 1ULL << 32,
1480
						new_page_dirs);
1481
	if (!ret)
1482
		*ppgtt->pdp.used_pdpes = *new_page_dirs;
5060 serge 1483
 
6084 serge 1484
	free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
1485
 
1486
	return ret;
5060 serge 1487
}
1488
 
6084 serge 1489
/*
5060 serge 1490
 * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
1491
 * with a net effect resembling a 2-level page table in normal x86 terms. Each
1492
 * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
1493
 * space.
4560 Serge 1494
 *
5060 serge 1495
 */
6084 serge 1496
static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
4560 Serge 1497
{
6084 serge 1498
	int ret;
4560 Serge 1499
 
6084 serge 1500
	ret = gen8_init_scratch(&ppgtt->base);
5060 serge 1501
	if (ret)
1502
		return ret;
4560 Serge 1503
 
6084 serge 1504
	ppgtt->base.start = 0;
1505
	ppgtt->base.cleanup = gen8_ppgtt_cleanup;
1506
	ppgtt->base.allocate_va_range = gen8_alloc_va_range;
1507
	ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
1508
	ppgtt->base.clear_range = gen8_ppgtt_clear_range;
1509
	ppgtt->base.unbind_vma = ppgtt_unbind_vma;
1510
	ppgtt->base.bind_vma = ppgtt_bind_vma;
1511
	ppgtt->debug_dump = gen8_dump_ppgtt;
1512
 
1513
	if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
1514
		ret = setup_px(ppgtt->base.dev, &ppgtt->pml4);
5060 serge 1515
		if (ret)
6084 serge 1516
			goto free_scratch;
4560 Serge 1517
 
6084 serge 1518
		gen8_initialize_pml4(&ppgtt->base, &ppgtt->pml4);
1519
 
1520
		ppgtt->base.total = 1ULL << 48;
1521
		ppgtt->switch_mm = gen8_48b_mm_switch;
1522
	} else {
1523
		ret = __pdp_init(ppgtt->base.dev, &ppgtt->pdp);
1524
		if (ret)
1525
			goto free_scratch;
1526
 
1527
		ppgtt->base.total = 1ULL << 32;
1528
		ppgtt->switch_mm = gen8_legacy_mm_switch;
1529
		trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base,
1530
							      0, 0,
1531
							      GEN8_PML4E_SHIFT);
1532
 
1533
		if (intel_vgpu_active(ppgtt->base.dev)) {
1534
			ret = gen8_preallocate_top_level_pdps(ppgtt);
5060 serge 1535
			if (ret)
6084 serge 1536
				goto free_scratch;
4560 Serge 1537
		}
1538
	}
1539
 
6084 serge 1540
	if (intel_vgpu_active(ppgtt->base.dev))
1541
		gen8_ppgtt_notify_vgt(ppgtt, true);
4560 Serge 1542
 
1543
	return 0;
1544
 
6084 serge 1545
free_scratch:
1546
	gen8_free_scratch(&ppgtt->base);
4560 Serge 1547
	return ret;
1548
}
1549
 
6084 serge 1550
static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
4104 Serge 1551
{
6084 serge 1552
	struct i915_address_space *vm = &ppgtt->base;
1553
	struct i915_page_table *unused;
1554
	gen6_pte_t scratch_pte;
3746 Serge 1555
	uint32_t pd_entry;
6084 serge 1556
	uint32_t  pte, pde, temp;
1557
	uint32_t start = ppgtt->base.start, length = ppgtt->base.total;
3746 Serge 1558
 
6084 serge 1559
	scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
1560
				     I915_CACHE_LLC, true, 0);
3746 Serge 1561
 
6084 serge 1562
	gen6_for_each_pde(unused, &ppgtt->pd, start, length, temp, pde) {
1563
		u32 expected;
1564
		gen6_pte_t *pt_vaddr;
1565
		const dma_addr_t pt_addr = px_dma(ppgtt->pd.page_table[pde]);
1566
		pd_entry = readl(ppgtt->pd_addr + pde);
1567
		expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID);
3746 Serge 1568
 
6084 serge 1569
		if (pd_entry != expected)
1570
			seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n",
1571
				   pde,
1572
				   pd_entry,
1573
				   expected);
1574
		seq_printf(m, "\tPDE: %x\n", pd_entry);
1575
 
1576
		pt_vaddr = kmap_px(ppgtt->pd.page_table[pde]);
1577
 
1578
		for (pte = 0; pte < GEN6_PTES; pte+=4) {
1579
			unsigned long va =
1580
				(pde * PAGE_SIZE * GEN6_PTES) +
1581
				(pte * PAGE_SIZE);
1582
			int i;
1583
			bool found = false;
1584
			for (i = 0; i < 4; i++)
1585
				if (pt_vaddr[pte + i] != scratch_pte)
1586
					found = true;
1587
			if (!found)
1588
				continue;
1589
 
1590
			seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte);
1591
			for (i = 0; i < 4; i++) {
1592
				if (pt_vaddr[pte + i] != scratch_pte)
1593
					seq_printf(m, " %08x", pt_vaddr[pte + i]);
1594
				else
1595
					seq_puts(m, "  SCRATCH ");
1596
			}
1597
			seq_puts(m, "\n");
1598
		}
1599
		kunmap_px(ppgtt, pt_vaddr);
3746 Serge 1600
	}
4104 Serge 1601
}
3746 Serge 1602
 
6084 serge 1603
/* Write pde (index) from the page directory @pd to the page table @pt */
1604
static void gen6_write_pde(struct i915_page_directory *pd,
1605
			    const int pde, struct i915_page_table *pt)
1606
{
1607
	/* Caller needs to make sure the write completes if necessary */
1608
	struct i915_hw_ppgtt *ppgtt =
1609
		container_of(pd, struct i915_hw_ppgtt, pd);
1610
	u32 pd_entry;
1611
 
1612
	pd_entry = GEN6_PDE_ADDR_ENCODE(px_dma(pt));
1613
	pd_entry |= GEN6_PDE_VALID;
1614
 
1615
	writel(pd_entry, ppgtt->pd_addr + pde);
1616
}
1617
 
1618
/* Write all the page tables found in the ppgtt structure to incrementing page
1619
 * directories. */
1620
static void gen6_write_page_range(struct drm_i915_private *dev_priv,
1621
				  struct i915_page_directory *pd,
1622
				  uint32_t start, uint32_t length)
1623
{
1624
	struct i915_page_table *pt;
1625
	uint32_t pde, temp;
1626
 
1627
	gen6_for_each_pde(pt, pd, start, length, temp, pde)
1628
		gen6_write_pde(pd, pde, pt);
1629
 
1630
	/* Make sure write is complete before other code can use this page
1631
	 * table. Also require for WC mapped PTEs */
1632
	readl(dev_priv->gtt.gsm);
1633
}
1634
 
5060 serge 1635
static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
4104 Serge 1636
{
6084 serge 1637
	BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f);
4104 Serge 1638
 
6084 serge 1639
	return (ppgtt->pd.base.ggtt_offset / 64) << 16;
5060 serge 1640
}
4104 Serge 1641
 
5060 serge 1642
static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
6084 serge 1643
			 struct drm_i915_gem_request *req)
5060 serge 1644
{
6084 serge 1645
	struct intel_engine_cs *ring = req->ring;
5060 serge 1646
	int ret;
3746 Serge 1647
 
5060 serge 1648
	/* NB: TLBs must be flushed and invalidated before a switch */
6084 serge 1649
	ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
5060 serge 1650
	if (ret)
1651
		return ret;
3746 Serge 1652
 
6084 serge 1653
	ret = intel_ring_begin(req, 6);
5060 serge 1654
	if (ret)
1655
		return ret;
3746 Serge 1656
 
5060 serge 1657
	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
6937 serge 1658
	intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(ring));
5060 serge 1659
	intel_ring_emit(ring, PP_DIR_DCLV_2G);
6937 serge 1660
	intel_ring_emit_reg(ring, RING_PP_DIR_BASE(ring));
5060 serge 1661
	intel_ring_emit(ring, get_pd_offset(ppgtt));
1662
	intel_ring_emit(ring, MI_NOOP);
1663
	intel_ring_advance(ring);
1664
 
1665
	return 0;
1666
}
1667
 
6084 serge 1668
static int vgpu_mm_switch(struct i915_hw_ppgtt *ppgtt,
1669
			  struct drm_i915_gem_request *req)
1670
{
1671
	struct intel_engine_cs *ring = req->ring;
1672
	struct drm_i915_private *dev_priv = to_i915(ppgtt->base.dev);
1673
 
1674
	I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
1675
	I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
1676
	return 0;
1677
}
1678
 
5060 serge 1679
static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
6084 serge 1680
			  struct drm_i915_gem_request *req)
5060 serge 1681
{
6084 serge 1682
	struct intel_engine_cs *ring = req->ring;
5060 serge 1683
	int ret;
1684
 
1685
	/* NB: TLBs must be flushed and invalidated before a switch */
6084 serge 1686
	ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
5060 serge 1687
	if (ret)
1688
		return ret;
1689
 
6084 serge 1690
	ret = intel_ring_begin(req, 6);
5060 serge 1691
	if (ret)
1692
		return ret;
1693
 
1694
	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
6937 serge 1695
	intel_ring_emit_reg(ring, RING_PP_DIR_DCLV(ring));
5060 serge 1696
	intel_ring_emit(ring, PP_DIR_DCLV_2G);
6937 serge 1697
	intel_ring_emit_reg(ring, RING_PP_DIR_BASE(ring));
5060 serge 1698
	intel_ring_emit(ring, get_pd_offset(ppgtt));
1699
	intel_ring_emit(ring, MI_NOOP);
1700
	intel_ring_advance(ring);
1701
 
1702
	/* XXX: RCS is the only one to auto invalidate the TLBs? */
1703
	if (ring->id != RCS) {
6084 serge 1704
		ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
5060 serge 1705
		if (ret)
1706
			return ret;
1707
	}
1708
 
1709
	return 0;
1710
}
1711
 
1712
static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
6084 serge 1713
			  struct drm_i915_gem_request *req)
5060 serge 1714
{
6084 serge 1715
	struct intel_engine_cs *ring = req->ring;
5060 serge 1716
	struct drm_device *dev = ppgtt->base.dev;
1717
	struct drm_i915_private *dev_priv = dev->dev_private;
1718
 
1719
 
1720
	I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
1721
	I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
1722
 
1723
	POSTING_READ(RING_PP_DIR_DCLV(ring));
1724
 
1725
	return 0;
1726
}
1727
 
5354 serge 1728
static void gen8_ppgtt_enable(struct drm_device *dev)
5060 serge 1729
{
1730
	struct drm_i915_private *dev_priv = dev->dev_private;
1731
	struct intel_engine_cs *ring;
5354 serge 1732
	int j;
5060 serge 1733
 
1734
	for_each_ring(ring, dev_priv, j) {
6084 serge 1735
		u32 four_level = USES_FULL_48BIT_PPGTT(dev) ? GEN8_GFX_PPGTT_48B : 0;
5060 serge 1736
		I915_WRITE(RING_MODE_GEN7(ring),
6084 serge 1737
			   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE | four_level));
5060 serge 1738
	}
1739
}
1740
 
5354 serge 1741
static void gen7_ppgtt_enable(struct drm_device *dev)
5060 serge 1742
{
1743
	struct drm_i915_private *dev_priv = dev->dev_private;
1744
	struct intel_engine_cs *ring;
6084 serge 1745
	uint32_t ecochk, ecobits;
5060 serge 1746
	int i;
3746 Serge 1747
 
6084 serge 1748
	ecobits = I915_READ(GAC_ECO_BITS);
1749
	I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
3746 Serge 1750
 
6084 serge 1751
	ecochk = I915_READ(GAM_ECOCHK);
1752
	if (IS_HASWELL(dev)) {
1753
		ecochk |= ECOCHK_PPGTT_WB_HSW;
1754
	} else {
1755
		ecochk |= ECOCHK_PPGTT_LLC_IVB;
1756
		ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
1757
	}
1758
	I915_WRITE(GAM_ECOCHK, ecochk);
3746 Serge 1759
 
1760
	for_each_ring(ring, dev_priv, i) {
5060 serge 1761
		/* GFX_MODE is per-ring on gen7+ */
6084 serge 1762
		I915_WRITE(RING_MODE_GEN7(ring),
1763
			   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
3746 Serge 1764
	}
1765
}
1766
 
5354 serge 1767
static void gen6_ppgtt_enable(struct drm_device *dev)
5060 serge 1768
{
1769
	struct drm_i915_private *dev_priv = dev->dev_private;
1770
	uint32_t ecochk, gab_ctl, ecobits;
1771
 
1772
	ecobits = I915_READ(GAC_ECO_BITS);
1773
	I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT |
1774
		   ECOBITS_PPGTT_CACHE64B);
1775
 
1776
	gab_ctl = I915_READ(GAB_CTL);
1777
	I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
1778
 
1779
	ecochk = I915_READ(GAM_ECOCHK);
1780
	I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);
1781
 
1782
	I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
1783
}
1784
 
3031 serge 1785
/* PPGTT support for Sandybdrige/Gen6 and later */
4104 Serge 1786
static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
5060 serge 1787
				   uint64_t start,
1788
				   uint64_t length,
4280 Serge 1789
				   bool use_scratch)
3031 serge 1790
{
4104 Serge 1791
	struct i915_hw_ppgtt *ppgtt =
1792
		container_of(vm, struct i915_hw_ppgtt, base);
6084 serge 1793
	gen6_pte_t *pt_vaddr, scratch_pte;
5060 serge 1794
	unsigned first_entry = start >> PAGE_SHIFT;
1795
	unsigned num_entries = length >> PAGE_SHIFT;
6084 serge 1796
	unsigned act_pt = first_entry / GEN6_PTES;
1797
	unsigned first_pte = first_entry % GEN6_PTES;
3031 serge 1798
	unsigned last_pte, i;
1799
 
6084 serge 1800
	scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
1801
				     I915_CACHE_LLC, true, 0);
3031 serge 1802
 
3480 Serge 1803
	while (num_entries) {
6084 serge 1804
		last_pte = first_pte + num_entries;
1805
		if (last_pte > GEN6_PTES)
1806
			last_pte = GEN6_PTES;
3031 serge 1807
 
6084 serge 1808
		pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
3031 serge 1809
 
6084 serge 1810
		for (i = first_pte; i < last_pte; i++)
1811
			pt_vaddr[i] = scratch_pte;
3031 serge 1812
 
6084 serge 1813
		kunmap_px(ppgtt, pt_vaddr);
5354 serge 1814
 
6084 serge 1815
		num_entries -= last_pte - first_pte;
1816
		first_pte = 0;
1817
		act_pt++;
5354 serge 1818
	}
3480 Serge 1819
}
1820
 
4104 Serge 1821
static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
3480 Serge 1822
				      struct sg_table *pages,
5060 serge 1823
				      uint64_t start,
1824
				      enum i915_cache_level cache_level, u32 flags)
3480 Serge 1825
{
4104 Serge 1826
	struct i915_hw_ppgtt *ppgtt =
1827
		container_of(vm, struct i915_hw_ppgtt, base);
6084 serge 1828
	gen6_pte_t *pt_vaddr;
5060 serge 1829
	unsigned first_entry = start >> PAGE_SHIFT;
6084 serge 1830
	unsigned act_pt = first_entry / GEN6_PTES;
1831
	unsigned act_pte = first_entry % GEN6_PTES;
3746 Serge 1832
	struct sg_page_iter sg_iter;
3480 Serge 1833
 
5354 serge 1834
	pt_vaddr = NULL;
3746 Serge 1835
	for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
5354 serge 1836
		if (pt_vaddr == NULL)
6084 serge 1837
			pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
3480 Serge 1838
 
4560 Serge 1839
		pt_vaddr[act_pte] =
1840
			vm->pte_encode(sg_page_iter_dma_address(&sg_iter),
5060 serge 1841
				       cache_level, true, flags);
1842
 
6084 serge 1843
		if (++act_pte == GEN6_PTES) {
1844
			kunmap_px(ppgtt, pt_vaddr);
5354 serge 1845
			pt_vaddr = NULL;
3746 Serge 1846
			act_pt++;
1847
			act_pte = 0;
3480 Serge 1848
		}
6084 serge 1849
	}
5354 serge 1850
	if (pt_vaddr)
6084 serge 1851
		kunmap_px(ppgtt, pt_vaddr);
3031 serge 1852
}
1853
 
6084 serge 1854
static int gen6_alloc_va_range(struct i915_address_space *vm,
1855
			       uint64_t start_in, uint64_t length_in)
3031 serge 1856
{
6084 serge 1857
	DECLARE_BITMAP(new_page_tables, I915_PDES);
1858
	struct drm_device *dev = vm->dev;
1859
	struct drm_i915_private *dev_priv = dev->dev_private;
1860
	struct i915_hw_ppgtt *ppgtt =
1861
				container_of(vm, struct i915_hw_ppgtt, base);
1862
	struct i915_page_table *pt;
1863
	uint32_t start, length, start_save, length_save;
1864
	uint32_t pde, temp;
1865
	int ret;
3480 Serge 1866
 
6084 serge 1867
	if (WARN_ON(start_in + length_in > ppgtt->base.total))
1868
		return -ENODEV;
1869
 
1870
	start = start_save = start_in;
1871
	length = length_save = length_in;
1872
 
1873
	bitmap_zero(new_page_tables, I915_PDES);
1874
 
1875
	/* The allocation is done in two stages so that we can bail out with
1876
	 * minimal amount of pain. The first stage finds new page tables that
1877
	 * need allocation. The second stage marks use ptes within the page
1878
	 * tables.
1879
	 */
1880
	gen6_for_each_pde(pt, &ppgtt->pd, start, length, temp, pde) {
1881
		if (pt != vm->scratch_pt) {
1882
			WARN_ON(bitmap_empty(pt->used_ptes, GEN6_PTES));
1883
			continue;
1884
		}
1885
 
1886
		/* We've already allocated a page table */
1887
		WARN_ON(!bitmap_empty(pt->used_ptes, GEN6_PTES));
1888
 
1889
		pt = alloc_pt(dev);
1890
		if (IS_ERR(pt)) {
1891
			ret = PTR_ERR(pt);
1892
			goto unwind_out;
1893
		}
1894
 
1895
		gen6_initialize_pt(vm, pt);
1896
 
1897
		ppgtt->pd.page_table[pde] = pt;
1898
		__set_bit(pde, new_page_tables);
1899
		trace_i915_page_table_entry_alloc(vm, pde, start, GEN6_PDE_SHIFT);
3480 Serge 1900
	}
6084 serge 1901
 
1902
	start = start_save;
1903
	length = length_save;
1904
 
1905
	gen6_for_each_pde(pt, &ppgtt->pd, start, length, temp, pde) {
1906
		DECLARE_BITMAP(tmp_bitmap, GEN6_PTES);
1907
 
1908
		bitmap_zero(tmp_bitmap, GEN6_PTES);
1909
		bitmap_set(tmp_bitmap, gen6_pte_index(start),
1910
			   gen6_pte_count(start, length));
1911
 
1912
		if (__test_and_clear_bit(pde, new_page_tables))
1913
			gen6_write_pde(&ppgtt->pd, pde, pt);
1914
 
1915
		trace_i915_page_table_entry_map(vm, pde, pt,
1916
					 gen6_pte_index(start),
1917
					 gen6_pte_count(start, length),
1918
					 GEN6_PTES);
1919
		bitmap_or(pt->used_ptes, tmp_bitmap, pt->used_ptes,
1920
				GEN6_PTES);
1921
	}
1922
 
1923
	WARN_ON(!bitmap_empty(new_page_tables, I915_PDES));
1924
 
1925
	/* Make sure write is complete before other code can use this page
1926
	 * table. Also require for WC mapped PTEs */
1927
	readl(dev_priv->gtt.gsm);
1928
 
1929
	mark_tlbs_dirty(ppgtt);
1930
	return 0;
1931
 
1932
unwind_out:
1933
	for_each_set_bit(pde, new_page_tables, I915_PDES) {
1934
		struct i915_page_table *pt = ppgtt->pd.page_table[pde];
1935
 
1936
		ppgtt->pd.page_table[pde] = vm->scratch_pt;
1937
		free_pt(vm->dev, pt);
1938
	}
1939
 
1940
	mark_tlbs_dirty(ppgtt);
1941
	return ret;
5060 serge 1942
}
3480 Serge 1943
 
6084 serge 1944
static int gen6_init_scratch(struct i915_address_space *vm)
5060 serge 1945
{
6084 serge 1946
	struct drm_device *dev = vm->dev;
5060 serge 1947
 
6084 serge 1948
	vm->scratch_page = alloc_scratch_page(dev);
1949
	if (IS_ERR(vm->scratch_page))
1950
		return PTR_ERR(vm->scratch_page);
1951
 
1952
	vm->scratch_pt = alloc_pt(dev);
1953
	if (IS_ERR(vm->scratch_pt)) {
1954
		free_scratch_page(dev, vm->scratch_page);
1955
		return PTR_ERR(vm->scratch_pt);
1956
	}
1957
 
1958
	gen6_initialize_pt(vm, vm->scratch_pt);
1959
 
1960
	return 0;
3480 Serge 1961
}
1962
 
6084 serge 1963
static void gen6_free_scratch(struct i915_address_space *vm)
1964
{
1965
	struct drm_device *dev = vm->dev;
1966
 
1967
	free_pt(dev, vm->scratch_pt);
1968
	free_scratch_page(dev, vm->scratch_page);
1969
}
1970
 
5060 serge 1971
static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
3480 Serge 1972
{
5060 serge 1973
	struct i915_hw_ppgtt *ppgtt =
1974
		container_of(vm, struct i915_hw_ppgtt, base);
6084 serge 1975
	struct i915_page_table *pt;
1976
	uint32_t pde;
5060 serge 1977
 
1978
	drm_mm_remove_node(&ppgtt->node);
1979
 
6084 serge 1980
	gen6_for_all_pdes(pt, ppgtt, pde) {
1981
		if (pt != vm->scratch_pt)
1982
			free_pt(ppgtt->base.dev, pt);
1983
	}
1984
 
1985
	gen6_free_scratch(vm);
5060 serge 1986
}
1987
 
1988
static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt)
1989
{
6084 serge 1990
	struct i915_address_space *vm = &ppgtt->base;
4104 Serge 1991
	struct drm_device *dev = ppgtt->base.dev;
3031 serge 1992
	struct drm_i915_private *dev_priv = dev->dev_private;
5060 serge 1993
	bool retried = false;
1994
	int ret;
3031 serge 1995
 
5060 serge 1996
	/* PPGTT PDEs reside in the GGTT and consists of 512 entries. The
1997
	 * allocator works in address space sizes, so it's multiplied by page
1998
	 * size. We allocate at the top of the GTT to avoid fragmentation.
1999
	 */
2000
	BUG_ON(!drm_mm_initialized(&dev_priv->gtt.base.mm));
6084 serge 2001
 
2002
	ret = gen6_init_scratch(vm);
2003
	if (ret)
2004
		return ret;
2005
 
5060 serge 2006
alloc:
2007
	ret = drm_mm_insert_node_in_range_generic(&dev_priv->gtt.base.mm,
2008
						  &ppgtt->node, GEN6_PD_SIZE,
2009
						  GEN6_PD_ALIGN, 0,
2010
						  0, dev_priv->gtt.base.total,
2011
						  DRM_MM_TOPDOWN);
2012
	if (ret == -ENOSPC && !retried) {
2013
		ret = i915_gem_evict_something(dev, &dev_priv->gtt.base,
2014
					       GEN6_PD_SIZE, GEN6_PD_ALIGN,
2015
					       I915_CACHE_NONE,
2016
					       0, dev_priv->gtt.base.total,
2017
					       0);
2018
		if (ret)
6084 serge 2019
			goto err_out;
3031 serge 2020
 
5060 serge 2021
		retried = true;
2022
		goto alloc;
2023
	}
2024
 
6084 serge 2025
	if (ret)
2026
		goto err_out;
2027
 
2028
 
5060 serge 2029
	if (ppgtt->node.start < dev_priv->gtt.mappable_end)
2030
		DRM_DEBUG("Forced to use aperture for PDEs\n");
2031
 
6084 serge 2032
	return 0;
2033
 
2034
err_out:
2035
	gen6_free_scratch(vm);
5060 serge 2036
	return ret;
2037
}
2038
 
2039
static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt)
2040
{
6084 serge 2041
	return gen6_ppgtt_allocate_page_directories(ppgtt);
5060 serge 2042
}
3031 serge 2043
 
6084 serge 2044
static void gen6_scratch_va_range(struct i915_hw_ppgtt *ppgtt,
2045
				  uint64_t start, uint64_t length)
5060 serge 2046
{
6084 serge 2047
	struct i915_page_table *unused;
2048
	uint32_t pde, temp;
5060 serge 2049
 
6084 serge 2050
	gen6_for_each_pde(unused, &ppgtt->pd, start, length, temp, pde)
2051
		ppgtt->pd.page_table[pde] = ppgtt->base.scratch_pt;
3031 serge 2052
}
2053
 
5060 serge 2054
static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
3031 serge 2055
{
5060 serge 2056
	struct drm_device *dev = ppgtt->base.dev;
3031 serge 2057
	struct drm_i915_private *dev_priv = dev->dev_private;
3480 Serge 2058
	int ret;
3031 serge 2059
 
5060 serge 2060
	ppgtt->base.pte_encode = dev_priv->gtt.base.pte_encode;
2061
	if (IS_GEN6(dev)) {
2062
		ppgtt->switch_mm = gen6_mm_switch;
2063
	} else if (IS_HASWELL(dev)) {
2064
		ppgtt->switch_mm = hsw_mm_switch;
2065
	} else if (IS_GEN7(dev)) {
2066
		ppgtt->switch_mm = gen7_mm_switch;
2067
	} else
2068
		BUG();
3031 serge 2069
 
6084 serge 2070
	if (intel_vgpu_active(dev))
2071
		ppgtt->switch_mm = vgpu_mm_switch;
2072
 
5060 serge 2073
	ret = gen6_ppgtt_alloc(ppgtt);
2074
	if (ret)
2075
		return ret;
2076
 
6084 serge 2077
	ppgtt->base.allocate_va_range = gen6_alloc_va_range;
5060 serge 2078
	ppgtt->base.clear_range = gen6_ppgtt_clear_range;
2079
	ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
6084 serge 2080
	ppgtt->base.unbind_vma = ppgtt_unbind_vma;
2081
	ppgtt->base.bind_vma = ppgtt_bind_vma;
5060 serge 2082
	ppgtt->base.cleanup = gen6_ppgtt_cleanup;
2083
	ppgtt->base.start = 0;
6084 serge 2084
	ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE;
2085
	ppgtt->debug_dump = gen6_dump_ppgtt;
5060 serge 2086
 
6084 serge 2087
	ppgtt->pd.base.ggtt_offset =
2088
		ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t);
5060 serge 2089
 
6084 serge 2090
	ppgtt->pd_addr = (gen6_pte_t __iomem *)dev_priv->gtt.gsm +
2091
		ppgtt->pd.base.ggtt_offset / sizeof(gen6_pte_t);
5060 serge 2092
 
6084 serge 2093
	gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total);
2094
 
2095
	gen6_write_page_range(dev_priv, &ppgtt->pd, 0, ppgtt->base.total);
2096
 
2097
	DRM_DEBUG_DRIVER("Allocated pde space (%lldM) at GTT entry: %llx\n",
5060 serge 2098
			 ppgtt->node.size >> 20,
2099
			 ppgtt->node.start / PAGE_SIZE);
2100
 
5354 serge 2101
	DRM_DEBUG("Adding PPGTT at offset %x\n",
6084 serge 2102
		  ppgtt->pd.base.ggtt_offset << 10);
5354 serge 2103
 
5060 serge 2104
	return 0;
2105
}
2106
 
5354 serge 2107
static int __hw_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
5060 serge 2108
{
4104 Serge 2109
	ppgtt->base.dev = dev;
3031 serge 2110
 
3746 Serge 2111
	if (INTEL_INFO(dev)->gen < 8)
5354 serge 2112
		return gen6_ppgtt_init(ppgtt);
3746 Serge 2113
	else
6084 serge 2114
		return gen8_ppgtt_init(ppgtt);
5354 serge 2115
}
6084 serge 2116
 
2117
static void i915_address_space_init(struct i915_address_space *vm,
2118
				    struct drm_i915_private *dev_priv)
2119
{
2120
	drm_mm_init(&vm->mm, vm->start, vm->total);
2121
	vm->dev = dev_priv->dev;
2122
	INIT_LIST_HEAD(&vm->active_list);
2123
	INIT_LIST_HEAD(&vm->inactive_list);
2124
	list_add_tail(&vm->global_link, &dev_priv->vm_list);
2125
}
2126
 
7144 serge 2127
static void gtt_write_workarounds(struct drm_device *dev)
2128
{
2129
	struct drm_i915_private *dev_priv = dev->dev_private;
2130
 
2131
	/* This function is for gtt related workarounds. This function is
2132
	 * called on driver load and after a GPU reset, so you can place
2133
	 * workarounds here even if they get overwritten by GPU reset.
2134
	 */
2135
	/* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt */
2136
	if (IS_BROADWELL(dev))
2137
		I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW);
2138
	else if (IS_CHERRYVIEW(dev))
2139
		I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV);
2140
	else if (IS_SKYLAKE(dev))
2141
		I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL);
2142
	else if (IS_BROXTON(dev))
2143
		I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT);
2144
}
2145
 
5354 serge 2146
int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
2147
{
2148
	struct drm_i915_private *dev_priv = dev->dev_private;
2149
	int ret = 0;
3746 Serge 2150
 
5354 serge 2151
	ret = __hw_ppgtt_init(dev, ppgtt);
2152
	if (ret == 0) {
5060 serge 2153
		kref_init(&ppgtt->ref);
6084 serge 2154
		i915_address_space_init(&ppgtt->base, dev_priv);
5354 serge 2155
	}
2156
 
2157
	return ret;
2158
}
2159
 
2160
int i915_ppgtt_init_hw(struct drm_device *dev)
2161
{
7144 serge 2162
	gtt_write_workarounds(dev);
2163
 
5354 serge 2164
	/* In the case of execlists, PPGTT is enabled by the context descriptor
2165
	 * and the PDPs are contained within the context itself.  We don't
2166
	 * need to do anything here. */
2167
	if (i915.enable_execlists)
2168
		return 0;
2169
 
2170
	if (!USES_PPGTT(dev))
2171
		return 0;
2172
 
2173
	if (IS_GEN6(dev))
2174
		gen6_ppgtt_enable(dev);
2175
	else if (IS_GEN7(dev))
2176
		gen7_ppgtt_enable(dev);
2177
	else if (INTEL_INFO(dev)->gen >= 8)
2178
		gen8_ppgtt_enable(dev);
2179
	else
6084 serge 2180
		MISSING_CASE(INTEL_INFO(dev)->gen);
5354 serge 2181
 
6084 serge 2182
	return 0;
2183
}
3480 Serge 2184
 
6084 serge 2185
int i915_ppgtt_init_ring(struct drm_i915_gem_request *req)
2186
{
2187
	struct drm_i915_private *dev_priv = req->ring->dev->dev_private;
2188
	struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
2189
 
2190
	if (i915.enable_execlists)
2191
		return 0;
2192
 
2193
	if (!ppgtt)
2194
		return 0;
2195
 
2196
	return ppgtt->switch_mm(ppgtt, req);
3031 serge 2197
}
6084 serge 2198
 
5354 serge 2199
struct i915_hw_ppgtt *
2200
i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv)
2201
{
2202
	struct i915_hw_ppgtt *ppgtt;
2203
	int ret;
3031 serge 2204
 
5354 serge 2205
	ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
2206
	if (!ppgtt)
2207
		return ERR_PTR(-ENOMEM);
2208
 
2209
	ret = i915_ppgtt_init(dev, ppgtt);
2210
	if (ret) {
2211
		kfree(ppgtt);
2212
		return ERR_PTR(ret);
2213
	}
2214
 
2215
	ppgtt->file_priv = fpriv;
2216
 
2217
	trace_i915_ppgtt_create(&ppgtt->base);
2218
 
2219
	return ppgtt;
2220
}
2221
 
2222
void  i915_ppgtt_release(struct kref *kref)
2223
{
2224
	struct i915_hw_ppgtt *ppgtt =
2225
		container_of(kref, struct i915_hw_ppgtt, ref);
2226
 
2227
	trace_i915_ppgtt_release(&ppgtt->base);
2228
 
2229
	/* vmas should already be unbound */
2230
	WARN_ON(!list_empty(&ppgtt->base.active_list));
2231
	WARN_ON(!list_empty(&ppgtt->base.inactive_list));
2232
 
2233
	list_del(&ppgtt->base.global_link);
2234
	drm_mm_takedown(&ppgtt->base.mm);
2235
 
2236
	ppgtt->base.cleanup(&ppgtt->base);
2237
	kfree(ppgtt);
2238
}
2239
 
3480 Serge 2240
extern int intel_iommu_gfx_mapped;
2241
/* Certain Gen5 chipsets require require idling the GPU before
2242
 * unmapping anything from the GTT when VT-d is enabled.
2243
 */
6084 serge 2244
static bool needs_idle_maps(struct drm_device *dev)
3480 Serge 2245
{
2246
#ifdef CONFIG_INTEL_IOMMU
2247
	/* Query intel_iommu to see if we need the workaround. Presumably that
2248
	 * was loaded first.
2249
	 */
2250
	if (IS_GEN5(dev) && IS_MOBILE(dev) && intel_iommu_gfx_mapped)
2251
		return true;
2252
#endif
2253
	return false;
2254
}
2255
 
2344 Serge 2256
static bool do_idling(struct drm_i915_private *dev_priv)
2257
{
2258
	bool ret = dev_priv->mm.interruptible;
2259
 
3480 Serge 2260
	if (unlikely(dev_priv->gtt.do_idle_maps)) {
2344 Serge 2261
		dev_priv->mm.interruptible = false;
2262
		if (i915_gpu_idle(dev_priv->dev)) {
2263
			DRM_ERROR("Couldn't idle GPU\n");
2264
			/* Wait a bit, in hopes it avoids the hang */
2265
			udelay(10);
2266
		}
2267
	}
2268
 
2269
	return ret;
2270
}
2271
 
2272
static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible)
2273
{
3480 Serge 2274
	if (unlikely(dev_priv->gtt.do_idle_maps))
2344 Serge 2275
		dev_priv->mm.interruptible = interruptible;
2276
}
2277
 
4280 Serge 2278
void i915_check_and_clear_faults(struct drm_device *dev)
2279
{
2280
	struct drm_i915_private *dev_priv = dev->dev_private;
5060 serge 2281
	struct intel_engine_cs *ring;
4280 Serge 2282
	int i;
2283
 
2284
	if (INTEL_INFO(dev)->gen < 6)
2285
		return;
2286
 
2287
	for_each_ring(ring, dev_priv, i) {
2288
		u32 fault_reg;
2289
		fault_reg = I915_READ(RING_FAULT_REG(ring));
2290
		if (fault_reg & RING_FAULT_VALID) {
2291
			DRM_DEBUG_DRIVER("Unexpected fault\n"
5354 serge 2292
					 "\tAddr: 0x%08lx\n"
4280 Serge 2293
					 "\tAddress space: %s\n"
2294
					 "\tSource ID: %d\n"
2295
					 "\tType: %d\n",
2296
					 fault_reg & PAGE_MASK,
2297
					 fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT",
2298
					 RING_FAULT_SRCID(fault_reg),
2299
					 RING_FAULT_FAULT_TYPE(fault_reg));
2300
			I915_WRITE(RING_FAULT_REG(ring),
2301
				   fault_reg & ~RING_FAULT_VALID);
2302
		}
2303
	}
2304
	POSTING_READ(RING_FAULT_REG(&dev_priv->ring[RCS]));
2305
}
2306
 
5354 serge 2307
static void i915_ggtt_flush(struct drm_i915_private *dev_priv)
2308
{
2309
	if (INTEL_INFO(dev_priv->dev)->gen < 6) {
2310
		intel_gtt_chipset_flush();
2311
	} else {
2312
		I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2313
		POSTING_READ(GFX_FLSH_CNTL_GEN6);
2314
	}
2315
}
2316
 
4280 Serge 2317
void i915_gem_suspend_gtt_mappings(struct drm_device *dev)
2318
{
2319
	struct drm_i915_private *dev_priv = dev->dev_private;
2320
 
2321
	/* Don't bother messing with faults pre GEN6 as we have little
2322
	 * documentation supporting that it's a good idea.
2323
	 */
2324
	if (INTEL_INFO(dev)->gen < 6)
2325
		return;
2326
 
2327
	i915_check_and_clear_faults(dev);
2328
 
2329
	dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
5060 serge 2330
				       dev_priv->gtt.base.start,
2331
				       dev_priv->gtt.base.total,
2332
				       true);
5354 serge 2333
 
2334
	i915_ggtt_flush(dev_priv);
4280 Serge 2335
}
2336
 
3031 serge 2337
int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
2332 Serge 2338
{
3480 Serge 2339
	if (!dma_map_sg(&obj->base.dev->pdev->dev,
2340
			obj->pages->sgl, obj->pages->nents,
2341
			PCI_DMA_BIDIRECTIONAL))
2342
		return -ENOSPC;
3243 Serge 2343
 
2332 Serge 2344
	return 0;
2345
}
2346
 
6084 serge 2347
static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
4560 Serge 2348
{
2349
#ifdef writeq
2350
	writeq(pte, addr);
2351
#else
2352
	iowrite32((u32)pte, addr);
2353
	iowrite32(pte >> 32, addr + 4);
2354
#endif
2355
}
2356
 
2357
static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
2358
				     struct sg_table *st,
5060 serge 2359
				     uint64_t start,
2360
				     enum i915_cache_level level, u32 unused)
4560 Serge 2361
{
2362
	struct drm_i915_private *dev_priv = vm->dev->dev_private;
5060 serge 2363
	unsigned first_entry = start >> PAGE_SHIFT;
6084 serge 2364
	gen8_pte_t __iomem *gtt_entries =
2365
		(gen8_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
4560 Serge 2366
	int i = 0;
2367
	struct sg_page_iter sg_iter;
5060 serge 2368
	dma_addr_t addr = 0; /* shut up gcc */
6937 serge 2369
	int rpm_atomic_seq;
4560 Serge 2370
 
6937 serge 2371
	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2372
 
4560 Serge 2373
	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
2374
		addr = sg_dma_address(sg_iter.sg) +
2375
			(sg_iter.sg_pgoffset << PAGE_SHIFT);
2376
		gen8_set_pte(>t_entries[i],
2377
			     gen8_pte_encode(addr, level, true));
2378
		i++;
2379
	}
2380
 
2381
	/*
2382
	 * XXX: This serves as a posting read to make sure that the PTE has
2383
	 * actually been updated. There is some concern that even though
2384
	 * registers and PTEs are within the same BAR that they are potentially
2385
	 * of NUMA access patterns. Therefore, even with the way we assume
2386
	 * hardware should work, we must keep this posting read for paranoia.
2387
	 */
2388
	if (i != 0)
2389
		WARN_ON(readq(>t_entries[i-1])
2390
			!= gen8_pte_encode(addr, level, true));
2391
 
2392
	/* This next bit makes the above posting read even more important. We
2393
	 * want to flush the TLBs only after we're certain all the PTE updates
2394
	 * have finished.
2395
	 */
2396
	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2397
	POSTING_READ(GFX_FLSH_CNTL_GEN6);
6937 serge 2398
 
2399
	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
4560 Serge 2400
}
2401
 
6937 serge 2402
struct insert_entries {
2403
	struct i915_address_space *vm;
2404
	struct sg_table *st;
2405
	uint64_t start;
2406
	enum i915_cache_level level;
2407
	u32 flags;
2408
};
2409
 
2410
static int gen8_ggtt_insert_entries__cb(void *_arg)
2411
{
2412
	struct insert_entries *arg = _arg;
2413
	gen8_ggtt_insert_entries(arg->vm, arg->st,
2414
				 arg->start, arg->level, arg->flags);
2415
	return 0;
2416
}
2417
 
2418
static void gen8_ggtt_insert_entries__BKL(struct i915_address_space *vm,
2419
					  struct sg_table *st,
2420
					  uint64_t start,
2421
					  enum i915_cache_level level,
2422
					  u32 flags)
2423
{
2424
	struct insert_entries arg = { vm, st, start, level, flags };
2425
	gen8_ggtt_insert_entries__cb, &arg;
2426
}
2427
 
3243 Serge 2428
/*
2429
 * Binds an object into the global gtt with the specified cache level. The object
2430
 * will be accessible to the GPU via commands whose operands reference offsets
2431
 * within the global GTT as well as accessible by the GPU through the GMADR
2432
 * mapped BAR (dev_priv->mm.gtt->gtt).
2433
 */
4104 Serge 2434
static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
3480 Serge 2435
				     struct sg_table *st,
5060 serge 2436
				     uint64_t start,
2437
				     enum i915_cache_level level, u32 flags)
3243 Serge 2438
{
4104 Serge 2439
	struct drm_i915_private *dev_priv = vm->dev->dev_private;
5060 serge 2440
	unsigned first_entry = start >> PAGE_SHIFT;
6084 serge 2441
	gen6_pte_t __iomem *gtt_entries =
2442
		(gen6_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
3746 Serge 2443
	int i = 0;
2444
	struct sg_page_iter sg_iter;
5060 serge 2445
	dma_addr_t addr = 0;
6937 serge 2446
	int rpm_atomic_seq;
3243 Serge 2447
 
6937 serge 2448
	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2449
 
3746 Serge 2450
	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
2451
		addr = sg_page_iter_dma_address(&sg_iter);
5060 serge 2452
		iowrite32(vm->pte_encode(addr, level, true, flags), >t_entries[i]);
6084 serge 2453
		i++;
2454
	}
3243 Serge 2455
 
2456
	/* XXX: This serves as a posting read to make sure that the PTE has
2457
	 * actually been updated. There is some concern that even though
2458
	 * registers and PTEs are within the same BAR that they are potentially
2459
	 * of NUMA access patterns. Therefore, even with the way we assume
2460
	 * hardware should work, we must keep this posting read for paranoia.
2461
	 */
5060 serge 2462
	if (i != 0) {
2463
		unsigned long gtt = readl(>t_entries[i-1]);
2464
		WARN_ON(gtt != vm->pte_encode(addr, level, true, flags));
2465
	}
3243 Serge 2466
 
2467
	/* This next bit makes the above posting read even more important. We
2468
	 * want to flush the TLBs only after we're certain all the PTE updates
2469
	 * have finished.
2470
	 */
2471
	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2472
	POSTING_READ(GFX_FLSH_CNTL_GEN6);
6937 serge 2473
 
2474
	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
3243 Serge 2475
}
2476
 
4560 Serge 2477
static void gen8_ggtt_clear_range(struct i915_address_space *vm,
5060 serge 2478
				  uint64_t start,
2479
				  uint64_t length,
4560 Serge 2480
				  bool use_scratch)
2481
{
2482
	struct drm_i915_private *dev_priv = vm->dev->dev_private;
5060 serge 2483
	unsigned first_entry = start >> PAGE_SHIFT;
2484
	unsigned num_entries = length >> PAGE_SHIFT;
6084 serge 2485
	gen8_pte_t scratch_pte, __iomem *gtt_base =
2486
		(gen8_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
4560 Serge 2487
	const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
2488
	int i;
6937 serge 2489
	int rpm_atomic_seq;
4560 Serge 2490
 
6937 serge 2491
	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2492
 
4560 Serge 2493
	if (WARN(num_entries > max_entries,
2494
		 "First entry = %d; Num entries = %d (max=%d)\n",
2495
		 first_entry, num_entries, max_entries))
2496
		num_entries = max_entries;
2497
 
6084 serge 2498
	scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
4560 Serge 2499
				      I915_CACHE_LLC,
2500
				      use_scratch);
2501
	for (i = 0; i < num_entries; i++)
2502
		gen8_set_pte(>t_base[i], scratch_pte);
2503
	readl(gtt_base);
6937 serge 2504
 
2505
	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
4560 Serge 2506
}
2507
 
4104 Serge 2508
static void gen6_ggtt_clear_range(struct i915_address_space *vm,
5060 serge 2509
				  uint64_t start,
2510
				  uint64_t length,
4280 Serge 2511
				  bool use_scratch)
3480 Serge 2512
{
4104 Serge 2513
	struct drm_i915_private *dev_priv = vm->dev->dev_private;
5060 serge 2514
	unsigned first_entry = start >> PAGE_SHIFT;
2515
	unsigned num_entries = length >> PAGE_SHIFT;
6084 serge 2516
	gen6_pte_t scratch_pte, __iomem *gtt_base =
2517
		(gen6_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
3480 Serge 2518
	const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
2519
	int i;
6937 serge 2520
	int rpm_atomic_seq;
3480 Serge 2521
 
6937 serge 2522
	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2523
 
4126 Serge 2524
	if (WARN(num_entries > max_entries,
2525
		 "First entry = %d; Num entries = %d (max=%d)\n",
2526
		 first_entry, num_entries, max_entries))
6084 serge 2527
		num_entries = max_entries;
3480 Serge 2528
 
6084 serge 2529
	scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
2530
				     I915_CACHE_LLC, use_scratch, 0);
4280 Serge 2531
 
3480 Serge 2532
	for (i = 0; i < num_entries; i++)
2533
		iowrite32(scratch_pte, >t_base[i]);
2534
	readl(gtt_base);
6937 serge 2535
 
2536
	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
3480 Serge 2537
}
2538
 
6084 serge 2539
static void i915_ggtt_insert_entries(struct i915_address_space *vm,
2540
				     struct sg_table *pages,
2541
				     uint64_t start,
2542
				     enum i915_cache_level cache_level, u32 unused)
3480 Serge 2543
{
6937 serge 2544
	struct drm_i915_private *dev_priv = vm->dev->dev_private;
3480 Serge 2545
	unsigned int flags = (cache_level == I915_CACHE_NONE) ?
2546
		AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
6937 serge 2547
	int rpm_atomic_seq;
3480 Serge 2548
 
6937 serge 2549
	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2550
 
6084 serge 2551
	intel_gtt_insert_sg_entries(pages, start >> PAGE_SHIFT, flags);
2552
 
6937 serge 2553
	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
2554
 
3480 Serge 2555
}
2556
 
4104 Serge 2557
static void i915_ggtt_clear_range(struct i915_address_space *vm,
5060 serge 2558
				  uint64_t start,
2559
				  uint64_t length,
4280 Serge 2560
				  bool unused)
3480 Serge 2561
{
6937 serge 2562
	struct drm_i915_private *dev_priv = vm->dev->dev_private;
5060 serge 2563
	unsigned first_entry = start >> PAGE_SHIFT;
2564
	unsigned num_entries = length >> PAGE_SHIFT;
6937 serge 2565
	int rpm_atomic_seq;
2566
 
2567
	rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv);
2568
 
3480 Serge 2569
	intel_gtt_clear_range(first_entry, num_entries);
6937 serge 2570
 
2571
	assert_rpm_atomic_end(dev_priv, rpm_atomic_seq);
3480 Serge 2572
}
2573
 
6084 serge 2574
static int ggtt_bind_vma(struct i915_vma *vma,
2575
			 enum i915_cache_level cache_level,
2576
			 u32 flags)
5060 serge 2577
{
6084 serge 2578
	struct drm_i915_gem_object *obj = vma->obj;
2579
	u32 pte_flags = 0;
2580
	int ret;
3480 Serge 2581
 
6084 serge 2582
	ret = i915_get_ggtt_vma_pages(vma);
2583
	if (ret)
2584
		return ret;
2585
 
2586
	/* Currently applicable only to VLV */
2587
	if (obj->gt_ro)
2588
		pte_flags |= PTE_READ_ONLY;
2589
 
2590
	vma->vm->insert_entries(vma->vm, vma->ggtt_view.pages,
2591
				vma->node.start,
2592
				cache_level, pte_flags);
2593
 
2594
	/*
2595
	 * Without aliasing PPGTT there's no difference between
2596
	 * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally
2597
	 * upgrade to both bound if we bind either to avoid double-binding.
2598
	 */
2599
	vma->bound |= GLOBAL_BIND | LOCAL_BIND;
2600
 
2601
	return 0;
5060 serge 2602
}
2603
 
6084 serge 2604
static int aliasing_gtt_bind_vma(struct i915_vma *vma,
2605
				 enum i915_cache_level cache_level,
2606
				 u32 flags)
2332 Serge 2607
{
5060 serge 2608
	struct drm_device *dev = vma->vm->dev;
3480 Serge 2609
	struct drm_i915_private *dev_priv = dev->dev_private;
5060 serge 2610
	struct drm_i915_gem_object *obj = vma->obj;
6084 serge 2611
	struct sg_table *pages = obj->pages;
2612
	u32 pte_flags = 0;
2613
	int ret;
3480 Serge 2614
 
6084 serge 2615
	ret = i915_get_ggtt_vma_pages(vma);
2616
	if (ret)
2617
		return ret;
2618
	pages = vma->ggtt_view.pages;
2619
 
5060 serge 2620
	/* Currently applicable only to VLV */
2621
	if (obj->gt_ro)
6084 serge 2622
		pte_flags |= PTE_READ_ONLY;
2332 Serge 2623
 
6084 serge 2624
 
2625
	if (flags & GLOBAL_BIND) {
2626
		vma->vm->insert_entries(vma->vm, pages,
2627
					vma->node.start,
2628
					cache_level, pte_flags);
5060 serge 2629
	}
2630
 
6084 serge 2631
	if (flags & LOCAL_BIND) {
5060 serge 2632
		struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
6084 serge 2633
		appgtt->base.insert_entries(&appgtt->base, pages,
5060 serge 2634
					    vma->node.start,
6084 serge 2635
					    cache_level, pte_flags);
5060 serge 2636
	}
6084 serge 2637
 
2638
	return 0;
2332 Serge 2639
}
2640
 
5060 serge 2641
static void ggtt_unbind_vma(struct i915_vma *vma)
2332 Serge 2642
{
5060 serge 2643
	struct drm_device *dev = vma->vm->dev;
3480 Serge 2644
	struct drm_i915_private *dev_priv = dev->dev_private;
5060 serge 2645
	struct drm_i915_gem_object *obj = vma->obj;
6084 serge 2646
	const uint64_t size = min_t(uint64_t,
2647
				    obj->base.size,
2648
				    vma->node.size);
3480 Serge 2649
 
5354 serge 2650
	if (vma->bound & GLOBAL_BIND) {
5060 serge 2651
		vma->vm->clear_range(vma->vm,
2652
				     vma->node.start,
6084 serge 2653
				     size,
2654
				     true);
5060 serge 2655
	}
3031 serge 2656
 
6084 serge 2657
	if (dev_priv->mm.aliasing_ppgtt && vma->bound & LOCAL_BIND) {
5060 serge 2658
		struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
6084 serge 2659
 
5060 serge 2660
		appgtt->base.clear_range(&appgtt->base,
2661
					 vma->node.start,
6084 serge 2662
					 size,
5060 serge 2663
					 true);
2664
	}
3031 serge 2665
}
2666
 
2667
void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
2668
{
2344 Serge 2669
	struct drm_device *dev = obj->base.dev;
2670
	struct drm_i915_private *dev_priv = dev->dev_private;
2671
	bool interruptible;
2672
 
2673
	interruptible = do_idling(dev_priv);
2674
 
6084 serge 2675
	dma_unmap_sg(&dev->pdev->dev, obj->pages->sgl, obj->pages->nents,
2676
		     PCI_DMA_BIDIRECTIONAL);
2332 Serge 2677
 
3031 serge 2678
	undo_idling(dev_priv, interruptible);
2679
}
2680
 
2681
static void i915_gtt_color_adjust(struct drm_mm_node *node,
2682
				  unsigned long color,
6084 serge 2683
				  u64 *start,
2684
				  u64 *end)
3031 serge 2685
{
2686
	if (node->color != color)
2687
		*start += 4096;
2688
 
2689
	if (!list_empty(&node->node_list)) {
2690
		node = list_entry(node->node_list.next,
2691
				  struct drm_mm_node,
2692
				  node_list);
2693
		if (node->allocated && node->color != color)
2694
			*end -= 4096;
2332 Serge 2695
	}
3031 serge 2696
}
4560 Serge 2697
 
5354 serge 2698
static int i915_gem_setup_global_gtt(struct drm_device *dev,
6084 serge 2699
				     u64 start,
2700
				     u64 mappable_end,
2701
				     u64 end)
3031 serge 2702
{
3480 Serge 2703
	/* Let GEM Manage all of the aperture.
2704
	 *
2705
	 * However, leave one page at the end still bound to the scratch page.
2706
	 * There are a number of places where the hardware apparently prefetches
2707
	 * past the end of the object, and we've seen multiple hangs with the
2708
	 * GPU head pointer stuck in a batchbuffer bound at the last page of the
2709
	 * aperture.  One page should be enough to keep any prefetching inside
2710
	 * of the aperture.
2711
	 */
4104 Serge 2712
	struct drm_i915_private *dev_priv = dev->dev_private;
2713
	struct i915_address_space *ggtt_vm = &dev_priv->gtt.base;
3480 Serge 2714
	struct drm_mm_node *entry;
2715
	struct drm_i915_gem_object *obj;
2716
	unsigned long hole_start, hole_end;
5354 serge 2717
	int ret;
3031 serge 2718
 
3480 Serge 2719
	BUG_ON(mappable_end > end);
2720
 
6084 serge 2721
	ggtt_vm->start = start;
2722
 
2723
	/* Subtract the guard page before address space initialization to
2724
	 * shrink the range used by drm_mm */
2725
	ggtt_vm->total = end - start - PAGE_SIZE;
2726
	i915_address_space_init(ggtt_vm, dev_priv);
2727
	ggtt_vm->total += PAGE_SIZE;
2728
 
2729
	if (intel_vgpu_active(dev)) {
2730
		ret = intel_vgt_balloon(dev);
2731
		if (ret)
2732
			return ret;
2733
	}
2734
 
3031 serge 2735
	if (!HAS_LLC(dev))
6084 serge 2736
		ggtt_vm->mm.color_adjust = i915_gtt_color_adjust;
3031 serge 2737
 
3480 Serge 2738
	/* Mark any preallocated objects as occupied */
4104 Serge 2739
	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
2740
		struct i915_vma *vma = i915_gem_obj_to_vma(obj, ggtt_vm);
5354 serge 2741
 
6084 serge 2742
		DRM_DEBUG_KMS("reserving preallocated space: %llx + %zx\n",
4104 Serge 2743
			      i915_gem_obj_ggtt_offset(obj), obj->base.size);
3031 serge 2744
 
4104 Serge 2745
		WARN_ON(i915_gem_obj_ggtt_bound(obj));
2746
		ret = drm_mm_reserve_node(&ggtt_vm->mm, &vma->node);
5354 serge 2747
		if (ret) {
2748
			DRM_DEBUG_KMS("Reservation failed: %i\n", ret);
2749
			return ret;
2750
		}
2751
		vma->bound |= GLOBAL_BIND;
6084 serge 2752
		__i915_vma_set_map_and_fenceable(vma);
7144 serge 2753
		list_add_tail(&vma->vm_link, &ggtt_vm->inactive_list);
3480 Serge 2754
	}
2755
 
2756
	/* Clear any non-preallocated blocks */
4104 Serge 2757
	drm_mm_for_each_hole(entry, &ggtt_vm->mm, hole_start, hole_end) {
3480 Serge 2758
		DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
2759
			      hole_start, hole_end);
5060 serge 2760
		ggtt_vm->clear_range(ggtt_vm, hole_start,
2761
				     hole_end - hole_start, true);
3480 Serge 2762
	}
2763
 
2764
	/* And finally clear the reserved guard page */
5060 serge 2765
	ggtt_vm->clear_range(ggtt_vm, end - PAGE_SIZE, PAGE_SIZE, true);
5354 serge 2766
 
2767
	if (USES_PPGTT(dev) && !USES_FULL_PPGTT(dev)) {
2768
		struct i915_hw_ppgtt *ppgtt;
2769
 
2770
		ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
2771
		if (!ppgtt)
2772
			return -ENOMEM;
2773
 
2774
		ret = __hw_ppgtt_init(dev, ppgtt);
6084 serge 2775
		if (ret) {
2776
			ppgtt->base.cleanup(&ppgtt->base);
2777
			kfree(ppgtt);
5354 serge 2778
			return ret;
6084 serge 2779
		}
5354 serge 2780
 
6084 serge 2781
		if (ppgtt->base.allocate_va_range)
2782
			ret = ppgtt->base.allocate_va_range(&ppgtt->base, 0,
2783
							    ppgtt->base.total);
2784
		if (ret) {
2785
			ppgtt->base.cleanup(&ppgtt->base);
2786
			kfree(ppgtt);
2787
			return ret;
2788
		}
2789
 
2790
		ppgtt->base.clear_range(&ppgtt->base,
2791
					ppgtt->base.start,
2792
					ppgtt->base.total,
2793
					true);
2794
 
5354 serge 2795
		dev_priv->mm.aliasing_ppgtt = ppgtt;
6084 serge 2796
		WARN_ON(dev_priv->gtt.base.bind_vma != ggtt_bind_vma);
2797
		dev_priv->gtt.base.bind_vma = aliasing_gtt_bind_vma;
5354 serge 2798
	}
2799
 
2800
	return 0;
2332 Serge 2801
}
3243 Serge 2802
 
3480 Serge 2803
void i915_gem_init_global_gtt(struct drm_device *dev)
2804
{
2805
	struct drm_i915_private *dev_priv = dev->dev_private;
6084 serge 2806
	u64 gtt_size, mappable_size;
3480 Serge 2807
 
4104 Serge 2808
	gtt_size = dev_priv->gtt.base.total;
3480 Serge 2809
	mappable_size = dev_priv->gtt.mappable_end;
2810
 
4280 Serge 2811
	i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size);
3480 Serge 2812
}
2813
 
5354 serge 2814
void i915_global_gtt_cleanup(struct drm_device *dev)
2815
{
2816
	struct drm_i915_private *dev_priv = dev->dev_private;
2817
	struct i915_address_space *vm = &dev_priv->gtt.base;
2818
 
2819
	if (dev_priv->mm.aliasing_ppgtt) {
2820
		struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
2821
 
2822
		ppgtt->base.cleanup(&ppgtt->base);
2823
	}
2824
 
7144 serge 2825
	i915_gem_cleanup_stolen(dev);
2826
 
5354 serge 2827
	if (drm_mm_initialized(&vm->mm)) {
6084 serge 2828
		if (intel_vgpu_active(dev))
2829
			intel_vgt_deballoon();
2830
 
5354 serge 2831
		drm_mm_takedown(&vm->mm);
2832
		list_del(&vm->global_link);
2833
	}
2834
 
2835
	vm->cleanup(vm);
2836
}
2837
 
6084 serge 2838
static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
3243 Serge 2839
{
2840
	snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
2841
	snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
2842
	return snb_gmch_ctl << 20;
2843
}
2844
 
6084 serge 2845
static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
4560 Serge 2846
{
2847
	bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
2848
	bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
2849
	if (bdw_gmch_ctl)
2850
		bdw_gmch_ctl = 1 << bdw_gmch_ctl;
2851
 
5060 serge 2852
#ifdef CONFIG_X86_32
2853
	/* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * PAGE_SIZE */
2854
	if (bdw_gmch_ctl > 4)
2855
		bdw_gmch_ctl = 4;
2856
#endif
2857
 
4560 Serge 2858
	return bdw_gmch_ctl << 20;
2859
}
2860
 
6084 serge 2861
static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
5060 serge 2862
{
2863
	gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
2864
	gmch_ctrl &= SNB_GMCH_GGMS_MASK;
2865
 
2866
	if (gmch_ctrl)
2867
		return 1 << (20 + gmch_ctrl);
2868
 
2869
	return 0;
2870
}
2871
 
6084 serge 2872
static size_t gen6_get_stolen_size(u16 snb_gmch_ctl)
3243 Serge 2873
{
2874
	snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT;
2875
	snb_gmch_ctl &= SNB_GMCH_GMS_MASK;
2876
	return snb_gmch_ctl << 25; /* 32 MB units */
2877
}
2878
 
6084 serge 2879
static size_t gen8_get_stolen_size(u16 bdw_gmch_ctl)
4560 Serge 2880
{
2881
	bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
2882
	bdw_gmch_ctl &= BDW_GMCH_GMS_MASK;
2883
	return bdw_gmch_ctl << 25; /* 32 MB units */
2884
}
2885
 
5060 serge 2886
static size_t chv_get_stolen_size(u16 gmch_ctrl)
2887
{
2888
	gmch_ctrl >>= SNB_GMCH_GMS_SHIFT;
2889
	gmch_ctrl &= SNB_GMCH_GMS_MASK;
2890
 
2891
	/*
2892
	 * 0x0  to 0x10: 32MB increments starting at 0MB
2893
	 * 0x11 to 0x16: 4MB increments starting at 8MB
2894
	 * 0x17 to 0x1d: 4MB increments start at 36MB
2895
	 */
2896
	if (gmch_ctrl < 0x11)
2897
		return gmch_ctrl << 25;
2898
	else if (gmch_ctrl < 0x17)
2899
		return (gmch_ctrl - 0x11 + 2) << 22;
2900
	else
2901
		return (gmch_ctrl - 0x17 + 9) << 22;
2902
}
2903
 
5354 serge 2904
static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl)
2905
{
2906
	gen9_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
2907
	gen9_gmch_ctl &= BDW_GMCH_GMS_MASK;
2908
 
2909
	if (gen9_gmch_ctl < 0xf0)
2910
		return gen9_gmch_ctl << 25; /* 32 MB units */
2911
	else
2912
		/* 4MB increments starting at 0xf0 for 4MB */
2913
		return (gen9_gmch_ctl - 0xf0 + 1) << 22;
2914
}
2915
 
4560 Serge 2916
static int ggtt_probe_common(struct drm_device *dev,
2917
			     size_t gtt_size)
2918
{
2919
	struct drm_i915_private *dev_priv = dev->dev_private;
6084 serge 2920
	struct i915_page_scratch *scratch_page;
4560 Serge 2921
	phys_addr_t gtt_phys_addr;
2922
 
2923
	/* For Modern GENs the PTEs and register space are split in the BAR */
2924
	gtt_phys_addr = pci_resource_start(dev->pdev, 0) +
2925
		(pci_resource_len(dev->pdev, 0) / 2);
2926
 
6084 serge 2927
	/*
2928
	 * On BXT writes larger than 64 bit to the GTT pagetable range will be
2929
	 * dropped. For WC mappings in general we have 64 byte burst writes
2930
	 * when the WC buffer is flushed, so we can't use it, but have to
2931
	 * resort to an uncached mapping. The WC issue is easily caught by the
2932
	 * readback check when writing GTT PTE entries.
2933
	 */
2934
	if (IS_BROXTON(dev))
2935
		dev_priv->gtt.gsm = ioremap_nocache(gtt_phys_addr, gtt_size);
2936
	else
2937
		dev_priv->gtt.gsm = ioremap_wc(gtt_phys_addr, gtt_size);
4560 Serge 2938
	if (!dev_priv->gtt.gsm) {
2939
		DRM_ERROR("Failed to map the gtt page table\n");
2940
		return -ENOMEM;
2941
	}
2942
 
6084 serge 2943
	scratch_page = alloc_scratch_page(dev);
2944
	if (IS_ERR(scratch_page)) {
4560 Serge 2945
		DRM_ERROR("Scratch setup failed\n");
2946
		/* iounmap will also get called at remove, but meh */
2947
		iounmap(dev_priv->gtt.gsm);
6084 serge 2948
		return PTR_ERR(scratch_page);
4560 Serge 2949
	}
2950
 
6084 serge 2951
	dev_priv->gtt.base.scratch_page = scratch_page;
2952
 
2953
	return 0;
4560 Serge 2954
}
2955
 
2956
/* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
2957
 * bits. When using advanced contexts each context stores its own PAT, but
2958
 * writing this data shouldn't be harmful even in those cases. */
5060 serge 2959
static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv)
4560 Serge 2960
{
2961
	uint64_t pat;
2962
 
2963
	pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC)     | /* for normal objects, no eLLC */
2964
	      GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */
2965
	      GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */
2966
	      GEN8_PPAT(3, GEN8_PPAT_UC)                     | /* Uncached objects, mostly for scanout */
2967
	      GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
2968
	      GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
2969
	      GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
2970
	      GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
2971
 
5354 serge 2972
	if (!USES_PPGTT(dev_priv->dev))
2973
		/* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry,
2974
		 * so RTL will always use the value corresponding to
2975
		 * pat_sel = 000".
2976
		 * So let's disable cache for GGTT to avoid screen corruptions.
2977
		 * MOCS still can be used though.
2978
		 * - System agent ggtt writes (i.e. cpu gtt mmaps) already work
2979
		 * before this patch, i.e. the same uncached + snooping access
2980
		 * like on gen6/7 seems to be in effect.
2981
		 * - So this just fixes blitter/render access. Again it looks
2982
		 * like it's not just uncached access, but uncached + snooping.
2983
		 * So we can still hold onto all our assumptions wrt cpu
2984
		 * clflushing on LLC machines.
2985
		 */
2986
		pat = GEN8_PPAT(0, GEN8_PPAT_UC);
2987
 
4560 Serge 2988
	/* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b
2989
	 * write would work. */
6084 serge 2990
	I915_WRITE(GEN8_PRIVATE_PAT_LO, pat);
2991
	I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32);
4560 Serge 2992
}
2993
 
5060 serge 2994
static void chv_setup_private_ppat(struct drm_i915_private *dev_priv)
2995
{
2996
	uint64_t pat;
2997
 
2998
	/*
2999
	 * Map WB on BDW to snooped on CHV.
3000
	 *
3001
	 * Only the snoop bit has meaning for CHV, the rest is
3002
	 * ignored.
3003
	 *
5354 serge 3004
	 * The hardware will never snoop for certain types of accesses:
3005
	 * - CPU GTT (GMADR->GGTT->no snoop->memory)
3006
	 * - PPGTT page tables
3007
	 * - some other special cycles
3008
	 *
3009
	 * As with BDW, we also need to consider the following for GT accesses:
3010
	 * "For GGTT, there is NO pat_sel[2:0] from the entry,
3011
	 * so RTL will always use the value corresponding to
3012
	 * pat_sel = 000".
3013
	 * Which means we must set the snoop bit in PAT entry 0
3014
	 * in order to keep the global status page working.
5060 serge 3015
	 */
3016
	pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) |
3017
	      GEN8_PPAT(1, 0) |
3018
	      GEN8_PPAT(2, 0) |
3019
	      GEN8_PPAT(3, 0) |
3020
	      GEN8_PPAT(4, CHV_PPAT_SNOOP) |
3021
	      GEN8_PPAT(5, CHV_PPAT_SNOOP) |
3022
	      GEN8_PPAT(6, CHV_PPAT_SNOOP) |
3023
	      GEN8_PPAT(7, CHV_PPAT_SNOOP);
3024
 
6084 serge 3025
	I915_WRITE(GEN8_PRIVATE_PAT_LO, pat);
3026
	I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32);
5060 serge 3027
}
3028
 
4560 Serge 3029
static int gen8_gmch_probe(struct drm_device *dev,
6084 serge 3030
			   u64 *gtt_total,
4560 Serge 3031
			   size_t *stolen,
3032
			   phys_addr_t *mappable_base,
6084 serge 3033
			   u64 *mappable_end)
4560 Serge 3034
{
3035
	struct drm_i915_private *dev_priv = dev->dev_private;
6084 serge 3036
	u64 gtt_size;
4560 Serge 3037
	u16 snb_gmch_ctl;
3038
	int ret;
3039
 
3040
	/* TODO: We're not aware of mappable constraints on gen8 yet */
3041
	*mappable_base = pci_resource_start(dev->pdev, 2);
3042
	*mappable_end = pci_resource_len(dev->pdev, 2);
3043
 
3044
	pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
3045
 
5354 serge 3046
	if (INTEL_INFO(dev)->gen >= 9) {
3047
		*stolen = gen9_get_stolen_size(snb_gmch_ctl);
3048
		gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl);
3049
	} else if (IS_CHERRYVIEW(dev)) {
5060 serge 3050
		*stolen = chv_get_stolen_size(snb_gmch_ctl);
3051
		gtt_size = chv_get_total_gtt_size(snb_gmch_ctl);
3052
	} else {
6084 serge 3053
		*stolen = gen8_get_stolen_size(snb_gmch_ctl);
5060 serge 3054
		gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl);
3055
	}
4560 Serge 3056
 
6084 serge 3057
	*gtt_total = (gtt_size / sizeof(gen8_pte_t)) << PAGE_SHIFT;
4560 Serge 3058
 
6084 serge 3059
	if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
5060 serge 3060
		chv_setup_private_ppat(dev_priv);
3061
	else
3062
		bdw_setup_private_ppat(dev_priv);
4560 Serge 3063
 
3064
	ret = ggtt_probe_common(dev, gtt_size);
3065
 
3066
	dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range;
3067
	dev_priv->gtt.base.insert_entries = gen8_ggtt_insert_entries;
6084 serge 3068
	dev_priv->gtt.base.bind_vma = ggtt_bind_vma;
3069
	dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma;
4560 Serge 3070
 
6937 serge 3071
	if (IS_CHERRYVIEW(dev_priv))
3072
		dev_priv->gtt.base.insert_entries = gen8_ggtt_insert_entries__BKL;
3073
 
4560 Serge 3074
	return ret;
3075
}
3076
 
3480 Serge 3077
static int gen6_gmch_probe(struct drm_device *dev,
6084 serge 3078
			   u64 *gtt_total,
3480 Serge 3079
			   size_t *stolen,
3080
			   phys_addr_t *mappable_base,
6084 serge 3081
			   u64 *mappable_end)
3243 Serge 3082
{
3083
	struct drm_i915_private *dev_priv = dev->dev_private;
3480 Serge 3084
	unsigned int gtt_size;
3243 Serge 3085
	u16 snb_gmch_ctl;
3086
	int ret;
3087
 
3480 Serge 3088
	*mappable_base = pci_resource_start(dev->pdev, 2);
3089
	*mappable_end = pci_resource_len(dev->pdev, 2);
3090
 
3091
	/* 64/512MB is the current min/max we actually know of, but this is just
3092
	 * a coarse sanity check.
3243 Serge 3093
	 */
3480 Serge 3094
	if ((*mappable_end < (64<<20) || (*mappable_end > (512<<20)))) {
6084 serge 3095
		DRM_ERROR("Unknown GMADR size (%llx)\n",
3480 Serge 3096
			  dev_priv->gtt.mappable_end);
3097
		return -ENXIO;
6084 serge 3098
	}
3243 Serge 3099
 
3480 Serge 3100
	pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
3243 Serge 3101
 
4104 Serge 3102
	*stolen = gen6_get_stolen_size(snb_gmch_ctl);
4560 Serge 3103
 
3104
	gtt_size = gen6_get_total_gtt_size(snb_gmch_ctl);
6084 serge 3105
	*gtt_total = (gtt_size / sizeof(gen6_pte_t)) << PAGE_SHIFT;
3243 Serge 3106
 
4560 Serge 3107
	ret = ggtt_probe_common(dev, gtt_size);
3243 Serge 3108
 
4104 Serge 3109
	dev_priv->gtt.base.clear_range = gen6_ggtt_clear_range;
3110
	dev_priv->gtt.base.insert_entries = gen6_ggtt_insert_entries;
6084 serge 3111
	dev_priv->gtt.base.bind_vma = ggtt_bind_vma;
3112
	dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma;
3480 Serge 3113
 
3114
	return ret;
3115
}
3116
 
4104 Serge 3117
static void gen6_gmch_remove(struct i915_address_space *vm)
3480 Serge 3118
{
4104 Serge 3119
 
3120
	struct i915_gtt *gtt = container_of(vm, struct i915_gtt, base);
5060 serge 3121
 
4104 Serge 3122
	iounmap(gtt->gsm);
6084 serge 3123
	free_scratch_page(vm->dev, vm->scratch_page);
3480 Serge 3124
}
3125
 
3126
static int i915_gmch_probe(struct drm_device *dev,
6084 serge 3127
			   u64 *gtt_total,
3480 Serge 3128
			   size_t *stolen,
3129
			   phys_addr_t *mappable_base,
6084 serge 3130
			   u64 *mappable_end)
3480 Serge 3131
{
3132
	struct drm_i915_private *dev_priv = dev->dev_private;
3133
	int ret;
3134
 
3135
	ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->dev->pdev, NULL);
3136
	if (!ret) {
3137
		DRM_ERROR("failed to set up gmch\n");
3138
		return -EIO;
3243 Serge 3139
	}
3140
 
3480 Serge 3141
	intel_gtt_get(gtt_total, stolen, mappable_base, mappable_end);
3243 Serge 3142
 
3480 Serge 3143
	dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev);
6084 serge 3144
	dev_priv->gtt.base.insert_entries = i915_ggtt_insert_entries;
4104 Serge 3145
	dev_priv->gtt.base.clear_range = i915_ggtt_clear_range;
6084 serge 3146
	dev_priv->gtt.base.bind_vma = ggtt_bind_vma;
3147
	dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma;
3480 Serge 3148
 
4560 Serge 3149
	if (unlikely(dev_priv->gtt.do_idle_maps))
3150
		DRM_INFO("applying Ironlake quirks for intel_iommu\n");
3151
 
3243 Serge 3152
	return 0;
3480 Serge 3153
}
3243 Serge 3154
 
4104 Serge 3155
static void i915_gmch_remove(struct i915_address_space *vm)
3480 Serge 3156
{
4560 Serge 3157
//	intel_gmch_remove();
3480 Serge 3158
}
3159
 
3160
int i915_gem_gtt_init(struct drm_device *dev)
3161
{
3162
	struct drm_i915_private *dev_priv = dev->dev_private;
3163
	struct i915_gtt *gtt = &dev_priv->gtt;
3164
	int ret;
3165
 
3166
	if (INTEL_INFO(dev)->gen <= 5) {
4104 Serge 3167
		gtt->gtt_probe = i915_gmch_probe;
3168
		gtt->base.cleanup = i915_gmch_remove;
4560 Serge 3169
	} else if (INTEL_INFO(dev)->gen < 8) {
4104 Serge 3170
		gtt->gtt_probe = gen6_gmch_probe;
3171
		gtt->base.cleanup = gen6_gmch_remove;
3172
		if (IS_HASWELL(dev) && dev_priv->ellc_size)
3173
			gtt->base.pte_encode = iris_pte_encode;
3174
		else if (IS_HASWELL(dev))
3175
			gtt->base.pte_encode = hsw_pte_encode;
3176
		else if (IS_VALLEYVIEW(dev))
3177
			gtt->base.pte_encode = byt_pte_encode;
3178
		else if (INTEL_INFO(dev)->gen >= 7)
3179
			gtt->base.pte_encode = ivb_pte_encode;
3180
		else
3181
			gtt->base.pte_encode = snb_pte_encode;
4560 Serge 3182
	} else {
3183
		dev_priv->gtt.gtt_probe = gen8_gmch_probe;
3184
		dev_priv->gtt.base.cleanup = gen6_gmch_remove;
3480 Serge 3185
	}
3186
 
6084 serge 3187
	gtt->base.dev = dev;
7144 serge 3188
	gtt->base.is_ggtt = true;
6084 serge 3189
 
4104 Serge 3190
	ret = gtt->gtt_probe(dev, >t->base.total, >t->stolen_size,
3191
			     >t->mappable_base, >t->mappable_end);
3480 Serge 3192
	if (ret)
4104 Serge 3193
		return ret;
3480 Serge 3194
 
7144 serge 3195
	/*
3196
	 * Initialise stolen early so that we may reserve preallocated
3197
	 * objects for the BIOS to KMS transition.
3198
	 */
3199
	ret = i915_gem_init_stolen(dev);
3200
	if (ret)
3201
		goto out_gtt_cleanup;
3202
 
3480 Serge 3203
	/* GMADR is the PCI mmio aperture into the global GTT. */
6084 serge 3204
	DRM_INFO("Memory usable by graphics device = %lluM\n",
4104 Serge 3205
		 gtt->base.total >> 20);
6084 serge 3206
	DRM_DEBUG_DRIVER("GMADR size = %lldM\n", gtt->mappable_end >> 20);
4104 Serge 3207
	DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", gtt->stolen_size >> 20);
5060 serge 3208
#ifdef CONFIG_INTEL_IOMMU
3209
	if (intel_iommu_gfx_mapped)
3210
		DRM_INFO("VT-d active for gfx access\n");
3211
#endif
3212
	/*
3213
	 * i915.enable_ppgtt is read-only, so do an early pass to validate the
3214
	 * user's requested state against the hardware/driver capabilities.  We
3215
	 * do this now so that we can print out any log messages once rather
3216
	 * than every time we check intel_enable_ppgtt().
3217
	 */
3218
	i915.enable_ppgtt = sanitize_enable_ppgtt(dev, i915.enable_ppgtt);
3219
	DRM_DEBUG_DRIVER("ppgtt mode: %i\n", i915.enable_ppgtt);
3480 Serge 3220
 
3221
	return 0;
7144 serge 3222
 
3223
out_gtt_cleanup:
3224
	gtt->base.cleanup(&dev_priv->gtt.base);
3225
 
3226
	return ret;
3243 Serge 3227
}
3228
 
6084 serge 3229
void i915_gem_restore_gtt_mappings(struct drm_device *dev)
5060 serge 3230
{
6084 serge 3231
	struct drm_i915_private *dev_priv = dev->dev_private;
3232
	struct drm_i915_gem_object *obj;
3233
	struct i915_address_space *vm;
3234
	struct i915_vma *vma;
3235
	bool flush;
3236
 
3237
	i915_check_and_clear_faults(dev);
3238
 
3239
	/* First fill our portion of the GTT with scratch pages */
3240
	dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
3241
				       dev_priv->gtt.base.start,
3242
				       dev_priv->gtt.base.total,
3243
				       true);
3244
 
3245
	/* Cache flush objects bound into GGTT and rebind them. */
3246
	vm = &dev_priv->gtt.base;
3247
	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
3248
		flush = false;
7144 serge 3249
		list_for_each_entry(vma, &obj->vma_list, obj_link) {
6084 serge 3250
			if (vma->vm != vm)
3251
				continue;
3252
 
3253
			WARN_ON(i915_vma_bind(vma, obj->cache_level,
3254
					      PIN_UPDATE));
3255
 
3256
			flush = true;
3257
		}
3258
 
3259
		if (flush)
3260
			i915_gem_clflush_object(obj, obj->pin_display);
3261
	}
3262
 
3263
	if (INTEL_INFO(dev)->gen >= 8) {
3264
		if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
3265
			chv_setup_private_ppat(dev_priv);
3266
		else
3267
			bdw_setup_private_ppat(dev_priv);
3268
 
3269
		return;
3270
	}
3271
 
3272
	if (USES_PPGTT(dev)) {
3273
		list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
3274
			/* TODO: Perhaps it shouldn't be gen6 specific */
3275
 
3276
			struct i915_hw_ppgtt *ppgtt =
3277
					container_of(vm, struct i915_hw_ppgtt,
3278
						     base);
3279
 
3280
			if (i915_is_ggtt(vm))
3281
				ppgtt = dev_priv->mm.aliasing_ppgtt;
3282
 
3283
			gen6_write_page_range(dev_priv, &ppgtt->pd,
3284
					      0, ppgtt->base.total);
3285
		}
3286
	}
3287
 
3288
	i915_ggtt_flush(dev_priv);
3289
}
3290
 
3291
static struct i915_vma *
3292
__i915_gem_vma_create(struct drm_i915_gem_object *obj,
3293
		      struct i915_address_space *vm,
3294
		      const struct i915_ggtt_view *ggtt_view)
3295
{
3296
	struct i915_vma *vma;
3297
 
3298
	if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view))
3299
		return ERR_PTR(-EINVAL);
3300
 
3301
//	vma = kmem_cache_zalloc(to_i915(obj->base.dev)->vmas, GFP_KERNEL);
3302
	vma = kzalloc(sizeof(*vma), GFP_KERNEL);
5060 serge 3303
	if (vma == NULL)
3304
		return ERR_PTR(-ENOMEM);
3305
 
7144 serge 3306
	INIT_LIST_HEAD(&vma->vm_link);
3307
	INIT_LIST_HEAD(&vma->obj_link);
5060 serge 3308
	INIT_LIST_HEAD(&vma->exec_list);
3309
	vma->vm = vm;
3310
	vma->obj = obj;
7144 serge 3311
	vma->is_ggtt = i915_is_ggtt(vm);
5060 serge 3312
 
6084 serge 3313
	if (i915_is_ggtt(vm))
3314
		vma->ggtt_view = *ggtt_view;
7144 serge 3315
	else
5354 serge 3316
		i915_ppgtt_get(i915_vm_to_ppgtt(vm));
5060 serge 3317
 
7144 serge 3318
	list_add_tail(&vma->obj_link, &obj->vma_list);
3319
 
5060 serge 3320
	return vma;
3321
}
3322
 
3323
struct i915_vma *
3324
i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
3325
				  struct i915_address_space *vm)
3326
{
3327
	struct i915_vma *vma;
3328
 
3329
	vma = i915_gem_obj_to_vma(obj, vm);
3330
	if (!vma)
6084 serge 3331
		vma = __i915_gem_vma_create(obj, vm,
3332
					    i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL);
5060 serge 3333
 
3334
	return vma;
3335
}
3336
 
6084 serge 3337
struct i915_vma *
3338
i915_gem_obj_lookup_or_create_ggtt_vma(struct drm_i915_gem_object *obj,
3339
				       const struct i915_ggtt_view *view)
3243 Serge 3340
{
6084 serge 3341
	struct i915_address_space *ggtt = i915_obj_to_ggtt(obj);
3342
	struct i915_vma *vma;
3243 Serge 3343
 
6084 serge 3344
	if (WARN_ON(!view))
3345
		return ERR_PTR(-EINVAL);
3243 Serge 3346
 
6084 serge 3347
	vma = i915_gem_obj_to_ggtt_view(obj, view);
3243 Serge 3348
 
6084 serge 3349
	if (IS_ERR(vma))
3350
		return vma;
3243 Serge 3351
 
6084 serge 3352
	if (!vma)
3353
		vma = __i915_gem_vma_create(obj, ggtt, view);
3243 Serge 3354
 
6084 serge 3355
	return vma;
3243 Serge 3356
 
6084 serge 3357
}
3243 Serge 3358
 
6084 serge 3359
static struct scatterlist *
7144 serge 3360
rotate_pages(const dma_addr_t *in, unsigned int offset,
6084 serge 3361
	     unsigned int width, unsigned int height,
7144 serge 3362
	     unsigned int stride,
6084 serge 3363
	     struct sg_table *st, struct scatterlist *sg)
3364
{
3365
	unsigned int column, row;
3366
	unsigned int src_idx;
3243 Serge 3367
 
6084 serge 3368
	if (!sg) {
3369
		st->nents = 0;
3370
		sg = st->sgl;
3371
	}
3243 Serge 3372
 
6084 serge 3373
	for (column = 0; column < width; column++) {
7144 serge 3374
		src_idx = stride * (height - 1) + column;
6084 serge 3375
		for (row = 0; row < height; row++) {
3376
			st->nents++;
3377
			/* We don't need the pages, but need to initialize
3378
			 * the entries so the sg list can be happily traversed.
3379
			 * The only thing we need are DMA addresses.
3380
			 */
3381
			sg_set_page(sg, NULL, PAGE_SIZE, 0);
3382
			sg_dma_address(sg) = in[offset + src_idx];
3383
			sg_dma_len(sg) = PAGE_SIZE;
3384
			sg = sg_next(sg);
7144 serge 3385
			src_idx -= stride;
6084 serge 3386
		}
3387
	}
3388
 
3389
	return sg;
3243 Serge 3390
}
3391
 
6084 serge 3392
static struct sg_table *
7144 serge 3393
intel_rotate_fb_obj_pages(struct intel_rotation_info *rot_info,
6084 serge 3394
			  struct drm_i915_gem_object *obj)
3243 Serge 3395
{
6084 serge 3396
	unsigned int size_pages = rot_info->size >> PAGE_SHIFT;
3397
	unsigned int size_pages_uv;
3398
	struct sg_page_iter sg_iter;
3399
	unsigned long i;
3400
	dma_addr_t *page_addr_list;
3401
	struct sg_table *st;
3402
	unsigned int uv_start_page;
3403
	struct scatterlist *sg;
3404
	int ret = -ENOMEM;
3243 Serge 3405
 
6084 serge 3406
	/* Allocate a temporary list of source pages for random access. */
3407
	page_addr_list = drm_malloc_ab(obj->base.size / PAGE_SIZE,
3408
				       sizeof(dma_addr_t));
3409
	if (!page_addr_list)
3410
		return ERR_PTR(ret);
3243 Serge 3411
 
6084 serge 3412
	/* Account for UV plane with NV12. */
3413
	if (rot_info->pixel_format == DRM_FORMAT_NV12)
3414
		size_pages_uv = rot_info->size_uv >> PAGE_SHIFT;
3415
	else
3416
		size_pages_uv = 0;
3243 Serge 3417
 
6084 serge 3418
	/* Allocate target SG list. */
3419
	st = kmalloc(sizeof(*st), GFP_KERNEL);
3420
	if (!st)
3421
		goto err_st_alloc;
3243 Serge 3422
 
6084 serge 3423
	ret = sg_alloc_table(st, size_pages + size_pages_uv, GFP_KERNEL);
3424
	if (ret)
3425
		goto err_sg_alloc;
3243 Serge 3426
 
6084 serge 3427
	/* Populate source page list from the object. */
3428
	i = 0;
3429
	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
3430
		page_addr_list[i] = sg_page_iter_dma_address(&sg_iter);
3431
		i++;
3432
	}
3243 Serge 3433
 
6084 serge 3434
	/* Rotate the pages. */
3435
	sg = rotate_pages(page_addr_list, 0,
3436
		     rot_info->width_pages, rot_info->height_pages,
7144 serge 3437
		     rot_info->width_pages,
6084 serge 3438
		     st, NULL);
3243 Serge 3439
 
6084 serge 3440
	/* Append the UV plane if NV12. */
3441
	if (rot_info->pixel_format == DRM_FORMAT_NV12) {
3442
		uv_start_page = size_pages;
3243 Serge 3443
 
6084 serge 3444
		/* Check for tile-row un-alignment. */
3445
		if (offset_in_page(rot_info->uv_offset))
3446
			uv_start_page--;
3243 Serge 3447
 
6084 serge 3448
		rot_info->uv_start_page = uv_start_page;
3243 Serge 3449
 
6084 serge 3450
		rotate_pages(page_addr_list, uv_start_page,
3451
			     rot_info->width_pages_uv,
3452
			     rot_info->height_pages_uv,
7144 serge 3453
			     rot_info->width_pages_uv,
6084 serge 3454
			     st, sg);
3455
	}
3243 Serge 3456
 
6084 serge 3457
	DRM_DEBUG_KMS(
3458
		      "Created rotated page mapping for object size %zu (pitch=%u, height=%u, pixel_format=0x%x, %ux%u tiles, %u pages (%u plane 0)).\n",
3459
		      obj->base.size, rot_info->pitch, rot_info->height,
3460
		      rot_info->pixel_format, rot_info->width_pages,
3461
		      rot_info->height_pages, size_pages + size_pages_uv,
3462
		      size_pages);
3243 Serge 3463
 
6084 serge 3464
	drm_free_large(page_addr_list);
3243 Serge 3465
 
6084 serge 3466
	return st;
3243 Serge 3467
 
6084 serge 3468
err_sg_alloc:
3469
	kfree(st);
3470
err_st_alloc:
3471
	drm_free_large(page_addr_list);
3472
 
3473
	DRM_DEBUG_KMS(
3474
		      "Failed to create rotated mapping for object size %zu! (%d) (pitch=%u, height=%u, pixel_format=0x%x, %ux%u tiles, %u pages (%u plane 0))\n",
3475
		      obj->base.size, ret, rot_info->pitch, rot_info->height,
3476
		      rot_info->pixel_format, rot_info->width_pages,
3477
		      rot_info->height_pages, size_pages + size_pages_uv,
3478
		      size_pages);
3479
	return ERR_PTR(ret);
3243 Serge 3480
}
3481
 
6084 serge 3482
static struct sg_table *
3483
intel_partial_pages(const struct i915_ggtt_view *view,
3484
		    struct drm_i915_gem_object *obj)
3243 Serge 3485
{
6084 serge 3486
	struct sg_table *st;
3487
	struct scatterlist *sg;
3488
	struct sg_page_iter obj_sg_iter;
3489
	int ret = -ENOMEM;
3243 Serge 3490
 
6084 serge 3491
	st = kmalloc(sizeof(*st), GFP_KERNEL);
3492
	if (!st)
3493
		goto err_st_alloc;
3746 Serge 3494
 
6084 serge 3495
	ret = sg_alloc_table(st, view->params.partial.size, GFP_KERNEL);
3496
	if (ret)
3497
		goto err_sg_alloc;
3746 Serge 3498
 
6084 serge 3499
	sg = st->sgl;
3500
	st->nents = 0;
3501
	for_each_sg_page(obj->pages->sgl, &obj_sg_iter, obj->pages->nents,
3502
		view->params.partial.offset)
3503
	{
3504
		if (st->nents >= view->params.partial.size)
3505
			break;
3506
 
3507
		sg_set_page(sg, NULL, PAGE_SIZE, 0);
3508
		sg_dma_address(sg) = sg_page_iter_dma_address(&obj_sg_iter);
3509
		sg_dma_len(sg) = PAGE_SIZE;
3510
 
3511
		sg = sg_next(sg);
3512
		st->nents++;
3513
	}
3514
 
3515
	return st;
3516
 
3517
err_sg_alloc:
3518
	kfree(st);
3519
err_st_alloc:
3520
	return ERR_PTR(ret);
3746 Serge 3521
}
3522
 
6084 serge 3523
static int
3524
i915_get_ggtt_vma_pages(struct i915_vma *vma)
3746 Serge 3525
{
6084 serge 3526
	int ret = 0;
3527
 
3528
	if (vma->ggtt_view.pages)
3529
		return 0;
3530
 
3531
	if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL)
3532
		vma->ggtt_view.pages = vma->obj->pages;
3533
	else if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED)
3534
		vma->ggtt_view.pages =
7144 serge 3535
			intel_rotate_fb_obj_pages(&vma->ggtt_view.params.rotated, vma->obj);
6084 serge 3536
	else if (vma->ggtt_view.type == I915_GGTT_VIEW_PARTIAL)
3537
		vma->ggtt_view.pages =
3538
			intel_partial_pages(&vma->ggtt_view, vma->obj);
3539
	else
3540
		WARN_ONCE(1, "GGTT view %u not implemented!\n",
3541
			  vma->ggtt_view.type);
3542
 
3543
	if (!vma->ggtt_view.pages) {
3544
		DRM_ERROR("Failed to get pages for GGTT view type %u!\n",
3545
			  vma->ggtt_view.type);
3546
		ret = -EINVAL;
3547
	} else if (IS_ERR(vma->ggtt_view.pages)) {
3548
		ret = PTR_ERR(vma->ggtt_view.pages);
3549
		vma->ggtt_view.pages = NULL;
3550
		DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n",
3551
			  vma->ggtt_view.type, ret);
3552
	}
3553
 
3554
	return ret;
3746 Serge 3555
}
3556
 
6084 serge 3557
/**
3558
 * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space.
3559
 * @vma: VMA to map
3560
 * @cache_level: mapping cache level
3561
 * @flags: flags like global or local mapping
3562
 *
3563
 * DMA addresses are taken from the scatter-gather table of this object (or of
3564
 * this VMA in case of non-default GGTT views) and PTE entries set up.
3565
 * Note that DMA addresses are also the only part of the SG table we care about.
3566
 */
3567
int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
3568
		  u32 flags)
3746 Serge 3569
{
6084 serge 3570
	int ret;
3571
	u32 bind_flags;
3746 Serge 3572
 
6084 serge 3573
	if (WARN_ON(flags == 0))
3574
		return -EINVAL;
3746 Serge 3575
 
6084 serge 3576
	bind_flags = 0;
3577
	if (flags & PIN_GLOBAL)
3578
		bind_flags |= GLOBAL_BIND;
3579
	if (flags & PIN_USER)
3580
		bind_flags |= LOCAL_BIND;
3746 Serge 3581
 
6084 serge 3582
	if (flags & PIN_UPDATE)
3583
		bind_flags |= vma->bound;
3584
	else
3585
		bind_flags &= ~vma->bound;
3586
 
3587
	if (bind_flags == 0)
3588
		return 0;
3589
 
3590
	if (vma->bound == 0 && vma->vm->allocate_va_range) {
3591
		/* XXX: i915_vma_pin() will fix this +- hack */
3592
		vma->pin_count++;
3593
		ret = vma->vm->allocate_va_range(vma->vm,
3594
						 vma->node.start,
3595
						 vma->node.size);
3596
		vma->pin_count--;
3597
		if (ret)
3598
			return ret;
3599
	}
3600
 
3601
	ret = vma->vm->bind_vma(vma, cache_level, bind_flags);
3602
	if (ret)
3603
		return ret;
3604
 
3605
	vma->bound |= bind_flags;
3606
 
3607
	return 0;
3746 Serge 3608
}
3609
 
6084 serge 3610
/**
3611
 * i915_ggtt_view_size - Get the size of a GGTT view.
3612
 * @obj: Object the view is of.
3613
 * @view: The view in question.
3614
 *
3615
 * @return The size of the GGTT view in bytes.
3616
 */
3617
size_t
3618
i915_ggtt_view_size(struct drm_i915_gem_object *obj,
3619
		    const struct i915_ggtt_view *view)
3620
{
3621
	if (view->type == I915_GGTT_VIEW_NORMAL) {
3622
		return obj->base.size;
3623
	} else if (view->type == I915_GGTT_VIEW_ROTATED) {
7144 serge 3624
		return view->params.rotated.size;
6084 serge 3625
	} else if (view->type == I915_GGTT_VIEW_PARTIAL) {
3626
		return view->params.partial.size << PAGE_SHIFT;
3627
	} else {
3628
		WARN_ONCE(1, "GGTT view %u not implemented!\n", view->type);
3629
		return obj->base.size;
3630
	}
3631
}