Subversion Repositories Kolibri OS

Rev

Rev 6084 | Rev 6937 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2332 Serge 1
/*
2
 * Copyright © 2010 Daniel Vetter
5060 serge 3
 * Copyright © 2011-2014 Intel Corporation
2332 Serge 4
 *
5
 * Permission is hereby granted, free of charge, to any person obtaining a
6
 * copy of this software and associated documentation files (the "Software"),
7
 * to deal in the Software without restriction, including without limitation
8
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
 * and/or sell copies of the Software, and to permit persons to whom the
10
 * Software is furnished to do so, subject to the following conditions:
11
 *
12
 * The above copyright notice and this permission notice (including the next
13
 * paragraph) shall be included in all copies or substantial portions of the
14
 * Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22
 * IN THE SOFTWARE.
23
 *
24
 */
25
 
5354 serge 26
#include 
3031 serge 27
#include 
28
#include 
2332 Serge 29
#include "i915_drv.h"
6084 serge 30
#include "i915_vgpu.h"
2351 Serge 31
#include "i915_trace.h"
2332 Serge 32
#include "intel_drv.h"
33
 
6084 serge 34
/**
35
 * DOC: Global GTT views
36
 *
37
 * Background and previous state
38
 *
39
 * Historically objects could exists (be bound) in global GTT space only as
40
 * singular instances with a view representing all of the object's backing pages
41
 * in a linear fashion. This view will be called a normal view.
42
 *
43
 * To support multiple views of the same object, where the number of mapped
44
 * pages is not equal to the backing store, or where the layout of the pages
45
 * is not linear, concept of a GGTT view was added.
46
 *
47
 * One example of an alternative view is a stereo display driven by a single
48
 * image. In this case we would have a framebuffer looking like this
49
 * (2x2 pages):
50
 *
51
 *    12
52
 *    34
53
 *
54
 * Above would represent a normal GGTT view as normally mapped for GPU or CPU
55
 * rendering. In contrast, fed to the display engine would be an alternative
56
 * view which could look something like this:
57
 *
58
 *   1212
59
 *   3434
60
 *
61
 * In this example both the size and layout of pages in the alternative view is
62
 * different from the normal view.
63
 *
64
 * Implementation and usage
65
 *
66
 * GGTT views are implemented using VMAs and are distinguished via enum
67
 * i915_ggtt_view_type and struct i915_ggtt_view.
68
 *
69
 * A new flavour of core GEM functions which work with GGTT bound objects were
70
 * added with the _ggtt_ infix, and sometimes with _view postfix to avoid
71
 * renaming  in large amounts of code. They take the struct i915_ggtt_view
72
 * parameter encapsulating all metadata required to implement a view.
73
 *
74
 * As a helper for callers which are only interested in the normal view,
75
 * globally const i915_ggtt_view_normal singleton instance exists. All old core
76
 * GEM API functions, the ones not taking the view parameter, are operating on,
77
 * or with the normal GGTT view.
78
 *
79
 * Code wanting to add or use a new GGTT view needs to:
80
 *
81
 * 1. Add a new enum with a suitable name.
82
 * 2. Extend the metadata in the i915_ggtt_view structure if required.
83
 * 3. Add support to i915_get_vma_pages().
84
 *
85
 * New views are required to build a scatter-gather table from within the
86
 * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and
87
 * exists for the lifetime of an VMA.
88
 *
89
 * Core API is designed to have copy semantics which means that passed in
90
 * struct i915_ggtt_view does not need to be persistent (left around after
91
 * calling the core API functions).
92
 *
93
 */
3243 Serge 94
 
6084 serge 95
static int
96
i915_get_ggtt_vma_pages(struct i915_vma *vma);
97
 
98
const struct i915_ggtt_view i915_ggtt_view_normal;
99
const struct i915_ggtt_view i915_ggtt_view_rotated = {
100
        .type = I915_GGTT_VIEW_ROTATED
101
};
102
 
5354 serge 103
static int sanitize_enable_ppgtt(struct drm_device *dev, int enable_ppgtt)
5060 serge 104
{
5354 serge 105
	bool has_aliasing_ppgtt;
106
	bool has_full_ppgtt;
3243 Serge 107
 
5354 serge 108
	has_aliasing_ppgtt = INTEL_INFO(dev)->gen >= 6;
109
	has_full_ppgtt = INTEL_INFO(dev)->gen >= 7;
3243 Serge 110
 
6084 serge 111
	if (intel_vgpu_active(dev))
112
		has_full_ppgtt = false; /* emulation is too hard */
113
 
5354 serge 114
	/*
115
	 * We don't allow disabling PPGTT for gen9+ as it's a requirement for
116
	 * execlists, the sole mechanism available to submit work.
117
	 */
118
	if (INTEL_INFO(dev)->gen < 9 &&
119
	    (enable_ppgtt == 0 || !has_aliasing_ppgtt))
5060 serge 120
		return 0;
4104 Serge 121
 
5060 serge 122
	if (enable_ppgtt == 1)
123
		return 1;
4560 Serge 124
 
5354 serge 125
	if (enable_ppgtt == 2 && has_full_ppgtt)
5060 serge 126
		return 2;
4560 Serge 127
 
5060 serge 128
#ifdef CONFIG_INTEL_IOMMU
129
	/* Disable ppgtt on SNB if VT-d is on. */
130
	if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) {
131
		DRM_INFO("Disabling PPGTT because VT-d is on\n");
132
		return 0;
133
	}
134
#endif
135
 
136
	/* Early VLV doesn't have this */
137
	if (IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev) &&
138
	    dev->pdev->revision < 0xb) {
139
		DRM_DEBUG_DRIVER("disabling PPGTT on pre-B3 step VLV\n");
140
		return 0;
141
	}
142
 
6084 serge 143
	if (INTEL_INFO(dev)->gen >= 8 && i915.enable_execlists)
144
		return 2;
145
	else
146
		return has_aliasing_ppgtt ? 1 : 0;
5060 serge 147
}
148
 
6084 serge 149
static int ppgtt_bind_vma(struct i915_vma *vma,
150
			  enum i915_cache_level cache_level,
151
			  u32 unused)
152
{
153
	u32 pte_flags = 0;
5060 serge 154
 
6084 serge 155
	/* Currently applicable only to VLV */
156
	if (vma->obj->gt_ro)
157
		pte_flags |= PTE_READ_ONLY;
5060 serge 158
 
6084 serge 159
	vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start,
160
				cache_level, pte_flags);
161
 
162
	return 0;
163
}
164
 
165
static void ppgtt_unbind_vma(struct i915_vma *vma)
4560 Serge 166
{
6084 serge 167
	vma->vm->clear_range(vma->vm,
168
			     vma->node.start,
169
			     vma->obj->base.size,
170
			     true);
171
}
172
 
173
static gen8_pte_t gen8_pte_encode(dma_addr_t addr,
174
				  enum i915_cache_level level,
175
				  bool valid)
176
{
177
	gen8_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0;
4560 Serge 178
	pte |= addr;
5060 serge 179
 
180
	switch (level) {
181
	case I915_CACHE_NONE:
182
		pte |= PPAT_UNCACHED_INDEX;
183
		break;
184
	case I915_CACHE_WT:
185
		pte |= PPAT_DISPLAY_ELLC_INDEX;
186
		break;
187
	default:
4560 Serge 188
		pte |= PPAT_CACHED_INDEX;
5060 serge 189
		break;
190
	}
191
 
4560 Serge 192
	return pte;
193
}
194
 
6084 serge 195
static gen8_pde_t gen8_pde_encode(const dma_addr_t addr,
196
				  const enum i915_cache_level level)
4560 Serge 197
{
6084 serge 198
	gen8_pde_t pde = _PAGE_PRESENT | _PAGE_RW;
4560 Serge 199
	pde |= addr;
200
	if (level != I915_CACHE_NONE)
201
		pde |= PPAT_CACHED_PDE_INDEX;
202
	else
203
		pde |= PPAT_UNCACHED_INDEX;
204
	return pde;
205
}
206
 
6084 serge 207
#define gen8_pdpe_encode gen8_pde_encode
208
#define gen8_pml4e_encode gen8_pde_encode
209
 
210
static gen6_pte_t snb_pte_encode(dma_addr_t addr,
211
				 enum i915_cache_level level,
212
				 bool valid, u32 unused)
4104 Serge 213
{
6084 serge 214
	gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
4104 Serge 215
	pte |= GEN6_PTE_ADDR_ENCODE(addr);
216
 
217
	switch (level) {
218
	case I915_CACHE_L3_LLC:
219
	case I915_CACHE_LLC:
220
		pte |= GEN6_PTE_CACHE_LLC;
221
		break;
222
	case I915_CACHE_NONE:
223
		pte |= GEN6_PTE_UNCACHED;
224
		break;
225
	default:
6084 serge 226
		MISSING_CASE(level);
4104 Serge 227
	}
228
 
229
	return pte;
230
}
231
 
6084 serge 232
static gen6_pte_t ivb_pte_encode(dma_addr_t addr,
233
				 enum i915_cache_level level,
234
				 bool valid, u32 unused)
3243 Serge 235
{
6084 serge 236
	gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
3243 Serge 237
	pte |= GEN6_PTE_ADDR_ENCODE(addr);
238
 
239
	switch (level) {
4104 Serge 240
	case I915_CACHE_L3_LLC:
241
		pte |= GEN7_PTE_CACHE_L3_LLC;
3243 Serge 242
		break;
243
	case I915_CACHE_LLC:
244
		pte |= GEN6_PTE_CACHE_LLC;
245
		break;
246
	case I915_CACHE_NONE:
6084 serge 247
		pte |= GEN6_PTE_UNCACHED;
3243 Serge 248
		break;
249
	default:
6084 serge 250
		MISSING_CASE(level);
3243 Serge 251
	}
252
 
253
	return pte;
254
}
255
 
6084 serge 256
static gen6_pte_t byt_pte_encode(dma_addr_t addr,
257
				 enum i915_cache_level level,
258
				 bool valid, u32 flags)
3746 Serge 259
{
6084 serge 260
	gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
4104 Serge 261
	pte |= GEN6_PTE_ADDR_ENCODE(addr);
262
 
5060 serge 263
	if (!(flags & PTE_READ_ONLY))
6084 serge 264
		pte |= BYT_PTE_WRITEABLE;
4104 Serge 265
 
266
	if (level != I915_CACHE_NONE)
267
		pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;
268
 
269
	return pte;
270
}
271
 
6084 serge 272
static gen6_pte_t hsw_pte_encode(dma_addr_t addr,
273
				 enum i915_cache_level level,
274
				 bool valid, u32 unused)
4104 Serge 275
{
6084 serge 276
	gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
4104 Serge 277
	pte |= HSW_PTE_ADDR_ENCODE(addr);
278
 
279
	if (level != I915_CACHE_NONE)
280
		pte |= HSW_WB_LLC_AGE3;
281
 
282
	return pte;
283
}
284
 
6084 serge 285
static gen6_pte_t iris_pte_encode(dma_addr_t addr,
286
				  enum i915_cache_level level,
287
				  bool valid, u32 unused)
4104 Serge 288
{
6084 serge 289
	gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
4104 Serge 290
	pte |= HSW_PTE_ADDR_ENCODE(addr);
291
 
292
	switch (level) {
293
	case I915_CACHE_NONE:
294
		break;
295
	case I915_CACHE_WT:
4560 Serge 296
		pte |= HSW_WT_ELLC_LLC_AGE3;
4104 Serge 297
		break;
298
	default:
4560 Serge 299
		pte |= HSW_WB_ELLC_LLC_AGE3;
4104 Serge 300
		break;
301
	}
302
 
303
	return pte;
304
}
305
 
6084 serge 306
static int __setup_page_dma(struct drm_device *dev,
307
			    struct i915_page_dma *p, gfp_t flags)
308
{
309
	struct device *device = &dev->pdev->dev;
310
 
311
	p->page = alloc_page(flags);
312
	if (!p->page)
313
		return -ENOMEM;
314
 
315
	p->daddr = page_to_phys(p->page);
316
 
317
	return 0;
318
}
319
 
320
static int setup_page_dma(struct drm_device *dev, struct i915_page_dma *p)
321
{
322
	return __setup_page_dma(dev, p, GFP_KERNEL);
323
}
324
 
325
static void cleanup_page_dma(struct drm_device *dev, struct i915_page_dma *p)
326
{
327
	if (WARN_ON(!p->page))
328
		return;
329
 
330
	__free_page(p->page);
331
	memset(p, 0, sizeof(*p));
332
}
333
 
334
static void *kmap_page_dma(struct i915_page_dma *p)
335
{
336
	return kmap_atomic(p->page);
337
}
338
 
339
/* We use the flushing unmap only with ppgtt structures:
340
 * page directories, page tables and scratch pages.
341
 */
342
static void kunmap_page_dma(struct drm_device *dev, void *vaddr)
343
{
344
	/* There are only few exceptions for gen >=6. chv and bxt.
345
	 * And we are not sure about the latter so play safe for now.
346
	 */
347
	if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
348
		drm_clflush_virt_range(vaddr, PAGE_SIZE);
349
 
350
	kunmap_atomic(vaddr);
351
}
352
 
353
#define kmap_px(px) kmap_page_dma(px_base(px))
354
#define kunmap_px(ppgtt, vaddr) kunmap_page_dma((ppgtt)->base.dev, (vaddr))
355
 
356
#define setup_px(dev, px) setup_page_dma((dev), px_base(px))
357
#define cleanup_px(dev, px) cleanup_page_dma((dev), px_base(px))
358
#define fill_px(dev, px, v) fill_page_dma((dev), px_base(px), (v))
359
#define fill32_px(dev, px, v) fill_page_dma_32((dev), px_base(px), (v))
360
 
361
static void fill_page_dma(struct drm_device *dev, struct i915_page_dma *p,
362
			  const uint64_t val)
363
{
364
	int i;
365
	uint64_t * const vaddr = kmap_page_dma(p);
366
 
367
	for (i = 0; i < 512; i++)
368
		vaddr[i] = val;
369
 
370
	kunmap_page_dma(dev, vaddr);
371
}
372
 
373
static void fill_page_dma_32(struct drm_device *dev, struct i915_page_dma *p,
374
			     const uint32_t val32)
375
{
376
	uint64_t v = val32;
377
 
378
	v = v << 32 | val32;
379
 
380
	fill_page_dma(dev, p, v);
381
}
382
 
383
static struct i915_page_scratch *alloc_scratch_page(struct drm_device *dev)
384
{
385
	struct i915_page_scratch *sp;
386
	int ret;
387
 
388
	sp = kzalloc(sizeof(*sp), GFP_KERNEL);
389
	if (sp == NULL)
390
		return ERR_PTR(-ENOMEM);
391
 
392
	ret = __setup_page_dma(dev, px_base(sp), GFP_DMA32 | __GFP_ZERO);
393
	if (ret) {
394
		kfree(sp);
395
		return ERR_PTR(ret);
396
	}
397
 
398
//   set_pages_uc(px_page(sp), 1);
399
 
400
	return sp;
401
}
402
 
403
static void free_scratch_page(struct drm_device *dev,
404
			      struct i915_page_scratch *sp)
405
{
406
//   set_pages_wb(px_page(sp), 1);
407
 
408
	cleanup_px(dev, sp);
409
	kfree(sp);
410
}
411
 
412
static struct i915_page_table *alloc_pt(struct drm_device *dev)
413
{
414
	struct i915_page_table *pt;
415
	const size_t count = INTEL_INFO(dev)->gen >= 8 ?
416
		GEN8_PTES : GEN6_PTES;
417
	int ret = -ENOMEM;
418
 
419
	pt = kzalloc(sizeof(*pt), GFP_KERNEL);
420
	if (!pt)
421
		return ERR_PTR(-ENOMEM);
422
 
423
	pt->used_ptes = kcalloc(BITS_TO_LONGS(count), sizeof(*pt->used_ptes),
424
				GFP_KERNEL);
425
 
426
	if (!pt->used_ptes)
427
		goto fail_bitmap;
428
 
429
	ret = setup_px(dev, pt);
430
	if (ret)
431
		goto fail_page_m;
432
 
433
	return pt;
434
 
435
fail_page_m:
436
	kfree(pt->used_ptes);
437
fail_bitmap:
438
	kfree(pt);
439
 
440
	return ERR_PTR(ret);
441
}
442
 
443
static void free_pt(struct drm_device *dev, struct i915_page_table *pt)
444
{
445
	cleanup_px(dev, pt);
446
	kfree(pt->used_ptes);
447
	kfree(pt);
448
}
449
 
450
static void gen8_initialize_pt(struct i915_address_space *vm,
451
			       struct i915_page_table *pt)
452
{
453
	gen8_pte_t scratch_pte;
454
 
455
	scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
456
				      I915_CACHE_LLC, true);
457
 
458
	fill_px(vm->dev, pt, scratch_pte);
459
}
460
 
461
static void gen6_initialize_pt(struct i915_address_space *vm,
462
			       struct i915_page_table *pt)
463
{
464
	gen6_pte_t scratch_pte;
465
 
466
	WARN_ON(px_dma(vm->scratch_page) == 0);
467
 
468
	scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
469
				     I915_CACHE_LLC, true, 0);
470
 
471
	fill32_px(vm->dev, pt, scratch_pte);
472
}
473
 
474
static struct i915_page_directory *alloc_pd(struct drm_device *dev)
475
{
476
	struct i915_page_directory *pd;
477
	int ret = -ENOMEM;
478
 
479
	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
480
	if (!pd)
481
		return ERR_PTR(-ENOMEM);
482
 
483
	pd->used_pdes = kcalloc(BITS_TO_LONGS(I915_PDES),
484
				sizeof(*pd->used_pdes), GFP_KERNEL);
485
	if (!pd->used_pdes)
486
		goto fail_bitmap;
487
 
488
	ret = setup_px(dev, pd);
489
	if (ret)
490
		goto fail_page_m;
491
 
492
	return pd;
493
 
494
fail_page_m:
495
	kfree(pd->used_pdes);
496
fail_bitmap:
497
	kfree(pd);
498
 
499
	return ERR_PTR(ret);
500
}
501
 
502
static void free_pd(struct drm_device *dev, struct i915_page_directory *pd)
503
{
504
	if (px_page(pd)) {
505
		cleanup_px(dev, pd);
506
		kfree(pd->used_pdes);
507
		kfree(pd);
508
	}
509
}
510
 
511
static void gen8_initialize_pd(struct i915_address_space *vm,
512
			       struct i915_page_directory *pd)
513
{
514
	gen8_pde_t scratch_pde;
515
 
516
	scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC);
517
 
518
	fill_px(vm->dev, pd, scratch_pde);
519
}
520
 
521
static int __pdp_init(struct drm_device *dev,
522
		      struct i915_page_directory_pointer *pdp)
523
{
524
	size_t pdpes = I915_PDPES_PER_PDP(dev);
525
 
526
	pdp->used_pdpes = kcalloc(BITS_TO_LONGS(pdpes),
527
				  sizeof(unsigned long),
528
				  GFP_KERNEL);
529
	if (!pdp->used_pdpes)
530
		return -ENOMEM;
531
 
532
	pdp->page_directory = kcalloc(pdpes, sizeof(*pdp->page_directory),
533
				      GFP_KERNEL);
534
	if (!pdp->page_directory) {
535
		kfree(pdp->used_pdpes);
536
		/* the PDP might be the statically allocated top level. Keep it
537
		 * as clean as possible */
538
		pdp->used_pdpes = NULL;
539
		return -ENOMEM;
540
	}
541
 
542
	return 0;
543
}
544
 
545
static void __pdp_fini(struct i915_page_directory_pointer *pdp)
546
{
547
	kfree(pdp->used_pdpes);
548
	kfree(pdp->page_directory);
549
	pdp->page_directory = NULL;
550
}
551
 
552
static struct
553
i915_page_directory_pointer *alloc_pdp(struct drm_device *dev)
554
{
555
	struct i915_page_directory_pointer *pdp;
556
	int ret = -ENOMEM;
557
 
558
	WARN_ON(!USES_FULL_48BIT_PPGTT(dev));
559
 
560
	pdp = kzalloc(sizeof(*pdp), GFP_KERNEL);
561
	if (!pdp)
562
		return ERR_PTR(-ENOMEM);
563
 
564
	ret = __pdp_init(dev, pdp);
565
	if (ret)
566
		goto fail_bitmap;
567
 
568
	ret = setup_px(dev, pdp);
569
	if (ret)
570
		goto fail_page_m;
571
 
572
	return pdp;
573
 
574
fail_page_m:
575
	__pdp_fini(pdp);
576
fail_bitmap:
577
	kfree(pdp);
578
 
579
	return ERR_PTR(ret);
580
}
581
 
582
static void free_pdp(struct drm_device *dev,
583
		     struct i915_page_directory_pointer *pdp)
584
{
585
	__pdp_fini(pdp);
586
	if (USES_FULL_48BIT_PPGTT(dev)) {
587
		cleanup_px(dev, pdp);
588
		kfree(pdp);
589
	}
590
}
591
 
592
static void gen8_initialize_pdp(struct i915_address_space *vm,
593
				struct i915_page_directory_pointer *pdp)
594
{
595
	gen8_ppgtt_pdpe_t scratch_pdpe;
596
 
597
	scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC);
598
 
599
	fill_px(vm->dev, pdp, scratch_pdpe);
600
}
601
 
602
static void gen8_initialize_pml4(struct i915_address_space *vm,
603
				 struct i915_pml4 *pml4)
604
{
605
	gen8_ppgtt_pml4e_t scratch_pml4e;
606
 
607
	scratch_pml4e = gen8_pml4e_encode(px_dma(vm->scratch_pdp),
608
					  I915_CACHE_LLC);
609
 
610
	fill_px(vm->dev, pml4, scratch_pml4e);
611
}
612
 
613
static void
614
gen8_setup_page_directory(struct i915_hw_ppgtt *ppgtt,
615
			  struct i915_page_directory_pointer *pdp,
616
			  struct i915_page_directory *pd,
617
			  int index)
618
{
619
	gen8_ppgtt_pdpe_t *page_directorypo;
620
 
621
	if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
622
		return;
623
 
624
	page_directorypo = kmap_px(pdp);
625
	page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC);
626
	kunmap_px(ppgtt, page_directorypo);
627
}
628
 
629
static void
630
gen8_setup_page_directory_pointer(struct i915_hw_ppgtt *ppgtt,
631
				  struct i915_pml4 *pml4,
632
				  struct i915_page_directory_pointer *pdp,
633
				  int index)
634
{
635
	gen8_ppgtt_pml4e_t *pagemap = kmap_px(pml4);
636
 
637
	WARN_ON(!USES_FULL_48BIT_PPGTT(ppgtt->base.dev));
638
	pagemap[index] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC);
639
	kunmap_px(ppgtt, pagemap);
640
}
641
 
4560 Serge 642
/* Broadwell Page Directory Pointer Descriptors */
6084 serge 643
static int gen8_write_pdp(struct drm_i915_gem_request *req,
644
			  unsigned entry,
645
			  dma_addr_t addr)
4560 Serge 646
{
6084 serge 647
	struct intel_engine_cs *ring = req->ring;
4560 Serge 648
	int ret;
649
 
650
	BUG_ON(entry >= 4);
651
 
6084 serge 652
	ret = intel_ring_begin(req, 6);
4560 Serge 653
	if (ret)
654
		return ret;
655
 
656
	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
657
	intel_ring_emit(ring, GEN8_RING_PDP_UDW(ring, entry));
6084 serge 658
	intel_ring_emit(ring, upper_32_bits(addr));
4560 Serge 659
	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
660
	intel_ring_emit(ring, GEN8_RING_PDP_LDW(ring, entry));
6084 serge 661
	intel_ring_emit(ring, lower_32_bits(addr));
4560 Serge 662
	intel_ring_advance(ring);
663
 
664
	return 0;
665
}
666
 
6084 serge 667
static int gen8_legacy_mm_switch(struct i915_hw_ppgtt *ppgtt,
668
				 struct drm_i915_gem_request *req)
4560 Serge 669
{
5060 serge 670
	int i, ret;
4560 Serge 671
 
6084 serge 672
	for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) {
673
		const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);
4560 Serge 674
 
6084 serge 675
		ret = gen8_write_pdp(req, i, pd_daddr);
676
		if (ret)
5060 serge 677
			return ret;
4560 Serge 678
	}
5060 serge 679
 
4560 Serge 680
	return 0;
681
}
682
 
6084 serge 683
static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt,
684
			      struct drm_i915_gem_request *req)
4560 Serge 685
{
6084 serge 686
	return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4));
687
}
688
 
689
static void gen8_ppgtt_clear_pte_range(struct i915_address_space *vm,
690
				       struct i915_page_directory_pointer *pdp,
691
				       uint64_t start,
692
				       uint64_t length,
693
				       gen8_pte_t scratch_pte)
694
{
4560 Serge 695
	struct i915_hw_ppgtt *ppgtt =
696
		container_of(vm, struct i915_hw_ppgtt, base);
6084 serge 697
	gen8_pte_t *pt_vaddr;
698
	unsigned pdpe = gen8_pdpe_index(start);
699
	unsigned pde = gen8_pde_index(start);
700
	unsigned pte = gen8_pte_index(start);
5060 serge 701
	unsigned num_entries = length >> PAGE_SHIFT;
4560 Serge 702
	unsigned last_pte, i;
703
 
6084 serge 704
	if (WARN_ON(!pdp))
705
		return;
4560 Serge 706
 
707
	while (num_entries) {
6084 serge 708
		struct i915_page_directory *pd;
709
		struct i915_page_table *pt;
4560 Serge 710
 
6084 serge 711
		if (WARN_ON(!pdp->page_directory[pdpe]))
712
			break;
713
 
714
		pd = pdp->page_directory[pdpe];
715
 
716
		if (WARN_ON(!pd->page_table[pde]))
717
			break;
718
 
719
		pt = pd->page_table[pde];
720
 
721
		if (WARN_ON(!px_page(pt)))
722
			break;
723
 
5060 serge 724
		last_pte = pte + num_entries;
6084 serge 725
		if (last_pte > GEN8_PTES)
726
			last_pte = GEN8_PTES;
4560 Serge 727
 
6084 serge 728
		pt_vaddr = kmap_px(pt);
4560 Serge 729
 
5060 serge 730
		for (i = pte; i < last_pte; i++) {
4560 Serge 731
			pt_vaddr[i] = scratch_pte;
5060 serge 732
			num_entries--;
733
		}
4560 Serge 734
 
6084 serge 735
		kunmap_px(ppgtt, pt);
5060 serge 736
 
737
		pte = 0;
6084 serge 738
		if (++pde == I915_PDES) {
739
			if (++pdpe == I915_PDPES_PER_PDP(vm->dev))
740
				break;
5060 serge 741
			pde = 0;
742
		}
4560 Serge 743
	}
744
}
745
 
6084 serge 746
static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
747
				   uint64_t start,
748
				   uint64_t length,
749
				   bool use_scratch)
4560 Serge 750
{
751
	struct i915_hw_ppgtt *ppgtt =
752
		container_of(vm, struct i915_hw_ppgtt, base);
6084 serge 753
	gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
754
						 I915_CACHE_LLC, use_scratch);
4560 Serge 755
 
6084 serge 756
	if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
757
		gen8_ppgtt_clear_pte_range(vm, &ppgtt->pdp, start, length,
758
					   scratch_pte);
759
	} else {
760
		uint64_t templ4, pml4e;
761
		struct i915_page_directory_pointer *pdp;
762
 
763
		gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, templ4, pml4e) {
764
			gen8_ppgtt_clear_pte_range(vm, pdp, start, length,
765
						   scratch_pte);
766
		}
767
	}
768
}
769
 
770
static void
771
gen8_ppgtt_insert_pte_entries(struct i915_address_space *vm,
772
			      struct i915_page_directory_pointer *pdp,
773
			      struct sg_page_iter *sg_iter,
774
			      uint64_t start,
775
			      enum i915_cache_level cache_level)
776
{
777
	struct i915_hw_ppgtt *ppgtt =
778
		container_of(vm, struct i915_hw_ppgtt, base);
779
	gen8_pte_t *pt_vaddr;
780
	unsigned pdpe = gen8_pdpe_index(start);
781
	unsigned pde = gen8_pde_index(start);
782
	unsigned pte = gen8_pte_index(start);
783
 
5354 serge 784
	pt_vaddr = NULL;
4560 Serge 785
 
6084 serge 786
	while (__sg_page_iter_next(sg_iter)) {
787
		if (pt_vaddr == NULL) {
788
			struct i915_page_directory *pd = pdp->page_directory[pdpe];
789
			struct i915_page_table *pt = pd->page_table[pde];
790
			pt_vaddr = kmap_px(pt);
791
		}
4560 Serge 792
 
5060 serge 793
		pt_vaddr[pte] =
6084 serge 794
			gen8_pte_encode(sg_page_iter_dma_address(sg_iter),
4560 Serge 795
					cache_level, true);
6084 serge 796
		if (++pte == GEN8_PTES) {
797
			kunmap_px(ppgtt, pt_vaddr);
5354 serge 798
			pt_vaddr = NULL;
6084 serge 799
			if (++pde == I915_PDES) {
800
				if (++pdpe == I915_PDPES_PER_PDP(vm->dev))
801
					break;
5060 serge 802
				pde = 0;
803
			}
804
			pte = 0;
4560 Serge 805
		}
806
	}
6084 serge 807
 
808
	if (pt_vaddr)
809
		kunmap_px(ppgtt, pt_vaddr);
810
}
811
 
812
static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
813
				      struct sg_table *pages,
814
				      uint64_t start,
815
				      enum i915_cache_level cache_level,
816
				      u32 unused)
817
{
818
	struct i915_hw_ppgtt *ppgtt =
819
		container_of(vm, struct i915_hw_ppgtt, base);
820
	struct sg_page_iter sg_iter;
821
 
822
	__sg_page_iter_start(&sg_iter, pages->sgl, sg_nents(pages->sgl), 0);
823
 
824
	if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
825
		gen8_ppgtt_insert_pte_entries(vm, &ppgtt->pdp, &sg_iter, start,
826
					      cache_level);
827
	} else {
828
		struct i915_page_directory_pointer *pdp;
829
		uint64_t templ4, pml4e;
830
		uint64_t length = (uint64_t)pages->orig_nents << PAGE_SHIFT;
831
 
832
		gen8_for_each_pml4e(pdp, &ppgtt->pml4, start, length, templ4, pml4e) {
833
			gen8_ppgtt_insert_pte_entries(vm, pdp, &sg_iter,
834
						      start, cache_level);
835
		}
5354 serge 836
	}
4560 Serge 837
}
838
 
6084 serge 839
static void gen8_free_page_tables(struct drm_device *dev,
840
				  struct i915_page_directory *pd)
4560 Serge 841
{
5060 serge 842
	int i;
843
 
6084 serge 844
	if (!px_page(pd))
5060 serge 845
		return;
846
 
6084 serge 847
	for_each_set_bit(i, pd->used_pdes, I915_PDES) {
848
		if (WARN_ON(!pd->page_table[i]))
849
			continue;
850
 
851
		free_pt(dev, pd->page_table[i]);
852
		pd->page_table[i] = NULL;
853
	}
5060 serge 854
}
855
 
6084 serge 856
static int gen8_init_scratch(struct i915_address_space *vm)
5060 serge 857
{
6084 serge 858
	struct drm_device *dev = vm->dev;
859
 
860
	vm->scratch_page = alloc_scratch_page(dev);
861
	if (IS_ERR(vm->scratch_page))
862
		return PTR_ERR(vm->scratch_page);
863
 
864
	vm->scratch_pt = alloc_pt(dev);
865
	if (IS_ERR(vm->scratch_pt)) {
866
		free_scratch_page(dev, vm->scratch_page);
867
		return PTR_ERR(vm->scratch_pt);
868
	}
869
 
870
	vm->scratch_pd = alloc_pd(dev);
871
	if (IS_ERR(vm->scratch_pd)) {
872
		free_pt(dev, vm->scratch_pt);
873
		free_scratch_page(dev, vm->scratch_page);
874
		return PTR_ERR(vm->scratch_pd);
875
	}
876
 
877
	if (USES_FULL_48BIT_PPGTT(dev)) {
878
		vm->scratch_pdp = alloc_pdp(dev);
879
		if (IS_ERR(vm->scratch_pdp)) {
880
			free_pd(dev, vm->scratch_pd);
881
			free_pt(dev, vm->scratch_pt);
882
			free_scratch_page(dev, vm->scratch_page);
883
			return PTR_ERR(vm->scratch_pdp);
884
		}
885
	}
886
 
887
	gen8_initialize_pt(vm, vm->scratch_pt);
888
	gen8_initialize_pd(vm, vm->scratch_pd);
889
	if (USES_FULL_48BIT_PPGTT(dev))
890
		gen8_initialize_pdp(vm, vm->scratch_pdp);
891
 
892
	return 0;
893
}
894
 
895
static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create)
896
{
897
	enum vgt_g2v_type msg;
898
	struct drm_device *dev = ppgtt->base.dev;
899
	struct drm_i915_private *dev_priv = dev->dev_private;
900
	unsigned int offset = vgtif_reg(pdp0_lo);
5060 serge 901
	int i;
902
 
6084 serge 903
	if (USES_FULL_48BIT_PPGTT(dev)) {
904
		u64 daddr = px_dma(&ppgtt->pml4);
905
 
906
		I915_WRITE(offset, lower_32_bits(daddr));
907
		I915_WRITE(offset + 4, upper_32_bits(daddr));
908
 
909
		msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE :
910
				VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY);
911
	} else {
912
		for (i = 0; i < GEN8_LEGACY_PDPES; i++) {
913
			u64 daddr = i915_page_dir_dma_addr(ppgtt, i);
914
 
915
			I915_WRITE(offset, lower_32_bits(daddr));
916
			I915_WRITE(offset + 4, upper_32_bits(daddr));
917
 
918
			offset += 8;
919
		}
920
 
921
		msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE :
922
				VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY);
5060 serge 923
	}
924
 
6084 serge 925
	I915_WRITE(vgtif_reg(g2v_notify), msg);
926
 
927
	return 0;
5060 serge 928
}
929
 
6084 serge 930
static void gen8_free_scratch(struct i915_address_space *vm)
5060 serge 931
{
6084 serge 932
	struct drm_device *dev = vm->dev;
4560 Serge 933
 
6084 serge 934
	if (USES_FULL_48BIT_PPGTT(dev))
935
		free_pdp(dev, vm->scratch_pdp);
936
	free_pd(dev, vm->scratch_pd);
937
	free_pt(dev, vm->scratch_pt);
938
	free_scratch_page(dev, vm->scratch_page);
939
}
940
 
941
static void gen8_ppgtt_cleanup_3lvl(struct drm_device *dev,
942
				    struct i915_page_directory_pointer *pdp)
943
{
944
	int i;
945
 
946
	for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev)) {
947
		if (WARN_ON(!pdp->page_directory[i]))
5060 serge 948
			continue;
4560 Serge 949
 
6084 serge 950
		gen8_free_page_tables(dev, pdp->page_directory[i]);
951
		free_pd(dev, pdp->page_directory[i]);
952
	}
4560 Serge 953
 
6084 serge 954
	free_pdp(dev, pdp);
955
}
956
 
957
static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt)
958
{
959
	int i;
960
 
961
	for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) {
962
		if (WARN_ON(!ppgtt->pml4.pdps[i]))
963
			continue;
964
 
965
		gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, ppgtt->pml4.pdps[i]);
5060 serge 966
	}
6084 serge 967
 
968
	cleanup_px(ppgtt->base.dev, &ppgtt->pml4);
5060 serge 969
}
4560 Serge 970
 
5060 serge 971
static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
972
{
973
	struct i915_hw_ppgtt *ppgtt =
974
		container_of(vm, struct i915_hw_ppgtt, base);
975
 
6084 serge 976
	if (intel_vgpu_active(vm->dev))
977
		gen8_ppgtt_notify_vgt(ppgtt, false);
978
 
979
	if (!USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
980
		gen8_ppgtt_cleanup_3lvl(ppgtt->base.dev, &ppgtt->pdp);
981
	else
982
		gen8_ppgtt_cleanup_4lvl(ppgtt);
983
 
984
	gen8_free_scratch(vm);
5060 serge 985
}
986
 
6084 serge 987
/**
988
 * gen8_ppgtt_alloc_pagetabs() - Allocate page tables for VA range.
989
 * @vm:	Master vm structure.
990
 * @pd:	Page directory for this address range.
991
 * @start:	Starting virtual address to begin allocations.
992
 * @length:	Size of the allocations.
993
 * @new_pts:	Bitmap set by function with new allocations. Likely used by the
994
 *		caller to free on error.
995
 *
996
 * Allocate the required number of page tables. Extremely similar to
997
 * gen8_ppgtt_alloc_page_directories(). The main difference is here we are limited by
998
 * the page directory boundary (instead of the page directory pointer). That
999
 * boundary is 1GB virtual. Therefore, unlike gen8_ppgtt_alloc_page_directories(), it is
1000
 * possible, and likely that the caller will need to use multiple calls of this
1001
 * function to achieve the appropriate allocation.
1002
 *
1003
 * Return: 0 if success; negative error code otherwise.
1004
 */
1005
static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm,
1006
				     struct i915_page_directory *pd,
1007
				     uint64_t start,
1008
				     uint64_t length,
1009
				     unsigned long *new_pts)
5060 serge 1010
{
6084 serge 1011
	struct drm_device *dev = vm->dev;
1012
	struct i915_page_table *pt;
1013
	uint64_t temp;
1014
	uint32_t pde;
5060 serge 1015
 
6084 serge 1016
	gen8_for_each_pde(pt, pd, start, length, temp, pde) {
1017
		/* Don't reallocate page tables */
1018
		if (test_bit(pde, pd->used_pdes)) {
1019
			/* Scratch is never allocated this way */
1020
			WARN_ON(pt == vm->scratch_pt);
1021
			continue;
1022
		}
5060 serge 1023
 
6084 serge 1024
		pt = alloc_pt(dev);
1025
		if (IS_ERR(pt))
1026
			goto unwind_out;
5060 serge 1027
 
6084 serge 1028
		gen8_initialize_pt(vm, pt);
1029
		pd->page_table[pde] = pt;
1030
		__set_bit(pde, new_pts);
1031
		trace_i915_page_table_entry_alloc(vm, pde, start, GEN8_PDE_SHIFT);
1032
	}
5060 serge 1033
 
6084 serge 1034
	return 0;
1035
 
1036
unwind_out:
1037
	for_each_set_bit(pde, new_pts, I915_PDES)
1038
		free_pt(dev, pd->page_table[pde]);
1039
 
1040
	return -ENOMEM;
5060 serge 1041
}
1042
 
6084 serge 1043
/**
1044
 * gen8_ppgtt_alloc_page_directories() - Allocate page directories for VA range.
1045
 * @vm:	Master vm structure.
1046
 * @pdp:	Page directory pointer for this address range.
1047
 * @start:	Starting virtual address to begin allocations.
1048
 * @length:	Size of the allocations.
1049
 * @new_pds:	Bitmap set by function with new allocations. Likely used by the
1050
 *		caller to free on error.
1051
 *
1052
 * Allocate the required number of page directories starting at the pde index of
1053
 * @start, and ending at the pde index @start + @length. This function will skip
1054
 * over already allocated page directories within the range, and only allocate
1055
 * new ones, setting the appropriate pointer within the pdp as well as the
1056
 * correct position in the bitmap @new_pds.
1057
 *
1058
 * The function will only allocate the pages within the range for a give page
1059
 * directory pointer. In other words, if @start + @length straddles a virtually
1060
 * addressed PDP boundary (512GB for 4k pages), there will be more allocations
1061
 * required by the caller, This is not currently possible, and the BUG in the
1062
 * code will prevent it.
1063
 *
1064
 * Return: 0 if success; negative error code otherwise.
1065
 */
1066
static int
1067
gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm,
1068
				  struct i915_page_directory_pointer *pdp,
1069
				  uint64_t start,
1070
				  uint64_t length,
1071
				  unsigned long *new_pds)
5060 serge 1072
{
6084 serge 1073
	struct drm_device *dev = vm->dev;
1074
	struct i915_page_directory *pd;
1075
	uint64_t temp;
1076
	uint32_t pdpe;
1077
	uint32_t pdpes = I915_PDPES_PER_PDP(dev);
5060 serge 1078
 
6084 serge 1079
	WARN_ON(!bitmap_empty(new_pds, pdpes));
1080
 
1081
	gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) {
1082
		if (test_bit(pdpe, pdp->used_pdpes))
1083
			continue;
1084
 
1085
		pd = alloc_pd(dev);
1086
		if (IS_ERR(pd))
5060 serge 1087
			goto unwind_out;
6084 serge 1088
 
1089
		gen8_initialize_pd(vm, pd);
1090
		pdp->page_directory[pdpe] = pd;
1091
		__set_bit(pdpe, new_pds);
1092
		trace_i915_page_directory_entry_alloc(vm, pdpe, start, GEN8_PDPE_SHIFT);
4560 Serge 1093
	}
1094
 
5060 serge 1095
	return 0;
1096
 
1097
unwind_out:
6084 serge 1098
	for_each_set_bit(pdpe, new_pds, pdpes)
1099
		free_pd(dev, pdp->page_directory[pdpe]);
5060 serge 1100
 
6084 serge 1101
	return -ENOMEM;
4560 Serge 1102
}
1103
 
6084 serge 1104
/**
1105
 * gen8_ppgtt_alloc_page_dirpointers() - Allocate pdps for VA range.
1106
 * @vm:	Master vm structure.
1107
 * @pml4:	Page map level 4 for this address range.
1108
 * @start:	Starting virtual address to begin allocations.
1109
 * @length:	Size of the allocations.
1110
 * @new_pdps:	Bitmap set by function with new allocations. Likely used by the
1111
 *		caller to free on error.
1112
 *
1113
 * Allocate the required number of page directory pointers. Extremely similar to
1114
 * gen8_ppgtt_alloc_page_directories() and gen8_ppgtt_alloc_pagetabs().
1115
 * The main difference is here we are limited by the pml4 boundary (instead of
1116
 * the page directory pointer).
1117
 *
1118
 * Return: 0 if success; negative error code otherwise.
1119
 */
1120
static int
1121
gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm,
1122
				  struct i915_pml4 *pml4,
1123
				  uint64_t start,
1124
				  uint64_t length,
1125
				  unsigned long *new_pdps)
5060 serge 1126
{
6084 serge 1127
	struct drm_device *dev = vm->dev;
1128
	struct i915_page_directory_pointer *pdp;
1129
	uint64_t temp;
1130
	uint32_t pml4e;
5060 serge 1131
 
6084 serge 1132
	WARN_ON(!bitmap_empty(new_pdps, GEN8_PML4ES_PER_PML4));
1133
 
1134
	gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) {
1135
		if (!test_bit(pml4e, pml4->used_pml4es)) {
1136
			pdp = alloc_pdp(dev);
1137
			if (IS_ERR(pdp))
1138
				goto unwind_out;
1139
 
1140
			gen8_initialize_pdp(vm, pdp);
1141
			pml4->pdps[pml4e] = pdp;
1142
			__set_bit(pml4e, new_pdps);
1143
			trace_i915_page_directory_pointer_entry_alloc(vm,
1144
								      pml4e,
1145
								      start,
1146
								      GEN8_PML4E_SHIFT);
5060 serge 1147
		}
6084 serge 1148
	}
5060 serge 1149
 
1150
	return 0;
6084 serge 1151
 
1152
unwind_out:
1153
	for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
1154
		free_pdp(dev, pml4->pdps[pml4e]);
1155
 
1156
	return -ENOMEM;
5060 serge 1157
}
1158
 
6084 serge 1159
static void
1160
free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long *new_pts)
5060 serge 1161
{
6084 serge 1162
	kfree(new_pts);
1163
	kfree(new_pds);
1164
}
1165
 
1166
/* Fills in the page directory bitmap, and the array of page tables bitmap. Both
1167
 * of these are based on the number of PDPEs in the system.
1168
 */
1169
static
1170
int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds,
1171
					 unsigned long **new_pts,
1172
					 uint32_t pdpes)
1173
{
1174
	unsigned long *pds;
1175
	unsigned long *pts;
1176
 
1177
	pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_TEMPORARY);
1178
	if (!pds)
5060 serge 1179
		return -ENOMEM;
1180
 
6084 serge 1181
	pts = kcalloc(pdpes, BITS_TO_LONGS(I915_PDES) * sizeof(unsigned long),
1182
		      GFP_TEMPORARY);
1183
	if (!pts)
1184
		goto err_out;
5060 serge 1185
 
6084 serge 1186
	*new_pds = pds;
1187
	*new_pts = pts;
1188
 
5060 serge 1189
	return 0;
6084 serge 1190
 
1191
err_out:
1192
	free_gen8_temp_bitmaps(pds, pts);
1193
	return -ENOMEM;
5060 serge 1194
}
1195
 
6084 serge 1196
/* PDE TLBs are a pain to invalidate on GEN8+. When we modify
1197
 * the page table structures, we mark them dirty so that
1198
 * context switching/execlist queuing code takes extra steps
1199
 * to ensure that tlbs are flushed.
1200
 */
1201
static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt)
5060 serge 1202
{
6084 serge 1203
	ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.dev)->ring_mask;
1204
}
1205
 
1206
static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
1207
				    struct i915_page_directory_pointer *pdp,
1208
				    uint64_t start,
1209
				    uint64_t length)
1210
{
1211
	struct i915_hw_ppgtt *ppgtt =
1212
		container_of(vm, struct i915_hw_ppgtt, base);
1213
	unsigned long *new_page_dirs, *new_page_tables;
1214
	struct drm_device *dev = vm->dev;
1215
	struct i915_page_directory *pd;
1216
	const uint64_t orig_start = start;
1217
	const uint64_t orig_length = length;
1218
	uint64_t temp;
1219
	uint32_t pdpe;
1220
	uint32_t pdpes = I915_PDPES_PER_PDP(dev);
5060 serge 1221
	int ret;
1222
 
6084 serge 1223
	/* Wrap is never okay since we can only represent 48b, and we don't
1224
	 * actually use the other side of the canonical address space.
1225
	 */
1226
	if (WARN_ON(start + length < start))
1227
		return -ENODEV;
1228
 
1229
	if (WARN_ON(start + length > vm->total))
1230
		return -ENODEV;
1231
 
1232
	ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes);
5060 serge 1233
	if (ret)
1234
		return ret;
1235
 
6084 serge 1236
	/* Do the allocations first so we can easily bail out */
1237
	ret = gen8_ppgtt_alloc_page_directories(vm, pdp, start, length,
1238
						new_page_dirs);
5060 serge 1239
	if (ret) {
6084 serge 1240
		free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
5060 serge 1241
		return ret;
1242
	}
1243
 
6084 serge 1244
	/* For every page directory referenced, allocate page tables */
1245
	gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) {
1246
		ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length,
1247
						new_page_tables + pdpe * BITS_TO_LONGS(I915_PDES));
1248
		if (ret)
1249
			goto err_out;
1250
	}
5060 serge 1251
 
6084 serge 1252
	start = orig_start;
1253
	length = orig_length;
5060 serge 1254
 
6084 serge 1255
	/* Allocations have completed successfully, so set the bitmaps, and do
1256
	 * the mappings. */
1257
	gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) {
1258
		gen8_pde_t *const page_directory = kmap_px(pd);
1259
		struct i915_page_table *pt;
1260
		uint64_t pd_len = length;
1261
		uint64_t pd_start = start;
1262
		uint32_t pde;
1263
 
1264
		/* Every pd should be allocated, we just did that above. */
1265
		WARN_ON(!pd);
1266
 
1267
		gen8_for_each_pde(pt, pd, pd_start, pd_len, temp, pde) {
1268
			/* Same reasoning as pd */
1269
			WARN_ON(!pt);
1270
			WARN_ON(!pd_len);
1271
			WARN_ON(!gen8_pte_count(pd_start, pd_len));
1272
 
1273
			/* Set our used ptes within the page table */
1274
			bitmap_set(pt->used_ptes,
1275
				   gen8_pte_index(pd_start),
1276
				   gen8_pte_count(pd_start, pd_len));
1277
 
1278
			/* Our pde is now pointing to the pagetable, pt */
1279
			__set_bit(pde, pd->used_pdes);
1280
 
1281
			/* Map the PDE to the page table */
1282
			page_directory[pde] = gen8_pde_encode(px_dma(pt),
1283
							      I915_CACHE_LLC);
1284
			trace_i915_page_table_entry_map(&ppgtt->base, pde, pt,
1285
							gen8_pte_index(start),
1286
							gen8_pte_count(start, length),
1287
							GEN8_PTES);
1288
 
1289
			/* NB: We haven't yet mapped ptes to pages. At this
1290
			 * point we're still relying on insert_entries() */
1291
		}
1292
 
1293
		kunmap_px(ppgtt, page_directory);
1294
		__set_bit(pdpe, pdp->used_pdpes);
1295
		gen8_setup_page_directory(ppgtt, pdp, pd, pdpe);
1296
	}
1297
 
1298
	free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
1299
	mark_tlbs_dirty(ppgtt);
1300
	return 0;
1301
 
1302
err_out:
1303
	while (pdpe--) {
1304
		for_each_set_bit(temp, new_page_tables + pdpe *
1305
				BITS_TO_LONGS(I915_PDES), I915_PDES)
1306
			free_pt(dev, pdp->page_directory[pdpe]->page_table[temp]);
1307
	}
1308
 
1309
	for_each_set_bit(pdpe, new_page_dirs, pdpes)
1310
		free_pd(dev, pdp->page_directory[pdpe]);
1311
 
1312
	free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
1313
	mark_tlbs_dirty(ppgtt);
5060 serge 1314
	return ret;
1315
}
1316
 
6084 serge 1317
static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
1318
				    struct i915_pml4 *pml4,
1319
				    uint64_t start,
1320
				    uint64_t length)
5060 serge 1321
{
6084 serge 1322
	DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4);
1323
	struct i915_hw_ppgtt *ppgtt =
1324
			container_of(vm, struct i915_hw_ppgtt, base);
1325
	struct i915_page_directory_pointer *pdp;
1326
	uint64_t temp, pml4e;
1327
	int ret = 0;
5060 serge 1328
 
6084 serge 1329
	/* Do the pml4 allocations first, so we don't need to track the newly
1330
	 * allocated tables below the pdp */
1331
	bitmap_zero(new_pdps, GEN8_PML4ES_PER_PML4);
5060 serge 1332
 
6084 serge 1333
	/* The pagedirectory and pagetable allocations are done in the shared 3
1334
	 * and 4 level code. Just allocate the pdps.
1335
	 */
1336
	ret = gen8_ppgtt_alloc_page_dirpointers(vm, pml4, start, length,
1337
						new_pdps);
1338
	if (ret)
1339
		return ret;
5060 serge 1340
 
6084 serge 1341
	WARN(bitmap_weight(new_pdps, GEN8_PML4ES_PER_PML4) > 2,
1342
	     "The allocation has spanned more than 512GB. "
1343
	     "It is highly likely this is incorrect.");
5060 serge 1344
 
6084 serge 1345
	gen8_for_each_pml4e(pdp, pml4, start, length, temp, pml4e) {
1346
		WARN_ON(!pdp);
1347
 
1348
		ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length);
1349
		if (ret)
1350
			goto err_out;
1351
 
1352
		gen8_setup_page_directory_pointer(ppgtt, pml4, pdp, pml4e);
1353
	}
1354
 
1355
	bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es,
1356
		  GEN8_PML4ES_PER_PML4);
1357
 
5060 serge 1358
	return 0;
6084 serge 1359
 
1360
err_out:
1361
	for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
1362
		gen8_ppgtt_cleanup_3lvl(vm->dev, pml4->pdps[pml4e]);
1363
 
1364
	return ret;
5060 serge 1365
}
1366
 
6084 serge 1367
static int gen8_alloc_va_range(struct i915_address_space *vm,
1368
			       uint64_t start, uint64_t length)
5060 serge 1369
{
6084 serge 1370
	struct i915_hw_ppgtt *ppgtt =
1371
		container_of(vm, struct i915_hw_ppgtt, base);
1372
 
1373
	if (USES_FULL_48BIT_PPGTT(vm->dev))
1374
		return gen8_alloc_va_range_4lvl(vm, &ppgtt->pml4, start, length);
1375
	else
1376
		return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length);
1377
}
1378
 
1379
static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp,
1380
			  uint64_t start, uint64_t length,
1381
			  gen8_pte_t scratch_pte,
1382
			  struct seq_file *m)
1383
{
1384
	struct i915_page_directory *pd;
1385
	uint64_t temp;
1386
	uint32_t pdpe;
1387
 
1388
	gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) {
1389
		struct i915_page_table *pt;
1390
		uint64_t pd_len = length;
1391
		uint64_t pd_start = start;
1392
		uint32_t pde;
1393
 
1394
		if (!test_bit(pdpe, pdp->used_pdpes))
1395
			continue;
1396
 
1397
		seq_printf(m, "\tPDPE #%d\n", pdpe);
1398
		gen8_for_each_pde(pt, pd, pd_start, pd_len, temp, pde) {
1399
			uint32_t  pte;
1400
			gen8_pte_t *pt_vaddr;
1401
 
1402
			if (!test_bit(pde, pd->used_pdes))
1403
				continue;
1404
 
1405
			pt_vaddr = kmap_px(pt);
1406
			for (pte = 0; pte < GEN8_PTES; pte += 4) {
1407
				uint64_t va =
1408
					(pdpe << GEN8_PDPE_SHIFT) |
1409
					(pde << GEN8_PDE_SHIFT) |
1410
					(pte << GEN8_PTE_SHIFT);
1411
				int i;
1412
				bool found = false;
1413
 
1414
				for (i = 0; i < 4; i++)
1415
					if (pt_vaddr[pte + i] != scratch_pte)
1416
						found = true;
1417
				if (!found)
1418
					continue;
1419
 
1420
				seq_printf(m, "\t\t0x%llx [%03d,%03d,%04d]: =", va, pdpe, pde, pte);
1421
				for (i = 0; i < 4; i++) {
1422
					if (pt_vaddr[pte + i] != scratch_pte)
1423
						seq_printf(m, " %llx", pt_vaddr[pte + i]);
1424
					else
1425
						seq_puts(m, "  SCRATCH ");
1426
				}
1427
				seq_puts(m, "\n");
1428
			}
1429
			/* don't use kunmap_px, it could trigger
1430
			 * an unnecessary flush.
1431
			 */
1432
			kunmap_atomic(pt_vaddr);
1433
		}
1434
	}
1435
}
1436
 
1437
static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
1438
{
1439
	struct i915_address_space *vm = &ppgtt->base;
1440
	uint64_t start = ppgtt->base.start;
1441
	uint64_t length = ppgtt->base.total;
1442
	gen8_pte_t scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
1443
						 I915_CACHE_LLC, true);
1444
 
1445
	if (!USES_FULL_48BIT_PPGTT(vm->dev)) {
1446
		gen8_dump_pdp(&ppgtt->pdp, start, length, scratch_pte, m);
1447
	} else {
1448
		uint64_t templ4, pml4e;
1449
		struct i915_pml4 *pml4 = &ppgtt->pml4;
1450
		struct i915_page_directory_pointer *pdp;
1451
 
1452
		gen8_for_each_pml4e(pdp, pml4, start, length, templ4, pml4e) {
1453
			if (!test_bit(pml4e, pml4->used_pml4es))
1454
				continue;
1455
 
1456
			seq_printf(m, "    PML4E #%llu\n", pml4e);
1457
			gen8_dump_pdp(pdp, start, length, scratch_pte, m);
1458
		}
1459
	}
1460
}
1461
 
1462
static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt)
1463
{
1464
	unsigned long *new_page_dirs, *new_page_tables;
1465
	uint32_t pdpes = I915_PDPES_PER_PDP(dev);
5060 serge 1466
	int ret;
1467
 
6084 serge 1468
	/* We allocate temp bitmap for page tables for no gain
1469
	 * but as this is for init only, lets keep the things simple
1470
	 */
1471
	ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes);
1472
	if (ret)
1473
		return ret;
5060 serge 1474
 
6084 serge 1475
	/* Allocate for all pdps regardless of how the ppgtt
1476
	 * was defined.
1477
	 */
1478
	ret = gen8_ppgtt_alloc_page_directories(&ppgtt->base, &ppgtt->pdp,
1479
						0, 1ULL << 32,
1480
						new_page_dirs);
1481
	if (!ret)
1482
		*ppgtt->pdp.used_pdpes = *new_page_dirs;
5060 serge 1483
 
6084 serge 1484
	free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
1485
 
1486
	return ret;
5060 serge 1487
}
1488
 
6084 serge 1489
/*
5060 serge 1490
 * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
1491
 * with a net effect resembling a 2-level page table in normal x86 terms. Each
1492
 * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
1493
 * space.
4560 Serge 1494
 *
5060 serge 1495
 */
6084 serge 1496
static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
4560 Serge 1497
{
6084 serge 1498
	int ret;
4560 Serge 1499
 
6084 serge 1500
	ret = gen8_init_scratch(&ppgtt->base);
5060 serge 1501
	if (ret)
1502
		return ret;
4560 Serge 1503
 
6084 serge 1504
	ppgtt->base.start = 0;
1505
	ppgtt->base.cleanup = gen8_ppgtt_cleanup;
1506
	ppgtt->base.allocate_va_range = gen8_alloc_va_range;
1507
	ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
1508
	ppgtt->base.clear_range = gen8_ppgtt_clear_range;
1509
	ppgtt->base.unbind_vma = ppgtt_unbind_vma;
1510
	ppgtt->base.bind_vma = ppgtt_bind_vma;
1511
	ppgtt->debug_dump = gen8_dump_ppgtt;
1512
 
1513
	if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
1514
		ret = setup_px(ppgtt->base.dev, &ppgtt->pml4);
5060 serge 1515
		if (ret)
6084 serge 1516
			goto free_scratch;
4560 Serge 1517
 
6084 serge 1518
		gen8_initialize_pml4(&ppgtt->base, &ppgtt->pml4);
1519
 
1520
		ppgtt->base.total = 1ULL << 48;
1521
		ppgtt->switch_mm = gen8_48b_mm_switch;
1522
	} else {
1523
		ret = __pdp_init(ppgtt->base.dev, &ppgtt->pdp);
1524
		if (ret)
1525
			goto free_scratch;
1526
 
1527
		ppgtt->base.total = 1ULL << 32;
1528
		ppgtt->switch_mm = gen8_legacy_mm_switch;
1529
		trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base,
1530
							      0, 0,
1531
							      GEN8_PML4E_SHIFT);
1532
 
1533
		if (intel_vgpu_active(ppgtt->base.dev)) {
1534
			ret = gen8_preallocate_top_level_pdps(ppgtt);
5060 serge 1535
			if (ret)
6084 serge 1536
				goto free_scratch;
4560 Serge 1537
		}
1538
	}
1539
 
6084 serge 1540
	if (intel_vgpu_active(ppgtt->base.dev))
1541
		gen8_ppgtt_notify_vgt(ppgtt, true);
4560 Serge 1542
 
1543
	return 0;
1544
 
6084 serge 1545
free_scratch:
1546
	gen8_free_scratch(&ppgtt->base);
4560 Serge 1547
	return ret;
1548
}
1549
 
6084 serge 1550
static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
4104 Serge 1551
{
6084 serge 1552
	struct i915_address_space *vm = &ppgtt->base;
1553
	struct i915_page_table *unused;
1554
	gen6_pte_t scratch_pte;
3746 Serge 1555
	uint32_t pd_entry;
6084 serge 1556
	uint32_t  pte, pde, temp;
1557
	uint32_t start = ppgtt->base.start, length = ppgtt->base.total;
3746 Serge 1558
 
6084 serge 1559
	scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
1560
				     I915_CACHE_LLC, true, 0);
3746 Serge 1561
 
6084 serge 1562
	gen6_for_each_pde(unused, &ppgtt->pd, start, length, temp, pde) {
1563
		u32 expected;
1564
		gen6_pte_t *pt_vaddr;
1565
		const dma_addr_t pt_addr = px_dma(ppgtt->pd.page_table[pde]);
1566
		pd_entry = readl(ppgtt->pd_addr + pde);
1567
		expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID);
3746 Serge 1568
 
6084 serge 1569
		if (pd_entry != expected)
1570
			seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n",
1571
				   pde,
1572
				   pd_entry,
1573
				   expected);
1574
		seq_printf(m, "\tPDE: %x\n", pd_entry);
1575
 
1576
		pt_vaddr = kmap_px(ppgtt->pd.page_table[pde]);
1577
 
1578
		for (pte = 0; pte < GEN6_PTES; pte+=4) {
1579
			unsigned long va =
1580
				(pde * PAGE_SIZE * GEN6_PTES) +
1581
				(pte * PAGE_SIZE);
1582
			int i;
1583
			bool found = false;
1584
			for (i = 0; i < 4; i++)
1585
				if (pt_vaddr[pte + i] != scratch_pte)
1586
					found = true;
1587
			if (!found)
1588
				continue;
1589
 
1590
			seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte);
1591
			for (i = 0; i < 4; i++) {
1592
				if (pt_vaddr[pte + i] != scratch_pte)
1593
					seq_printf(m, " %08x", pt_vaddr[pte + i]);
1594
				else
1595
					seq_puts(m, "  SCRATCH ");
1596
			}
1597
			seq_puts(m, "\n");
1598
		}
1599
		kunmap_px(ppgtt, pt_vaddr);
3746 Serge 1600
	}
4104 Serge 1601
}
3746 Serge 1602
 
6084 serge 1603
/* Write pde (index) from the page directory @pd to the page table @pt */
1604
static void gen6_write_pde(struct i915_page_directory *pd,
1605
			    const int pde, struct i915_page_table *pt)
1606
{
1607
	/* Caller needs to make sure the write completes if necessary */
1608
	struct i915_hw_ppgtt *ppgtt =
1609
		container_of(pd, struct i915_hw_ppgtt, pd);
1610
	u32 pd_entry;
1611
 
1612
	pd_entry = GEN6_PDE_ADDR_ENCODE(px_dma(pt));
1613
	pd_entry |= GEN6_PDE_VALID;
1614
 
1615
	writel(pd_entry, ppgtt->pd_addr + pde);
1616
}
1617
 
1618
/* Write all the page tables found in the ppgtt structure to incrementing page
1619
 * directories. */
1620
static void gen6_write_page_range(struct drm_i915_private *dev_priv,
1621
				  struct i915_page_directory *pd,
1622
				  uint32_t start, uint32_t length)
1623
{
1624
	struct i915_page_table *pt;
1625
	uint32_t pde, temp;
1626
 
1627
	gen6_for_each_pde(pt, pd, start, length, temp, pde)
1628
		gen6_write_pde(pd, pde, pt);
1629
 
1630
	/* Make sure write is complete before other code can use this page
1631
	 * table. Also require for WC mapped PTEs */
1632
	readl(dev_priv->gtt.gsm);
1633
}
1634
 
5060 serge 1635
static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
4104 Serge 1636
{
6084 serge 1637
	BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f);
4104 Serge 1638
 
6084 serge 1639
	return (ppgtt->pd.base.ggtt_offset / 64) << 16;
5060 serge 1640
}
4104 Serge 1641
 
5060 serge 1642
static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
6084 serge 1643
			 struct drm_i915_gem_request *req)
5060 serge 1644
{
6084 serge 1645
	struct intel_engine_cs *ring = req->ring;
5060 serge 1646
	int ret;
3746 Serge 1647
 
5060 serge 1648
	/* NB: TLBs must be flushed and invalidated before a switch */
6084 serge 1649
	ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
5060 serge 1650
	if (ret)
1651
		return ret;
3746 Serge 1652
 
6084 serge 1653
	ret = intel_ring_begin(req, 6);
5060 serge 1654
	if (ret)
1655
		return ret;
3746 Serge 1656
 
5060 serge 1657
	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
1658
	intel_ring_emit(ring, RING_PP_DIR_DCLV(ring));
1659
	intel_ring_emit(ring, PP_DIR_DCLV_2G);
1660
	intel_ring_emit(ring, RING_PP_DIR_BASE(ring));
1661
	intel_ring_emit(ring, get_pd_offset(ppgtt));
1662
	intel_ring_emit(ring, MI_NOOP);
1663
	intel_ring_advance(ring);
1664
 
1665
	return 0;
1666
}
1667
 
6084 serge 1668
static int vgpu_mm_switch(struct i915_hw_ppgtt *ppgtt,
1669
			  struct drm_i915_gem_request *req)
1670
{
1671
	struct intel_engine_cs *ring = req->ring;
1672
	struct drm_i915_private *dev_priv = to_i915(ppgtt->base.dev);
1673
 
1674
	I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
1675
	I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
1676
	return 0;
1677
}
1678
 
5060 serge 1679
static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
6084 serge 1680
			  struct drm_i915_gem_request *req)
5060 serge 1681
{
6084 serge 1682
	struct intel_engine_cs *ring = req->ring;
5060 serge 1683
	int ret;
1684
 
1685
	/* NB: TLBs must be flushed and invalidated before a switch */
6084 serge 1686
	ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
5060 serge 1687
	if (ret)
1688
		return ret;
1689
 
6084 serge 1690
	ret = intel_ring_begin(req, 6);
5060 serge 1691
	if (ret)
1692
		return ret;
1693
 
1694
	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
1695
	intel_ring_emit(ring, RING_PP_DIR_DCLV(ring));
1696
	intel_ring_emit(ring, PP_DIR_DCLV_2G);
1697
	intel_ring_emit(ring, RING_PP_DIR_BASE(ring));
1698
	intel_ring_emit(ring, get_pd_offset(ppgtt));
1699
	intel_ring_emit(ring, MI_NOOP);
1700
	intel_ring_advance(ring);
1701
 
1702
	/* XXX: RCS is the only one to auto invalidate the TLBs? */
1703
	if (ring->id != RCS) {
6084 serge 1704
		ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
5060 serge 1705
		if (ret)
1706
			return ret;
1707
	}
1708
 
1709
	return 0;
1710
}
1711
 
1712
static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
6084 serge 1713
			  struct drm_i915_gem_request *req)
5060 serge 1714
{
6084 serge 1715
	struct intel_engine_cs *ring = req->ring;
5060 serge 1716
	struct drm_device *dev = ppgtt->base.dev;
1717
	struct drm_i915_private *dev_priv = dev->dev_private;
1718
 
1719
 
1720
	I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
1721
	I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
1722
 
1723
	POSTING_READ(RING_PP_DIR_DCLV(ring));
1724
 
1725
	return 0;
1726
}
1727
 
5354 serge 1728
static void gen8_ppgtt_enable(struct drm_device *dev)
5060 serge 1729
{
1730
	struct drm_i915_private *dev_priv = dev->dev_private;
1731
	struct intel_engine_cs *ring;
5354 serge 1732
	int j;
5060 serge 1733
 
1734
	for_each_ring(ring, dev_priv, j) {
6084 serge 1735
		u32 four_level = USES_FULL_48BIT_PPGTT(dev) ? GEN8_GFX_PPGTT_48B : 0;
5060 serge 1736
		I915_WRITE(RING_MODE_GEN7(ring),
6084 serge 1737
			   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE | four_level));
5060 serge 1738
	}
1739
}
1740
 
5354 serge 1741
static void gen7_ppgtt_enable(struct drm_device *dev)
5060 serge 1742
{
1743
	struct drm_i915_private *dev_priv = dev->dev_private;
1744
	struct intel_engine_cs *ring;
6084 serge 1745
	uint32_t ecochk, ecobits;
5060 serge 1746
	int i;
3746 Serge 1747
 
6084 serge 1748
	ecobits = I915_READ(GAC_ECO_BITS);
1749
	I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
3746 Serge 1750
 
6084 serge 1751
	ecochk = I915_READ(GAM_ECOCHK);
1752
	if (IS_HASWELL(dev)) {
1753
		ecochk |= ECOCHK_PPGTT_WB_HSW;
1754
	} else {
1755
		ecochk |= ECOCHK_PPGTT_LLC_IVB;
1756
		ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
1757
	}
1758
	I915_WRITE(GAM_ECOCHK, ecochk);
3746 Serge 1759
 
1760
	for_each_ring(ring, dev_priv, i) {
5060 serge 1761
		/* GFX_MODE is per-ring on gen7+ */
6084 serge 1762
		I915_WRITE(RING_MODE_GEN7(ring),
1763
			   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
3746 Serge 1764
	}
1765
}
1766
 
5354 serge 1767
static void gen6_ppgtt_enable(struct drm_device *dev)
5060 serge 1768
{
1769
	struct drm_i915_private *dev_priv = dev->dev_private;
1770
	uint32_t ecochk, gab_ctl, ecobits;
1771
 
1772
	ecobits = I915_READ(GAC_ECO_BITS);
1773
	I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT |
1774
		   ECOBITS_PPGTT_CACHE64B);
1775
 
1776
	gab_ctl = I915_READ(GAB_CTL);
1777
	I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);
1778
 
1779
	ecochk = I915_READ(GAM_ECOCHK);
1780
	I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);
1781
 
1782
	I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
1783
}
1784
 
3031 serge 1785
/* PPGTT support for Sandybdrige/Gen6 and later */
4104 Serge 1786
static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
5060 serge 1787
				   uint64_t start,
1788
				   uint64_t length,
4280 Serge 1789
				   bool use_scratch)
3031 serge 1790
{
4104 Serge 1791
	struct i915_hw_ppgtt *ppgtt =
1792
		container_of(vm, struct i915_hw_ppgtt, base);
6084 serge 1793
	gen6_pte_t *pt_vaddr, scratch_pte;
5060 serge 1794
	unsigned first_entry = start >> PAGE_SHIFT;
1795
	unsigned num_entries = length >> PAGE_SHIFT;
6084 serge 1796
	unsigned act_pt = first_entry / GEN6_PTES;
1797
	unsigned first_pte = first_entry % GEN6_PTES;
3031 serge 1798
	unsigned last_pte, i;
1799
 
6084 serge 1800
	scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
1801
				     I915_CACHE_LLC, true, 0);
3031 serge 1802
 
3480 Serge 1803
	while (num_entries) {
6084 serge 1804
		last_pte = first_pte + num_entries;
1805
		if (last_pte > GEN6_PTES)
1806
			last_pte = GEN6_PTES;
3031 serge 1807
 
6084 serge 1808
		pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
3031 serge 1809
 
6084 serge 1810
		for (i = first_pte; i < last_pte; i++)
1811
			pt_vaddr[i] = scratch_pte;
3031 serge 1812
 
6084 serge 1813
		kunmap_px(ppgtt, pt_vaddr);
5354 serge 1814
 
6084 serge 1815
		num_entries -= last_pte - first_pte;
1816
		first_pte = 0;
1817
		act_pt++;
5354 serge 1818
	}
3480 Serge 1819
}
1820
 
4104 Serge 1821
static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
3480 Serge 1822
				      struct sg_table *pages,
5060 serge 1823
				      uint64_t start,
1824
				      enum i915_cache_level cache_level, u32 flags)
3480 Serge 1825
{
4104 Serge 1826
	struct i915_hw_ppgtt *ppgtt =
1827
		container_of(vm, struct i915_hw_ppgtt, base);
6084 serge 1828
	gen6_pte_t *pt_vaddr;
5060 serge 1829
	unsigned first_entry = start >> PAGE_SHIFT;
6084 serge 1830
	unsigned act_pt = first_entry / GEN6_PTES;
1831
	unsigned act_pte = first_entry % GEN6_PTES;
3746 Serge 1832
	struct sg_page_iter sg_iter;
3480 Serge 1833
 
5354 serge 1834
	pt_vaddr = NULL;
3746 Serge 1835
	for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
5354 serge 1836
		if (pt_vaddr == NULL)
6084 serge 1837
			pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
3480 Serge 1838
 
4560 Serge 1839
		pt_vaddr[act_pte] =
1840
			vm->pte_encode(sg_page_iter_dma_address(&sg_iter),
5060 serge 1841
				       cache_level, true, flags);
1842
 
6084 serge 1843
		if (++act_pte == GEN6_PTES) {
1844
			kunmap_px(ppgtt, pt_vaddr);
5354 serge 1845
			pt_vaddr = NULL;
3746 Serge 1846
			act_pt++;
1847
			act_pte = 0;
3480 Serge 1848
		}
6084 serge 1849
	}
5354 serge 1850
	if (pt_vaddr)
6084 serge 1851
		kunmap_px(ppgtt, pt_vaddr);
3031 serge 1852
}
1853
 
6084 serge 1854
static int gen6_alloc_va_range(struct i915_address_space *vm,
1855
			       uint64_t start_in, uint64_t length_in)
3031 serge 1856
{
6084 serge 1857
	DECLARE_BITMAP(new_page_tables, I915_PDES);
1858
	struct drm_device *dev = vm->dev;
1859
	struct drm_i915_private *dev_priv = dev->dev_private;
1860
	struct i915_hw_ppgtt *ppgtt =
1861
				container_of(vm, struct i915_hw_ppgtt, base);
1862
	struct i915_page_table *pt;
1863
	uint32_t start, length, start_save, length_save;
1864
	uint32_t pde, temp;
1865
	int ret;
3480 Serge 1866
 
6084 serge 1867
	if (WARN_ON(start_in + length_in > ppgtt->base.total))
1868
		return -ENODEV;
1869
 
1870
	start = start_save = start_in;
1871
	length = length_save = length_in;
1872
 
1873
	bitmap_zero(new_page_tables, I915_PDES);
1874
 
1875
	/* The allocation is done in two stages so that we can bail out with
1876
	 * minimal amount of pain. The first stage finds new page tables that
1877
	 * need allocation. The second stage marks use ptes within the page
1878
	 * tables.
1879
	 */
1880
	gen6_for_each_pde(pt, &ppgtt->pd, start, length, temp, pde) {
1881
		if (pt != vm->scratch_pt) {
1882
			WARN_ON(bitmap_empty(pt->used_ptes, GEN6_PTES));
1883
			continue;
1884
		}
1885
 
1886
		/* We've already allocated a page table */
1887
		WARN_ON(!bitmap_empty(pt->used_ptes, GEN6_PTES));
1888
 
1889
		pt = alloc_pt(dev);
1890
		if (IS_ERR(pt)) {
1891
			ret = PTR_ERR(pt);
1892
			goto unwind_out;
1893
		}
1894
 
1895
		gen6_initialize_pt(vm, pt);
1896
 
1897
		ppgtt->pd.page_table[pde] = pt;
1898
		__set_bit(pde, new_page_tables);
1899
		trace_i915_page_table_entry_alloc(vm, pde, start, GEN6_PDE_SHIFT);
3480 Serge 1900
	}
6084 serge 1901
 
1902
	start = start_save;
1903
	length = length_save;
1904
 
1905
	gen6_for_each_pde(pt, &ppgtt->pd, start, length, temp, pde) {
1906
		DECLARE_BITMAP(tmp_bitmap, GEN6_PTES);
1907
 
1908
		bitmap_zero(tmp_bitmap, GEN6_PTES);
1909
		bitmap_set(tmp_bitmap, gen6_pte_index(start),
1910
			   gen6_pte_count(start, length));
1911
 
1912
		if (__test_and_clear_bit(pde, new_page_tables))
1913
			gen6_write_pde(&ppgtt->pd, pde, pt);
1914
 
1915
		trace_i915_page_table_entry_map(vm, pde, pt,
1916
					 gen6_pte_index(start),
1917
					 gen6_pte_count(start, length),
1918
					 GEN6_PTES);
1919
		bitmap_or(pt->used_ptes, tmp_bitmap, pt->used_ptes,
1920
				GEN6_PTES);
1921
	}
1922
 
1923
	WARN_ON(!bitmap_empty(new_page_tables, I915_PDES));
1924
 
1925
	/* Make sure write is complete before other code can use this page
1926
	 * table. Also require for WC mapped PTEs */
1927
	readl(dev_priv->gtt.gsm);
1928
 
1929
	mark_tlbs_dirty(ppgtt);
1930
	return 0;
1931
 
1932
unwind_out:
1933
	for_each_set_bit(pde, new_page_tables, I915_PDES) {
1934
		struct i915_page_table *pt = ppgtt->pd.page_table[pde];
1935
 
1936
		ppgtt->pd.page_table[pde] = vm->scratch_pt;
1937
		free_pt(vm->dev, pt);
1938
	}
1939
 
1940
	mark_tlbs_dirty(ppgtt);
1941
	return ret;
5060 serge 1942
}
3480 Serge 1943
 
6084 serge 1944
static int gen6_init_scratch(struct i915_address_space *vm)
5060 serge 1945
{
6084 serge 1946
	struct drm_device *dev = vm->dev;
5060 serge 1947
 
6084 serge 1948
	vm->scratch_page = alloc_scratch_page(dev);
1949
	if (IS_ERR(vm->scratch_page))
1950
		return PTR_ERR(vm->scratch_page);
1951
 
1952
	vm->scratch_pt = alloc_pt(dev);
1953
	if (IS_ERR(vm->scratch_pt)) {
1954
		free_scratch_page(dev, vm->scratch_page);
1955
		return PTR_ERR(vm->scratch_pt);
1956
	}
1957
 
1958
	gen6_initialize_pt(vm, vm->scratch_pt);
1959
 
1960
	return 0;
3480 Serge 1961
}
1962
 
6084 serge 1963
static void gen6_free_scratch(struct i915_address_space *vm)
1964
{
1965
	struct drm_device *dev = vm->dev;
1966
 
1967
	free_pt(dev, vm->scratch_pt);
1968
	free_scratch_page(dev, vm->scratch_page);
1969
}
1970
 
5060 serge 1971
static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
3480 Serge 1972
{
5060 serge 1973
	struct i915_hw_ppgtt *ppgtt =
1974
		container_of(vm, struct i915_hw_ppgtt, base);
6084 serge 1975
	struct i915_page_table *pt;
1976
	uint32_t pde;
5060 serge 1977
 
1978
	drm_mm_remove_node(&ppgtt->node);
1979
 
6084 serge 1980
	gen6_for_all_pdes(pt, ppgtt, pde) {
1981
		if (pt != vm->scratch_pt)
1982
			free_pt(ppgtt->base.dev, pt);
1983
	}
1984
 
1985
	gen6_free_scratch(vm);
5060 serge 1986
}
1987
 
1988
static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt)
1989
{
6084 serge 1990
	struct i915_address_space *vm = &ppgtt->base;
4104 Serge 1991
	struct drm_device *dev = ppgtt->base.dev;
3031 serge 1992
	struct drm_i915_private *dev_priv = dev->dev_private;
5060 serge 1993
	bool retried = false;
1994
	int ret;
3031 serge 1995
 
5060 serge 1996
	/* PPGTT PDEs reside in the GGTT and consists of 512 entries. The
1997
	 * allocator works in address space sizes, so it's multiplied by page
1998
	 * size. We allocate at the top of the GTT to avoid fragmentation.
1999
	 */
2000
	BUG_ON(!drm_mm_initialized(&dev_priv->gtt.base.mm));
6084 serge 2001
 
2002
	ret = gen6_init_scratch(vm);
2003
	if (ret)
2004
		return ret;
2005
 
5060 serge 2006
alloc:
2007
	ret = drm_mm_insert_node_in_range_generic(&dev_priv->gtt.base.mm,
2008
						  &ppgtt->node, GEN6_PD_SIZE,
2009
						  GEN6_PD_ALIGN, 0,
2010
						  0, dev_priv->gtt.base.total,
2011
						  DRM_MM_TOPDOWN);
2012
	if (ret == -ENOSPC && !retried) {
2013
		ret = i915_gem_evict_something(dev, &dev_priv->gtt.base,
2014
					       GEN6_PD_SIZE, GEN6_PD_ALIGN,
2015
					       I915_CACHE_NONE,
2016
					       0, dev_priv->gtt.base.total,
2017
					       0);
2018
		if (ret)
6084 serge 2019
			goto err_out;
3031 serge 2020
 
5060 serge 2021
		retried = true;
2022
		goto alloc;
2023
	}
2024
 
6084 serge 2025
	if (ret)
2026
		goto err_out;
2027
 
2028
 
5060 serge 2029
	if (ppgtt->node.start < dev_priv->gtt.mappable_end)
2030
		DRM_DEBUG("Forced to use aperture for PDEs\n");
2031
 
6084 serge 2032
	return 0;
2033
 
2034
err_out:
2035
	gen6_free_scratch(vm);
5060 serge 2036
	return ret;
2037
}
2038
 
2039
static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt)
2040
{
6084 serge 2041
	return gen6_ppgtt_allocate_page_directories(ppgtt);
5060 serge 2042
}
3031 serge 2043
 
6084 serge 2044
static void gen6_scratch_va_range(struct i915_hw_ppgtt *ppgtt,
2045
				  uint64_t start, uint64_t length)
5060 serge 2046
{
6084 serge 2047
	struct i915_page_table *unused;
2048
	uint32_t pde, temp;
5060 serge 2049
 
6084 serge 2050
	gen6_for_each_pde(unused, &ppgtt->pd, start, length, temp, pde)
2051
		ppgtt->pd.page_table[pde] = ppgtt->base.scratch_pt;
3031 serge 2052
}
2053
 
5060 serge 2054
static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
3031 serge 2055
{
5060 serge 2056
	struct drm_device *dev = ppgtt->base.dev;
3031 serge 2057
	struct drm_i915_private *dev_priv = dev->dev_private;
3480 Serge 2058
	int ret;
3031 serge 2059
 
5060 serge 2060
	ppgtt->base.pte_encode = dev_priv->gtt.base.pte_encode;
2061
	if (IS_GEN6(dev)) {
2062
		ppgtt->switch_mm = gen6_mm_switch;
2063
	} else if (IS_HASWELL(dev)) {
2064
		ppgtt->switch_mm = hsw_mm_switch;
2065
	} else if (IS_GEN7(dev)) {
2066
		ppgtt->switch_mm = gen7_mm_switch;
2067
	} else
2068
		BUG();
3031 serge 2069
 
6084 serge 2070
	if (intel_vgpu_active(dev))
2071
		ppgtt->switch_mm = vgpu_mm_switch;
2072
 
5060 serge 2073
	ret = gen6_ppgtt_alloc(ppgtt);
2074
	if (ret)
2075
		return ret;
2076
 
6084 serge 2077
	ppgtt->base.allocate_va_range = gen6_alloc_va_range;
5060 serge 2078
	ppgtt->base.clear_range = gen6_ppgtt_clear_range;
2079
	ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
6084 serge 2080
	ppgtt->base.unbind_vma = ppgtt_unbind_vma;
2081
	ppgtt->base.bind_vma = ppgtt_bind_vma;
5060 serge 2082
	ppgtt->base.cleanup = gen6_ppgtt_cleanup;
2083
	ppgtt->base.start = 0;
6084 serge 2084
	ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE;
2085
	ppgtt->debug_dump = gen6_dump_ppgtt;
5060 serge 2086
 
6084 serge 2087
	ppgtt->pd.base.ggtt_offset =
2088
		ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t);
5060 serge 2089
 
6084 serge 2090
	ppgtt->pd_addr = (gen6_pte_t __iomem *)dev_priv->gtt.gsm +
2091
		ppgtt->pd.base.ggtt_offset / sizeof(gen6_pte_t);
5060 serge 2092
 
6084 serge 2093
	gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total);
2094
 
2095
	gen6_write_page_range(dev_priv, &ppgtt->pd, 0, ppgtt->base.total);
2096
 
2097
	DRM_DEBUG_DRIVER("Allocated pde space (%lldM) at GTT entry: %llx\n",
5060 serge 2098
			 ppgtt->node.size >> 20,
2099
			 ppgtt->node.start / PAGE_SIZE);
2100
 
5354 serge 2101
	DRM_DEBUG("Adding PPGTT at offset %x\n",
6084 serge 2102
		  ppgtt->pd.base.ggtt_offset << 10);
5354 serge 2103
 
5060 serge 2104
	return 0;
2105
}
2106
 
5354 serge 2107
static int __hw_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
5060 serge 2108
{
4104 Serge 2109
	ppgtt->base.dev = dev;
3031 serge 2110
 
3746 Serge 2111
	if (INTEL_INFO(dev)->gen < 8)
5354 serge 2112
		return gen6_ppgtt_init(ppgtt);
3746 Serge 2113
	else
6084 serge 2114
		return gen8_ppgtt_init(ppgtt);
5354 serge 2115
}
6084 serge 2116
 
2117
static void i915_address_space_init(struct i915_address_space *vm,
2118
				    struct drm_i915_private *dev_priv)
2119
{
2120
	drm_mm_init(&vm->mm, vm->start, vm->total);
2121
	vm->dev = dev_priv->dev;
2122
	INIT_LIST_HEAD(&vm->active_list);
2123
	INIT_LIST_HEAD(&vm->inactive_list);
2124
	list_add_tail(&vm->global_link, &dev_priv->vm_list);
2125
}
2126
 
5354 serge 2127
int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
2128
{
2129
	struct drm_i915_private *dev_priv = dev->dev_private;
2130
	int ret = 0;
3746 Serge 2131
 
5354 serge 2132
	ret = __hw_ppgtt_init(dev, ppgtt);
2133
	if (ret == 0) {
5060 serge 2134
		kref_init(&ppgtt->ref);
6084 serge 2135
		i915_address_space_init(&ppgtt->base, dev_priv);
5354 serge 2136
	}
2137
 
2138
	return ret;
2139
}
2140
 
2141
int i915_ppgtt_init_hw(struct drm_device *dev)
2142
{
2143
	/* In the case of execlists, PPGTT is enabled by the context descriptor
2144
	 * and the PDPs are contained within the context itself.  We don't
2145
	 * need to do anything here. */
2146
	if (i915.enable_execlists)
2147
		return 0;
2148
 
2149
	if (!USES_PPGTT(dev))
2150
		return 0;
2151
 
2152
	if (IS_GEN6(dev))
2153
		gen6_ppgtt_enable(dev);
2154
	else if (IS_GEN7(dev))
2155
		gen7_ppgtt_enable(dev);
2156
	else if (INTEL_INFO(dev)->gen >= 8)
2157
		gen8_ppgtt_enable(dev);
2158
	else
6084 serge 2159
		MISSING_CASE(INTEL_INFO(dev)->gen);
5354 serge 2160
 
6084 serge 2161
	return 0;
2162
}
3480 Serge 2163
 
6084 serge 2164
int i915_ppgtt_init_ring(struct drm_i915_gem_request *req)
2165
{
2166
	struct drm_i915_private *dev_priv = req->ring->dev->dev_private;
2167
	struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
2168
 
2169
	if (i915.enable_execlists)
2170
		return 0;
2171
 
2172
	if (!ppgtt)
2173
		return 0;
2174
 
2175
	return ppgtt->switch_mm(ppgtt, req);
3031 serge 2176
}
6084 serge 2177
 
5354 serge 2178
struct i915_hw_ppgtt *
2179
i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv)
2180
{
2181
	struct i915_hw_ppgtt *ppgtt;
2182
	int ret;
3031 serge 2183
 
5354 serge 2184
	ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
2185
	if (!ppgtt)
2186
		return ERR_PTR(-ENOMEM);
2187
 
2188
	ret = i915_ppgtt_init(dev, ppgtt);
2189
	if (ret) {
2190
		kfree(ppgtt);
2191
		return ERR_PTR(ret);
2192
	}
2193
 
2194
	ppgtt->file_priv = fpriv;
2195
 
2196
	trace_i915_ppgtt_create(&ppgtt->base);
2197
 
2198
	return ppgtt;
2199
}
2200
 
2201
void  i915_ppgtt_release(struct kref *kref)
2202
{
2203
	struct i915_hw_ppgtt *ppgtt =
2204
		container_of(kref, struct i915_hw_ppgtt, ref);
2205
 
2206
	trace_i915_ppgtt_release(&ppgtt->base);
2207
 
2208
	/* vmas should already be unbound */
2209
	WARN_ON(!list_empty(&ppgtt->base.active_list));
2210
	WARN_ON(!list_empty(&ppgtt->base.inactive_list));
2211
 
2212
	list_del(&ppgtt->base.global_link);
2213
	drm_mm_takedown(&ppgtt->base.mm);
2214
 
2215
	ppgtt->base.cleanup(&ppgtt->base);
2216
	kfree(ppgtt);
2217
}
2218
 
3480 Serge 2219
extern int intel_iommu_gfx_mapped;
2220
/* Certain Gen5 chipsets require require idling the GPU before
2221
 * unmapping anything from the GTT when VT-d is enabled.
2222
 */
6084 serge 2223
static bool needs_idle_maps(struct drm_device *dev)
3480 Serge 2224
{
2225
#ifdef CONFIG_INTEL_IOMMU
2226
	/* Query intel_iommu to see if we need the workaround. Presumably that
2227
	 * was loaded first.
2228
	 */
2229
	if (IS_GEN5(dev) && IS_MOBILE(dev) && intel_iommu_gfx_mapped)
2230
		return true;
2231
#endif
2232
	return false;
2233
}
2234
 
2344 Serge 2235
static bool do_idling(struct drm_i915_private *dev_priv)
2236
{
2237
	bool ret = dev_priv->mm.interruptible;
2238
 
3480 Serge 2239
	if (unlikely(dev_priv->gtt.do_idle_maps)) {
2344 Serge 2240
		dev_priv->mm.interruptible = false;
2241
		if (i915_gpu_idle(dev_priv->dev)) {
2242
			DRM_ERROR("Couldn't idle GPU\n");
2243
			/* Wait a bit, in hopes it avoids the hang */
2244
			udelay(10);
2245
		}
2246
	}
2247
 
2248
	return ret;
2249
}
2250
 
2251
static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible)
2252
{
3480 Serge 2253
	if (unlikely(dev_priv->gtt.do_idle_maps))
2344 Serge 2254
		dev_priv->mm.interruptible = interruptible;
2255
}
2256
 
4280 Serge 2257
void i915_check_and_clear_faults(struct drm_device *dev)
2258
{
2259
	struct drm_i915_private *dev_priv = dev->dev_private;
5060 serge 2260
	struct intel_engine_cs *ring;
4280 Serge 2261
	int i;
2262
 
2263
	if (INTEL_INFO(dev)->gen < 6)
2264
		return;
2265
 
2266
	for_each_ring(ring, dev_priv, i) {
2267
		u32 fault_reg;
2268
		fault_reg = I915_READ(RING_FAULT_REG(ring));
2269
		if (fault_reg & RING_FAULT_VALID) {
2270
			DRM_DEBUG_DRIVER("Unexpected fault\n"
5354 serge 2271
					 "\tAddr: 0x%08lx\n"
4280 Serge 2272
					 "\tAddress space: %s\n"
2273
					 "\tSource ID: %d\n"
2274
					 "\tType: %d\n",
2275
					 fault_reg & PAGE_MASK,
2276
					 fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT",
2277
					 RING_FAULT_SRCID(fault_reg),
2278
					 RING_FAULT_FAULT_TYPE(fault_reg));
2279
			I915_WRITE(RING_FAULT_REG(ring),
2280
				   fault_reg & ~RING_FAULT_VALID);
2281
		}
2282
	}
2283
	POSTING_READ(RING_FAULT_REG(&dev_priv->ring[RCS]));
2284
}
2285
 
5354 serge 2286
static void i915_ggtt_flush(struct drm_i915_private *dev_priv)
2287
{
2288
	if (INTEL_INFO(dev_priv->dev)->gen < 6) {
2289
		intel_gtt_chipset_flush();
2290
	} else {
2291
		I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2292
		POSTING_READ(GFX_FLSH_CNTL_GEN6);
2293
	}
2294
}
2295
 
4280 Serge 2296
void i915_gem_suspend_gtt_mappings(struct drm_device *dev)
2297
{
2298
	struct drm_i915_private *dev_priv = dev->dev_private;
2299
 
2300
	/* Don't bother messing with faults pre GEN6 as we have little
2301
	 * documentation supporting that it's a good idea.
2302
	 */
2303
	if (INTEL_INFO(dev)->gen < 6)
2304
		return;
2305
 
2306
	i915_check_and_clear_faults(dev);
2307
 
2308
	dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
5060 serge 2309
				       dev_priv->gtt.base.start,
2310
				       dev_priv->gtt.base.total,
2311
				       true);
5354 serge 2312
 
2313
	i915_ggtt_flush(dev_priv);
4280 Serge 2314
}
2315
 
3031 serge 2316
int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
2332 Serge 2317
{
3480 Serge 2318
	if (!dma_map_sg(&obj->base.dev->pdev->dev,
2319
			obj->pages->sgl, obj->pages->nents,
2320
			PCI_DMA_BIDIRECTIONAL))
2321
		return -ENOSPC;
3243 Serge 2322
 
2332 Serge 2323
	return 0;
2324
}
2325
 
6084 serge 2326
static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
4560 Serge 2327
{
2328
#ifdef writeq
2329
	writeq(pte, addr);
2330
#else
2331
	iowrite32((u32)pte, addr);
2332
	iowrite32(pte >> 32, addr + 4);
2333
#endif
2334
}
2335
 
2336
static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
2337
				     struct sg_table *st,
5060 serge 2338
				     uint64_t start,
2339
				     enum i915_cache_level level, u32 unused)
4560 Serge 2340
{
2341
	struct drm_i915_private *dev_priv = vm->dev->dev_private;
5060 serge 2342
	unsigned first_entry = start >> PAGE_SHIFT;
6084 serge 2343
	gen8_pte_t __iomem *gtt_entries =
2344
		(gen8_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
4560 Serge 2345
	int i = 0;
2346
	struct sg_page_iter sg_iter;
5060 serge 2347
	dma_addr_t addr = 0; /* shut up gcc */
4560 Serge 2348
 
2349
	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
2350
		addr = sg_dma_address(sg_iter.sg) +
2351
			(sg_iter.sg_pgoffset << PAGE_SHIFT);
2352
		gen8_set_pte(>t_entries[i],
2353
			     gen8_pte_encode(addr, level, true));
2354
		i++;
2355
	}
2356
 
2357
	/*
2358
	 * XXX: This serves as a posting read to make sure that the PTE has
2359
	 * actually been updated. There is some concern that even though
2360
	 * registers and PTEs are within the same BAR that they are potentially
2361
	 * of NUMA access patterns. Therefore, even with the way we assume
2362
	 * hardware should work, we must keep this posting read for paranoia.
2363
	 */
2364
	if (i != 0)
2365
		WARN_ON(readq(>t_entries[i-1])
2366
			!= gen8_pte_encode(addr, level, true));
2367
 
2368
	/* This next bit makes the above posting read even more important. We
2369
	 * want to flush the TLBs only after we're certain all the PTE updates
2370
	 * have finished.
2371
	 */
2372
	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2373
	POSTING_READ(GFX_FLSH_CNTL_GEN6);
2374
}
2375
 
3243 Serge 2376
/*
2377
 * Binds an object into the global gtt with the specified cache level. The object
2378
 * will be accessible to the GPU via commands whose operands reference offsets
2379
 * within the global GTT as well as accessible by the GPU through the GMADR
2380
 * mapped BAR (dev_priv->mm.gtt->gtt).
2381
 */
4104 Serge 2382
static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
3480 Serge 2383
				     struct sg_table *st,
5060 serge 2384
				     uint64_t start,
2385
				     enum i915_cache_level level, u32 flags)
3243 Serge 2386
{
4104 Serge 2387
	struct drm_i915_private *dev_priv = vm->dev->dev_private;
5060 serge 2388
	unsigned first_entry = start >> PAGE_SHIFT;
6084 serge 2389
	gen6_pte_t __iomem *gtt_entries =
2390
		(gen6_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
3746 Serge 2391
	int i = 0;
2392
	struct sg_page_iter sg_iter;
5060 serge 2393
	dma_addr_t addr = 0;
3243 Serge 2394
 
3746 Serge 2395
	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
2396
		addr = sg_page_iter_dma_address(&sg_iter);
5060 serge 2397
		iowrite32(vm->pte_encode(addr, level, true, flags), >t_entries[i]);
6084 serge 2398
		i++;
2399
	}
3243 Serge 2400
 
2401
	/* XXX: This serves as a posting read to make sure that the PTE has
2402
	 * actually been updated. There is some concern that even though
2403
	 * registers and PTEs are within the same BAR that they are potentially
2404
	 * of NUMA access patterns. Therefore, even with the way we assume
2405
	 * hardware should work, we must keep this posting read for paranoia.
2406
	 */
5060 serge 2407
	if (i != 0) {
2408
		unsigned long gtt = readl(>t_entries[i-1]);
2409
		WARN_ON(gtt != vm->pte_encode(addr, level, true, flags));
2410
	}
3243 Serge 2411
 
2412
	/* This next bit makes the above posting read even more important. We
2413
	 * want to flush the TLBs only after we're certain all the PTE updates
2414
	 * have finished.
2415
	 */
2416
	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
2417
	POSTING_READ(GFX_FLSH_CNTL_GEN6);
2418
}
2419
 
4560 Serge 2420
static void gen8_ggtt_clear_range(struct i915_address_space *vm,
5060 serge 2421
				  uint64_t start,
2422
				  uint64_t length,
4560 Serge 2423
				  bool use_scratch)
2424
{
2425
	struct drm_i915_private *dev_priv = vm->dev->dev_private;
5060 serge 2426
	unsigned first_entry = start >> PAGE_SHIFT;
2427
	unsigned num_entries = length >> PAGE_SHIFT;
6084 serge 2428
	gen8_pte_t scratch_pte, __iomem *gtt_base =
2429
		(gen8_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
4560 Serge 2430
	const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
2431
	int i;
2432
 
2433
	if (WARN(num_entries > max_entries,
2434
		 "First entry = %d; Num entries = %d (max=%d)\n",
2435
		 first_entry, num_entries, max_entries))
2436
		num_entries = max_entries;
2437
 
6084 serge 2438
	scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
4560 Serge 2439
				      I915_CACHE_LLC,
2440
				      use_scratch);
2441
	for (i = 0; i < num_entries; i++)
2442
		gen8_set_pte(>t_base[i], scratch_pte);
2443
	readl(gtt_base);
2444
}
2445
 
4104 Serge 2446
static void gen6_ggtt_clear_range(struct i915_address_space *vm,
5060 serge 2447
				  uint64_t start,
2448
				  uint64_t length,
4280 Serge 2449
				  bool use_scratch)
3480 Serge 2450
{
4104 Serge 2451
	struct drm_i915_private *dev_priv = vm->dev->dev_private;
5060 serge 2452
	unsigned first_entry = start >> PAGE_SHIFT;
2453
	unsigned num_entries = length >> PAGE_SHIFT;
6084 serge 2454
	gen6_pte_t scratch_pte, __iomem *gtt_base =
2455
		(gen6_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
3480 Serge 2456
	const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
2457
	int i;
2458
 
4126 Serge 2459
	if (WARN(num_entries > max_entries,
2460
		 "First entry = %d; Num entries = %d (max=%d)\n",
2461
		 first_entry, num_entries, max_entries))
6084 serge 2462
		num_entries = max_entries;
3480 Serge 2463
 
6084 serge 2464
	scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
2465
				     I915_CACHE_LLC, use_scratch, 0);
4280 Serge 2466
 
3480 Serge 2467
	for (i = 0; i < num_entries; i++)
2468
		iowrite32(scratch_pte, >t_base[i]);
2469
	readl(gtt_base);
2470
}
2471
 
6084 serge 2472
static void i915_ggtt_insert_entries(struct i915_address_space *vm,
2473
				     struct sg_table *pages,
2474
				     uint64_t start,
2475
				     enum i915_cache_level cache_level, u32 unused)
3480 Serge 2476
{
2477
	unsigned int flags = (cache_level == I915_CACHE_NONE) ?
2478
		AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;
2479
 
6084 serge 2480
	intel_gtt_insert_sg_entries(pages, start >> PAGE_SHIFT, flags);
2481
 
3480 Serge 2482
}
2483
 
4104 Serge 2484
static void i915_ggtt_clear_range(struct i915_address_space *vm,
5060 serge 2485
				  uint64_t start,
2486
				  uint64_t length,
4280 Serge 2487
				  bool unused)
3480 Serge 2488
{
5060 serge 2489
	unsigned first_entry = start >> PAGE_SHIFT;
2490
	unsigned num_entries = length >> PAGE_SHIFT;
3480 Serge 2491
	intel_gtt_clear_range(first_entry, num_entries);
2492
}
2493
 
6084 serge 2494
static int ggtt_bind_vma(struct i915_vma *vma,
2495
			 enum i915_cache_level cache_level,
2496
			 u32 flags)
5060 serge 2497
{
6084 serge 2498
	struct drm_i915_gem_object *obj = vma->obj;
2499
	u32 pte_flags = 0;
2500
	int ret;
3480 Serge 2501
 
6084 serge 2502
	ret = i915_get_ggtt_vma_pages(vma);
2503
	if (ret)
2504
		return ret;
2505
 
2506
	/* Currently applicable only to VLV */
2507
	if (obj->gt_ro)
2508
		pte_flags |= PTE_READ_ONLY;
2509
 
2510
	vma->vm->insert_entries(vma->vm, vma->ggtt_view.pages,
2511
				vma->node.start,
2512
				cache_level, pte_flags);
2513
 
2514
	/*
2515
	 * Without aliasing PPGTT there's no difference between
2516
	 * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally
2517
	 * upgrade to both bound if we bind either to avoid double-binding.
2518
	 */
2519
	vma->bound |= GLOBAL_BIND | LOCAL_BIND;
2520
 
2521
	return 0;
5060 serge 2522
}
2523
 
6084 serge 2524
static int aliasing_gtt_bind_vma(struct i915_vma *vma,
2525
				 enum i915_cache_level cache_level,
2526
				 u32 flags)
2332 Serge 2527
{
5060 serge 2528
	struct drm_device *dev = vma->vm->dev;
3480 Serge 2529
	struct drm_i915_private *dev_priv = dev->dev_private;
5060 serge 2530
	struct drm_i915_gem_object *obj = vma->obj;
6084 serge 2531
	struct sg_table *pages = obj->pages;
2532
	u32 pte_flags = 0;
2533
	int ret;
3480 Serge 2534
 
6084 serge 2535
	ret = i915_get_ggtt_vma_pages(vma);
2536
	if (ret)
2537
		return ret;
2538
	pages = vma->ggtt_view.pages;
2539
 
5060 serge 2540
	/* Currently applicable only to VLV */
2541
	if (obj->gt_ro)
6084 serge 2542
		pte_flags |= PTE_READ_ONLY;
2332 Serge 2543
 
6084 serge 2544
 
2545
	if (flags & GLOBAL_BIND) {
2546
		vma->vm->insert_entries(vma->vm, pages,
2547
					vma->node.start,
2548
					cache_level, pte_flags);
5060 serge 2549
	}
2550
 
6084 serge 2551
	if (flags & LOCAL_BIND) {
5060 serge 2552
		struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
6084 serge 2553
		appgtt->base.insert_entries(&appgtt->base, pages,
5060 serge 2554
					    vma->node.start,
6084 serge 2555
					    cache_level, pte_flags);
5060 serge 2556
	}
6084 serge 2557
 
2558
	return 0;
2332 Serge 2559
}
2560
 
5060 serge 2561
static void ggtt_unbind_vma(struct i915_vma *vma)
2332 Serge 2562
{
5060 serge 2563
	struct drm_device *dev = vma->vm->dev;
3480 Serge 2564
	struct drm_i915_private *dev_priv = dev->dev_private;
5060 serge 2565
	struct drm_i915_gem_object *obj = vma->obj;
6084 serge 2566
	const uint64_t size = min_t(uint64_t,
2567
				    obj->base.size,
2568
				    vma->node.size);
3480 Serge 2569
 
5354 serge 2570
	if (vma->bound & GLOBAL_BIND) {
5060 serge 2571
		vma->vm->clear_range(vma->vm,
2572
				     vma->node.start,
6084 serge 2573
				     size,
2574
				     true);
5060 serge 2575
	}
3031 serge 2576
 
6084 serge 2577
	if (dev_priv->mm.aliasing_ppgtt && vma->bound & LOCAL_BIND) {
5060 serge 2578
		struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
6084 serge 2579
 
5060 serge 2580
		appgtt->base.clear_range(&appgtt->base,
2581
					 vma->node.start,
6084 serge 2582
					 size,
5060 serge 2583
					 true);
2584
	}
3031 serge 2585
}
2586
 
2587
void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
2588
{
2344 Serge 2589
	struct drm_device *dev = obj->base.dev;
2590
	struct drm_i915_private *dev_priv = dev->dev_private;
2591
	bool interruptible;
2592
 
2593
	interruptible = do_idling(dev_priv);
2594
 
6084 serge 2595
	dma_unmap_sg(&dev->pdev->dev, obj->pages->sgl, obj->pages->nents,
2596
		     PCI_DMA_BIDIRECTIONAL);
2332 Serge 2597
 
3031 serge 2598
	undo_idling(dev_priv, interruptible);
2599
}
2600
 
2601
static void i915_gtt_color_adjust(struct drm_mm_node *node,
2602
				  unsigned long color,
6084 serge 2603
				  u64 *start,
2604
				  u64 *end)
3031 serge 2605
{
2606
	if (node->color != color)
2607
		*start += 4096;
2608
 
2609
	if (!list_empty(&node->node_list)) {
2610
		node = list_entry(node->node_list.next,
2611
				  struct drm_mm_node,
2612
				  node_list);
2613
		if (node->allocated && node->color != color)
2614
			*end -= 4096;
2332 Serge 2615
	}
3031 serge 2616
}
4560 Serge 2617
 
5354 serge 2618
static int i915_gem_setup_global_gtt(struct drm_device *dev,
6084 serge 2619
				     u64 start,
2620
				     u64 mappable_end,
2621
				     u64 end)
3031 serge 2622
{
3480 Serge 2623
	/* Let GEM Manage all of the aperture.
2624
	 *
2625
	 * However, leave one page at the end still bound to the scratch page.
2626
	 * There are a number of places where the hardware apparently prefetches
2627
	 * past the end of the object, and we've seen multiple hangs with the
2628
	 * GPU head pointer stuck in a batchbuffer bound at the last page of the
2629
	 * aperture.  One page should be enough to keep any prefetching inside
2630
	 * of the aperture.
2631
	 */
4104 Serge 2632
	struct drm_i915_private *dev_priv = dev->dev_private;
2633
	struct i915_address_space *ggtt_vm = &dev_priv->gtt.base;
3480 Serge 2634
	struct drm_mm_node *entry;
2635
	struct drm_i915_gem_object *obj;
2636
	unsigned long hole_start, hole_end;
5354 serge 2637
	int ret;
3031 serge 2638
 
3480 Serge 2639
	BUG_ON(mappable_end > end);
2640
 
6084 serge 2641
	ggtt_vm->start = start;
2642
 
2643
	/* Subtract the guard page before address space initialization to
2644
	 * shrink the range used by drm_mm */
2645
	ggtt_vm->total = end - start - PAGE_SIZE;
2646
	i915_address_space_init(ggtt_vm, dev_priv);
2647
	ggtt_vm->total += PAGE_SIZE;
2648
 
2649
	if (intel_vgpu_active(dev)) {
2650
		ret = intel_vgt_balloon(dev);
2651
		if (ret)
2652
			return ret;
2653
	}
2654
 
3031 serge 2655
	if (!HAS_LLC(dev))
6084 serge 2656
		ggtt_vm->mm.color_adjust = i915_gtt_color_adjust;
3031 serge 2657
 
3480 Serge 2658
	/* Mark any preallocated objects as occupied */
4104 Serge 2659
	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
2660
		struct i915_vma *vma = i915_gem_obj_to_vma(obj, ggtt_vm);
5354 serge 2661
 
6084 serge 2662
		DRM_DEBUG_KMS("reserving preallocated space: %llx + %zx\n",
4104 Serge 2663
			      i915_gem_obj_ggtt_offset(obj), obj->base.size);
3031 serge 2664
 
4104 Serge 2665
		WARN_ON(i915_gem_obj_ggtt_bound(obj));
2666
		ret = drm_mm_reserve_node(&ggtt_vm->mm, &vma->node);
5354 serge 2667
		if (ret) {
2668
			DRM_DEBUG_KMS("Reservation failed: %i\n", ret);
2669
			return ret;
2670
		}
2671
		vma->bound |= GLOBAL_BIND;
6084 serge 2672
		__i915_vma_set_map_and_fenceable(vma);
2673
		list_add_tail(&vma->mm_list, &ggtt_vm->inactive_list);
3480 Serge 2674
	}
2675
 
2676
	/* Clear any non-preallocated blocks */
4104 Serge 2677
	drm_mm_for_each_hole(entry, &ggtt_vm->mm, hole_start, hole_end) {
3480 Serge 2678
		DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
2679
			      hole_start, hole_end);
5060 serge 2680
		ggtt_vm->clear_range(ggtt_vm, hole_start,
2681
				     hole_end - hole_start, true);
3480 Serge 2682
	}
2683
 
2684
	/* And finally clear the reserved guard page */
5060 serge 2685
	ggtt_vm->clear_range(ggtt_vm, end - PAGE_SIZE, PAGE_SIZE, true);
5354 serge 2686
 
2687
	if (USES_PPGTT(dev) && !USES_FULL_PPGTT(dev)) {
2688
		struct i915_hw_ppgtt *ppgtt;
2689
 
2690
		ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
2691
		if (!ppgtt)
2692
			return -ENOMEM;
2693
 
2694
		ret = __hw_ppgtt_init(dev, ppgtt);
6084 serge 2695
		if (ret) {
2696
			ppgtt->base.cleanup(&ppgtt->base);
2697
			kfree(ppgtt);
5354 serge 2698
			return ret;
6084 serge 2699
		}
5354 serge 2700
 
6084 serge 2701
		if (ppgtt->base.allocate_va_range)
2702
			ret = ppgtt->base.allocate_va_range(&ppgtt->base, 0,
2703
							    ppgtt->base.total);
2704
		if (ret) {
2705
			ppgtt->base.cleanup(&ppgtt->base);
2706
			kfree(ppgtt);
2707
			return ret;
2708
		}
2709
 
2710
		ppgtt->base.clear_range(&ppgtt->base,
2711
					ppgtt->base.start,
2712
					ppgtt->base.total,
2713
					true);
2714
 
5354 serge 2715
		dev_priv->mm.aliasing_ppgtt = ppgtt;
6084 serge 2716
		WARN_ON(dev_priv->gtt.base.bind_vma != ggtt_bind_vma);
2717
		dev_priv->gtt.base.bind_vma = aliasing_gtt_bind_vma;
5354 serge 2718
	}
2719
 
2720
	return 0;
2332 Serge 2721
}
3243 Serge 2722
 
3480 Serge 2723
void i915_gem_init_global_gtt(struct drm_device *dev)
2724
{
2725
	struct drm_i915_private *dev_priv = dev->dev_private;
6084 serge 2726
	u64 gtt_size, mappable_size;
3480 Serge 2727
 
4104 Serge 2728
	gtt_size = dev_priv->gtt.base.total;
3480 Serge 2729
	mappable_size = dev_priv->gtt.mappable_end;
2730
 
4280 Serge 2731
	i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size);
3480 Serge 2732
}
2733
 
5354 serge 2734
void i915_global_gtt_cleanup(struct drm_device *dev)
2735
{
2736
	struct drm_i915_private *dev_priv = dev->dev_private;
2737
	struct i915_address_space *vm = &dev_priv->gtt.base;
2738
 
2739
	if (dev_priv->mm.aliasing_ppgtt) {
2740
		struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
2741
 
2742
		ppgtt->base.cleanup(&ppgtt->base);
6660 serge 2743
		kfree(ppgtt);
5354 serge 2744
	}
2745
 
2746
	if (drm_mm_initialized(&vm->mm)) {
6084 serge 2747
		if (intel_vgpu_active(dev))
2748
			intel_vgt_deballoon();
2749
 
5354 serge 2750
		drm_mm_takedown(&vm->mm);
2751
		list_del(&vm->global_link);
2752
	}
2753
 
2754
	vm->cleanup(vm);
2755
}
2756
 
6084 serge 2757
static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
3243 Serge 2758
{
2759
	snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
2760
	snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
2761
	return snb_gmch_ctl << 20;
2762
}
2763
 
6084 serge 2764
static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
4560 Serge 2765
{
2766
	bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
2767
	bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
2768
	if (bdw_gmch_ctl)
2769
		bdw_gmch_ctl = 1 << bdw_gmch_ctl;
2770
 
5060 serge 2771
#ifdef CONFIG_X86_32
2772
	/* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * PAGE_SIZE */
2773
	if (bdw_gmch_ctl > 4)
2774
		bdw_gmch_ctl = 4;
2775
#endif
2776
 
4560 Serge 2777
	return bdw_gmch_ctl << 20;
2778
}
2779
 
6084 serge 2780
static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
5060 serge 2781
{
2782
	gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
2783
	gmch_ctrl &= SNB_GMCH_GGMS_MASK;
2784
 
2785
	if (gmch_ctrl)
2786
		return 1 << (20 + gmch_ctrl);
2787
 
2788
	return 0;
2789
}
2790
 
6084 serge 2791
static size_t gen6_get_stolen_size(u16 snb_gmch_ctl)
3243 Serge 2792
{
2793
	snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT;
2794
	snb_gmch_ctl &= SNB_GMCH_GMS_MASK;
2795
	return snb_gmch_ctl << 25; /* 32 MB units */
2796
}
2797
 
6084 serge 2798
static size_t gen8_get_stolen_size(u16 bdw_gmch_ctl)
4560 Serge 2799
{
2800
	bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
2801
	bdw_gmch_ctl &= BDW_GMCH_GMS_MASK;
2802
	return bdw_gmch_ctl << 25; /* 32 MB units */
2803
}
2804
 
5060 serge 2805
static size_t chv_get_stolen_size(u16 gmch_ctrl)
2806
{
2807
	gmch_ctrl >>= SNB_GMCH_GMS_SHIFT;
2808
	gmch_ctrl &= SNB_GMCH_GMS_MASK;
2809
 
2810
	/*
2811
	 * 0x0  to 0x10: 32MB increments starting at 0MB
2812
	 * 0x11 to 0x16: 4MB increments starting at 8MB
2813
	 * 0x17 to 0x1d: 4MB increments start at 36MB
2814
	 */
2815
	if (gmch_ctrl < 0x11)
2816
		return gmch_ctrl << 25;
2817
	else if (gmch_ctrl < 0x17)
2818
		return (gmch_ctrl - 0x11 + 2) << 22;
2819
	else
2820
		return (gmch_ctrl - 0x17 + 9) << 22;
2821
}
2822
 
5354 serge 2823
static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl)
2824
{
2825
	gen9_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
2826
	gen9_gmch_ctl &= BDW_GMCH_GMS_MASK;
2827
 
2828
	if (gen9_gmch_ctl < 0xf0)
2829
		return gen9_gmch_ctl << 25; /* 32 MB units */
2830
	else
2831
		/* 4MB increments starting at 0xf0 for 4MB */
2832
		return (gen9_gmch_ctl - 0xf0 + 1) << 22;
2833
}
2834
 
4560 Serge 2835
static int ggtt_probe_common(struct drm_device *dev,
2836
			     size_t gtt_size)
2837
{
2838
	struct drm_i915_private *dev_priv = dev->dev_private;
6084 serge 2839
	struct i915_page_scratch *scratch_page;
4560 Serge 2840
	phys_addr_t gtt_phys_addr;
2841
 
2842
	/* For Modern GENs the PTEs and register space are split in the BAR */
2843
	gtt_phys_addr = pci_resource_start(dev->pdev, 0) +
2844
		(pci_resource_len(dev->pdev, 0) / 2);
2845
 
6084 serge 2846
	/*
2847
	 * On BXT writes larger than 64 bit to the GTT pagetable range will be
2848
	 * dropped. For WC mappings in general we have 64 byte burst writes
2849
	 * when the WC buffer is flushed, so we can't use it, but have to
2850
	 * resort to an uncached mapping. The WC issue is easily caught by the
2851
	 * readback check when writing GTT PTE entries.
2852
	 */
2853
	if (IS_BROXTON(dev))
2854
		dev_priv->gtt.gsm = ioremap_nocache(gtt_phys_addr, gtt_size);
2855
	else
2856
		dev_priv->gtt.gsm = ioremap_wc(gtt_phys_addr, gtt_size);
4560 Serge 2857
	if (!dev_priv->gtt.gsm) {
2858
		DRM_ERROR("Failed to map the gtt page table\n");
2859
		return -ENOMEM;
2860
	}
2861
 
6084 serge 2862
	scratch_page = alloc_scratch_page(dev);
2863
	if (IS_ERR(scratch_page)) {
4560 Serge 2864
		DRM_ERROR("Scratch setup failed\n");
2865
		/* iounmap will also get called at remove, but meh */
2866
		iounmap(dev_priv->gtt.gsm);
6084 serge 2867
		return PTR_ERR(scratch_page);
4560 Serge 2868
	}
2869
 
6084 serge 2870
	dev_priv->gtt.base.scratch_page = scratch_page;
2871
 
2872
	return 0;
4560 Serge 2873
}
2874
 
2875
/* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
2876
 * bits. When using advanced contexts each context stores its own PAT, but
2877
 * writing this data shouldn't be harmful even in those cases. */
5060 serge 2878
static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv)
4560 Serge 2879
{
2880
	uint64_t pat;
2881
 
2882
	pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC)     | /* for normal objects, no eLLC */
2883
	      GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */
2884
	      GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */
2885
	      GEN8_PPAT(3, GEN8_PPAT_UC)                     | /* Uncached objects, mostly for scanout */
2886
	      GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
2887
	      GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
2888
	      GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
2889
	      GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
2890
 
5354 serge 2891
	if (!USES_PPGTT(dev_priv->dev))
2892
		/* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry,
2893
		 * so RTL will always use the value corresponding to
2894
		 * pat_sel = 000".
2895
		 * So let's disable cache for GGTT to avoid screen corruptions.
2896
		 * MOCS still can be used though.
2897
		 * - System agent ggtt writes (i.e. cpu gtt mmaps) already work
2898
		 * before this patch, i.e. the same uncached + snooping access
2899
		 * like on gen6/7 seems to be in effect.
2900
		 * - So this just fixes blitter/render access. Again it looks
2901
		 * like it's not just uncached access, but uncached + snooping.
2902
		 * So we can still hold onto all our assumptions wrt cpu
2903
		 * clflushing on LLC machines.
2904
		 */
2905
		pat = GEN8_PPAT(0, GEN8_PPAT_UC);
2906
 
4560 Serge 2907
	/* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b
2908
	 * write would work. */
6084 serge 2909
	I915_WRITE(GEN8_PRIVATE_PAT_LO, pat);
2910
	I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32);
4560 Serge 2911
}
2912
 
5060 serge 2913
static void chv_setup_private_ppat(struct drm_i915_private *dev_priv)
2914
{
2915
	uint64_t pat;
2916
 
2917
	/*
2918
	 * Map WB on BDW to snooped on CHV.
2919
	 *
2920
	 * Only the snoop bit has meaning for CHV, the rest is
2921
	 * ignored.
2922
	 *
5354 serge 2923
	 * The hardware will never snoop for certain types of accesses:
2924
	 * - CPU GTT (GMADR->GGTT->no snoop->memory)
2925
	 * - PPGTT page tables
2926
	 * - some other special cycles
2927
	 *
2928
	 * As with BDW, we also need to consider the following for GT accesses:
2929
	 * "For GGTT, there is NO pat_sel[2:0] from the entry,
2930
	 * so RTL will always use the value corresponding to
2931
	 * pat_sel = 000".
2932
	 * Which means we must set the snoop bit in PAT entry 0
2933
	 * in order to keep the global status page working.
5060 serge 2934
	 */
2935
	pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) |
2936
	      GEN8_PPAT(1, 0) |
2937
	      GEN8_PPAT(2, 0) |
2938
	      GEN8_PPAT(3, 0) |
2939
	      GEN8_PPAT(4, CHV_PPAT_SNOOP) |
2940
	      GEN8_PPAT(5, CHV_PPAT_SNOOP) |
2941
	      GEN8_PPAT(6, CHV_PPAT_SNOOP) |
2942
	      GEN8_PPAT(7, CHV_PPAT_SNOOP);
2943
 
6084 serge 2944
	I915_WRITE(GEN8_PRIVATE_PAT_LO, pat);
2945
	I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32);
5060 serge 2946
}
2947
 
4560 Serge 2948
static int gen8_gmch_probe(struct drm_device *dev,
6084 serge 2949
			   u64 *gtt_total,
4560 Serge 2950
			   size_t *stolen,
2951
			   phys_addr_t *mappable_base,
6084 serge 2952
			   u64 *mappable_end)
4560 Serge 2953
{
2954
	struct drm_i915_private *dev_priv = dev->dev_private;
6084 serge 2955
	u64 gtt_size;
4560 Serge 2956
	u16 snb_gmch_ctl;
2957
	int ret;
2958
 
2959
	/* TODO: We're not aware of mappable constraints on gen8 yet */
2960
	*mappable_base = pci_resource_start(dev->pdev, 2);
2961
	*mappable_end = pci_resource_len(dev->pdev, 2);
2962
 
2963
	if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(39)))
2964
		pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(39));
2965
 
2966
	pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
2967
 
5354 serge 2968
	if (INTEL_INFO(dev)->gen >= 9) {
2969
		*stolen = gen9_get_stolen_size(snb_gmch_ctl);
2970
		gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl);
2971
	} else if (IS_CHERRYVIEW(dev)) {
5060 serge 2972
		*stolen = chv_get_stolen_size(snb_gmch_ctl);
2973
		gtt_size = chv_get_total_gtt_size(snb_gmch_ctl);
2974
	} else {
6084 serge 2975
		*stolen = gen8_get_stolen_size(snb_gmch_ctl);
5060 serge 2976
		gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl);
2977
	}
4560 Serge 2978
 
6084 serge 2979
	*gtt_total = (gtt_size / sizeof(gen8_pte_t)) << PAGE_SHIFT;
4560 Serge 2980
 
6084 serge 2981
	if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
5060 serge 2982
		chv_setup_private_ppat(dev_priv);
2983
	else
2984
		bdw_setup_private_ppat(dev_priv);
4560 Serge 2985
 
2986
	ret = ggtt_probe_common(dev, gtt_size);
2987
 
2988
	dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range;
2989
	dev_priv->gtt.base.insert_entries = gen8_ggtt_insert_entries;
6084 serge 2990
	dev_priv->gtt.base.bind_vma = ggtt_bind_vma;
2991
	dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma;
4560 Serge 2992
 
2993
	return ret;
2994
}
2995
 
3480 Serge 2996
static int gen6_gmch_probe(struct drm_device *dev,
6084 serge 2997
			   u64 *gtt_total,
3480 Serge 2998
			   size_t *stolen,
2999
			   phys_addr_t *mappable_base,
6084 serge 3000
			   u64 *mappable_end)
3243 Serge 3001
{
3002
	struct drm_i915_private *dev_priv = dev->dev_private;
3480 Serge 3003
	unsigned int gtt_size;
3243 Serge 3004
	u16 snb_gmch_ctl;
3005
	int ret;
3006
 
3480 Serge 3007
	*mappable_base = pci_resource_start(dev->pdev, 2);
3008
	*mappable_end = pci_resource_len(dev->pdev, 2);
3009
 
3010
	/* 64/512MB is the current min/max we actually know of, but this is just
3011
	 * a coarse sanity check.
3243 Serge 3012
	 */
3480 Serge 3013
	if ((*mappable_end < (64<<20) || (*mappable_end > (512<<20)))) {
6084 serge 3014
		DRM_ERROR("Unknown GMADR size (%llx)\n",
3480 Serge 3015
			  dev_priv->gtt.mappable_end);
3016
		return -ENXIO;
6084 serge 3017
	}
3243 Serge 3018
 
3480 Serge 3019
	if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40)))
3020
		pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40));
3021
	pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
3243 Serge 3022
 
4104 Serge 3023
	*stolen = gen6_get_stolen_size(snb_gmch_ctl);
4560 Serge 3024
 
3025
	gtt_size = gen6_get_total_gtt_size(snb_gmch_ctl);
6084 serge 3026
	*gtt_total = (gtt_size / sizeof(gen6_pte_t)) << PAGE_SHIFT;
3243 Serge 3027
 
4560 Serge 3028
	ret = ggtt_probe_common(dev, gtt_size);
3243 Serge 3029
 
4104 Serge 3030
	dev_priv->gtt.base.clear_range = gen6_ggtt_clear_range;
3031
	dev_priv->gtt.base.insert_entries = gen6_ggtt_insert_entries;
6084 serge 3032
	dev_priv->gtt.base.bind_vma = ggtt_bind_vma;
3033
	dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma;
3480 Serge 3034
 
3035
	return ret;
3036
}
3037
 
4104 Serge 3038
static void gen6_gmch_remove(struct i915_address_space *vm)
3480 Serge 3039
{
4104 Serge 3040
 
3041
	struct i915_gtt *gtt = container_of(vm, struct i915_gtt, base);
5060 serge 3042
 
4104 Serge 3043
	iounmap(gtt->gsm);
6084 serge 3044
	free_scratch_page(vm->dev, vm->scratch_page);
3480 Serge 3045
}
3046
 
3047
static int i915_gmch_probe(struct drm_device *dev,
6084 serge 3048
			   u64 *gtt_total,
3480 Serge 3049
			   size_t *stolen,
3050
			   phys_addr_t *mappable_base,
6084 serge 3051
			   u64 *mappable_end)
3480 Serge 3052
{
3053
	struct drm_i915_private *dev_priv = dev->dev_private;
3054
	int ret;
3055
 
3056
	ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->dev->pdev, NULL);
3057
	if (!ret) {
3058
		DRM_ERROR("failed to set up gmch\n");
3059
		return -EIO;
3243 Serge 3060
	}
3061
 
3480 Serge 3062
	intel_gtt_get(gtt_total, stolen, mappable_base, mappable_end);
3243 Serge 3063
 
3480 Serge 3064
	dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev);
6084 serge 3065
	dev_priv->gtt.base.insert_entries = i915_ggtt_insert_entries;
4104 Serge 3066
	dev_priv->gtt.base.clear_range = i915_ggtt_clear_range;
6084 serge 3067
	dev_priv->gtt.base.bind_vma = ggtt_bind_vma;
3068
	dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma;
3480 Serge 3069
 
4560 Serge 3070
	if (unlikely(dev_priv->gtt.do_idle_maps))
3071
		DRM_INFO("applying Ironlake quirks for intel_iommu\n");
3072
 
3243 Serge 3073
	return 0;
3480 Serge 3074
}
3243 Serge 3075
 
4104 Serge 3076
static void i915_gmch_remove(struct i915_address_space *vm)
3480 Serge 3077
{
4560 Serge 3078
//	intel_gmch_remove();
3480 Serge 3079
}
3080
 
3081
int i915_gem_gtt_init(struct drm_device *dev)
3082
{
3083
	struct drm_i915_private *dev_priv = dev->dev_private;
3084
	struct i915_gtt *gtt = &dev_priv->gtt;
3085
	int ret;
3086
 
3087
	if (INTEL_INFO(dev)->gen <= 5) {
4104 Serge 3088
		gtt->gtt_probe = i915_gmch_probe;
3089
		gtt->base.cleanup = i915_gmch_remove;
4560 Serge 3090
	} else if (INTEL_INFO(dev)->gen < 8) {
4104 Serge 3091
		gtt->gtt_probe = gen6_gmch_probe;
3092
		gtt->base.cleanup = gen6_gmch_remove;
3093
		if (IS_HASWELL(dev) && dev_priv->ellc_size)
3094
			gtt->base.pte_encode = iris_pte_encode;
3095
		else if (IS_HASWELL(dev))
3096
			gtt->base.pte_encode = hsw_pte_encode;
3097
		else if (IS_VALLEYVIEW(dev))
3098
			gtt->base.pte_encode = byt_pte_encode;
3099
		else if (INTEL_INFO(dev)->gen >= 7)
3100
			gtt->base.pte_encode = ivb_pte_encode;
3101
		else
3102
			gtt->base.pte_encode = snb_pte_encode;
4560 Serge 3103
	} else {
3104
		dev_priv->gtt.gtt_probe = gen8_gmch_probe;
3105
		dev_priv->gtt.base.cleanup = gen6_gmch_remove;
3480 Serge 3106
	}
3107
 
6084 serge 3108
	gtt->base.dev = dev;
3109
 
4104 Serge 3110
	ret = gtt->gtt_probe(dev, >t->base.total, >t->stolen_size,
3111
			     >t->mappable_base, >t->mappable_end);
3480 Serge 3112
	if (ret)
4104 Serge 3113
		return ret;
3480 Serge 3114
 
3115
	/* GMADR is the PCI mmio aperture into the global GTT. */
6084 serge 3116
	DRM_INFO("Memory usable by graphics device = %lluM\n",
4104 Serge 3117
		 gtt->base.total >> 20);
6084 serge 3118
	DRM_DEBUG_DRIVER("GMADR size = %lldM\n", gtt->mappable_end >> 20);
4104 Serge 3119
	DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", gtt->stolen_size >> 20);
5060 serge 3120
#ifdef CONFIG_INTEL_IOMMU
3121
	if (intel_iommu_gfx_mapped)
3122
		DRM_INFO("VT-d active for gfx access\n");
3123
#endif
3124
	/*
3125
	 * i915.enable_ppgtt is read-only, so do an early pass to validate the
3126
	 * user's requested state against the hardware/driver capabilities.  We
3127
	 * do this now so that we can print out any log messages once rather
3128
	 * than every time we check intel_enable_ppgtt().
3129
	 */
3130
	i915.enable_ppgtt = sanitize_enable_ppgtt(dev, i915.enable_ppgtt);
3131
	DRM_DEBUG_DRIVER("ppgtt mode: %i\n", i915.enable_ppgtt);
3480 Serge 3132
 
3133
	return 0;
3243 Serge 3134
}
3135
 
6084 serge 3136
void i915_gem_restore_gtt_mappings(struct drm_device *dev)
5060 serge 3137
{
6084 serge 3138
	struct drm_i915_private *dev_priv = dev->dev_private;
3139
	struct drm_i915_gem_object *obj;
3140
	struct i915_address_space *vm;
3141
	struct i915_vma *vma;
3142
	bool flush;
3143
 
3144
	i915_check_and_clear_faults(dev);
3145
 
3146
	/* First fill our portion of the GTT with scratch pages */
3147
	dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
3148
				       dev_priv->gtt.base.start,
3149
				       dev_priv->gtt.base.total,
3150
				       true);
3151
 
3152
	/* Cache flush objects bound into GGTT and rebind them. */
3153
	vm = &dev_priv->gtt.base;
3154
	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
3155
		flush = false;
3156
		list_for_each_entry(vma, &obj->vma_list, vma_link) {
3157
			if (vma->vm != vm)
3158
				continue;
3159
 
3160
			WARN_ON(i915_vma_bind(vma, obj->cache_level,
3161
					      PIN_UPDATE));
3162
 
3163
			flush = true;
3164
		}
3165
 
3166
		if (flush)
3167
			i915_gem_clflush_object(obj, obj->pin_display);
3168
	}
3169
 
3170
	if (INTEL_INFO(dev)->gen >= 8) {
3171
		if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
3172
			chv_setup_private_ppat(dev_priv);
3173
		else
3174
			bdw_setup_private_ppat(dev_priv);
3175
 
3176
		return;
3177
	}
3178
 
3179
	if (USES_PPGTT(dev)) {
3180
		list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
3181
			/* TODO: Perhaps it shouldn't be gen6 specific */
3182
 
3183
			struct i915_hw_ppgtt *ppgtt =
3184
					container_of(vm, struct i915_hw_ppgtt,
3185
						     base);
3186
 
3187
			if (i915_is_ggtt(vm))
3188
				ppgtt = dev_priv->mm.aliasing_ppgtt;
3189
 
3190
			gen6_write_page_range(dev_priv, &ppgtt->pd,
3191
					      0, ppgtt->base.total);
3192
		}
3193
	}
3194
 
3195
	i915_ggtt_flush(dev_priv);
3196
}
3197
 
3198
static struct i915_vma *
3199
__i915_gem_vma_create(struct drm_i915_gem_object *obj,
3200
		      struct i915_address_space *vm,
3201
		      const struct i915_ggtt_view *ggtt_view)
3202
{
3203
	struct i915_vma *vma;
3204
 
3205
	if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view))
3206
		return ERR_PTR(-EINVAL);
3207
 
3208
//	vma = kmem_cache_zalloc(to_i915(obj->base.dev)->vmas, GFP_KERNEL);
3209
	vma = kzalloc(sizeof(*vma), GFP_KERNEL);
5060 serge 3210
	if (vma == NULL)
3211
		return ERR_PTR(-ENOMEM);
3212
 
3213
	INIT_LIST_HEAD(&vma->vma_link);
3214
	INIT_LIST_HEAD(&vma->mm_list);
3215
	INIT_LIST_HEAD(&vma->exec_list);
3216
	vma->vm = vm;
3217
	vma->obj = obj;
3218
 
6084 serge 3219
	if (i915_is_ggtt(vm))
3220
		vma->ggtt_view = *ggtt_view;
5060 serge 3221
 
6084 serge 3222
	list_add_tail(&vma->vma_link, &obj->vma_list);
3223
	if (!i915_is_ggtt(vm))
5354 serge 3224
		i915_ppgtt_get(i915_vm_to_ppgtt(vm));
5060 serge 3225
 
3226
	return vma;
3227
}
3228
 
3229
struct i915_vma *
3230
i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
3231
				  struct i915_address_space *vm)
3232
{
3233
	struct i915_vma *vma;
3234
 
3235
	vma = i915_gem_obj_to_vma(obj, vm);
3236
	if (!vma)
6084 serge 3237
		vma = __i915_gem_vma_create(obj, vm,
3238
					    i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL);
5060 serge 3239
 
3240
	return vma;
3241
}
3242
 
6084 serge 3243
struct i915_vma *
3244
i915_gem_obj_lookup_or_create_ggtt_vma(struct drm_i915_gem_object *obj,
3245
				       const struct i915_ggtt_view *view)
3243 Serge 3246
{
6084 serge 3247
	struct i915_address_space *ggtt = i915_obj_to_ggtt(obj);
3248
	struct i915_vma *vma;
3243 Serge 3249
 
6084 serge 3250
	if (WARN_ON(!view))
3251
		return ERR_PTR(-EINVAL);
3243 Serge 3252
 
6084 serge 3253
	vma = i915_gem_obj_to_ggtt_view(obj, view);
3243 Serge 3254
 
6084 serge 3255
	if (IS_ERR(vma))
3256
		return vma;
3243 Serge 3257
 
6084 serge 3258
	if (!vma)
3259
		vma = __i915_gem_vma_create(obj, ggtt, view);
3243 Serge 3260
 
6084 serge 3261
	return vma;
3243 Serge 3262
 
6084 serge 3263
}
3243 Serge 3264
 
6084 serge 3265
static struct scatterlist *
3266
rotate_pages(dma_addr_t *in, unsigned int offset,
3267
	     unsigned int width, unsigned int height,
3268
	     struct sg_table *st, struct scatterlist *sg)
3269
{
3270
	unsigned int column, row;
3271
	unsigned int src_idx;
3243 Serge 3272
 
6084 serge 3273
	if (!sg) {
3274
		st->nents = 0;
3275
		sg = st->sgl;
3276
	}
3243 Serge 3277
 
6084 serge 3278
	for (column = 0; column < width; column++) {
3279
		src_idx = width * (height - 1) + column;
3280
		for (row = 0; row < height; row++) {
3281
			st->nents++;
3282
			/* We don't need the pages, but need to initialize
3283
			 * the entries so the sg list can be happily traversed.
3284
			 * The only thing we need are DMA addresses.
3285
			 */
3286
			sg_set_page(sg, NULL, PAGE_SIZE, 0);
3287
			sg_dma_address(sg) = in[offset + src_idx];
3288
			sg_dma_len(sg) = PAGE_SIZE;
3289
			sg = sg_next(sg);
3290
			src_idx -= width;
3291
		}
3292
	}
3293
 
3294
	return sg;
3243 Serge 3295
}
3296
 
6084 serge 3297
static struct sg_table *
3298
intel_rotate_fb_obj_pages(struct i915_ggtt_view *ggtt_view,
3299
			  struct drm_i915_gem_object *obj)
3243 Serge 3300
{
6084 serge 3301
	struct intel_rotation_info *rot_info = &ggtt_view->rotation_info;
3302
	unsigned int size_pages = rot_info->size >> PAGE_SHIFT;
3303
	unsigned int size_pages_uv;
3304
	struct sg_page_iter sg_iter;
3305
	unsigned long i;
3306
	dma_addr_t *page_addr_list;
3307
	struct sg_table *st;
3308
	unsigned int uv_start_page;
3309
	struct scatterlist *sg;
3310
	int ret = -ENOMEM;
3243 Serge 3311
 
6084 serge 3312
	/* Allocate a temporary list of source pages for random access. */
3313
	page_addr_list = drm_malloc_ab(obj->base.size / PAGE_SIZE,
3314
				       sizeof(dma_addr_t));
3315
	if (!page_addr_list)
3316
		return ERR_PTR(ret);
3243 Serge 3317
 
6084 serge 3318
	/* Account for UV plane with NV12. */
3319
	if (rot_info->pixel_format == DRM_FORMAT_NV12)
3320
		size_pages_uv = rot_info->size_uv >> PAGE_SHIFT;
3321
	else
3322
		size_pages_uv = 0;
3243 Serge 3323
 
6084 serge 3324
	/* Allocate target SG list. */
3325
	st = kmalloc(sizeof(*st), GFP_KERNEL);
3326
	if (!st)
3327
		goto err_st_alloc;
3243 Serge 3328
 
6084 serge 3329
	ret = sg_alloc_table(st, size_pages + size_pages_uv, GFP_KERNEL);
3330
	if (ret)
3331
		goto err_sg_alloc;
3243 Serge 3332
 
6084 serge 3333
	/* Populate source page list from the object. */
3334
	i = 0;
3335
	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
3336
		page_addr_list[i] = sg_page_iter_dma_address(&sg_iter);
3337
		i++;
3338
	}
3243 Serge 3339
 
6084 serge 3340
	/* Rotate the pages. */
3341
	sg = rotate_pages(page_addr_list, 0,
3342
		     rot_info->width_pages, rot_info->height_pages,
3343
		     st, NULL);
3243 Serge 3344
 
6084 serge 3345
	/* Append the UV plane if NV12. */
3346
	if (rot_info->pixel_format == DRM_FORMAT_NV12) {
3347
		uv_start_page = size_pages;
3243 Serge 3348
 
6084 serge 3349
		/* Check for tile-row un-alignment. */
3350
		if (offset_in_page(rot_info->uv_offset))
3351
			uv_start_page--;
3243 Serge 3352
 
6084 serge 3353
		rot_info->uv_start_page = uv_start_page;
3243 Serge 3354
 
6084 serge 3355
		rotate_pages(page_addr_list, uv_start_page,
3356
			     rot_info->width_pages_uv,
3357
			     rot_info->height_pages_uv,
3358
			     st, sg);
3359
	}
3243 Serge 3360
 
6084 serge 3361
	DRM_DEBUG_KMS(
3362
		      "Created rotated page mapping for object size %zu (pitch=%u, height=%u, pixel_format=0x%x, %ux%u tiles, %u pages (%u plane 0)).\n",
3363
		      obj->base.size, rot_info->pitch, rot_info->height,
3364
		      rot_info->pixel_format, rot_info->width_pages,
3365
		      rot_info->height_pages, size_pages + size_pages_uv,
3366
		      size_pages);
3243 Serge 3367
 
6084 serge 3368
	drm_free_large(page_addr_list);
3243 Serge 3369
 
6084 serge 3370
	return st;
3243 Serge 3371
 
6084 serge 3372
err_sg_alloc:
3373
	kfree(st);
3374
err_st_alloc:
3375
	drm_free_large(page_addr_list);
3376
 
3377
	DRM_DEBUG_KMS(
3378
		      "Failed to create rotated mapping for object size %zu! (%d) (pitch=%u, height=%u, pixel_format=0x%x, %ux%u tiles, %u pages (%u plane 0))\n",
3379
		      obj->base.size, ret, rot_info->pitch, rot_info->height,
3380
		      rot_info->pixel_format, rot_info->width_pages,
3381
		      rot_info->height_pages, size_pages + size_pages_uv,
3382
		      size_pages);
3383
	return ERR_PTR(ret);
3243 Serge 3384
}
3385
 
6084 serge 3386
static struct sg_table *
3387
intel_partial_pages(const struct i915_ggtt_view *view,
3388
		    struct drm_i915_gem_object *obj)
3243 Serge 3389
{
6084 serge 3390
	struct sg_table *st;
3391
	struct scatterlist *sg;
3392
	struct sg_page_iter obj_sg_iter;
3393
	int ret = -ENOMEM;
3243 Serge 3394
 
6084 serge 3395
	st = kmalloc(sizeof(*st), GFP_KERNEL);
3396
	if (!st)
3397
		goto err_st_alloc;
3746 Serge 3398
 
6084 serge 3399
	ret = sg_alloc_table(st, view->params.partial.size, GFP_KERNEL);
3400
	if (ret)
3401
		goto err_sg_alloc;
3746 Serge 3402
 
6084 serge 3403
	sg = st->sgl;
3404
	st->nents = 0;
3405
	for_each_sg_page(obj->pages->sgl, &obj_sg_iter, obj->pages->nents,
3406
		view->params.partial.offset)
3407
	{
3408
		if (st->nents >= view->params.partial.size)
3409
			break;
3410
 
3411
		sg_set_page(sg, NULL, PAGE_SIZE, 0);
3412
		sg_dma_address(sg) = sg_page_iter_dma_address(&obj_sg_iter);
3413
		sg_dma_len(sg) = PAGE_SIZE;
3414
 
3415
		sg = sg_next(sg);
3416
		st->nents++;
3417
	}
3418
 
3419
	return st;
3420
 
3421
err_sg_alloc:
3422
	kfree(st);
3423
err_st_alloc:
3424
	return ERR_PTR(ret);
3746 Serge 3425
}
3426
 
6084 serge 3427
static int
3428
i915_get_ggtt_vma_pages(struct i915_vma *vma)
3746 Serge 3429
{
6084 serge 3430
	int ret = 0;
3431
 
3432
	if (vma->ggtt_view.pages)
3433
		return 0;
3434
 
3435
	if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL)
3436
		vma->ggtt_view.pages = vma->obj->pages;
3437
	else if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED)
3438
		vma->ggtt_view.pages =
3439
			intel_rotate_fb_obj_pages(&vma->ggtt_view, vma->obj);
3440
	else if (vma->ggtt_view.type == I915_GGTT_VIEW_PARTIAL)
3441
		vma->ggtt_view.pages =
3442
			intel_partial_pages(&vma->ggtt_view, vma->obj);
3443
	else
3444
		WARN_ONCE(1, "GGTT view %u not implemented!\n",
3445
			  vma->ggtt_view.type);
3446
 
3447
	if (!vma->ggtt_view.pages) {
3448
		DRM_ERROR("Failed to get pages for GGTT view type %u!\n",
3449
			  vma->ggtt_view.type);
3450
		ret = -EINVAL;
3451
	} else if (IS_ERR(vma->ggtt_view.pages)) {
3452
		ret = PTR_ERR(vma->ggtt_view.pages);
3453
		vma->ggtt_view.pages = NULL;
3454
		DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n",
3455
			  vma->ggtt_view.type, ret);
3456
	}
3457
 
3458
	return ret;
3746 Serge 3459
}
3460
 
6084 serge 3461
/**
3462
 * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space.
3463
 * @vma: VMA to map
3464
 * @cache_level: mapping cache level
3465
 * @flags: flags like global or local mapping
3466
 *
3467
 * DMA addresses are taken from the scatter-gather table of this object (or of
3468
 * this VMA in case of non-default GGTT views) and PTE entries set up.
3469
 * Note that DMA addresses are also the only part of the SG table we care about.
3470
 */
3471
int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
3472
		  u32 flags)
3746 Serge 3473
{
6084 serge 3474
	int ret;
3475
	u32 bind_flags;
3746 Serge 3476
 
6084 serge 3477
	if (WARN_ON(flags == 0))
3478
		return -EINVAL;
3746 Serge 3479
 
6084 serge 3480
	bind_flags = 0;
3481
	if (flags & PIN_GLOBAL)
3482
		bind_flags |= GLOBAL_BIND;
3483
	if (flags & PIN_USER)
3484
		bind_flags |= LOCAL_BIND;
3746 Serge 3485
 
6084 serge 3486
	if (flags & PIN_UPDATE)
3487
		bind_flags |= vma->bound;
3488
	else
3489
		bind_flags &= ~vma->bound;
3490
 
3491
	if (bind_flags == 0)
3492
		return 0;
3493
 
3494
	if (vma->bound == 0 && vma->vm->allocate_va_range) {
3495
		trace_i915_va_alloc(vma->vm,
3496
				    vma->node.start,
3497
				    vma->node.size,
3498
				    VM_TO_TRACE_NAME(vma->vm));
3499
 
3500
		/* XXX: i915_vma_pin() will fix this +- hack */
3501
		vma->pin_count++;
3502
		ret = vma->vm->allocate_va_range(vma->vm,
3503
						 vma->node.start,
3504
						 vma->node.size);
3505
		vma->pin_count--;
3506
		if (ret)
3507
			return ret;
3508
	}
3509
 
3510
	ret = vma->vm->bind_vma(vma, cache_level, bind_flags);
3511
	if (ret)
3512
		return ret;
3513
 
3514
	vma->bound |= bind_flags;
3515
 
3516
	return 0;
3746 Serge 3517
}
3518
 
6084 serge 3519
/**
3520
 * i915_ggtt_view_size - Get the size of a GGTT view.
3521
 * @obj: Object the view is of.
3522
 * @view: The view in question.
3523
 *
3524
 * @return The size of the GGTT view in bytes.
3525
 */
3526
size_t
3527
i915_ggtt_view_size(struct drm_i915_gem_object *obj,
3528
		    const struct i915_ggtt_view *view)
3529
{
3530
	if (view->type == I915_GGTT_VIEW_NORMAL) {
3531
		return obj->base.size;
3532
	} else if (view->type == I915_GGTT_VIEW_ROTATED) {
3533
		return view->rotation_info.size;
3534
	} else if (view->type == I915_GGTT_VIEW_PARTIAL) {
3535
		return view->params.partial.size << PAGE_SHIFT;
3536
	} else {
3537
		WARN_ONCE(1, "GGTT view %u not implemented!\n", view->type);
3538
		return obj->base.size;
3539
	}
3540
}