Subversion Repositories Kolibri OS

Rev

Rev 4539 | Rev 5060 | Go to most recent revision | Show entire file | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 4539 Rev 4560
Line 35... Line 35...
35
#include "i915_trace.h"
35
#include "i915_trace.h"
36
#include "intel_drv.h"
36
#include "intel_drv.h"
Line 37... Line 37...
37
 
37
 
38
#define GEN6_PPGTT_PD_ENTRIES 512
38
#define GEN6_PPGTT_PD_ENTRIES 512
-
 
39
#define I915_PPGTT_PT_ENTRIES (PAGE_SIZE / sizeof(gen6_gtt_pte_t))
-
 
40
typedef uint64_t gen8_gtt_pte_t;
Line 39... Line 41...
39
#define I915_PPGTT_PT_ENTRIES (PAGE_SIZE / sizeof(gen6_gtt_pte_t))
41
typedef gen8_gtt_pte_t gen8_ppgtt_pde_t;
40
 
42
 
41
/* PPGTT stuff */
43
/* PPGTT stuff */
Line 60... Line 62...
60
#define HSW_CACHEABILITY_CONTROL(bits)	((((bits) & 0x7) << 1) | \
62
#define HSW_CACHEABILITY_CONTROL(bits)	((((bits) & 0x7) << 1) | \
61
					 (((bits) & 0x8) << (11 - 3)))
63
					 (((bits) & 0x8) << (11 - 3)))
62
#define HSW_WB_LLC_AGE3			HSW_CACHEABILITY_CONTROL(0x2)
64
#define HSW_WB_LLC_AGE3			HSW_CACHEABILITY_CONTROL(0x2)
63
#define HSW_WB_LLC_AGE0			HSW_CACHEABILITY_CONTROL(0x3)
65
#define HSW_WB_LLC_AGE0			HSW_CACHEABILITY_CONTROL(0x3)
64
#define HSW_WB_ELLC_LLC_AGE0		HSW_CACHEABILITY_CONTROL(0xb)
66
#define HSW_WB_ELLC_LLC_AGE0		HSW_CACHEABILITY_CONTROL(0xb)
-
 
67
#define HSW_WB_ELLC_LLC_AGE3		HSW_CACHEABILITY_CONTROL(0x8)
65
#define HSW_WT_ELLC_LLC_AGE0		HSW_CACHEABILITY_CONTROL(0x6)
68
#define HSW_WT_ELLC_LLC_AGE0		HSW_CACHEABILITY_CONTROL(0x6)
-
 
69
#define HSW_WT_ELLC_LLC_AGE3		HSW_CACHEABILITY_CONTROL(0x7)
-
 
70
 
-
 
71
#define GEN8_PTES_PER_PAGE		(PAGE_SIZE / sizeof(gen8_gtt_pte_t))
-
 
72
#define GEN8_PDES_PER_PAGE		(PAGE_SIZE / sizeof(gen8_ppgtt_pde_t))
-
 
73
#define GEN8_LEGACY_PDPS		4
-
 
74
 
-
 
75
#define PPAT_UNCACHED_INDEX		(_PAGE_PWT | _PAGE_PCD)
-
 
76
#define PPAT_CACHED_PDE_INDEX		0 /* WB LLC */
-
 
77
#define PPAT_CACHED_INDEX		_PAGE_PAT /* WB LLCeLLC */
-
 
78
#define PPAT_DISPLAY_ELLC_INDEX		_PAGE_PCD /* WT eLLC */
-
 
79
 
-
 
80
static inline gen8_gtt_pte_t gen8_pte_encode(dma_addr_t addr,
-
 
81
					     enum i915_cache_level level,
-
 
82
					     bool valid)
-
 
83
{
-
 
84
    gen8_gtt_pte_t pte = valid ? 1 | 2 : 0;
-
 
85
	pte |= addr;
-
 
86
	if (level != I915_CACHE_NONE)
-
 
87
		pte |= PPAT_CACHED_INDEX;
-
 
88
	else
-
 
89
		pte |= PPAT_UNCACHED_INDEX;
-
 
90
	return pte;
-
 
91
}
-
 
92
 
-
 
93
static inline gen8_ppgtt_pde_t gen8_pde_encode(struct drm_device *dev,
-
 
94
					     dma_addr_t addr,
-
 
95
					     enum i915_cache_level level)
-
 
96
{
-
 
97
	gen8_ppgtt_pde_t pde = _PAGE_PRESENT | _PAGE_RW;
-
 
98
	pde |= addr;
-
 
99
	if (level != I915_CACHE_NONE)
-
 
100
		pde |= PPAT_CACHED_PDE_INDEX;
-
 
101
	else
-
 
102
		pde |= PPAT_UNCACHED_INDEX;
-
 
103
	return pde;
-
 
104
}
Line 66... Line 105...
66
 
105
 
67
static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr,
106
static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr,
68
				     enum i915_cache_level level,
107
				     enum i915_cache_level level,
69
				     bool valid)
108
				     bool valid)
Line 153... Line 192...
153
 
192
 
154
	switch (level) {
193
	switch (level) {
155
	case I915_CACHE_NONE:
194
	case I915_CACHE_NONE:
156
		break;
195
		break;
157
	case I915_CACHE_WT:
196
	case I915_CACHE_WT:
158
		pte |= HSW_WT_ELLC_LLC_AGE0;
197
		pte |= HSW_WT_ELLC_LLC_AGE3;
159
		break;
198
		break;
160
	default:
199
	default:
161
		pte |= HSW_WB_ELLC_LLC_AGE0;
200
		pte |= HSW_WB_ELLC_LLC_AGE3;
162
		break;
201
		break;
Line 163... Line 202...
163
	}
202
	}
164
 
203
 
Line -... Line 204...
-
 
204
	return pte;
-
 
205
}
-
 
206
 
-
 
207
/* Broadwell Page Directory Pointer Descriptors */
-
 
208
static int gen8_write_pdp(struct intel_ring_buffer *ring, unsigned entry,
-
 
209
			   uint64_t val)
-
 
210
{
-
 
211
	int ret;
-
 
212
 
-
 
213
	BUG_ON(entry >= 4);
-
 
214
 
-
 
215
	ret = intel_ring_begin(ring, 6);
-
 
216
	if (ret)
-
 
217
		return ret;
-
 
218
 
-
 
219
	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
-
 
220
	intel_ring_emit(ring, GEN8_RING_PDP_UDW(ring, entry));
-
 
221
	intel_ring_emit(ring, (u32)(val >> 32));
-
 
222
	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
-
 
223
	intel_ring_emit(ring, GEN8_RING_PDP_LDW(ring, entry));
-
 
224
	intel_ring_emit(ring, (u32)(val));
-
 
225
	intel_ring_advance(ring);
-
 
226
 
-
 
227
	return 0;
-
 
228
}
-
 
229
 
-
 
230
static int gen8_ppgtt_enable(struct drm_device *dev)
-
 
231
{
-
 
232
	struct drm_i915_private *dev_priv = dev->dev_private;
-
 
233
	struct intel_ring_buffer *ring;
-
 
234
	struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;
-
 
235
	int i, j, ret;
-
 
236
 
-
 
237
	/* bit of a hack to find the actual last used pd */
-
 
238
	int used_pd = ppgtt->num_pd_entries / GEN8_PDES_PER_PAGE;
-
 
239
 
-
 
240
	for_each_ring(ring, dev_priv, j) {
-
 
241
		I915_WRITE(RING_MODE_GEN7(ring),
-
 
242
			   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
-
 
243
	}
-
 
244
 
-
 
245
	for (i = used_pd - 1; i >= 0; i--) {
-
 
246
		dma_addr_t addr = ppgtt->pd_dma_addr[i];
-
 
247
		for_each_ring(ring, dev_priv, j) {
-
 
248
			ret = gen8_write_pdp(ring, i, addr);
-
 
249
			if (ret)
-
 
250
				goto err_out;
-
 
251
		}
-
 
252
	}
-
 
253
	return 0;
-
 
254
 
-
 
255
err_out:
-
 
256
	for_each_ring(ring, dev_priv, j)
-
 
257
		I915_WRITE(RING_MODE_GEN7(ring),
-
 
258
			   _MASKED_BIT_DISABLE(GFX_PPGTT_ENABLE));
-
 
259
	return ret;
-
 
260
}
-
 
261
 
-
 
262
static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
-
 
263
				   unsigned first_entry,
-
 
264
				   unsigned num_entries,
-
 
265
				   bool use_scratch)
-
 
266
{
-
 
267
	struct i915_hw_ppgtt *ppgtt =
-
 
268
		container_of(vm, struct i915_hw_ppgtt, base);
-
 
269
	gen8_gtt_pte_t *pt_vaddr, scratch_pte;
-
 
270
	unsigned act_pt = first_entry / GEN8_PTES_PER_PAGE;
-
 
271
	unsigned first_pte = first_entry % GEN8_PTES_PER_PAGE;
-
 
272
	unsigned last_pte, i;
-
 
273
 
-
 
274
    pt_vaddr = (gen8_gtt_pte_t*)AllocKernelSpace(4096);
-
 
275
    if(pt_vaddr == NULL)
-
 
276
        return;
-
 
277
 
-
 
278
    scratch_pte = gen8_pte_encode(ppgtt->base.scratch.addr,
-
 
279
                      I915_CACHE_LLC, use_scratch);
-
 
280
 
-
 
281
	while (num_entries) {
-
 
282
		struct page *page_table = &ppgtt->gen8_pt_pages[act_pt];
-
 
283
 
-
 
284
		last_pte = first_pte + num_entries;
-
 
285
		if (last_pte > GEN8_PTES_PER_PAGE)
-
 
286
			last_pte = GEN8_PTES_PER_PAGE;
-
 
287
 
-
 
288
        MapPage(pt_vaddr,(addr_t)(ppgtt->pt_pages[act_pt]), 3);
-
 
289
 
-
 
290
		for (i = first_pte; i < last_pte; i++)
-
 
291
			pt_vaddr[i] = scratch_pte;
-
 
292
 
-
 
293
		num_entries -= last_pte - first_pte;
-
 
294
		first_pte = 0;
-
 
295
		act_pt++;
-
 
296
	}
-
 
297
    FreeKernelSpace(pt_vaddr);
-
 
298
}
-
 
299
 
-
 
300
static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
-
 
301
				      struct sg_table *pages,
-
 
302
				      unsigned first_entry,
-
 
303
				      enum i915_cache_level cache_level)
-
 
304
{
-
 
305
	struct i915_hw_ppgtt *ppgtt =
-
 
306
		container_of(vm, struct i915_hw_ppgtt, base);
-
 
307
	gen8_gtt_pte_t *pt_vaddr;
-
 
308
	unsigned act_pt = first_entry / GEN8_PTES_PER_PAGE;
-
 
309
	unsigned act_pte = first_entry % GEN8_PTES_PER_PAGE;
-
 
310
	struct sg_page_iter sg_iter;
-
 
311
 
-
 
312
    pt_vaddr = AllocKernelSpace(4096);
-
 
313
    if(pt_vaddr == NULL)
-
 
314
        return;
-
 
315
 
-
 
316
    MapPage(pt_vaddr,(addr_t)(ppgtt->pt_pages[act_pt]), 3);
-
 
317
 
-
 
318
	for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
-
 
319
 
-
 
320
		pt_vaddr[act_pte] =
-
 
321
			gen8_pte_encode(sg_page_iter_dma_address(&sg_iter),
-
 
322
					cache_level, true);
-
 
323
		if (++act_pte == GEN8_PTES_PER_PAGE) {
-
 
324
			act_pt++;
-
 
325
            MapPage(pt_vaddr,(addr_t)(ppgtt->pt_pages[act_pt]), 3);
-
 
326
			act_pte = 0;
-
 
327
		}
-
 
328
	}
-
 
329
    FreeKernelSpace(pt_vaddr);
-
 
330
}
-
 
331
 
-
 
332
static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
-
 
333
{
-
 
334
	struct i915_hw_ppgtt *ppgtt =
-
 
335
		container_of(vm, struct i915_hw_ppgtt, base);
-
 
336
	int i, j;
-
 
337
 
-
 
338
	drm_mm_takedown(&vm->mm);
-
 
339
 
-
 
340
	for (i = 0; i < ppgtt->num_pd_pages ; i++) {
-
 
341
		if (ppgtt->pd_dma_addr[i]) {
-
 
342
			pci_unmap_page(ppgtt->base.dev->pdev,
-
 
343
				       ppgtt->pd_dma_addr[i],
-
 
344
				       PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
-
 
345
 
-
 
346
			for (j = 0; j < GEN8_PDES_PER_PAGE; j++) {
-
 
347
				dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j];
-
 
348
				if (addr)
-
 
349
					pci_unmap_page(ppgtt->base.dev->pdev,
-
 
350
						       addr,
-
 
351
						       PAGE_SIZE,
-
 
352
						       PCI_DMA_BIDIRECTIONAL);
-
 
353
 
-
 
354
			}
-
 
355
		}
-
 
356
		kfree(ppgtt->gen8_pt_dma_addr[i]);
-
 
357
	}
-
 
358
 
-
 
359
//   __free_pages(ppgtt->gen8_pt_pages, get_order(ppgtt->num_pt_pages << PAGE_SHIFT));
-
 
360
//   __free_pages(ppgtt->pd_pages, get_order(ppgtt->num_pd_pages << PAGE_SHIFT));
-
 
361
}
-
 
362
 
-
 
363
/**
-
 
364
 * GEN8 legacy ppgtt programming is accomplished through 4 PDP registers with a
-
 
365
 * net effect resembling a 2-level page table in normal x86 terms. Each PDP
-
 
366
 * represents 1GB of memory
-
 
367
 * 4 * 512 * 512 * 4096 = 4GB legacy 32b address space.
-
 
368
 *
-
 
369
 * TODO: Do something with the size parameter
-
 
370
 **/
-
 
371
static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size)
-
 
372
{
-
 
373
	struct page *pt_pages;
-
 
374
	int i, j, ret = -ENOMEM;
-
 
375
	const int max_pdp = DIV_ROUND_UP(size, 1 << 30);
-
 
376
	const int num_pt_pages = GEN8_PDES_PER_PAGE * max_pdp;
-
 
377
 
-
 
378
	if (size % (1<<30))
-
 
379
		DRM_INFO("Pages will be wasted unless GTT size (%llu) is divisible by 1GB\n", size);
-
 
380
 
-
 
381
	/* FIXME: split allocation into smaller pieces. For now we only ever do
-
 
382
	 * this once, but with full PPGTT, the multiple contiguous allocations
-
 
383
	 * will be bad.
-
 
384
	 */
-
 
385
    ppgtt->pd_pages = AllocPages(max_pdp);
-
 
386
	if (!ppgtt->pd_pages)
-
 
387
		return -ENOMEM;
-
 
388
 
-
 
389
    pt_pages = AllocPages(num_pt_pages);
-
 
390
	if (!pt_pages) {
-
 
391
//       __free_pages(ppgtt->pd_pages, get_order(max_pdp << PAGE_SHIFT));
-
 
392
		return -ENOMEM;
-
 
393
	}
-
 
394
 
-
 
395
	ppgtt->gen8_pt_pages = pt_pages;
-
 
396
    ppgtt->num_pd_pages = max_pdp;
-
 
397
    ppgtt->num_pt_pages = num_pt_pages;
-
 
398
	ppgtt->num_pd_entries = max_pdp * GEN8_PDES_PER_PAGE;
-
 
399
	ppgtt->enable = gen8_ppgtt_enable;
-
 
400
	ppgtt->base.clear_range = gen8_ppgtt_clear_range;
-
 
401
	ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
-
 
402
	ppgtt->base.cleanup = gen8_ppgtt_cleanup;
-
 
403
	ppgtt->base.start = 0;
-
 
404
	ppgtt->base.total = ppgtt->num_pt_pages * GEN8_PTES_PER_PAGE * PAGE_SIZE;
-
 
405
 
-
 
406
	BUG_ON(ppgtt->num_pd_pages > GEN8_LEGACY_PDPS);
-
 
407
 
-
 
408
	/*
-
 
409
	 * - Create a mapping for the page directories.
-
 
410
	 * - For each page directory:
-
 
411
	 *      allocate space for page table mappings.
-
 
412
	 *      map each page table
-
 
413
	 */
-
 
414
	for (i = 0; i < max_pdp; i++) {
-
 
415
		dma_addr_t temp;
-
 
416
		temp = pci_map_page(ppgtt->base.dev->pdev,
-
 
417
				    &ppgtt->pd_pages[i], 0,
-
 
418
				    PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
-
 
419
 
-
 
420
		ppgtt->pd_dma_addr[i] = temp;
-
 
421
 
-
 
422
		ppgtt->gen8_pt_dma_addr[i] = kmalloc(sizeof(dma_addr_t) * GEN8_PDES_PER_PAGE, GFP_KERNEL);
-
 
423
		if (!ppgtt->gen8_pt_dma_addr[i])
-
 
424
			goto err_out;
-
 
425
 
-
 
426
		for (j = 0; j < GEN8_PDES_PER_PAGE; j++) {
-
 
427
			struct page *p = &pt_pages[i * GEN8_PDES_PER_PAGE + j];
-
 
428
			temp = pci_map_page(ppgtt->base.dev->pdev,
-
 
429
					    p, 0, PAGE_SIZE,
-
 
430
					    PCI_DMA_BIDIRECTIONAL);
-
 
431
 
-
 
432
            ppgtt->gen8_pt_dma_addr[i][j] = temp;
-
 
433
		}
-
 
434
	}
-
 
435
 
-
 
436
	/* For now, the PPGTT helper functions all require that the PDEs are
-
 
437
	 * plugged in correctly. So we do that now/here. For aliasing PPGTT, we
-
 
438
	 * will never need to touch the PDEs again */
-
 
439
 
-
 
440
    gen8_ppgtt_pde_t *pd_vaddr;
-
 
441
    pd_vaddr = AllocKernelSpace(4096);
-
 
442
 
-
 
443
    for (i = 0; i < max_pdp; i++) {
-
 
444
        MapPage(pd_vaddr,(addr_t)(ppgtt->pd_pages[i]), 3);
-
 
445
		for (j = 0; j < GEN8_PDES_PER_PAGE; j++) {
-
 
446
			dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j];
-
 
447
			pd_vaddr[j] = gen8_pde_encode(ppgtt->base.dev, addr,
-
 
448
						      I915_CACHE_LLC);
-
 
449
		}
-
 
450
	}
-
 
451
    FreeKernelSpace(pd_vaddr);
-
 
452
 
-
 
453
	ppgtt->base.clear_range(&ppgtt->base, 0,
-
 
454
				ppgtt->num_pd_entries * GEN8_PTES_PER_PAGE,
-
 
455
				true);
-
 
456
 
-
 
457
	DRM_DEBUG_DRIVER("Allocated %d pages for page directories (%d wasted)\n",
-
 
458
			 ppgtt->num_pd_pages, ppgtt->num_pd_pages - max_pdp);
-
 
459
	DRM_DEBUG_DRIVER("Allocated %d pages for page tables (%lld wasted)\n",
-
 
460
			 ppgtt->num_pt_pages,
-
 
461
			 (ppgtt->num_pt_pages - num_pt_pages) +
-
 
462
			 size % (1<<30));
-
 
463
	return 0;
-
 
464
 
-
 
465
err_out:
-
 
466
	ppgtt->base.cleanup(&ppgtt->base);
165
	return pte;
467
	return ret;
166
}
468
}
167
 
469
 
168
static void gen6_write_pdes(struct i915_hw_ppgtt *ppgtt)
470
static void gen6_write_pdes(struct i915_hw_ppgtt *ppgtt)
169
{
471
{
Line 302... Line 604...
302
    if(pt_vaddr == NULL)
604
    if(pt_vaddr == NULL)
303
        return;
605
        return;
Line 304... Line 606...
304
 
606
 
305
    MapPage(pt_vaddr,(addr_t)(ppgtt->pt_pages[act_pt]), 3);
607
    MapPage(pt_vaddr,(addr_t)(ppgtt->pt_pages[act_pt]), 3);
306
	for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
-
 
Line -... Line 608...
-
 
608
	for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
307
		dma_addr_t page_addr;
609
 
308
 
610
		pt_vaddr[act_pte] =
309
		page_addr = sg_page_iter_dma_address(&sg_iter);
611
			vm->pte_encode(sg_page_iter_dma_address(&sg_iter),
310
		pt_vaddr[act_pte] = vm->pte_encode(page_addr, cache_level, true);
612
				       cache_level, true);
311
		if (++act_pte == I915_PPGTT_PT_ENTRIES) {
613
		if (++act_pte == I915_PPGTT_PT_ENTRIES) {
312
			act_pt++;
614
			act_pt++;
Line 358... Line 660...
358
	ppgtt->enable = gen6_ppgtt_enable;
660
	ppgtt->enable = gen6_ppgtt_enable;
359
	ppgtt->base.clear_range = gen6_ppgtt_clear_range;
661
	ppgtt->base.clear_range = gen6_ppgtt_clear_range;
360
	ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
662
	ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
361
	ppgtt->base.cleanup = gen6_ppgtt_cleanup;
663
	ppgtt->base.cleanup = gen6_ppgtt_cleanup;
362
	ppgtt->base.scratch = dev_priv->gtt.base.scratch;
664
	ppgtt->base.scratch = dev_priv->gtt.base.scratch;
-
 
665
	ppgtt->base.start = 0;
-
 
666
	ppgtt->base.total = GEN6_PPGTT_PD_ENTRIES * I915_PPGTT_PT_ENTRIES * PAGE_SIZE;
363
	ppgtt->pt_pages = kzalloc(sizeof(struct page *)*ppgtt->num_pd_entries,
667
	ppgtt->pt_pages = kcalloc(ppgtt->num_pd_entries, sizeof(struct page *),
364
				  GFP_KERNEL);
668
				  GFP_KERNEL);
365
	if (!ppgtt->pt_pages)
669
	if (!ppgtt->pt_pages)
366
		return -ENOMEM;
670
		return -ENOMEM;
Line 367... Line 671...
367
 
671
 
368
	for (i = 0; i < ppgtt->num_pd_entries; i++) {
672
	for (i = 0; i < ppgtt->num_pd_entries; i++) {
369
		ppgtt->pt_pages[i] = alloc_page(GFP_KERNEL);
673
		ppgtt->pt_pages[i] = alloc_page(GFP_KERNEL);
370
		if (!ppgtt->pt_pages[i])
674
		if (!ppgtt->pt_pages[i])
371
			goto err_pt_alloc;
675
			goto err_pt_alloc;
Line 372... Line 676...
372
	}
676
	}
373
 
677
 
374
	ppgtt->pt_dma_addr = kzalloc(sizeof(dma_addr_t) *ppgtt->num_pd_entries,
678
	ppgtt->pt_dma_addr = kcalloc(ppgtt->num_pd_entries, sizeof(dma_addr_t),
375
					     GFP_KERNEL);
679
					     GFP_KERNEL);
Line 376... Line 680...
376
    if (!ppgtt->pt_dma_addr)
680
    if (!ppgtt->pt_dma_addr)
Line 421... Line 725...
421
 
725
 
Line 422... Line 726...
422
	ppgtt->base.dev = dev;
726
	ppgtt->base.dev = dev;
423
 
727
 
-
 
728
	if (INTEL_INFO(dev)->gen < 8)
-
 
729
	ret = gen6_ppgtt_init(ppgtt);
424
	if (INTEL_INFO(dev)->gen < 8)
730
	else if (IS_GEN8(dev))
425
	ret = gen6_ppgtt_init(ppgtt);
731
		ret = gen8_ppgtt_init(ppgtt, dev_priv->gtt.base.total);
Line 426... Line 732...
426
	else
732
	else
427
		BUG();
733
		BUG();
Line 584... Line 890...
584
		return -ENOSPC;
890
		return -ENOSPC;
Line 585... Line 891...
585
 
891
 
586
	return 0;
892
	return 0;
Line -... Line 893...
-
 
893
}
-
 
894
 
-
 
895
static inline void gen8_set_pte(void __iomem *addr, gen8_gtt_pte_t pte)
-
 
896
{
-
 
897
#ifdef writeq
-
 
898
	writeq(pte, addr);
-
 
899
#else
-
 
900
	iowrite32((u32)pte, addr);
-
 
901
	iowrite32(pte >> 32, addr + 4);
-
 
902
#endif
-
 
903
}
-
 
904
 
-
 
905
static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
-
 
906
				     struct sg_table *st,
-
 
907
				     unsigned int first_entry,
-
 
908
				     enum i915_cache_level level)
-
 
909
{
-
 
910
	struct drm_i915_private *dev_priv = vm->dev->dev_private;
-
 
911
	gen8_gtt_pte_t __iomem *gtt_entries =
-
 
912
		(gen8_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
-
 
913
	int i = 0;
-
 
914
	struct sg_page_iter sg_iter;
-
 
915
	dma_addr_t addr;
-
 
916
 
-
 
917
	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
-
 
918
		addr = sg_dma_address(sg_iter.sg) +
-
 
919
			(sg_iter.sg_pgoffset << PAGE_SHIFT);
-
 
920
		gen8_set_pte(>t_entries[i],
-
 
921
			     gen8_pte_encode(addr, level, true));
-
 
922
		i++;
-
 
923
	}
-
 
924
 
-
 
925
	/*
-
 
926
	 * XXX: This serves as a posting read to make sure that the PTE has
-
 
927
	 * actually been updated. There is some concern that even though
-
 
928
	 * registers and PTEs are within the same BAR that they are potentially
-
 
929
	 * of NUMA access patterns. Therefore, even with the way we assume
-
 
930
	 * hardware should work, we must keep this posting read for paranoia.
-
 
931
	 */
-
 
932
	if (i != 0)
-
 
933
		WARN_ON(readq(>t_entries[i-1])
-
 
934
			!= gen8_pte_encode(addr, level, true));
-
 
935
 
-
 
936
	/* This next bit makes the above posting read even more important. We
-
 
937
	 * want to flush the TLBs only after we're certain all the PTE updates
-
 
938
	 * have finished.
-
 
939
	 */
-
 
940
	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
-
 
941
	POSTING_READ(GFX_FLSH_CNTL_GEN6);
587
}
942
}
588
 
943
 
589
/*
944
/*
590
 * Binds an object into the global gtt with the specified cache level. The object
945
 * Binds an object into the global gtt with the specified cache level. The object
591
 * will be accessible to the GPU via commands whose operands reference offsets
946
 * will be accessible to the GPU via commands whose operands reference offsets
Line 626... Line 981...
626
	 */
981
	 */
627
	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
982
	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
628
	POSTING_READ(GFX_FLSH_CNTL_GEN6);
983
	POSTING_READ(GFX_FLSH_CNTL_GEN6);
629
}
984
}
Line -... Line 985...
-
 
985
 
-
 
986
static void gen8_ggtt_clear_range(struct i915_address_space *vm,
-
 
987
				  unsigned int first_entry,
-
 
988
				  unsigned int num_entries,
-
 
989
				  bool use_scratch)
-
 
990
{
-
 
991
	struct drm_i915_private *dev_priv = vm->dev->dev_private;
-
 
992
	gen8_gtt_pte_t scratch_pte, __iomem *gtt_base =
-
 
993
		(gen8_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
-
 
994
	const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
-
 
995
	int i;
-
 
996
 
-
 
997
	if (WARN(num_entries > max_entries,
-
 
998
		 "First entry = %d; Num entries = %d (max=%d)\n",
-
 
999
		 first_entry, num_entries, max_entries))
-
 
1000
		num_entries = max_entries;
-
 
1001
 
-
 
1002
	scratch_pte = gen8_pte_encode(vm->scratch.addr,
-
 
1003
				      I915_CACHE_LLC,
-
 
1004
				      use_scratch);
-
 
1005
	for (i = 0; i < num_entries; i++)
-
 
1006
		gen8_set_pte(>t_base[i], scratch_pte);
-
 
1007
	readl(gtt_base);
-
 
1008
}
630
 
1009
 
631
static void gen6_ggtt_clear_range(struct i915_address_space *vm,
1010
static void gen6_ggtt_clear_range(struct i915_address_space *vm,
632
				  unsigned int first_entry,
1011
				  unsigned int first_entry,
633
				  unsigned int num_entries,
1012
				  unsigned int num_entries,
634
				  bool use_scratch)
1013
				  bool use_scratch)
Line 649... Line 1028...
649
	for (i = 0; i < num_entries; i++)
1028
	for (i = 0; i < num_entries; i++)
650
		iowrite32(scratch_pte, >t_base[i]);
1029
		iowrite32(scratch_pte, >t_base[i]);
651
	readl(gtt_base);
1030
	readl(gtt_base);
652
}
1031
}
Line 653... Line -...
653
 
-
 
654
 
1032
 
655
static void i915_ggtt_insert_entries(struct i915_address_space *vm,
1033
static void i915_ggtt_insert_entries(struct i915_address_space *vm,
656
				     struct sg_table *st,
1034
				     struct sg_table *st,
657
				     unsigned int pg_start,
1035
				     unsigned int pg_start,
658
				     enum i915_cache_level cache_level)
1036
				     enum i915_cache_level cache_level)
Line 731... Line 1109...
731
				  node_list);
1109
				  node_list);
732
		if (node->allocated && node->color != color)
1110
		if (node->allocated && node->color != color)
733
			*end -= 4096;
1111
			*end -= 4096;
734
	}
1112
	}
735
}
1113
}
-
 
1114
 
736
void i915_gem_setup_global_gtt(struct drm_device *dev,
1115
void i915_gem_setup_global_gtt(struct drm_device *dev,
737
			      unsigned long start,
1116
			      unsigned long start,
738
			      unsigned long mappable_end,
1117
			      unsigned long mappable_end,
739
			      unsigned long end)
1118
			      unsigned long end)
740
{
1119
{
Line 770... Line 1149...
770
		WARN_ON(i915_gem_obj_ggtt_bound(obj));
1149
		WARN_ON(i915_gem_obj_ggtt_bound(obj));
771
		ret = drm_mm_reserve_node(&ggtt_vm->mm, &vma->node);
1150
		ret = drm_mm_reserve_node(&ggtt_vm->mm, &vma->node);
772
		if (ret)
1151
		if (ret)
773
			DRM_DEBUG_KMS("Reservation failed\n");
1152
			DRM_DEBUG_KMS("Reservation failed\n");
774
		obj->has_global_gtt_mapping = 1;
1153
		obj->has_global_gtt_mapping = 1;
775
		list_add(&vma->vma_link, &obj->vma_list);
-
 
776
	}
1154
	}
Line 777... Line 1155...
777
 
1155
 
778
	dev_priv->gtt.base.start = start;
1156
	dev_priv->gtt.base.start = start;
Line 828... Line 1206...
828
		if (!ret)
1206
		if (!ret)
829
			return;
1207
			return;
Line 830... Line 1208...
830
 
1208
 
831
		DRM_ERROR("Aliased PPGTT setup failed %d\n", ret);
1209
		DRM_ERROR("Aliased PPGTT setup failed %d\n", ret);
-
 
1210
		drm_mm_takedown(&dev_priv->gtt.base.mm);
832
		drm_mm_takedown(&dev_priv->gtt.base.mm);
1211
		if (INTEL_INFO(dev)->gen < 8)
833
		gtt_size += GEN6_PPGTT_PD_ENTRIES * PAGE_SIZE;
1212
			gtt_size += GEN6_PPGTT_PD_ENTRIES*PAGE_SIZE;
834
	}
1213
	}
835
	i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size);
1214
	i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size);
Line 878... Line 1257...
878
	snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
1257
	snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
879
	snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
1258
	snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
880
	return snb_gmch_ctl << 20;
1259
	return snb_gmch_ctl << 20;
881
}
1260
}
Line -... Line 1261...
-
 
1261
 
-
 
1262
static inline unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
-
 
1263
{
-
 
1264
	bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
-
 
1265
	bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
-
 
1266
	if (bdw_gmch_ctl)
-
 
1267
		bdw_gmch_ctl = 1 << bdw_gmch_ctl;
-
 
1268
	if (bdw_gmch_ctl > 4) {
-
 
1269
		WARN_ON(!i915_preliminary_hw_support);
-
 
1270
		return 4<<20;
-
 
1271
	}
-
 
1272
 
-
 
1273
	return bdw_gmch_ctl << 20;
-
 
1274
}
882
 
1275
 
883
static inline size_t gen6_get_stolen_size(u16 snb_gmch_ctl)
1276
static inline size_t gen6_get_stolen_size(u16 snb_gmch_ctl)
884
{
1277
{
885
	snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT;
1278
	snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT;
886
	snb_gmch_ctl &= SNB_GMCH_GMS_MASK;
1279
	snb_gmch_ctl &= SNB_GMCH_GMS_MASK;
887
	return snb_gmch_ctl << 25; /* 32 MB units */
1280
	return snb_gmch_ctl << 25; /* 32 MB units */
Line -... Line 1281...
-
 
1281
}
-
 
1282
 
-
 
1283
static inline size_t gen8_get_stolen_size(u16 bdw_gmch_ctl)
-
 
1284
{
-
 
1285
	bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
-
 
1286
	bdw_gmch_ctl &= BDW_GMCH_GMS_MASK;
-
 
1287
	return bdw_gmch_ctl << 25; /* 32 MB units */
-
 
1288
}
-
 
1289
 
-
 
1290
static int ggtt_probe_common(struct drm_device *dev,
-
 
1291
			     size_t gtt_size)
-
 
1292
{
-
 
1293
	struct drm_i915_private *dev_priv = dev->dev_private;
-
 
1294
	phys_addr_t gtt_phys_addr;
-
 
1295
	int ret;
-
 
1296
 
-
 
1297
	/* For Modern GENs the PTEs and register space are split in the BAR */
-
 
1298
	gtt_phys_addr = pci_resource_start(dev->pdev, 0) +
-
 
1299
		(pci_resource_len(dev->pdev, 0) / 2);
-
 
1300
 
-
 
1301
	dev_priv->gtt.gsm = ioremap_wc(gtt_phys_addr, gtt_size);
-
 
1302
	if (!dev_priv->gtt.gsm) {
-
 
1303
		DRM_ERROR("Failed to map the gtt page table\n");
-
 
1304
		return -ENOMEM;
-
 
1305
	}
-
 
1306
 
-
 
1307
	ret = setup_scratch_page(dev);
-
 
1308
	if (ret) {
-
 
1309
		DRM_ERROR("Scratch setup failed\n");
-
 
1310
		/* iounmap will also get called at remove, but meh */
-
 
1311
		iounmap(dev_priv->gtt.gsm);
-
 
1312
	}
-
 
1313
 
-
 
1314
	return ret;
-
 
1315
}
-
 
1316
 
-
 
1317
/* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
-
 
1318
 * bits. When using advanced contexts each context stores its own PAT, but
-
 
1319
 * writing this data shouldn't be harmful even in those cases. */
-
 
1320
static void gen8_setup_private_ppat(struct drm_i915_private *dev_priv)
-
 
1321
{
-
 
1322
#define GEN8_PPAT_UC		(0<<0)
-
 
1323
#define GEN8_PPAT_WC		(1<<0)
-
 
1324
#define GEN8_PPAT_WT		(2<<0)
-
 
1325
#define GEN8_PPAT_WB		(3<<0)
-
 
1326
#define GEN8_PPAT_ELLC_OVERRIDE	(0<<2)
-
 
1327
/* FIXME(BDW): Bspec is completely confused about cache control bits. */
-
 
1328
#define GEN8_PPAT_LLC		(1<<2)
-
 
1329
#define GEN8_PPAT_LLCELLC	(2<<2)
-
 
1330
#define GEN8_PPAT_LLCeLLC	(3<<2)
-
 
1331
#define GEN8_PPAT_AGE(x)	(x<<4)
-
 
1332
#define GEN8_PPAT(i, x) ((uint64_t) (x) << ((i) * 8))
-
 
1333
	uint64_t pat;
-
 
1334
 
-
 
1335
	pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC)     | /* for normal objects, no eLLC */
-
 
1336
	      GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */
-
 
1337
	      GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */
-
 
1338
	      GEN8_PPAT(3, GEN8_PPAT_UC)                     | /* Uncached objects, mostly for scanout */
-
 
1339
	      GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
-
 
1340
	      GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
-
 
1341
	      GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
-
 
1342
	      GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
-
 
1343
 
-
 
1344
	/* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b
-
 
1345
	 * write would work. */
-
 
1346
	I915_WRITE(GEN8_PRIVATE_PAT, pat);
-
 
1347
	I915_WRITE(GEN8_PRIVATE_PAT + 4, pat >> 32);
-
 
1348
}
-
 
1349
 
-
 
1350
static int gen8_gmch_probe(struct drm_device *dev,
-
 
1351
			   size_t *gtt_total,
-
 
1352
			   size_t *stolen,
-
 
1353
			   phys_addr_t *mappable_base,
-
 
1354
			   unsigned long *mappable_end)
-
 
1355
{
-
 
1356
	struct drm_i915_private *dev_priv = dev->dev_private;
-
 
1357
	unsigned int gtt_size;
-
 
1358
	u16 snb_gmch_ctl;
-
 
1359
	int ret;
-
 
1360
 
-
 
1361
	/* TODO: We're not aware of mappable constraints on gen8 yet */
-
 
1362
	*mappable_base = pci_resource_start(dev->pdev, 2);
-
 
1363
	*mappable_end = pci_resource_len(dev->pdev, 2);
-
 
1364
 
-
 
1365
	if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(39)))
-
 
1366
		pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(39));
-
 
1367
 
-
 
1368
	pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
-
 
1369
 
-
 
1370
	*stolen = gen8_get_stolen_size(snb_gmch_ctl);
-
 
1371
 
-
 
1372
	gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl);
-
 
1373
	*gtt_total = (gtt_size / sizeof(gen8_gtt_pte_t)) << PAGE_SHIFT;
-
 
1374
 
-
 
1375
	gen8_setup_private_ppat(dev_priv);
-
 
1376
 
-
 
1377
	ret = ggtt_probe_common(dev, gtt_size);
-
 
1378
 
-
 
1379
	dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range;
-
 
1380
	dev_priv->gtt.base.insert_entries = gen8_ggtt_insert_entries;
-
 
1381
 
-
 
1382
	return ret;
888
}
1383
}
889
 
1384
 
890
static int gen6_gmch_probe(struct drm_device *dev,
1385
static int gen6_gmch_probe(struct drm_device *dev,
891
			   size_t *gtt_total,
1386
			   size_t *gtt_total,
892
			   size_t *stolen,
1387
			   size_t *stolen,
893
			   phys_addr_t *mappable_base,
1388
			   phys_addr_t *mappable_base,
894
			   unsigned long *mappable_end)
1389
			   unsigned long *mappable_end)
895
{
-
 
896
	struct drm_i915_private *dev_priv = dev->dev_private;
1390
{
897
	phys_addr_t gtt_bus_addr;
1391
	struct drm_i915_private *dev_priv = dev->dev_private;
898
	unsigned int gtt_size;
1392
	unsigned int gtt_size;
Line 899... Line 1393...
899
	u16 snb_gmch_ctl;
1393
	u16 snb_gmch_ctl;
Line 912... Line 1406...
912
		}
1406
		}
Line 913... Line 1407...
913
 
1407
 
914
	if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40)))
1408
	if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40)))
915
		pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40));
1409
		pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40));
916
	pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
-
 
Line 917... Line 1410...
917
	gtt_size = gen6_get_total_gtt_size(snb_gmch_ctl);
1410
	pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
918
 
-
 
Line 919... Line -...
919
	*stolen = gen6_get_stolen_size(snb_gmch_ctl);
-
 
920
	*gtt_total = (gtt_size / sizeof(gen6_gtt_pte_t)) << PAGE_SHIFT;
1411
 
921
 
-
 
922
	/* For Modern GENs the PTEs and register space are split in the BAR */
-
 
923
	gtt_bus_addr = pci_resource_start(dev->pdev, 0) +
1412
	*stolen = gen6_get_stolen_size(snb_gmch_ctl);
924
		(pci_resource_len(dev->pdev, 0) / 2);
-
 
925
 
-
 
926
	dev_priv->gtt.gsm = ioremap_wc(gtt_bus_addr, gtt_size);
-
 
927
	if (!dev_priv->gtt.gsm) {
-
 
Line 928... Line 1413...
928
		DRM_ERROR("Failed to map the gtt page table\n");
1413
 
929
		return -ENOMEM;
-
 
930
	}
-
 
Line 931... Line 1414...
931
 
1414
	gtt_size = gen6_get_total_gtt_size(snb_gmch_ctl);
932
	ret = setup_scratch_page(dev);
1415
	*gtt_total = (gtt_size / sizeof(gen6_gtt_pte_t)) << PAGE_SHIFT;
Line 933... Line 1416...
933
	if (ret)
1416
 
Line 966... Line 1449...
966
 
1449
 
967
	dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev);
1450
	dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev);
968
	dev_priv->gtt.base.clear_range = i915_ggtt_clear_range;
1451
	dev_priv->gtt.base.clear_range = i915_ggtt_clear_range;
Line -... Line 1452...
-
 
1452
	dev_priv->gtt.base.insert_entries = i915_ggtt_insert_entries;
-
 
1453
 
-
 
1454
	if (unlikely(dev_priv->gtt.do_idle_maps))
969
	dev_priv->gtt.base.insert_entries = i915_ggtt_insert_entries;
1455
		DRM_INFO("applying Ironlake quirks for intel_iommu\n");
970
 
1456
 
Line 971... Line 1457...
971
	return 0;
1457
	return 0;
972
}
1458
}
Line 983... Line 1469...
983
	int ret;
1469
	int ret;
Line 984... Line 1470...
984
 
1470
 
985
	if (INTEL_INFO(dev)->gen <= 5) {
1471
	if (INTEL_INFO(dev)->gen <= 5) {
986
		gtt->gtt_probe = i915_gmch_probe;
1472
		gtt->gtt_probe = i915_gmch_probe;
987
		gtt->base.cleanup = i915_gmch_remove;
1473
		gtt->base.cleanup = i915_gmch_remove;
988
	} else {
1474
	} else if (INTEL_INFO(dev)->gen < 8) {
989
		gtt->gtt_probe = gen6_gmch_probe;
1475
		gtt->gtt_probe = gen6_gmch_probe;
990
		gtt->base.cleanup = gen6_gmch_remove;
1476
		gtt->base.cleanup = gen6_gmch_remove;
991
		if (IS_HASWELL(dev) && dev_priv->ellc_size)
1477
		if (IS_HASWELL(dev) && dev_priv->ellc_size)
992
			gtt->base.pte_encode = iris_pte_encode;
1478
			gtt->base.pte_encode = iris_pte_encode;
Line 996... Line 1482...
996
			gtt->base.pte_encode = byt_pte_encode;
1482
			gtt->base.pte_encode = byt_pte_encode;
997
		else if (INTEL_INFO(dev)->gen >= 7)
1483
		else if (INTEL_INFO(dev)->gen >= 7)
998
			gtt->base.pte_encode = ivb_pte_encode;
1484
			gtt->base.pte_encode = ivb_pte_encode;
999
		else
1485
		else
1000
			gtt->base.pte_encode = snb_pte_encode;
1486
			gtt->base.pte_encode = snb_pte_encode;
-
 
1487
	} else {
-
 
1488
		dev_priv->gtt.gtt_probe = gen8_gmch_probe;
-
 
1489
		dev_priv->gtt.base.cleanup = gen6_gmch_remove;
1001
	}
1490
	}
Line 1002... Line 1491...
1002
 
1491
 
1003
	ret = gtt->gtt_probe(dev, >t->base.total, >t->stolen_size,
1492
	ret = gtt->gtt_probe(dev, >t->base.total, >t->stolen_size,
1004
			     >t->mappable_base, >t->mappable_end);
1493
			     >t->mappable_base, >t->mappable_end);