Subversion Repositories Kolibri OS

Rev

Rev 5354 | Go to most recent revision | Show entire file | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 5354 Rev 6084
Line 29... Line 29...
29
#include 
29
#include 
30
#include 
30
#include 
31
#include 
31
#include 
32
#include "i915_drv.h"
32
#include "i915_drv.h"
Line 33... Line -...
33
 
-
 
34
/** @file i915_gem_tiling.c
-
 
35
 *
-
 
36
 * Support for managing tiling state of buffer objects.
-
 
37
 *
-
 
38
 * The idea behind tiling is to increase cache hit rates by rearranging
-
 
39
 * pixel data so that a group of pixel accesses are in the same cacheline.
-
 
40
 * Performance improvement from doing this on the back/depth buffer are on
-
 
41
 * the order of 30%.
-
 
42
 *
-
 
43
 * Intel architectures make this somewhat more complicated, though, by
-
 
44
 * adjustments made to addressing of data when the memory is in interleaved
-
 
45
 * mode (matched pairs of DIMMS) to improve memory bandwidth.
-
 
46
 * For interleaved memory, the CPU sends every sequential 64 bytes
-
 
47
 * to an alternate memory channel so it can get the bandwidth from both.
-
 
48
 *
-
 
49
 * The GPU also rearranges its accesses for increased bandwidth to interleaved
-
 
50
 * memory, and it matches what the CPU does for non-tiled.  However, when tiled
-
 
51
 * it does it a little differently, since one walks addresses not just in the
-
 
52
 * X direction but also Y.  So, along with alternating channels when bit
-
 
53
 * 6 of the address flips, it also alternates when other bits flip --  Bits 9
-
 
54
 * (every 512 bytes, an X tile scanline) and 10 (every two X tile scanlines)
-
 
55
 * are common to both the 915 and 965-class hardware.
-
 
56
 *
-
 
57
 * The CPU also sometimes XORs in higher bits as well, to improve
-
 
58
 * bandwidth doing strided access like we do so frequently in graphics.  This
-
 
59
 * is called "Channel XOR Randomization" in the MCH documentation.  The result
-
 
60
 * is that the CPU is XORing in either bit 11 or bit 17 to bit 6 of its address
-
 
61
 * decode.
-
 
62
 *
-
 
63
 * All of this bit 6 XORing has an effect on our memory management,
-
 
64
 * as we need to make sure that the 3d driver can correctly address object
-
 
65
 * contents.
-
 
66
 *
-
 
67
 * If we don't have interleaved memory, all tiling is safe and no swizzling is
-
 
68
 * required.
-
 
69
 *
-
 
70
 * When bit 17 is XORed in, we simply refuse to tile at all.  Bit
-
 
71
 * 17 is not just a page offset, so as we page an objet out and back in,
-
 
72
 * individual pages in it will have different bit 17 addresses, resulting in
-
 
73
 * each 64 bytes being swapped with its neighbor!
-
 
74
 *
-
 
75
 * Otherwise, if interleaved, we have to tell the 3d driver what the address
-
 
76
 * swizzling it needs to do is, since it's writing with the CPU to the pages
-
 
77
 * (bit 6 and potentially bit 11 XORed in), and the GPU is reading from the
-
 
78
 * pages (bit 6, 9, and 10 XORed in), resulting in a cumulative bit swizzling
-
 
79
 * required by the CPU of XORing in bit 6, 9, 10, and potentially 11, in order
-
 
80
 * to match what the GPU expects.
-
 
81
 */
-
 
82
 
33
 
83
/**
-
 
84
 * Detects bit 6 swizzling of address lookup between IGD access and CPU
34
/**
85
 * access through main memory.
-
 
86
 */
-
 
87
void
-
 
88
i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
-
 
89
{
-
 
90
	struct drm_i915_private *dev_priv = dev->dev_private;
-
 
91
	uint32_t swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
-
 
92
	uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
-
 
93
 
-
 
94
	if (INTEL_INFO(dev)->gen >= 8 || IS_VALLEYVIEW(dev)) {
-
 
95
		/*
-
 
96
		 * On BDW+, swizzling is not used. We leave the CPU memory
-
 
97
		 * controller in charge of optimizing memory accesses without
-
 
98
		 * the extra address manipulation GPU side.
35
 * DOC: buffer object tiling
99
		 *
-
 
100
		 * VLV and CHV don't have GPU swizzling.
-
 
101
		 */
-
 
102
		swizzle_x = I915_BIT_6_SWIZZLE_NONE;
-
 
103
		swizzle_y = I915_BIT_6_SWIZZLE_NONE;
-
 
104
	} else if (INTEL_INFO(dev)->gen >= 6) {
-
 
105
		if (dev_priv->preserve_bios_swizzle) {
-
 
106
			if (I915_READ(DISP_ARB_CTL) &
-
 
107
			    DISP_TILE_SURFACE_SWIZZLING) {
-
 
108
				swizzle_x = I915_BIT_6_SWIZZLE_9_10;
-
 
109
				swizzle_y = I915_BIT_6_SWIZZLE_9;
-
 
110
			} else {
-
 
111
				swizzle_x = I915_BIT_6_SWIZZLE_NONE;
-
 
112
				swizzle_y = I915_BIT_6_SWIZZLE_NONE;
-
 
113
			}
-
 
114
		} else {
-
 
115
		uint32_t dimm_c0, dimm_c1;
-
 
116
		dimm_c0 = I915_READ(MAD_DIMM_C0);
-
 
117
		dimm_c1 = I915_READ(MAD_DIMM_C1);
-
 
118
		dimm_c0 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
-
 
119
		dimm_c1 &= MAD_DIMM_A_SIZE_MASK | MAD_DIMM_B_SIZE_MASK;
36
 *
120
			/* Enable swizzling when the channels are populated
-
 
121
			 * with identically sized dimms. We don't need to check
-
 
122
			 * the 3rd channel because no cpu with gpu attached
-
 
123
			 * ships in that configuration. Also, swizzling only
37
 * i915_gem_set_tiling() and i915_gem_get_tiling() is the userspace interface to
124
			 * makes sense for 2 channels anyway. */
-
 
125
		if (dimm_c0 == dimm_c1) {
-
 
126
			swizzle_x = I915_BIT_6_SWIZZLE_9_10;
-
 
127
			swizzle_y = I915_BIT_6_SWIZZLE_9;
-
 
128
		} else {
-
 
129
			swizzle_x = I915_BIT_6_SWIZZLE_NONE;
-
 
130
			swizzle_y = I915_BIT_6_SWIZZLE_NONE;
-
 
131
		}
38
 * declare fence register requirements.
132
		}
-
 
133
	} else if (IS_GEN5(dev)) {
39
 *
134
		/* On Ironlake whatever DRAM config, GPU always do
-
 
135
		 * same swizzling setup.
-
 
136
		 */
-
 
137
		swizzle_x = I915_BIT_6_SWIZZLE_9_10;
-
 
138
		swizzle_y = I915_BIT_6_SWIZZLE_9;
-
 
139
	} else if (IS_GEN2(dev)) {
40
 * In principle GEM doesn't care at all about the internal data layout of an
140
		/* As far as we know, the 865 doesn't have these bit 6
41
 * object, and hence it also doesn't care about tiling or swizzling. There's two
141
		 * swizzling issues.
42
 * exceptions:
142
		 */
-
 
143
		swizzle_x = I915_BIT_6_SWIZZLE_NONE;
-
 
144
		swizzle_y = I915_BIT_6_SWIZZLE_NONE;
-
 
145
	} else if (IS_MOBILE(dev) || (IS_GEN3(dev) && !IS_G33(dev))) {
-
 
146
		uint32_t dcc;
-
 
147
 
-
 
148
		/* On 9xx chipsets, channel interleave by the CPU is
43
 *
149
		 * determined by DCC.  For single-channel, neither the CPU
44
 * - For X and Y tiling the hardware provides detilers for CPU access, so called
150
		 * nor the GPU do swizzling.  For dual channel interleaved,
45
 *   fences. Since there's only a limited amount of them the kernel must manage
151
		 * the GPU's interleave is bit 9 and 10 for X tiled, and bit
46
 *   these, and therefore userspace must tell the kernel the object tiling if it
152
		 * 9 for Y tiled.  The CPU's interleave is independent, and
47
 *   wants to use fences for detiling.
153
		 * can be based on either bit 11 (haven't seen this yet) or
-
 
154
		 * bit 17 (common).
-
 
155
		 */
-
 
156
		dcc = I915_READ(DCC);
-
 
157
		switch (dcc & DCC_ADDRESSING_MODE_MASK) {
-
 
158
		case DCC_ADDRESSING_MODE_SINGLE_CHANNEL:
-
 
159
		case DCC_ADDRESSING_MODE_DUAL_CHANNEL_ASYMMETRIC:
-
 
160
			swizzle_x = I915_BIT_6_SWIZZLE_NONE;
-
 
161
			swizzle_y = I915_BIT_6_SWIZZLE_NONE;
-
 
162
			break;
-
 
163
		case DCC_ADDRESSING_MODE_DUAL_CHANNEL_INTERLEAVED:
-
 
164
			if (dcc & DCC_CHANNEL_XOR_DISABLE) {
-
 
165
				/* This is the base swizzling by the GPU for
-
 
166
				 * tiled buffers.
-
 
167
				 */
-
 
168
				swizzle_x = I915_BIT_6_SWIZZLE_9_10;
-
 
169
				swizzle_y = I915_BIT_6_SWIZZLE_9;
-
 
170
			} else if ((dcc & DCC_CHANNEL_XOR_BIT_17) == 0) {
-
 
171
				/* Bit 11 swizzling by the CPU in addition. */
-
 
172
				swizzle_x = I915_BIT_6_SWIZZLE_9_10_11;
-
 
173
				swizzle_y = I915_BIT_6_SWIZZLE_9_11;
-
 
174
			} else {
-
 
175
				/* Bit 17 swizzling by the CPU in addition. */
-
 
176
				swizzle_x = I915_BIT_6_SWIZZLE_9_10_17;
-
 
177
				swizzle_y = I915_BIT_6_SWIZZLE_9_17;
-
 
178
			}
-
 
179
			break;
-
 
180
		}
-
 
181
 
48
 * - On gen3 and gen4 platforms have a swizzling pattern for tiled objects which
182
		/* check for L-shaped memory aka modified enhanced addressing */
-
 
183
		if (IS_GEN4(dev)) {
-
 
184
			uint32_t ddc2 = I915_READ(DCC2);
-
 
185
 
-
 
186
			if (!(ddc2 & DCC2_MODIFIED_ENHANCED_DISABLE))
-
 
187
				dev_priv->quirks |= QUIRK_PIN_SWIZZLED_PAGES;
-
 
188
		}
-
 
189
 
-
 
190
		if (dcc == 0xffffffff) {
-
 
191
			DRM_ERROR("Couldn't read from MCHBAR.  "
-
 
192
				  "Disabling tiling.\n");
-
 
193
			swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
-
 
194
			swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
-
 
195
		}
-
 
196
	} else {
49
 *   depends upon the physical page frame number. When swapping such objects the
197
		/* The 965, G33, and newer, have a very flexible memory
50
 *   page frame number might change and the kernel must be able to fix this up
198
		 * configuration.  It will enable dual-channel mode
51
 *   and hence now the tiling. Note that on a subset of platforms with
199
		 * (interleaving) on as much memory as it can, and the GPU
52
 *   asymmetric memory channel population the swizzling pattern changes in an
200
		 * will additionally sometimes enable different bit 6
-
 
201
		 * swizzling for tiled objects from the CPU.
-
 
202
		 *
-
 
203
		 * Here's what I found on the G965:
-
 
204
		 *    slot fill         memory size  swizzling
-
 
205
		 * 0A   0B   1A   1B    1-ch   2-ch
-
 
206
		 * 512  0    0    0     512    0     O
-
 
207
		 * 512  0    512  0     16     1008  X
-
 
208
		 * 512  0    0    512   16     1008  X
-
 
209
		 * 0    512  0    512   16     1008  X
-
 
210
		 * 1024 1024 1024 0     2048   1024  O
53
 *   unknown way, and for those the kernel simply forbids swapping completely.
211
		 *
54
 *
212
		 * We could probably detect this based on either the DRB
55
 * Since neither of this applies for new tiling layouts on modern platforms like
213
		 * matching, which was the case for the swizzling required in
56
 * W, Ys and Yf tiling GEM only allows object tiling to be set to X or Y tiled.
214
		 * the table above, or from the 1-ch value being less than
57
 * Anything else can be handled in userspace entirely without the kernel's
215
		 * the minimum size of a rank.
58
 * invovlement.
216
		 */
-
 
217
		if (I915_READ16(C0DRB3) != I915_READ16(C1DRB3)) {
-
 
218
			swizzle_x = I915_BIT_6_SWIZZLE_NONE;
-
 
219
			swizzle_y = I915_BIT_6_SWIZZLE_NONE;
-
 
220
		} else {
-
 
221
			swizzle_x = I915_BIT_6_SWIZZLE_9_10;
-
 
222
			swizzle_y = I915_BIT_6_SWIZZLE_9;
-
 
223
		}
-
 
224
	}
-
 
225
 
-
 
226
	dev_priv->mm.bit_6_swizzle_x = swizzle_x;
-
 
227
	dev_priv->mm.bit_6_swizzle_y = swizzle_y;
-
 
Line 228... Line 59...
228
}
59
 */
229
 
60
 
230
/* Check pitch constriants for all chips & tiling formats */
61
/* Check pitch constriants for all chips & tiling formats */
231
static bool
62
static bool
Line 311... Line 142...
311
 
142
 
312
	return true;
143
	return true;
Line 313... Line 144...
313
}
144
}
-
 
145
 
-
 
146
/**
-
 
147
 * i915_gem_set_tiling - IOCTL handler to set tiling mode
-
 
148
 * @dev: DRM device
-
 
149
 * @data: data pointer for the ioctl
314
 
150
 * @file: DRM file for the ioctl call
315
/**
151
 *
-
 
152
 * Sets the tiling mode of an object, returning the required swizzling of
-
 
153
 * bit 6 of addresses in the object.
-
 
154
 *
-
 
155
 * Called by the user via ioctl.
-
 
156
 *
316
 * Sets the tiling mode of an object, returning the required swizzling of
157
 * Returns:
317
 * bit 6 of addresses in the object.
158
 * Zero on success, negative errno on failure.
318
 */
159
 */
319
int
160
int
320
i915_gem_set_tiling(struct drm_device *dev, void *data,
161
i915_gem_set_tiling(struct drm_device *dev, void *data,
Line 333... Line 174...
333
			    args->stride, obj->base.size, args->tiling_mode)) {
174
			    args->stride, obj->base.size, args->tiling_mode)) {
334
		drm_gem_object_unreference_unlocked(&obj->base);
175
		drm_gem_object_unreference_unlocked(&obj->base);
335
		return -EINVAL;
176
		return -EINVAL;
336
	}
177
	}
Line -... Line 178...
-
 
178
 
337
 
179
	mutex_lock(&dev->struct_mutex);
338
	if (i915_gem_obj_is_pinned(obj) || obj->framebuffer_references) {
-
 
339
		drm_gem_object_unreference_unlocked(&obj->base);
180
	if (obj->pin_display || obj->framebuffer_references) {
-
 
181
		ret = -EBUSY;
340
		return -EBUSY;
182
		goto err;
Line 341... Line 183...
341
	}
183
	}
342
 
184
 
343
	if (args->tiling_mode == I915_TILING_NONE) {
185
	if (args->tiling_mode == I915_TILING_NONE) {
Line 367... Line 209...
367
			args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
209
			args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
368
			args->stride = 0;
210
			args->stride = 0;
369
		}
211
		}
370
	}
212
	}
Line 371... Line -...
371
 
-
 
372
	mutex_lock(&dev->struct_mutex);
213
 
373
	if (args->tiling_mode != obj->tiling_mode ||
214
	if (args->tiling_mode != obj->tiling_mode ||
374
	    args->stride != obj->stride) {
215
	    args->stride != obj->stride) {
375
		/* We need to rebind the object if its current allocation
216
		/* We need to rebind the object if its current allocation
376
		 * no longer meets the alignment restrictions for its new
217
		 * no longer meets the alignment restrictions for its new
Line 397... Line 238...
397
				if (obj->tiling_mode == I915_TILING_NONE)
238
				if (obj->tiling_mode == I915_TILING_NONE)
398
					i915_gem_object_pin_pages(obj);
239
					i915_gem_object_pin_pages(obj);
399
		}
240
			}
Line 400... Line 241...
400
 
241
 
401
			obj->fence_dirty =
242
			obj->fence_dirty =
402
				obj->last_fenced_seqno ||
243
				obj->last_fenced_req ||
Line 403... Line 244...
403
				obj->fence_reg != I915_FENCE_REG_NONE;
244
				obj->fence_reg != I915_FENCE_REG_NONE;
404
 
245
 
Line 422... Line 263...
422
	} else {
263
	} else {
423
		kfree(obj->bit_17);
264
		kfree(obj->bit_17);
424
		obj->bit_17 = NULL;
265
		obj->bit_17 = NULL;
425
	}
266
	}
Line -... Line 267...
-
 
267
 
426
 
268
err:
427
	drm_gem_object_unreference(&obj->base);
269
	drm_gem_object_unreference(&obj->base);
Line 428... Line 270...
428
	mutex_unlock(&dev->struct_mutex);
270
	mutex_unlock(&dev->struct_mutex);
429
 
271
 
Line 430... Line 272...
430
	return ret;
272
	return ret;
-
 
273
}
-
 
274
 
-
 
275
/**
-
 
276
 * i915_gem_get_tiling - IOCTL handler to get tiling mode
-
 
277
 * @dev: DRM device
431
}
278
 * @data: data pointer for the ioctl
-
 
279
 * @file: DRM file for the ioctl call
-
 
280
 *
-
 
281
 * Returns the current tiling mode and required bit 6 swizzling for the object.
-
 
282
 *
-
 
283
 * Called by the user via ioctl.
432
 
284
 *
433
/**
285
 * Returns:
434
 * Returns the current tiling mode and required bit 6 swizzling for the object.
286
 * Zero on success, negative errno on failure.
435
 */
287
 */
436
int
288
int
Line 461... Line 313...
461
	default:
313
	default:
462
		DRM_ERROR("unknown tiling mode\n");
314
		DRM_ERROR("unknown tiling mode\n");
463
	}
315
	}
Line 464... Line 316...
464
 
316
 
-
 
317
	/* Hide bit 17 from the user -- see comment in i915_gem_set_tiling */
-
 
318
	if (dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES)
-
 
319
		args->phys_swizzle_mode = I915_BIT_6_SWIZZLE_UNKNOWN;
465
	/* Hide bit 17 from the user -- see comment in i915_gem_set_tiling */
320
	else
466
	args->phys_swizzle_mode = args->swizzle_mode;
321
		args->phys_swizzle_mode = args->swizzle_mode;
467
	if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_17)
322
	if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_17)
468
		args->swizzle_mode = I915_BIT_6_SWIZZLE_9;
323
		args->swizzle_mode = I915_BIT_6_SWIZZLE_9;
469
	if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17)
324
	if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17)
Line 472... Line 327...
472
	drm_gem_object_unreference(&obj->base);
327
	drm_gem_object_unreference(&obj->base);
473
	mutex_unlock(&dev->struct_mutex);
328
	mutex_unlock(&dev->struct_mutex);
Line 474... Line 329...
474
 
329
 
475
	return 0;
330
	return 0;
476
}
-
 
477
 
-
 
478
#if 0
-
 
479
/**
-
 
480
 * Swap every 64 bytes of this page around, to account for it having a new
-
 
481
 * bit 17 of its physical address and therefore being interpreted differently
-
 
482
 * by the GPU.
-
 
483
 */
-
 
484
static void
-
 
485
i915_gem_swizzle_page(struct page *page)
-
 
486
{
-
 
487
	char temp[64];
-
 
488
	char *vaddr;
-
 
489
	int i;
-
 
490
 
-
 
491
	vaddr = kmap(page);
-
 
492
 
-
 
493
	for (i = 0; i < PAGE_SIZE; i += 128) {
-
 
494
		memcpy(temp, &vaddr[i], 64);
-
 
495
		memcpy(&vaddr[i], &vaddr[i + 64], 64);
-
 
496
		memcpy(&vaddr[i + 64], temp, 64);
-
 
497
	}
-
 
498
 
-
 
499
	kunmap(page);
-
 
500
}
-
 
501
 
-
 
502
void
-
 
503
i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj)
-
 
504
{
-
 
505
	struct sg_page_iter sg_iter;
-
 
506
	int i;
-
 
507
 
-
 
508
	if (obj->bit_17 == NULL)
-
 
509
		return;
-
 
510
 
-
 
511
	i = 0;
-
 
512
	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
-
 
513
		struct page *page = sg_page_iter_page(&sg_iter);
-
 
514
		char new_bit_17 = page_to_phys(page) >> 17;
-
 
515
		if ((new_bit_17 & 0x1) !=
-
 
516
		    (test_bit(i, obj->bit_17) != 0)) {
-
 
517
			i915_gem_swizzle_page(page);
-
 
518
			set_page_dirty(page);
-
 
519
		}
-
 
520
		i++;
-
 
521
	}
-
 
522
}
-
 
523
 
-
 
524
void
-
 
525
i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj)
-
 
526
{
-
 
527
	struct sg_page_iter sg_iter;
-
 
528
	int page_count = obj->base.size >> PAGE_SHIFT;
-
 
529
	int i;
-
 
530
 
-
 
531
	if (obj->bit_17 == NULL) {
-
 
532
		obj->bit_17 = kcalloc(BITS_TO_LONGS(page_count),
-
 
533
					   sizeof(long), GFP_KERNEL);
-
 
534
		if (obj->bit_17 == NULL) {
-
 
535
			DRM_ERROR("Failed to allocate memory for bit 17 "
-
 
536
				  "record\n");
-
 
537
			return;
-
 
538
		}
-
 
539
	}
-
 
540
 
-
 
541
	i = 0;
-
 
542
	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
-
 
543
		if (page_to_phys(sg_page_iter_page(&sg_iter)) & (1 << 17))
-
 
544
			__set_bit(i, obj->bit_17);
-
 
545
		else
-
 
546
			__clear_bit(i, obj->bit_17);
-
 
547
		i++;
-
 
548
	}
-
 
549
}
-