Subversion Repositories Kolibri OS

Rev

Rev 3769 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
3280 Serge 1
/*
2
 * Copyright © 2006,2008,2011 Intel Corporation
3
 * Copyright © 2007 Red Hat, Inc.
4
 *
5
 * Permission is hereby granted, free of charge, to any person obtaining a
6
 * copy of this software and associated documentation files (the "Software"),
7
 * to deal in the Software without restriction, including without limitation
8
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
 * and/or sell copies of the Software, and to permit persons to whom the
10
 * Software is furnished to do so, subject to the following conditions:
11
 *
12
 * The above copyright notice and this permission notice (including the next
13
 * paragraph) shall be included in all copies or substantial portions of the
14
 * Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
 * SOFTWARE.
23
 *
24
 * Authors:
25
 *    Wang Zhenyu 
26
 *    Eric Anholt 
27
 *    Carl Worth 
28
 *    Keith Packard 
29
 *    Chris Wilson 
30
 *
31
 */
32
 
33
#ifdef HAVE_CONFIG_H
34
#include "config.h"
35
#endif
36
 
37
#include "sna.h"
38
#include "sna_reg.h"
39
#include "sna_render.h"
40
#include "sna_render_inline.h"
41
//#include "sna_video.h"
42
 
43
#include "brw/brw.h"
44
#include "gen7_render.h"
45
#include "gen4_source.h"
46
#include "gen4_vertex.h"
47
 
4251 Serge 48
#define ALWAYS_FLUSH 0
49
 
3280 Serge 50
#define NO_COMPOSITE 0
51
#define NO_COMPOSITE_SPANS 0
52
#define NO_COPY 0
53
#define NO_COPY_BOXES 0
54
#define NO_FILL 0
55
#define NO_FILL_BOXES 0
56
#define NO_FILL_ONE 0
57
#define NO_FILL_CLEAR 0
58
 
59
#define NO_RING_SWITCH 0
60
 
61
#define USE_8_PIXEL_DISPATCH 1
62
#define USE_16_PIXEL_DISPATCH 1
63
#define USE_32_PIXEL_DISPATCH 0
64
 
65
#if !USE_8_PIXEL_DISPATCH && !USE_16_PIXEL_DISPATCH && !USE_32_PIXEL_DISPATCH
66
#error "Must select at least 8, 16 or 32 pixel dispatch"
67
#endif
68
 
69
#define GEN7_MAX_SIZE 16384
70
 
71
/* XXX Todo
72
 *
73
 * STR (software tiled rendering) mode. No, really.
74
 * 64x32 pixel blocks align with the rendering cache. Worth considering.
75
 */
76
 
77
#define is_aligned(x, y) (((x) & ((y) - 1)) == 0)
78
 
79
struct gt_info {
4251 Serge 80
	const char *name;
3280 Serge 81
	uint32_t max_vs_threads;
82
	uint32_t max_gs_threads;
83
	uint32_t max_wm_threads;
84
	struct {
85
		int size;
86
		int max_vs_entries;
87
		int max_gs_entries;
4251 Serge 88
		int push_ps_size; /* in 1KBs */
3280 Serge 89
	} urb;
4251 Serge 90
	int gt;
3280 Serge 91
};
92
 
93
static const struct gt_info ivb_gt_info = {
4251 Serge 94
	.name = "Ivybridge (gen7)",
3280 Serge 95
	.max_vs_threads = 16,
96
	.max_gs_threads = 16,
97
	.max_wm_threads = (16-1) << IVB_PS_MAX_THREADS_SHIFT,
4251 Serge 98
	.urb = { 128, 64, 64, 8 },
99
	.gt = 0,
3280 Serge 100
};
101
 
102
static const struct gt_info ivb_gt1_info = {
4251 Serge 103
	.name = "Ivybridge (gen7, gt1)",
3280 Serge 104
	.max_vs_threads = 36,
105
	.max_gs_threads = 36,
106
	.max_wm_threads = (48-1) << IVB_PS_MAX_THREADS_SHIFT,
4251 Serge 107
	.urb = { 128, 512, 192, 8 },
108
	.gt = 1,
3280 Serge 109
};
110
 
111
static const struct gt_info ivb_gt2_info = {
4251 Serge 112
	.name = "Ivybridge (gen7, gt2)",
3280 Serge 113
	.max_vs_threads = 128,
114
	.max_gs_threads = 128,
115
	.max_wm_threads = (172-1) << IVB_PS_MAX_THREADS_SHIFT,
4251 Serge 116
	.urb = { 256, 704, 320, 8 },
117
	.gt = 2,
3280 Serge 118
};
119
 
4251 Serge 120
static const struct gt_info byt_gt_info = {
121
	.name = "Baytrail (gen7)",
122
	.urb = { 128, 64, 64 },
123
	.max_vs_threads = 36,
124
	.max_gs_threads = 36,
125
	.max_wm_threads = (48-1) << IVB_PS_MAX_THREADS_SHIFT,
126
	.urb = { 128, 512, 192, 8 },
127
	.gt = 1,
128
};
129
 
3280 Serge 130
static const struct gt_info hsw_gt_info = {
4251 Serge 131
	.name = "Haswell (gen7.5)",
3280 Serge 132
	.max_vs_threads = 8,
133
	.max_gs_threads = 8,
134
	.max_wm_threads =
135
		(8 - 1) << HSW_PS_MAX_THREADS_SHIFT |
136
		1 << HSW_PS_SAMPLE_MASK_SHIFT,
4251 Serge 137
	.urb = { 128, 64, 64, 8 },
138
	.gt = 0,
3280 Serge 139
};
140
 
141
static const struct gt_info hsw_gt1_info = {
4251 Serge 142
	.name = "Haswell (gen7.5, gt1)",
3280 Serge 143
	.max_vs_threads = 70,
144
	.max_gs_threads = 70,
145
	.max_wm_threads =
146
		(102 - 1) << HSW_PS_MAX_THREADS_SHIFT |
147
		1 << HSW_PS_SAMPLE_MASK_SHIFT,
4251 Serge 148
	.urb = { 128, 640, 256, 8 },
149
	.gt = 1,
3280 Serge 150
};
151
 
152
static const struct gt_info hsw_gt2_info = {
4251 Serge 153
	.name = "Haswell (gen7.5, gt2)",
154
	.max_vs_threads = 140,
155
	.max_gs_threads = 140,
156
	.max_wm_threads =
157
		(140 - 1) << HSW_PS_MAX_THREADS_SHIFT |
158
		1 << HSW_PS_SAMPLE_MASK_SHIFT,
159
	.urb = { 256, 1664, 640, 8 },
160
	.gt = 2,
161
};
162
 
163
static const struct gt_info hsw_gt3_info = {
164
	.name = "Haswell (gen7.5, gt3)",
3280 Serge 165
	.max_vs_threads = 280,
166
	.max_gs_threads = 280,
167
	.max_wm_threads =
4251 Serge 168
		(280 - 1) << HSW_PS_MAX_THREADS_SHIFT |
3280 Serge 169
		1 << HSW_PS_SAMPLE_MASK_SHIFT,
4251 Serge 170
	.urb = { 512, 3328, 1280, 16 },
171
	.gt = 3,
3280 Serge 172
};
173
 
4251 Serge 174
inline static bool is_ivb(struct sna *sna)
175
{
176
	return sna->kgem.gen == 070;
177
}
178
 
179
inline static bool is_byt(struct sna *sna)
180
{
181
	return sna->kgem.gen == 071;
182
}
183
 
184
inline static bool is_hsw(struct sna *sna)
185
{
186
	return sna->kgem.gen == 075;
187
}
188
 
3280 Serge 189
static const uint32_t ps_kernel_packed[][4] = {
190
#include "exa_wm_src_affine.g7b"
191
#include "exa_wm_src_sample_argb.g7b"
192
#include "exa_wm_yuv_rgb.g7b"
193
#include "exa_wm_write.g7b"
194
};
195
 
196
static const uint32_t ps_kernel_planar[][4] = {
197
#include "exa_wm_src_affine.g7b"
198
#include "exa_wm_src_sample_planar.g7b"
199
#include "exa_wm_yuv_rgb.g7b"
200
#include "exa_wm_write.g7b"
201
};
202
 
203
#define KERNEL(kernel_enum, kernel, num_surfaces) \
204
    [GEN7_WM_KERNEL_##kernel_enum] = {#kernel_enum, kernel, sizeof(kernel), num_surfaces}
205
#define NOKERNEL(kernel_enum, func, num_surfaces) \
206
    [GEN7_WM_KERNEL_##kernel_enum] = {#kernel_enum, (void *)func, 0, num_surfaces}
207
static const struct wm_kernel_info {
208
	const char *name;
209
	const void *data;
210
	unsigned int size;
211
	int num_surfaces;
212
} wm_kernels[] = {
213
	NOKERNEL(NOMASK, brw_wm_kernel__affine, 2),
214
	NOKERNEL(NOMASK_P, brw_wm_kernel__projective, 2),
215
 
216
	NOKERNEL(MASK, brw_wm_kernel__affine_mask, 3),
217
	NOKERNEL(MASK_P, brw_wm_kernel__projective_mask, 3),
218
 
219
	NOKERNEL(MASKCA, brw_wm_kernel__affine_mask_ca, 3),
220
	NOKERNEL(MASKCA_P, brw_wm_kernel__projective_mask_ca, 3),
221
 
222
	NOKERNEL(MASKSA, brw_wm_kernel__affine_mask_sa, 3),
223
	NOKERNEL(MASKSA_P, brw_wm_kernel__projective_mask_sa, 3),
224
 
225
	NOKERNEL(OPACITY, brw_wm_kernel__affine_opacity, 2),
226
	NOKERNEL(OPACITY_P, brw_wm_kernel__projective_opacity, 2),
227
 
228
	KERNEL(VIDEO_PLANAR, ps_kernel_planar, 7),
229
	KERNEL(VIDEO_PACKED, ps_kernel_packed, 2),
230
};
231
#undef KERNEL
232
 
233
static const struct blendinfo {
234
	bool src_alpha;
235
	uint32_t src_blend;
236
	uint32_t dst_blend;
237
} gen7_blend_op[] = {
238
	/* Clear */	{0, GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_ZERO},
239
	/* Src */	{0, GEN7_BLENDFACTOR_ONE, GEN7_BLENDFACTOR_ZERO},
240
	/* Dst */	{0, GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_ONE},
241
	/* Over */	{1, GEN7_BLENDFACTOR_ONE, GEN7_BLENDFACTOR_INV_SRC_ALPHA},
242
	/* OverReverse */ {0, GEN7_BLENDFACTOR_INV_DST_ALPHA, GEN7_BLENDFACTOR_ONE},
243
	/* In */	{0, GEN7_BLENDFACTOR_DST_ALPHA, GEN7_BLENDFACTOR_ZERO},
244
	/* InReverse */	{1, GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_SRC_ALPHA},
245
	/* Out */	{0, GEN7_BLENDFACTOR_INV_DST_ALPHA, GEN7_BLENDFACTOR_ZERO},
246
	/* OutReverse */ {1, GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_INV_SRC_ALPHA},
247
	/* Atop */	{1, GEN7_BLENDFACTOR_DST_ALPHA, GEN7_BLENDFACTOR_INV_SRC_ALPHA},
248
	/* AtopReverse */ {1, GEN7_BLENDFACTOR_INV_DST_ALPHA, GEN7_BLENDFACTOR_SRC_ALPHA},
249
	/* Xor */	{1, GEN7_BLENDFACTOR_INV_DST_ALPHA, GEN7_BLENDFACTOR_INV_SRC_ALPHA},
250
	/* Add */	{0, GEN7_BLENDFACTOR_ONE, GEN7_BLENDFACTOR_ONE},
251
};
252
 
253
/**
254
 * Highest-valued BLENDFACTOR used in gen7_blend_op.
255
 *
256
 * This leaves out GEN7_BLENDFACTOR_INV_DST_COLOR,
257
 * GEN7_BLENDFACTOR_INV_CONST_{COLOR,ALPHA},
258
 * GEN7_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA}
259
 */
260
#define GEN7_BLENDFACTOR_COUNT (GEN7_BLENDFACTOR_INV_DST_ALPHA + 1)
261
 
262
#define GEN7_BLEND_STATE_PADDED_SIZE	ALIGN(sizeof(struct gen7_blend_state), 64)
263
 
264
#define BLEND_OFFSET(s, d) \
265
	((d != GEN7_BLENDFACTOR_ZERO) << 15 | \
266
	 (((s) * GEN7_BLENDFACTOR_COUNT + (d)) * GEN7_BLEND_STATE_PADDED_SIZE))
267
 
268
#define NO_BLEND BLEND_OFFSET(GEN7_BLENDFACTOR_ONE, GEN7_BLENDFACTOR_ZERO)
269
#define CLEAR BLEND_OFFSET(GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_ZERO)
270
 
271
#define SAMPLER_OFFSET(sf, se, mf, me) \
272
	((((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) + 2) * 2 * sizeof(struct gen7_sampler_state))
273
 
274
#define VERTEX_2s2s 0
275
 
276
#define COPY_SAMPLER 0
277
#define COPY_VERTEX VERTEX_2s2s
278
#define COPY_FLAGS(a) GEN7_SET_FLAGS(COPY_SAMPLER, (a) == GXcopy ? NO_BLEND : CLEAR, GEN7_WM_KERNEL_NOMASK, COPY_VERTEX)
279
 
280
#define FILL_SAMPLER (2 * sizeof(struct gen7_sampler_state))
281
#define FILL_VERTEX VERTEX_2s2s
282
#define FILL_FLAGS(op, format) GEN7_SET_FLAGS(FILL_SAMPLER, gen7_get_blend((op), false, (format)), GEN7_WM_KERNEL_NOMASK, FILL_VERTEX)
283
#define FILL_FLAGS_NOBLEND GEN7_SET_FLAGS(FILL_SAMPLER, NO_BLEND, GEN7_WM_KERNEL_NOMASK, FILL_VERTEX)
284
 
285
#define GEN7_SAMPLER(f) (((f) >> 16) & 0xfff0)
286
#define GEN7_BLEND(f) (((f) >> 0) & 0x7ff0)
287
#define GEN7_READS_DST(f) (((f) >> 15) & 1)
288
#define GEN7_KERNEL(f) (((f) >> 16) & 0xf)
289
#define GEN7_VERTEX(f) (((f) >> 0) & 0xf)
290
#define GEN7_SET_FLAGS(S, B, K, V)  (((S) | (K)) << 16 | ((B) | (V)))
291
 
292
#define OUT_BATCH(v) batch_emit(sna, v)
293
#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y)
294
#define OUT_VERTEX_F(v) vertex_emit(sna, v)
295
 
296
static inline bool too_large(int width, int height)
297
{
298
	return width > GEN7_MAX_SIZE || height > GEN7_MAX_SIZE;
299
}
300
 
301
static uint32_t gen7_get_blend(int op,
302
			       bool has_component_alpha,
303
			       uint32_t dst_format)
304
{
305
	uint32_t src, dst;
306
 
307
 
308
    src = GEN7_BLENDFACTOR_ONE; //gen6_blend_op[op].src_blend;
309
    dst = GEN7_BLENDFACTOR_INV_SRC_ALPHA; //gen6_blend_op[op].dst_blend;
310
 
3291 Serge 311
 
3280 Serge 312
#if 0
313
	/* If there's no dst alpha channel, adjust the blend op so that
314
	 * we'll treat it always as 1.
315
	 */
316
	if (PICT_FORMAT_A(dst_format) == 0) {
317
		if (src == GEN7_BLENDFACTOR_DST_ALPHA)
318
			src = GEN7_BLENDFACTOR_ONE;
319
		else if (src == GEN7_BLENDFACTOR_INV_DST_ALPHA)
320
			src = GEN7_BLENDFACTOR_ZERO;
321
	}
322
 
323
	/* If the source alpha is being used, then we should only be in a
324
	 * case where the source blend factor is 0, and the source blend
325
	 * value is the mask channels multiplied by the source picture's alpha.
326
	 */
327
	if (has_component_alpha && gen7_blend_op[op].src_alpha) {
328
		if (dst == GEN7_BLENDFACTOR_SRC_ALPHA)
329
			dst = GEN7_BLENDFACTOR_SRC_COLOR;
330
		else if (dst == GEN7_BLENDFACTOR_INV_SRC_ALPHA)
331
			dst = GEN7_BLENDFACTOR_INV_SRC_COLOR;
332
	}
333
#endif
334
 
335
	DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n",
336
	     op, dst_format, PICT_FORMAT_A(dst_format),
337
	     src, dst, (int)BLEND_OFFSET(src, dst)));
338
	return BLEND_OFFSET(src, dst);
339
}
340
 
341
static uint32_t gen7_get_card_format(PictFormat format)
342
{
343
	switch (format) {
344
	default:
345
		return -1;
346
	case PICT_a8r8g8b8:
347
		return GEN7_SURFACEFORMAT_B8G8R8A8_UNORM;
348
	case PICT_x8r8g8b8:
349
		return GEN7_SURFACEFORMAT_B8G8R8X8_UNORM;
4251 Serge 350
	case PICT_a8b8g8r8:
351
		return GEN7_SURFACEFORMAT_R8G8B8A8_UNORM;
352
	case PICT_x8b8g8r8:
353
		return GEN7_SURFACEFORMAT_R8G8B8X8_UNORM;
354
	case PICT_a2r10g10b10:
355
		return GEN7_SURFACEFORMAT_B10G10R10A2_UNORM;
356
	case PICT_x2r10g10b10:
357
		return GEN7_SURFACEFORMAT_B10G10R10X2_UNORM;
358
	case PICT_r8g8b8:
359
		return GEN7_SURFACEFORMAT_R8G8B8_UNORM;
360
	case PICT_r5g6b5:
361
		return GEN7_SURFACEFORMAT_B5G6R5_UNORM;
362
	case PICT_a1r5g5b5:
363
		return GEN7_SURFACEFORMAT_B5G5R5A1_UNORM;
3280 Serge 364
	case PICT_a8:
365
		return GEN7_SURFACEFORMAT_A8_UNORM;
4251 Serge 366
	case PICT_a4r4g4b4:
367
		return GEN7_SURFACEFORMAT_B4G4R4A4_UNORM;
3280 Serge 368
	}
369
}
370
 
371
static uint32_t gen7_get_dest_format(PictFormat format)
372
{
373
	switch (format) {
374
	default:
375
		return -1;
376
	case PICT_a8r8g8b8:
377
	case PICT_x8r8g8b8:
378
		return GEN7_SURFACEFORMAT_B8G8R8A8_UNORM;
4251 Serge 379
	case PICT_a8b8g8r8:
380
	case PICT_x8b8g8r8:
381
		return GEN7_SURFACEFORMAT_R8G8B8A8_UNORM;
382
	case PICT_a2r10g10b10:
383
	case PICT_x2r10g10b10:
384
		return GEN7_SURFACEFORMAT_B10G10R10A2_UNORM;
385
	case PICT_r5g6b5:
386
		return GEN7_SURFACEFORMAT_B5G6R5_UNORM;
387
	case PICT_x1r5g5b5:
388
	case PICT_a1r5g5b5:
389
		return GEN7_SURFACEFORMAT_B5G5R5A1_UNORM;
3280 Serge 390
	case PICT_a8:
391
		return GEN7_SURFACEFORMAT_A8_UNORM;
4251 Serge 392
	case PICT_a4r4g4b4:
393
	case PICT_x4r4g4b4:
394
		return GEN7_SURFACEFORMAT_B4G4R4A4_UNORM;
3280 Serge 395
	}
396
}
397
 
398
static int
399
gen7_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine)
400
{
401
	int base;
402
 
403
	if (has_mask) {
404
		if (is_ca) {
405
			if (gen7_blend_op[op].src_alpha)
406
				base = GEN7_WM_KERNEL_MASKSA;
407
			else
408
				base = GEN7_WM_KERNEL_MASKCA;
409
		} else
410
			base = GEN7_WM_KERNEL_MASK;
411
	} else
412
		base = GEN7_WM_KERNEL_NOMASK;
413
 
414
	return base + !is_affine;
415
}
416
 
417
static void
418
gen7_emit_urb(struct sna *sna)
419
{
420
	OUT_BATCH(GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
4251 Serge 421
	OUT_BATCH(sna->render_state.gen7.info->urb.push_ps_size);
3280 Serge 422
 
423
	/* num of VS entries must be divisible by 8 if size < 9 */
424
	OUT_BATCH(GEN7_3DSTATE_URB_VS | (2 - 2));
425
	OUT_BATCH((sna->render_state.gen7.info->urb.max_vs_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) |
426
		  (2 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
427
		  (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
428
 
429
	OUT_BATCH(GEN7_3DSTATE_URB_HS | (2 - 2));
430
	OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
431
		  (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
432
 
433
	OUT_BATCH(GEN7_3DSTATE_URB_DS | (2 - 2));
434
	OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
435
		  (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
436
 
437
	OUT_BATCH(GEN7_3DSTATE_URB_GS | (2 - 2));
438
	OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
439
		  (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
440
}
441
 
442
static void
443
gen7_emit_state_base_address(struct sna *sna)
444
{
4251 Serge 445
	uint32_t mocs;
446
 
447
	mocs = is_hsw(sna) ? 5 << 8 : 3 << 8;
448
 
3280 Serge 449
	OUT_BATCH(GEN7_STATE_BASE_ADDRESS | (10 - 2));
450
	OUT_BATCH(0); /* general */
451
	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */
452
				 sna->kgem.nbatch,
453
				 NULL,
454
				 I915_GEM_DOMAIN_INSTRUCTION << 16,
455
				 BASE_ADDRESS_MODIFY));
4251 Serge 456
	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* dynamic */
3280 Serge 457
				 sna->kgem.nbatch,
458
				 sna->render_state.gen7.general_bo,
459
				 I915_GEM_DOMAIN_INSTRUCTION << 16,
4251 Serge 460
				 mocs | BASE_ADDRESS_MODIFY));
3280 Serge 461
	OUT_BATCH(0); /* indirect */
4251 Serge 462
	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* instruction */
3280 Serge 463
				 sna->kgem.nbatch,
464
				 sna->render_state.gen7.general_bo,
465
				 I915_GEM_DOMAIN_INSTRUCTION << 16,
4251 Serge 466
				 mocs | BASE_ADDRESS_MODIFY));
3280 Serge 467
 
468
	/* upper bounds, disable */
469
	OUT_BATCH(0);
470
	OUT_BATCH(BASE_ADDRESS_MODIFY);
471
	OUT_BATCH(0);
472
	OUT_BATCH(BASE_ADDRESS_MODIFY);
473
}
474
 
475
static void
476
gen7_disable_vs(struct sna *sna)
477
{
478
	/* For future reference:
479
	 * A PIPE_CONTROL with post-sync op set to 1 and a depth stall needs
480
	 * to be emitted just prior to change VS state, i.e. 3DSTATE_VS,
481
	 * 3DSTATE_URB_VS, 3DSTATE_CONSTANT_VS,
482
	 * 3DSTATE_BINDING_TABLE_POINTER_VS, 3DSTATE_SAMPLER_STATE_POINTER_VS.
483
	 *
484
	 * Here we saved by the full-flush incurred when emitting
485
	 * the batchbuffer.
486
	 */
487
	OUT_BATCH(GEN7_3DSTATE_VS | (6 - 2));
488
	OUT_BATCH(0); /* no VS kernel */
489
	OUT_BATCH(0);
490
	OUT_BATCH(0);
491
	OUT_BATCH(0);
492
	OUT_BATCH(0); /* pass-through */
493
 
494
#if 0
495
	OUT_BATCH(GEN7_3DSTATE_CONSTANT_VS | (7 - 2));
496
	OUT_BATCH(0);
497
	OUT_BATCH(0);
498
	OUT_BATCH(0);
499
	OUT_BATCH(0);
500
	OUT_BATCH(0);
501
	OUT_BATCH(0);
502
 
503
	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS | (2 - 2));
504
	OUT_BATCH(0);
505
 
506
	OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS | (2 - 2));
507
	OUT_BATCH(0);
508
#endif
509
}
510
 
511
static void
512
gen7_disable_hs(struct sna *sna)
513
{
514
	OUT_BATCH(GEN7_3DSTATE_HS | (7 - 2));
515
	OUT_BATCH(0); /* no HS kernel */
516
	OUT_BATCH(0);
517
	OUT_BATCH(0);
518
	OUT_BATCH(0);
519
	OUT_BATCH(0);
520
	OUT_BATCH(0); /* pass-through */
521
 
522
#if 0
523
	OUT_BATCH(GEN7_3DSTATE_CONSTANT_HS | (7 - 2));
524
	OUT_BATCH(0);
525
	OUT_BATCH(0);
526
	OUT_BATCH(0);
527
	OUT_BATCH(0);
528
	OUT_BATCH(0);
529
	OUT_BATCH(0);
530
 
531
	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
532
	OUT_BATCH(0);
533
 
534
	OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS | (2 - 2));
535
	OUT_BATCH(0);
536
#endif
537
}
538
 
539
static void
540
gen7_disable_te(struct sna *sna)
541
{
542
	OUT_BATCH(GEN7_3DSTATE_TE | (4 - 2));
543
	OUT_BATCH(0);
544
	OUT_BATCH(0);
545
	OUT_BATCH(0);
546
}
547
 
548
static void
549
gen7_disable_ds(struct sna *sna)
550
{
551
	OUT_BATCH(GEN7_3DSTATE_DS | (6 - 2));
552
	OUT_BATCH(0);
553
	OUT_BATCH(0);
554
	OUT_BATCH(0);
555
	OUT_BATCH(0);
556
	OUT_BATCH(0);
557
 
558
#if 0
559
	OUT_BATCH(GEN7_3DSTATE_CONSTANT_DS | (7 - 2));
560
	OUT_BATCH(0);
561
	OUT_BATCH(0);
562
	OUT_BATCH(0);
563
	OUT_BATCH(0);
564
	OUT_BATCH(0);
565
	OUT_BATCH(0);
566
 
567
	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
568
	OUT_BATCH(0);
569
 
570
	OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS | (2 - 2));
571
	OUT_BATCH(0);
572
#endif
573
}
574
 
575
static void
576
gen7_disable_gs(struct sna *sna)
577
{
578
	OUT_BATCH(GEN7_3DSTATE_GS | (7 - 2));
579
	OUT_BATCH(0); /* no GS kernel */
580
	OUT_BATCH(0);
581
	OUT_BATCH(0);
582
	OUT_BATCH(0);
583
	OUT_BATCH(0);
584
	OUT_BATCH(0); /* pass-through */
585
 
586
#if 0
587
	OUT_BATCH(GEN7_3DSTATE_CONSTANT_GS | (7 - 2));
588
	OUT_BATCH(0);
589
	OUT_BATCH(0);
590
	OUT_BATCH(0);
591
	OUT_BATCH(0);
592
	OUT_BATCH(0);
593
	OUT_BATCH(0);
594
 
595
	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
596
	OUT_BATCH(0);
597
 
598
	OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS | (2 - 2));
599
	OUT_BATCH(0);
600
#endif
601
}
602
 
603
static void
604
gen7_disable_streamout(struct sna *sna)
605
{
606
	OUT_BATCH(GEN7_3DSTATE_STREAMOUT | (3 - 2));
607
	OUT_BATCH(0);
608
	OUT_BATCH(0);
609
}
610
 
611
static void
612
gen7_emit_sf_invariant(struct sna *sna)
613
{
614
	OUT_BATCH(GEN7_3DSTATE_SF | (7 - 2));
615
	OUT_BATCH(0);
616
	OUT_BATCH(GEN7_3DSTATE_SF_CULL_NONE);
617
	OUT_BATCH(2 << GEN7_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
618
	OUT_BATCH(0);
619
	OUT_BATCH(0);
620
	OUT_BATCH(0);
621
}
622
 
623
static void
624
gen7_emit_cc_invariant(struct sna *sna)
625
{
626
#if 0 /* unused, no change */
627
	OUT_BATCH(GEN7_3DSTATE_CC_STATE_POINTERS | (2 - 2));
628
	OUT_BATCH(0);
629
 
630
	OUT_BATCH(GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS | (2 - 2));
631
	OUT_BATCH(0);
632
#endif
633
 
634
	/* XXX clear to be safe */
635
	OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
636
	OUT_BATCH(0);
637
}
638
 
639
static void
640
gen7_disable_clip(struct sna *sna)
641
{
642
	OUT_BATCH(GEN7_3DSTATE_CLIP | (4 - 2));
643
	OUT_BATCH(0);
644
	OUT_BATCH(0); /* pass-through */
645
	OUT_BATCH(0);
646
 
647
	OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
648
	OUT_BATCH(0);
649
}
650
 
651
static void
652
gen7_emit_wm_invariant(struct sna *sna)
653
{
654
	OUT_BATCH(GEN7_3DSTATE_WM | (3 - 2));
655
	OUT_BATCH(GEN7_WM_DISPATCH_ENABLE |
656
		  GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
657
	OUT_BATCH(0);
658
 
659
#if 0
660
	/* XXX length bias of 7 in old spec? */
661
	OUT_BATCH(GEN7_3DSTATE_CONSTANT_PS | (7 - 2));
662
	OUT_BATCH(0);
663
	OUT_BATCH(0);
664
	OUT_BATCH(0);
665
	OUT_BATCH(0);
666
	OUT_BATCH(0);
667
	OUT_BATCH(0);
668
#endif
669
}
670
 
671
static void
672
gen7_emit_null_depth_buffer(struct sna *sna)
673
{
674
	OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER | (7 - 2));
675
	OUT_BATCH(GEN7_SURFACE_NULL << GEN7_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT |
676
		  GEN7_DEPTHFORMAT_D32_FLOAT << GEN7_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT);
677
	OUT_BATCH(0); /* disable depth, stencil and hiz */
678
	OUT_BATCH(0);
679
	OUT_BATCH(0);
680
	OUT_BATCH(0);
681
	OUT_BATCH(0);
682
 
683
#if 0
684
	OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
685
	OUT_BATCH(0);
686
	OUT_BATCH(0);
687
#endif
688
}
689
 
690
static void
691
gen7_emit_invariant(struct sna *sna)
692
{
693
	OUT_BATCH(GEN7_PIPELINE_SELECT | PIPELINE_SELECT_3D);
694
 
695
	OUT_BATCH(GEN7_3DSTATE_MULTISAMPLE | (4 - 2));
696
	OUT_BATCH(GEN7_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
697
		  GEN7_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
698
	OUT_BATCH(0);
699
	OUT_BATCH(0);
700
 
701
	OUT_BATCH(GEN7_3DSTATE_SAMPLE_MASK | (2 - 2));
702
	OUT_BATCH(1);
703
 
704
	gen7_emit_urb(sna);
705
 
706
	gen7_emit_state_base_address(sna);
707
 
708
	gen7_disable_vs(sna);
709
	gen7_disable_hs(sna);
710
	gen7_disable_te(sna);
711
	gen7_disable_ds(sna);
712
	gen7_disable_gs(sna);
713
	gen7_disable_clip(sna);
714
	gen7_emit_sf_invariant(sna);
715
	gen7_emit_wm_invariant(sna);
716
	gen7_emit_cc_invariant(sna);
717
	gen7_disable_streamout(sna);
718
	gen7_emit_null_depth_buffer(sna);
719
 
720
	sna->render_state.gen7.needs_invariant = false;
721
}
722
 
723
static void
724
gen7_emit_cc(struct sna *sna, uint32_t blend_offset)
725
{
726
	struct gen7_render_state *render = &sna->render_state.gen7;
727
 
728
	if (render->blend == blend_offset)
729
		return;
730
 
731
	DBG(("%s: blend = %x\n", __FUNCTION__, blend_offset));
732
 
733
	/* XXX can have upto 8 blend states preload, selectable via
734
	 * Render Target Index. What other side-effects of Render Target Index?
735
	 */
736
 
737
	assert (is_aligned(render->cc_blend + blend_offset, 64));
738
	OUT_BATCH(GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
739
	OUT_BATCH((render->cc_blend + blend_offset) | 1);
740
 
741
	render->blend = blend_offset;
742
}
743
 
744
static void
745
gen7_emit_sampler(struct sna *sna, uint32_t state)
746
{
747
	if (sna->render_state.gen7.samplers == state)
748
		return;
749
 
750
	sna->render_state.gen7.samplers = state;
751
 
752
	DBG(("%s: sampler = %x\n", __FUNCTION__, state));
753
 
754
	assert (is_aligned(sna->render_state.gen7.wm_state + state, 32));
755
	OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
756
	OUT_BATCH(sna->render_state.gen7.wm_state + state);
757
}
758
 
759
static void
760
gen7_emit_sf(struct sna *sna, bool has_mask)
761
{
762
	int num_sf_outputs = has_mask ? 2 : 1;
763
 
764
	if (sna->render_state.gen7.num_sf_outputs == num_sf_outputs)
765
		return;
766
 
767
	DBG(("%s: num_sf_outputs=%d, read_length=%d, read_offset=%d\n",
768
	     __FUNCTION__, num_sf_outputs, 1, 0));
769
 
770
	sna->render_state.gen7.num_sf_outputs = num_sf_outputs;
771
 
772
	OUT_BATCH(GEN7_3DSTATE_SBE | (14 - 2));
773
	OUT_BATCH(num_sf_outputs << GEN7_SBE_NUM_OUTPUTS_SHIFT |
774
		  1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
775
		  1 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT);
776
	OUT_BATCH(0);
777
	OUT_BATCH(0); /* dw4 */
778
	OUT_BATCH(0);
779
	OUT_BATCH(0);
780
	OUT_BATCH(0);
781
	OUT_BATCH(0); /* dw8 */
782
	OUT_BATCH(0);
783
	OUT_BATCH(0);
784
	OUT_BATCH(0);
785
	OUT_BATCH(0); /* dw12 */
786
	OUT_BATCH(0);
787
	OUT_BATCH(0);
788
}
789
 
790
static void
791
gen7_emit_wm(struct sna *sna, int kernel)
792
{
793
	const uint32_t *kernels;
794
 
795
	if (sna->render_state.gen7.kernel == kernel)
796
		return;
797
 
798
	sna->render_state.gen7.kernel = kernel;
799
	kernels = sna->render_state.gen7.wm_kernel[kernel];
800
 
801
	DBG(("%s: switching to %s, num_surfaces=%d (8-wide? %d, 16-wide? %d, 32-wide? %d)\n",
802
	     __FUNCTION__,
803
	     wm_kernels[kernel].name,
804
	     wm_kernels[kernel].num_surfaces,
805
	     kernels[0], kernels[1], kernels[2]));
806
 
807
	OUT_BATCH(GEN7_3DSTATE_PS | (8 - 2));
808
	OUT_BATCH(kernels[0] ?: kernels[1] ?: kernels[2]);
809
	OUT_BATCH(1 << GEN7_PS_SAMPLER_COUNT_SHIFT |
810
		  wm_kernels[kernel].num_surfaces << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT);
811
	OUT_BATCH(0); /* scratch address */
812
	OUT_BATCH(sna->render_state.gen7.info->max_wm_threads |
813
		  (kernels[0] ? GEN7_PS_8_DISPATCH_ENABLE : 0) |
814
		  (kernels[1] ? GEN7_PS_16_DISPATCH_ENABLE : 0) |
815
		  (kernels[2] ? GEN7_PS_32_DISPATCH_ENABLE : 0) |
816
		  GEN7_PS_ATTRIBUTE_ENABLE);
817
	OUT_BATCH((kernels[0] ? 4 : kernels[1] ? 6 : 8) << GEN7_PS_DISPATCH_START_GRF_SHIFT_0 |
818
		  8 << GEN7_PS_DISPATCH_START_GRF_SHIFT_1 |
819
		  6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_2);
820
	OUT_BATCH(kernels[2]);
821
	OUT_BATCH(kernels[1]);
822
}
823
 
824
static bool
825
gen7_emit_binding_table(struct sna *sna, uint16_t offset)
826
{
827
	if (sna->render_state.gen7.surface_table == offset)
828
		return false;
829
 
830
	/* Binding table pointers */
831
	assert(is_aligned(4*offset, 32));
832
	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
833
	OUT_BATCH(offset*4);
834
 
835
	sna->render_state.gen7.surface_table = offset;
836
	return true;
837
}
838
 
839
static bool
840
gen7_emit_drawing_rectangle(struct sna *sna,
841
			    const struct sna_composite_op *op)
842
{
843
	uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1);
844
	uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x;
845
 
846
	assert(!too_large(op->dst.x, op->dst.y));
847
	assert(!too_large(op->dst.width, op->dst.height));
848
 
849
	if (sna->render_state.gen7.drawrect_limit == limit &&
850
	    sna->render_state.gen7.drawrect_offset == offset)
851
		return true;
852
 
853
	sna->render_state.gen7.drawrect_offset = offset;
854
	sna->render_state.gen7.drawrect_limit = limit;
855
 
856
	OUT_BATCH(GEN7_3DSTATE_DRAWING_RECTANGLE | (4 - 2));
857
	OUT_BATCH(0);
858
	OUT_BATCH(limit);
859
	OUT_BATCH(offset);
860
	return false;
861
}
862
 
863
static void
864
gen7_emit_vertex_elements(struct sna *sna,
865
			  const struct sna_composite_op *op)
866
{
867
	/*
868
	 * vertex data in vertex buffer
869
	 *    position: (x, y)
870
	 *    texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0)
871
	 *    texture coordinate 1 if (has_mask is true): same as above
872
	 */
873
	struct gen7_render_state *render = &sna->render_state.gen7;
874
	uint32_t src_format, dw;
875
	int id = GEN7_VERTEX(op->u.gen7.flags);
876
	bool has_mask;
877
 
878
	DBG(("%s: setup id=%d\n", __FUNCTION__, id));
879
 
880
	if (render->ve_id == id)
881
		return;
882
	render->ve_id = id;
883
 
884
	/* The VUE layout
885
	 *    dword 0-3: pad (0.0, 0.0, 0.0. 0.0)
886
	 *    dword 4-7: position (x, y, 1.0, 1.0),
887
	 *    dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0)
888
	 *    dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0)
889
	 *
890
	 * dword 4-15 are fetched from vertex buffer
891
	 */
892
	has_mask = (id >> 2) != 0;
893
	OUT_BATCH(GEN7_3DSTATE_VERTEX_ELEMENTS |
894
		((2 * (3 + has_mask)) + 1 - 2));
895
 
896
	OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID |
897
		  GEN7_SURFACEFORMAT_R32G32B32A32_FLOAT << GEN7_VE0_FORMAT_SHIFT |
898
 
899
	OUT_BATCH(GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_0_SHIFT |
900
		  GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_1_SHIFT |
901
		  GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT |
902
		  GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_3_SHIFT);
903
 
904
	/* x,y */
905
	OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID |
906
		  GEN7_SURFACEFORMAT_R16G16_SSCALED << GEN7_VE0_FORMAT_SHIFT |
907
 
908
	OUT_BATCH(GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT |
909
		  GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT |
910
		  GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT |
911
		  GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT);
912
 
913
	/* u0, v0, w0 */
914
	DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3));
915
	dw = GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT;
916
	switch (id & 3) {
917
	default:
918
		assert(0);
919
	case 0:
920
		src_format = GEN7_SURFACEFORMAT_R16G16_SSCALED;
921
		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
922
		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT;
923
		dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT;
924
		break;
925
	case 1:
926
		src_format = GEN7_SURFACEFORMAT_R32_FLOAT;
927
		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
928
		dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_1_SHIFT;
929
		dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT;
930
		break;
931
	case 2:
932
		src_format = GEN7_SURFACEFORMAT_R32G32_FLOAT;
933
		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
934
		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT;
935
		dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT;
936
		break;
937
	case 3:
938
		src_format = GEN7_SURFACEFORMAT_R32G32B32_FLOAT;
939
		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
940
		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT;
941
		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_2_SHIFT;
942
		break;
943
	}
944
	OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID |
945
		  src_format << GEN7_VE0_FORMAT_SHIFT |
946
		  4 << GEN7_VE0_OFFSET_SHIFT);
947
	OUT_BATCH(dw);
948
 
949
	/* u1, v1, w1 */
950
	if (has_mask) {
951
		unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float);
952
		DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__, id >> 2, offset));
953
		dw = GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT;
954
		switch (id >> 2) {
955
		case 1:
956
			src_format = GEN7_SURFACEFORMAT_R32_FLOAT;
957
			dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
958
			dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_1_SHIFT;
959
			dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT;
960
			break;
961
		default:
962
			assert(0);
963
		case 2:
964
			src_format = GEN7_SURFACEFORMAT_R32G32_FLOAT;
965
			dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
966
			dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT;
967
			dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT;
968
			break;
969
		case 3:
970
			src_format = GEN7_SURFACEFORMAT_R32G32B32_FLOAT;
971
			dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
972
			dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT;
973
			dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_2_SHIFT;
974
			break;
975
		}
976
		OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID |
977
			  src_format << GEN7_VE0_FORMAT_SHIFT |
978
			  offset << GEN7_VE0_OFFSET_SHIFT);
979
		OUT_BATCH(dw);
980
	}
981
}
982
 
983
inline static void
984
gen7_emit_pipe_invalidate(struct sna *sna)
985
{
986
	OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2));
987
	OUT_BATCH(GEN7_PIPE_CONTROL_WC_FLUSH |
988
		  GEN7_PIPE_CONTROL_TC_FLUSH |
989
		  GEN7_PIPE_CONTROL_CS_STALL);
990
	OUT_BATCH(0);
991
	OUT_BATCH(0);
992
}
993
 
994
inline static void
4251 Serge 995
gen7_emit_pipe_flush(struct sna *sna, bool need_stall)
3280 Serge 996
{
4251 Serge 997
	unsigned stall;
998
 
999
	stall = 0;
1000
	if (need_stall)
1001
		stall = (GEN7_PIPE_CONTROL_CS_STALL |
1002
			 GEN7_PIPE_CONTROL_STALL_AT_SCOREBOARD);
1003
 
3280 Serge 1004
	OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2));
4251 Serge 1005
	OUT_BATCH(GEN7_PIPE_CONTROL_WC_FLUSH | stall);
3280 Serge 1006
	OUT_BATCH(0);
1007
	OUT_BATCH(0);
1008
}
1009
 
1010
inline static void
1011
gen7_emit_pipe_stall(struct sna *sna)
1012
{
1013
	OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2));
1014
	OUT_BATCH(GEN7_PIPE_CONTROL_CS_STALL |
1015
		  GEN7_PIPE_CONTROL_STALL_AT_SCOREBOARD);
1016
	OUT_BATCH(0);
1017
	OUT_BATCH(0);
1018
}
1019
 
1020
static void
1021
gen7_emit_state(struct sna *sna,
1022
		const struct sna_composite_op *op,
1023
		uint16_t wm_binding_table)
1024
{
1025
	bool need_stall;
1026
 
4251 Serge 1027
	assert(op->dst.bo->exec);
3280 Serge 1028
 
1029
	gen7_emit_cc(sna, GEN7_BLEND(op->u.gen7.flags));
1030
	gen7_emit_sampler(sna, GEN7_SAMPLER(op->u.gen7.flags));
1031
	gen7_emit_sf(sna, GEN7_VERTEX(op->u.gen7.flags) >> 2);
1032
	gen7_emit_wm(sna, GEN7_KERNEL(op->u.gen7.flags));
1033
	gen7_emit_vertex_elements(sna, op);
1034
 
1035
	need_stall = gen7_emit_binding_table(sna, wm_binding_table);
1036
	need_stall &= gen7_emit_drawing_rectangle(sna, op);
1037
 
4251 Serge 1038
	if (ALWAYS_FLUSH || kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
3280 Serge 1039
		gen7_emit_pipe_invalidate(sna);
1040
		kgem_clear_dirty(&sna->kgem);
4251 Serge 1041
		assert(op->dst.bo->exec);
3280 Serge 1042
			kgem_bo_mark_dirty(op->dst.bo);
4251 Serge 1043
		sna->render_state.gen7.emit_flush = false;
3280 Serge 1044
		need_stall = false;
1045
	}
4251 Serge 1046
	if (sna->render_state.gen7.emit_flush) {
1047
		gen7_emit_pipe_flush(sna, need_stall);
1048
		need_stall = false;
1049
	}
3280 Serge 1050
	if (need_stall)
1051
		gen7_emit_pipe_stall(sna);
1052
 
1053
	sna->render_state.gen7.emit_flush = GEN7_READS_DST(op->u.gen7.flags);
1054
}
1055
 
1056
static bool gen7_magic_ca_pass(struct sna *sna,
1057
			       const struct sna_composite_op *op)
1058
{
1059
	struct gen7_render_state *state = &sna->render_state.gen7;
1060
 
1061
	if (!op->need_magic_ca_pass)
1062
		return false;
1063
 
1064
	DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__,
1065
	     sna->render.vertex_start, sna->render.vertex_index));
1066
 
1067
	gen7_emit_pipe_stall(sna);
1068
 
1069
	gen7_emit_cc(sna,
1070
		     GEN7_BLEND(gen7_get_blend(PictOpAdd, true,
1071
					       op->dst.format)));
1072
	gen7_emit_wm(sna,
1073
		     gen7_choose_composite_kernel(PictOpAdd,
1074
						  true, true,
1075
						  op->is_affine));
1076
 
1077
	OUT_BATCH(GEN7_3DPRIMITIVE | (7- 2));
1078
	OUT_BATCH(GEN7_3DPRIMITIVE_VERTEX_SEQUENTIAL | _3DPRIM_RECTLIST);
1079
	OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start);
1080
	OUT_BATCH(sna->render.vertex_start);
1081
	OUT_BATCH(1);	/* single instance */
1082
	OUT_BATCH(0);	/* start instance location */
1083
	OUT_BATCH(0);	/* index buffer offset, ignored */
1084
 
1085
	state->last_primitive = sna->kgem.nbatch;
1086
	return true;
1087
}
1088
 
1089
static void null_create(struct sna_static_stream *stream)
1090
{
1091
	/* A bunch of zeros useful for legacy border color and depth-stencil */
1092
	sna_static_stream_map(stream, 64, 64);
1093
}
1094
 
1095
static void
1096
sampler_state_init(struct gen7_sampler_state *sampler_state,
1097
		   sampler_filter_t filter,
1098
		   sampler_extend_t extend)
1099
{
1100
	sampler_state->ss0.lod_preclamp = 1;	/* GL mode */
1101
 
1102
	/* We use the legacy mode to get the semantics specified by
1103
	 * the Render extension. */
1104
	sampler_state->ss0.default_color_mode = GEN7_BORDER_COLOR_MODE_LEGACY;
1105
 
1106
	switch (filter) {
1107
	default:
1108
	case SAMPLER_FILTER_NEAREST:
1109
		sampler_state->ss0.min_filter = GEN7_MAPFILTER_NEAREST;
1110
		sampler_state->ss0.mag_filter = GEN7_MAPFILTER_NEAREST;
1111
		break;
1112
	case SAMPLER_FILTER_BILINEAR:
1113
		sampler_state->ss0.min_filter = GEN7_MAPFILTER_LINEAR;
1114
		sampler_state->ss0.mag_filter = GEN7_MAPFILTER_LINEAR;
1115
		break;
1116
	}
1117
 
1118
	switch (extend) {
1119
	default:
1120
	case SAMPLER_EXTEND_NONE:
1121
		sampler_state->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_CLAMP_BORDER;
1122
		sampler_state->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_CLAMP_BORDER;
1123
		sampler_state->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_CLAMP_BORDER;
1124
		break;
1125
	case SAMPLER_EXTEND_REPEAT:
1126
		sampler_state->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_WRAP;
1127
		sampler_state->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_WRAP;
1128
		sampler_state->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_WRAP;
1129
		break;
1130
	case SAMPLER_EXTEND_PAD:
1131
		sampler_state->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_CLAMP;
1132
		sampler_state->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_CLAMP;
1133
		sampler_state->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_CLAMP;
1134
		break;
1135
	case SAMPLER_EXTEND_REFLECT:
1136
		sampler_state->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_MIRROR;
1137
		sampler_state->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_MIRROR;
1138
		sampler_state->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_MIRROR;
1139
		break;
1140
	}
1141
}
1142
 
1143
static void
1144
sampler_copy_init(struct gen7_sampler_state *ss)
1145
{
1146
	sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
1147
	ss->ss3.non_normalized_coord = 1;
1148
 
1149
	sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
1150
}
1151
 
1152
static void
1153
sampler_fill_init(struct gen7_sampler_state *ss)
1154
{
1155
	sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_REPEAT);
1156
	ss->ss3.non_normalized_coord = 1;
1157
 
1158
	sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
1159
}
1160
 
1161
static uint32_t
1162
gen7_tiling_bits(uint32_t tiling)
1163
{
1164
	switch (tiling) {
1165
	default: assert(0);
1166
	case I915_TILING_NONE: return 0;
1167
	case I915_TILING_X: return GEN7_SURFACE_TILED;
1168
	case I915_TILING_Y: return GEN7_SURFACE_TILED | GEN7_SURFACE_TILED_Y;
1169
	}
1170
}
1171
 
1172
/**
1173
 * Sets up the common fields for a surface state buffer for the given
1174
 * picture in the given surface state buffer.
1175
 */
1176
static uint32_t
1177
gen7_bind_bo(struct sna *sna,
1178
	     struct kgem_bo *bo,
1179
	     uint32_t width,
1180
	     uint32_t height,
1181
	     uint32_t format,
1182
	     bool is_dst)
1183
{
1184
	uint32_t *ss;
1185
	uint32_t domains;
1186
	int offset;
1187
	uint32_t is_scanout = is_dst && bo->scanout;
1188
 
1189
	COMPILE_TIME_ASSERT(sizeof(struct gen7_surface_state) == 32);
1190
 
1191
	/* After the first bind, we manage the cache domains within the batch */
4251 Serge 1192
	offset = kgem_bo_get_binding(bo, format | is_dst << 30 | is_scanout << 31);
3280 Serge 1193
	if (offset) {
1194
		if (is_dst)
1195
			kgem_bo_mark_dirty(bo);
1196
		return offset * sizeof(uint32_t);
1197
	}
1198
 
1199
	offset = sna->kgem.surface -=
1200
		sizeof(struct gen7_surface_state) / sizeof(uint32_t);
1201
	ss = sna->kgem.batch + offset;
1202
	ss[0] = (GEN7_SURFACE_2D << GEN7_SURFACE_TYPE_SHIFT |
1203
		 gen7_tiling_bits(bo->tiling) |
1204
		 format << GEN7_SURFACE_FORMAT_SHIFT);
4251 Serge 1205
	if (bo->tiling == I915_TILING_Y)
1206
		ss[0] |= GEN7_SURFACE_VALIGN_4;
1207
	if (is_dst) {
1208
		ss[0] |= GEN7_SURFACE_RC_READ_WRITE;
3280 Serge 1209
		domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER;
4251 Serge 1210
	} else
3280 Serge 1211
		domains = I915_GEM_DOMAIN_SAMPLER << 16;
1212
	ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
1213
	ss[2] = ((width - 1)  << GEN7_SURFACE_WIDTH_SHIFT |
1214
		 (height - 1) << GEN7_SURFACE_HEIGHT_SHIFT);
1215
	ss[3] = (bo->pitch - 1) << GEN7_SURFACE_PITCH_SHIFT;
1216
	ss[4] = 0;
4251 Serge 1217
	ss[5] = (is_scanout || bo->io) ? 0 : is_hsw(sna) ? 5 << 16 : 3 << 16;
3280 Serge 1218
	ss[6] = 0;
1219
	ss[7] = 0;
4251 Serge 1220
	if (is_hsw(sna))
3280 Serge 1221
		ss[7] |= HSW_SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA);
1222
 
4251 Serge 1223
	kgem_bo_set_binding(bo, format | is_dst << 30 | is_scanout << 31, offset);
3280 Serge 1224
 
1225
	DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
1226
	     offset, bo->handle, ss[1],
1227
	     format, width, height, bo->pitch, bo->tiling,
1228
	     domains & 0xffff ? "render" : "sampler"));
1229
 
1230
	return offset * sizeof(uint32_t);
1231
}
1232
 
1233
static void gen7_emit_vertex_buffer(struct sna *sna,
1234
				    const struct sna_composite_op *op)
1235
{
1236
	int id = GEN7_VERTEX(op->u.gen7.flags);
1237
 
1238
	OUT_BATCH(GEN7_3DSTATE_VERTEX_BUFFERS | (5 - 2));
1239
	OUT_BATCH(id << GEN7_VB0_BUFFER_INDEX_SHIFT |
1240
		  GEN7_VB0_VERTEXDATA |
1241
		  GEN7_VB0_ADDRESS_MODIFY_ENABLE |
1242
		  4*op->floats_per_vertex << GEN7_VB0_BUFFER_PITCH_SHIFT);
1243
	sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch;
1244
	OUT_BATCH(0);
1245
	OUT_BATCH(~0); /* max address: disabled */
1246
	OUT_BATCH(0);
1247
 
1248
	sna->render.vb_id |= 1 << id;
1249
}
1250
 
1251
static void gen7_emit_primitive(struct sna *sna)
1252
{
1253
	if (sna->kgem.nbatch == sna->render_state.gen7.last_primitive) {
1254
		sna->render.vertex_offset = sna->kgem.nbatch - 5;
1255
		return;
1256
	}
1257
 
1258
	OUT_BATCH(GEN7_3DPRIMITIVE | (7- 2));
1259
	OUT_BATCH(GEN7_3DPRIMITIVE_VERTEX_SEQUENTIAL | _3DPRIM_RECTLIST);
1260
	sna->render.vertex_offset = sna->kgem.nbatch;
1261
	OUT_BATCH(0);	/* vertex count, to be filled in later */
1262
	OUT_BATCH(sna->render.vertex_index);
1263
	OUT_BATCH(1);	/* single instance */
1264
	OUT_BATCH(0);	/* start instance location */
1265
	OUT_BATCH(0);	/* index buffer offset, ignored */
1266
	sna->render.vertex_start = sna->render.vertex_index;
1267
 
1268
	sna->render_state.gen7.last_primitive = sna->kgem.nbatch;
1269
}
1270
 
1271
static bool gen7_rectangle_begin(struct sna *sna,
1272
				 const struct sna_composite_op *op)
1273
{
1274
	int id = 1 << GEN7_VERTEX(op->u.gen7.flags);
1275
	int ndwords;
1276
 
1277
	if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
1278
		return true;
1279
 
1280
	ndwords = op->need_magic_ca_pass ? 60 : 6;
1281
	if ((sna->render.vb_id & id) == 0)
1282
		ndwords += 5;
1283
	if (!kgem_check_batch(&sna->kgem, ndwords))
1284
		return false;
1285
 
1286
	if ((sna->render.vb_id & id) == 0)
1287
		gen7_emit_vertex_buffer(sna, op);
1288
 
1289
	gen7_emit_primitive(sna);
1290
	return true;
1291
}
1292
 
1293
static int gen7_get_rectangles__flush(struct sna *sna,
1294
				      const struct sna_composite_op *op)
1295
{
1296
	/* Preventing discarding new vbo after lock contention */
1297
	if (sna_vertex_wait__locked(&sna->render)) {
1298
		int rem = vertex_space(sna);
1299
		if (rem > op->floats_per_rect)
1300
			return rem;
1301
	}
1302
 
1303
	if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 65 : 6))
1304
		return 0;
1305
	if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
1306
		return 0;
1307
 
1308
	if (sna->render.vertex_offset) {
1309
		gen4_vertex_flush(sna);
1310
		if (gen7_magic_ca_pass(sna, op)) {
1311
			gen7_emit_pipe_stall(sna);
1312
			gen7_emit_cc(sna, GEN7_BLEND(op->u.gen7.flags));
1313
			gen7_emit_wm(sna, GEN7_KERNEL(op->u.gen7.flags));
1314
		}
1315
	}
1316
 
1317
	return gen4_vertex_finish(sna);
1318
}
1319
 
1320
inline static int gen7_get_rectangles(struct sna *sna,
1321
				      const struct sna_composite_op *op,
1322
				      int want,
1323
				      void (*emit_state)(struct sna *sna, const struct sna_composite_op *op))
1324
{
1325
	int rem;
1326
 
1327
	assert(want);
1328
 
1329
start:
1330
	rem = vertex_space(sna);
1331
	if (unlikely(rem < op->floats_per_rect)) {
1332
		DBG(("flushing vbo for %s: %d < %d\n",
1333
		     __FUNCTION__, rem, op->floats_per_rect));
1334
		rem = gen7_get_rectangles__flush(sna, op);
1335
		if (unlikely(rem == 0))
1336
			goto flush;
1337
	}
1338
 
1339
	if (unlikely(sna->render.vertex_offset == 0)) {
1340
		if (!gen7_rectangle_begin(sna, op))
1341
			goto flush;
1342
		else
1343
			goto start;
1344
	}
1345
 
1346
	assert(rem <= vertex_space(sna));
4251 Serge 1347
	assert(op->floats_per_rect <= rem);
3280 Serge 1348
	if (want > 1 && want * op->floats_per_rect > rem)
1349
		want = rem / op->floats_per_rect;
1350
 
1351
	assert(want > 0);
1352
	sna->render.vertex_index += 3*want;
1353
	return want;
1354
 
1355
flush:
1356
	if (sna->render.vertex_offset) {
1357
		gen4_vertex_flush(sna);
1358
		gen7_magic_ca_pass(sna, op);
1359
	}
1360
	sna_vertex_wait__locked(&sna->render);
1361
	_kgem_submit(&sna->kgem);
1362
	emit_state(sna, op);
1363
	goto start;
1364
}
1365
 
1366
inline static uint32_t *gen7_composite_get_binding_table(struct sna *sna,
1367
							 uint16_t *offset)
1368
{
1369
	uint32_t *table;
1370
 
1371
	sna->kgem.surface -=
1372
		sizeof(struct gen7_surface_state) / sizeof(uint32_t);
1373
	/* Clear all surplus entries to zero in case of prefetch */
1374
	table = memset(sna->kgem.batch + sna->kgem.surface,
1375
		       0, sizeof(struct gen7_surface_state));
1376
 
1377
	DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface));
1378
 
1379
	*offset = sna->kgem.surface;
1380
	return table;
1381
}
1382
 
1383
static void
1384
gen7_get_batch(struct sna *sna, const struct sna_composite_op *op)
1385
{
1386
	kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
1387
 
1388
	if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) {
1389
		DBG(("%s: flushing batch: %d < %d+%d\n",
1390
		     __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch,
1391
		     150, 4*8));
1392
		_kgem_submit(&sna->kgem);
1393
		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
1394
	}
1395
 
1396
	assert(sna->kgem.mode == KGEM_RENDER);
1397
	assert(sna->kgem.ring == KGEM_RENDER);
1398
 
1399
	if (sna->render_state.gen7.needs_invariant)
1400
		gen7_emit_invariant(sna);
1401
}
1402
 
1403
static void gen7_emit_composite_state(struct sna *sna,
1404
				      const struct sna_composite_op *op)
1405
{
1406
	uint32_t *binding_table;
1407
	uint16_t offset;
1408
 
1409
	gen7_get_batch(sna, op);
1410
 
1411
	binding_table = gen7_composite_get_binding_table(sna, &offset);
1412
 
1413
	binding_table[0] =
1414
		gen7_bind_bo(sna,
1415
			    op->dst.bo, op->dst.width, op->dst.height,
1416
			    gen7_get_dest_format(op->dst.format),
1417
			    true);
1418
	binding_table[1] =
1419
		gen7_bind_bo(sna,
1420
			     op->src.bo, op->src.width, op->src.height,
1421
			     op->src.card_format,
1422
			     false);
1423
	if (op->mask.bo) {
1424
		binding_table[2] =
1425
			gen7_bind_bo(sna,
1426
				     op->mask.bo,
1427
				     op->mask.width,
1428
				     op->mask.height,
1429
				     op->mask.card_format,
1430
				     false);
1431
	}
1432
 
1433
	if (sna->kgem.surface == offset &&
1434
	    *(uint64_t *)(sna->kgem.batch + sna->render_state.gen7.surface_table) == *(uint64_t*)binding_table &&
1435
	    (op->mask.bo == NULL ||
1436
	     sna->kgem.batch[sna->render_state.gen7.surface_table+2] == binding_table[2])) {
1437
		sna->kgem.surface += sizeof(struct gen7_surface_state) / sizeof(uint32_t);
1438
		offset = sna->render_state.gen7.surface_table;
1439
	}
1440
 
1441
	gen7_emit_state(sna, op, offset);
1442
}
1443
 
1444
static void
1445
gen7_align_vertex(struct sna *sna, const struct sna_composite_op *op)
1446
{
1447
	if (op->floats_per_vertex != sna->render_state.gen7.floats_per_vertex) {
1448
		if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect)
1449
			gen4_vertex_finish(sna);
1450
 
1451
		DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n",
1452
		     sna->render_state.gen7.floats_per_vertex,
1453
		     op->floats_per_vertex,
1454
		     sna->render.vertex_index,
1455
		     (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex));
1456
		sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
1457
		sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex;
1458
		sna->render_state.gen7.floats_per_vertex = op->floats_per_vertex;
1459
	}
1460
}
1461
 
3291 Serge 1462
fastcall static void
1463
gen7_render_composite_blt(struct sna *sna,
1464
			  const struct sna_composite_op *op,
1465
			  const struct sna_composite_rectangles *r)
1466
{
1467
	gen7_get_rectangles(sna, op, 1, gen7_emit_composite_state);
1468
	op->prim_emit(sna, op, r);
1469
}
3280 Serge 1470
static uint32_t
1471
gen7_composite_create_blend_state(struct sna_static_stream *stream)
1472
{
1473
	char *base, *ptr;
1474
	int src, dst;
1475
 
1476
	base = sna_static_stream_map(stream,
1477
				     GEN7_BLENDFACTOR_COUNT * GEN7_BLENDFACTOR_COUNT * GEN7_BLEND_STATE_PADDED_SIZE,
1478
				     64);
1479
 
1480
	ptr = base;
1481
	for (src = 0; src < GEN7_BLENDFACTOR_COUNT; src++) {
1482
		for (dst= 0; dst < GEN7_BLENDFACTOR_COUNT; dst++) {
1483
			struct gen7_blend_state *blend =
1484
				(struct gen7_blend_state *)ptr;
1485
 
1486
			blend->blend0.dest_blend_factor = dst;
1487
			blend->blend0.source_blend_factor = src;
1488
			blend->blend0.blend_func = GEN7_BLENDFUNCTION_ADD;
1489
			blend->blend0.blend_enable =
1490
				!(dst == GEN7_BLENDFACTOR_ZERO && src == GEN7_BLENDFACTOR_ONE);
1491
 
1492
			blend->blend1.post_blend_clamp_enable = 1;
1493
			blend->blend1.pre_blend_clamp_enable = 1;
1494
 
1495
			ptr += GEN7_BLEND_STATE_PADDED_SIZE;
1496
		}
1497
	}
1498
 
1499
	return sna_static_stream_offsetof(stream, base);
1500
}
1501
 
4251 Serge 1502
#if 0
1503
static uint32_t gen7_bind_video_source(struct sna *sna,
1504
				       struct kgem_bo *bo,
1505
				       uint32_t offset,
1506
				       int width,
1507
				       int height,
1508
				       int pitch,
1509
				       uint32_t format)
1510
{
1511
	uint32_t *ss, bind;
3280 Serge 1512
 
4251 Serge 1513
	bind = sna->kgem.surface -=
1514
		sizeof(struct gen7_surface_state) / sizeof(uint32_t);
3280 Serge 1515
 
4251 Serge 1516
	assert(bo->tiling == I915_TILING_NONE);
1517
 
1518
	ss = sna->kgem.batch + bind;
1519
	ss[0] = (GEN7_SURFACE_2D << GEN7_SURFACE_TYPE_SHIFT |
1520
		 format << GEN7_SURFACE_FORMAT_SHIFT);
1521
	ss[1] = kgem_add_reloc(&sna->kgem, bind + 1, bo,
1522
			       I915_GEM_DOMAIN_SAMPLER << 16,
1523
			       offset);
1524
	ss[2] = ((width - 1)  << GEN7_SURFACE_WIDTH_SHIFT |
1525
		 (height - 1) << GEN7_SURFACE_HEIGHT_SHIFT);
1526
	ss[3] = (pitch - 1) << GEN7_SURFACE_PITCH_SHIFT;
1527
	ss[4] = 0;
1528
	ss[5] = 0;
1529
	ss[6] = 0;
1530
	ss[7] = 0;
1531
	if (is_hsw(sna))
1532
		ss[7] |= HSW_SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA);
1533
 
1534
	DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, offset=%d\n",
1535
	     bind, bo->handle, ss[1],
1536
	     format, width, height, pitch, offset));
1537
 
1538
	return bind * sizeof(uint32_t);
1539
}
1540
 
1541
static void gen7_emit_video_state(struct sna *sna,
1542
				  const struct sna_composite_op *op)
1543
{
1544
	struct sna_video_frame *frame = op->priv;
1545
	uint32_t src_surf_format;
1546
	uint32_t src_surf_base[6];
1547
	int src_width[6];
1548
	int src_height[6];
1549
	int src_pitch[6];
1550
	uint32_t *binding_table;
1551
	uint16_t offset;
1552
	int n_src, n;
1553
 
1554
	gen7_get_batch(sna, op);
1555
 
1556
	src_surf_base[0] = 0;
1557
	src_surf_base[1] = 0;
1558
	src_surf_base[2] = frame->VBufOffset;
1559
	src_surf_base[3] = frame->VBufOffset;
1560
	src_surf_base[4] = frame->UBufOffset;
1561
	src_surf_base[5] = frame->UBufOffset;
1562
 
1563
	if (is_planar_fourcc(frame->id)) {
1564
		src_surf_format = GEN7_SURFACEFORMAT_R8_UNORM;
1565
		src_width[1]  = src_width[0]  = frame->width;
1566
		src_height[1] = src_height[0] = frame->height;
1567
		src_pitch[1]  = src_pitch[0]  = frame->pitch[1];
1568
		src_width[4]  = src_width[5]  = src_width[2]  = src_width[3] =
1569
			frame->width / 2;
1570
		src_height[4] = src_height[5] = src_height[2] = src_height[3] =
1571
			frame->height / 2;
1572
		src_pitch[4]  = src_pitch[5]  = src_pitch[2]  = src_pitch[3] =
1573
			frame->pitch[0];
1574
		n_src = 6;
1575
	} else {
1576
		if (frame->id == FOURCC_UYVY)
1577
			src_surf_format = GEN7_SURFACEFORMAT_YCRCB_SWAPY;
1578
		else
1579
			src_surf_format = GEN7_SURFACEFORMAT_YCRCB_NORMAL;
1580
 
1581
		src_width[0]  = frame->width;
1582
		src_height[0] = frame->height;
1583
		src_pitch[0]  = frame->pitch[0];
1584
		n_src = 1;
1585
	}
1586
 
1587
	binding_table = gen7_composite_get_binding_table(sna, &offset);
1588
 
1589
	binding_table[0] =
1590
		gen7_bind_bo(sna,
1591
			     op->dst.bo, op->dst.width, op->dst.height,
1592
			     gen7_get_dest_format(op->dst.format),
1593
			     true);
1594
	for (n = 0; n < n_src; n++) {
1595
		binding_table[1+n] =
1596
			gen7_bind_video_source(sna,
1597
					       frame->bo,
1598
					       src_surf_base[n],
1599
					       src_width[n],
1600
					       src_height[n],
1601
					       src_pitch[n],
1602
					       src_surf_format);
1603
	}
1604
 
1605
	gen7_emit_state(sna, op, offset);
1606
}
1607
 
1608
static bool
1609
gen7_render_video(struct sna *sna,
1610
		  struct sna_video *video,
1611
		  struct sna_video_frame *frame,
1612
		  RegionPtr dstRegion,
1613
		  PixmapPtr pixmap)
1614
{
1615
	struct sna_composite_op tmp;
1616
	int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1;
1617
	int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1;
1618
	int src_width = frame->src.x2 - frame->src.x1;
1619
	int src_height = frame->src.y2 - frame->src.y1;
1620
	float src_offset_x, src_offset_y;
1621
	float src_scale_x, src_scale_y;
1622
	int nbox, pix_xoff, pix_yoff;
1623
	struct sna_pixmap *priv;
1624
	unsigned filter;
1625
	BoxPtr box;
1626
 
1627
	DBG(("%s: src=(%d, %d), dst=(%d, %d), %ldx[(%d, %d), (%d, %d)...]\n",
1628
	     __FUNCTION__,
1629
	     src_width, src_height, dst_width, dst_height,
1630
	     (long)REGION_NUM_RECTS(dstRegion),
1631
	     REGION_EXTENTS(NULL, dstRegion)->x1,
1632
	     REGION_EXTENTS(NULL, dstRegion)->y1,
1633
	     REGION_EXTENTS(NULL, dstRegion)->x2,
1634
	     REGION_EXTENTS(NULL, dstRegion)->y2));
1635
 
1636
	priv = sna_pixmap_force_to_gpu(pixmap, MOVE_READ | MOVE_WRITE);
1637
	if (priv == NULL)
1638
		return false;
1639
 
1640
	memset(&tmp, 0, sizeof(tmp));
1641
 
1642
	tmp.dst.pixmap = pixmap;
1643
	tmp.dst.width  = pixmap->drawable.width;
1644
	tmp.dst.height = pixmap->drawable.height;
1645
	tmp.dst.format = sna_render_format_for_depth(pixmap->drawable.depth);
1646
	tmp.dst.bo = priv->gpu_bo;
1647
 
1648
	tmp.src.bo = frame->bo;
1649
	tmp.mask.bo = NULL;
1650
 
1651
	tmp.floats_per_vertex = 3;
1652
	tmp.floats_per_rect = 9;
1653
 
1654
	if (src_width == dst_width && src_height == dst_height)
1655
		filter = SAMPLER_FILTER_NEAREST;
1656
	else
1657
		filter = SAMPLER_FILTER_BILINEAR;
1658
 
1659
	tmp.u.gen7.flags =
1660
		GEN7_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD,
1661
					      SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE),
1662
			       NO_BLEND,
1663
			       is_planar_fourcc(frame->id) ?
1664
			       GEN7_WM_KERNEL_VIDEO_PLANAR :
1665
			       GEN7_WM_KERNEL_VIDEO_PACKED,
1666
			       2);
1667
	tmp.priv = frame;
1668
 
1669
	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo);
1670
	if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) {
1671
		kgem_submit(&sna->kgem);
1672
		assert(kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL));
1673
		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
1674
	}
1675
 
1676
	gen7_emit_video_state(sna, &tmp);
1677
	gen7_align_vertex(sna, &tmp);
1678
 
1679
	/* Set up the offset for translating from the given region (in screen
1680
	 * coordinates) to the backing pixmap.
1681
	 */
1682
#ifdef COMPOSITE
1683
	pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
1684
	pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
1685
#else
1686
	pix_xoff = 0;
1687
	pix_yoff = 0;
1688
#endif
1689
 
1690
	DBG(("%s: src=(%d, %d)x(%d, %d); frame=(%dx%d), dst=(%dx%d)\n",
1691
	     __FUNCTION__,
1692
	     frame->src.x1, frame->src.y1,
1693
	     src_width, src_height,
1694
	     dst_width, dst_height,
1695
	     frame->width, frame->height));
1696
 
1697
	src_scale_x = (float)src_width / dst_width / frame->width;
1698
	src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x;
1699
 
1700
	src_scale_y = (float)src_height / dst_height / frame->height;
1701
	src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y;
1702
 
1703
	DBG(("%s: scale=(%f, %f), offset=(%f, %f)\n",
1704
	     __FUNCTION__,
1705
	     src_scale_x, src_scale_y,
1706
	     src_offset_x, src_offset_y));
1707
 
1708
	box = REGION_RECTS(dstRegion);
1709
	nbox = REGION_NUM_RECTS(dstRegion);
1710
	while (nbox--) {
1711
		BoxRec r;
1712
 
1713
		DBG(("%s: dst=(%d, %d), (%d, %d) + (%d, %d); src=(%f, %f), (%f, %f)\n",
1714
		     __FUNCTION__,
1715
		     box->x1, box->y1,
1716
		     box->x2, box->y2,
1717
		     pix_xoff, pix_yoff,
1718
		     box->x1 * src_scale_x + src_offset_x,
1719
		     box->y1 * src_scale_y + src_offset_y,
1720
		     box->x2 * src_scale_x + src_offset_x,
1721
		     box->y2 * src_scale_y + src_offset_y));
1722
 
1723
		r.x1 = box->x1 + pix_xoff;
1724
		r.x2 = box->x2 + pix_xoff;
1725
		r.y1 = box->y1 + pix_yoff;
1726
		r.y2 = box->y2 + pix_yoff;
1727
 
1728
		gen7_get_rectangles(sna, &tmp, 1, gen7_emit_video_state);
1729
 
1730
		OUT_VERTEX(r.x2, r.y2);
1731
		OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x);
1732
		OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
1733
 
1734
		OUT_VERTEX(r.x1, r.y2);
1735
		OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
1736
		OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
1737
 
1738
		OUT_VERTEX(r.x1, r.y1);
1739
		OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
1740
		OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y);
1741
 
1742
		if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
1743
			sna_damage_add_box(&priv->gpu_damage, &r);
1744
			sna_damage_subtract_box(&priv->cpu_damage, &r);
1745
		}
1746
		box++;
1747
	}
1748
 
1749
	gen4_vertex_flush(sna);
1750
	return true;
1751
}
1752
#endif
1753
 
3280 Serge 1754
static void gen7_render_composite_done(struct sna *sna,
1755
				       const struct sna_composite_op *op)
1756
{
1757
	if (sna->render.vertex_offset) {
1758
		gen4_vertex_flush(sna);
1759
		gen7_magic_ca_pass(sna, op);
1760
	}
1761
}
1762
 
1763
 
1764
 
1765
 
1766
 
1767
 
1768
 
1769
 
1770
 
1771
 
1772
 
1773
 
1774
 
1775
 
1776
 
1777
 
1778
 
1779
 
1780
 
1781
 
1782
 
1783
 
1784
 
1785
 
1786
 
3291 Serge 1787
 
1788
 
1789
 
1790
 
1791
 
1792
 
1793
 
1794
 
1795
 
1796
 
1797
 
1798
 
1799
 
1800
 
1801
 
1802
 
1803
 
1804
 
1805
 
1806
 
1807
 
1808
 
1809
 
1810
 
1811
 
1812
 
1813
 
1814
 
1815
 
1816
 
1817
 
1818
 
1819
 
1820
 
1821
 
1822
 
1823
 
1824
 
1825
 
1826
 
1827
 
1828
 
1829
 
1830
 
1831
 
1832
 
1833
 
1834
 
1835
 
1836
 
1837
 
1838
 
1839
 
1840
 
1841
 
1842
 
1843
 
1844
 
1845
 
1846
 
1847
 
1848
 
1849
 
1850
 
1851
 
1852
 
1853
 
1854
 
4251 Serge 1855
#if 0
1856
static bool
1857
gen7_render_fill_boxes(struct sna *sna,
1858
		       CARD8 op,
1859
		       PictFormat format,
1860
		       const xRenderColor *color,
1861
		       PixmapPtr dst, struct kgem_bo *dst_bo,
1862
		       const BoxRec *box, int n)
1863
{
1864
	struct sna_composite_op tmp;
1865
	uint32_t pixel;
3291 Serge 1866
 
4251 Serge 1867
	DBG(("%s (op=%d, color=(%04x, %04x, %04x, %04x) [%08x])\n",
1868
	     __FUNCTION__, op,
1869
	     color->red, color->green, color->blue, color->alpha, (int)format));
3291 Serge 1870
 
4251 Serge 1871
	if (op >= ARRAY_SIZE(gen7_blend_op)) {
1872
		DBG(("%s: fallback due to unhandled blend op: %d\n",
1873
		     __FUNCTION__, op));
1874
		return false;
1875
	}
3291 Serge 1876
 
4251 Serge 1877
	if (prefer_blt_fill(sna, dst_bo) || !gen7_check_dst_format(format)) {
1878
		uint8_t alu = GXinvalid;
3291 Serge 1879
 
4251 Serge 1880
		if (op <= PictOpSrc) {
1881
			pixel = 0;
1882
			if (op == PictOpClear)
1883
				alu = GXclear;
1884
			else if (sna_get_pixel_from_rgba(&pixel,
1885
							 color->red,
1886
							 color->green,
1887
							 color->blue,
1888
							 color->alpha,
1889
							 format))
1890
				alu = GXcopy;
1891
		}
3291 Serge 1892
 
4251 Serge 1893
		if (alu != GXinvalid &&
1894
		    sna_blt_fill_boxes(sna, alu,
1895
				       dst_bo, dst->drawable.bitsPerPixel,
1896
				       pixel, box, n))
1897
			return true;
3291 Serge 1898
 
4251 Serge 1899
		if (!gen7_check_dst_format(format))
1900
			return false;
1901
	}
3291 Serge 1902
 
4251 Serge 1903
	if (op == PictOpClear) {
1904
		pixel = 0;
1905
		op = PictOpSrc;
1906
	} else if (!sna_get_pixel_from_rgba(&pixel,
1907
					    color->red,
1908
					    color->green,
1909
					    color->blue,
1910
					    color->alpha,
1911
					    PICT_a8r8g8b8))
1912
		return false;
3291 Serge 1913
 
4251 Serge 1914
	DBG(("%s(%08x x %d [(%d, %d), (%d, %d) ...])\n",
1915
	     __FUNCTION__, pixel, n,
1916
	     box[0].x1, box[0].y1, box[0].x2, box[0].y2));
3291 Serge 1917
 
4251 Serge 1918
	tmp.dst.pixmap = dst;
1919
	tmp.dst.width  = dst->drawable.width;
1920
	tmp.dst.height = dst->drawable.height;
1921
	tmp.dst.format = format;
1922
	tmp.dst.bo = dst_bo;
1923
	tmp.dst.x = tmp.dst.y = 0;
1924
	tmp.damage = NULL;
3291 Serge 1925
 
4251 Serge 1926
	sna_render_composite_redirect_init(&tmp);
1927
	if (too_large(dst->drawable.width, dst->drawable.height)) {
1928
		BoxRec extents;
3291 Serge 1929
 
4251 Serge 1930
		boxes_extents(box, n, &extents);
1931
		if (!sna_render_composite_redirect(sna, &tmp,
1932
						   extents.x1, extents.y1,
1933
						   extents.x2 - extents.x1,
1934
						   extents.y2 - extents.y1,
1935
						   n > 1))
1936
			return sna_tiling_fill_boxes(sna, op, format, color,
1937
						     dst, dst_bo, box, n);
1938
	}
3291 Serge 1939
 
4251 Serge 1940
	tmp.src.bo = sna_render_get_solid(sna, pixel);
1941
	tmp.mask.bo = NULL;
3291 Serge 1942
 
4251 Serge 1943
	tmp.floats_per_vertex = 2;
1944
	tmp.floats_per_rect = 6;
1945
	tmp.need_magic_ca_pass = false;
3291 Serge 1946
 
4251 Serge 1947
	tmp.u.gen7.flags = FILL_FLAGS(op, format);
3291 Serge 1948
 
4251 Serge 1949
	kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo);
1950
	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
1951
		kgem_submit(&sna->kgem);
1952
		assert(kgem_check_bo(&sna->kgem, dst_bo, NULL));
1953
	}
3291 Serge 1954
 
4251 Serge 1955
	gen7_emit_fill_state(sna, &tmp);
1956
	gen7_align_vertex(sna, &tmp);
3291 Serge 1957
 
4251 Serge 1958
	do {
1959
		int n_this_time;
1960
		int16_t *v;
3291 Serge 1961
 
4251 Serge 1962
		n_this_time = gen7_get_rectangles(sna, &tmp, n,
1963
						  gen7_emit_fill_state);
1964
		n -= n_this_time;
3291 Serge 1965
 
4251 Serge 1966
		v = (int16_t *)(sna->render.vertices + sna->render.vertex_used);
1967
		sna->render.vertex_used += 6 * n_this_time;
1968
		assert(sna->render.vertex_used <= sna->render.vertex_size);
1969
		do {
1970
			DBG(("	(%d, %d), (%d, %d)\n",
1971
			     box->x1, box->y1, box->x2, box->y2));
1972
 
1973
			v[0] = box->x2;
1974
			v[5] = v[1] = box->y2;
1975
			v[8] = v[4] = box->x1;
1976
			v[9] = box->y1;
1977
			v[2] = v[3]  = v[7]  = 1;
1978
			v[6] = v[10] = v[11] = 0;
1979
			v += 12; box++;
1980
		} while (--n_this_time);
1981
	} while (n);
1982
 
1983
	gen4_vertex_flush(sna);
1984
	kgem_bo_destroy(&sna->kgem, tmp.src.bo);
1985
	sna_render_composite_redirect_done(sna, &tmp);
1986
	return true;
1987
}
1988
#endif
1989
 
3280 Serge 1990
static void gen7_render_flush(struct sna *sna)
1991
{
1992
	gen4_vertex_close(sna);
1993
 
1994
	assert(sna->render.vb_id == 0);
1995
	assert(sna->render.vertex_offset == 0);
1996
}
1997
 
1998
static void
1999
gen7_render_context_switch(struct kgem *kgem,
2000
			   int new_mode)
2001
{
2002
	if (kgem->nbatch) {
2003
		DBG(("%s: switch rings %d -> %d\n",
2004
		     __FUNCTION__, kgem->mode, new_mode));
2005
		_kgem_submit(kgem);
2006
	}
2007
 
2008
	kgem->ring = new_mode;
2009
}
2010
 
2011
static void
2012
gen7_render_retire(struct kgem *kgem)
2013
{
2014
	struct sna *sna;
2015
 
2016
	if (kgem->ring && (kgem->has_semaphores || !kgem->need_retire))
2017
		kgem->ring = kgem->mode;
2018
 
2019
	sna = container_of(kgem, struct sna, kgem);
2020
	if (kgem->nbatch == 0 && sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) {
2021
		DBG(("%s: resetting idle vbo\n", __FUNCTION__));
2022
		sna->render.vertex_used = 0;
2023
		sna->render.vertex_index = 0;
2024
	}
2025
}
2026
 
2027
static void
2028
gen7_render_expire(struct kgem *kgem)
2029
{
2030
	struct sna *sna;
2031
 
2032
	sna = container_of(kgem, struct sna, kgem);
2033
	if (sna->render.vbo && !sna->render.vertex_used) {
2034
		DBG(("%s: discarding vbo\n", __FUNCTION__));
2035
		kgem_bo_destroy(kgem, sna->render.vbo);
2036
		sna->render.vbo = NULL;
2037
		sna->render.vertices = sna->render.vertex_data;
2038
		sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
2039
		sna->render.vertex_used = 0;
2040
		sna->render.vertex_index = 0;
2041
	}
2042
}
2043
 
2044
static void gen7_render_reset(struct sna *sna)
2045
{
2046
	sna->render_state.gen7.emit_flush = false;
2047
	sna->render_state.gen7.needs_invariant = true;
2048
	sna->render_state.gen7.ve_id = 3 << 2;
2049
	sna->render_state.gen7.last_primitive = -1;
2050
 
2051
	sna->render_state.gen7.num_sf_outputs = 0;
2052
	sna->render_state.gen7.samplers = -1;
2053
	sna->render_state.gen7.blend = -1;
2054
	sna->render_state.gen7.kernel = -1;
2055
	sna->render_state.gen7.drawrect_offset = -1;
2056
	sna->render_state.gen7.drawrect_limit = -1;
2057
	sna->render_state.gen7.surface_table = -1;
2058
 
2059
	sna->render.vertex_offset = 0;
2060
	sna->render.nvertex_reloc = 0;
2061
	sna->render.vb_id = 0;
2062
}
2063
 
2064
static void gen7_render_fini(struct sna *sna)
2065
{
2066
	kgem_bo_destroy(&sna->kgem, sna->render_state.gen7.general_bo);
2067
}
2068
 
4251 Serge 2069
static bool is_gt3(struct sna *sna)
2070
{
2071
	assert(sna->kgem.gen == 075);
2072
	return sna->PciInfo->device_id & 0x20;
2073
}
2074
 
3280 Serge 2075
static bool is_gt2(struct sna *sna)
2076
{
4251 Serge 2077
	return sna->PciInfo->device_id & (is_hsw(sna)? 0x30 : 0x20);
3280 Serge 2078
}
2079
 
2080
static bool is_mobile(struct sna *sna)
2081
{
4251 Serge 2082
	return (sna->PciInfo->device_id & 0xf) == 0x6;
3280 Serge 2083
}
2084
 
2085
static bool gen7_render_setup(struct sna *sna)
2086
{
2087
    struct gen7_render_state *state = &sna->render_state.gen7;
2088
    struct sna_static_stream general;
2089
    struct gen7_sampler_state *ss;
2090
    int i, j, k, l, m;
2091
 
4251 Serge 2092
	if (is_ivb(sna)) {
3280 Serge 2093
        state->info = &ivb_gt_info;
4251 Serge 2094
		if (sna->PciInfo->device_id & 0xf) {
3280 Serge 2095
            state->info = &ivb_gt1_info;
2096
            if (is_gt2(sna))
2097
                state->info = &ivb_gt2_info; /* XXX requires GT_MODE WiZ disabled */
2098
        }
4251 Serge 2099
	} else if (is_byt(sna)) {
2100
		state->info = &byt_gt_info;
2101
	} else if (is_hsw(sna)) {
3280 Serge 2102
        state->info = &hsw_gt_info;
4251 Serge 2103
		if (sna->PciInfo->device_id & 0xf) {
2104
			if (is_gt3(sna))
2105
				state->info = &hsw_gt3_info;
2106
			else if (is_gt2(sna))
2107
				state->info = &hsw_gt2_info;
2108
			else
3280 Serge 2109
            state->info = &hsw_gt1_info;
2110
        }
2111
    } else
2112
        return false;
2113
 
2114
    sna_static_stream_init(&general);
2115
 
2116
    /* Zero pad the start. If you see an offset of 0x0 in the batchbuffer
2117
     * dumps, you know it points to zero.
2118
     */
2119
    null_create(&general);
2120
 
2121
    for (m = 0; m < GEN7_WM_KERNEL_COUNT; m++) {
2122
        if (wm_kernels[m].size) {
2123
            state->wm_kernel[m][1] =
2124
                sna_static_stream_add(&general,
2125
                              wm_kernels[m].data,
2126
                              wm_kernels[m].size,
2127
                              64);
2128
        } else {
2129
            if (USE_8_PIXEL_DISPATCH) {
2130
                state->wm_kernel[m][0] =
2131
                    sna_static_stream_compile_wm(sna, &general,
2132
                                     wm_kernels[m].data, 8);
2133
            }
2134
 
2135
            if (USE_16_PIXEL_DISPATCH) {
2136
                state->wm_kernel[m][1] =
2137
                    sna_static_stream_compile_wm(sna, &general,
2138
                                     wm_kernels[m].data, 16);
2139
            }
2140
 
2141
            if (USE_32_PIXEL_DISPATCH) {
2142
                state->wm_kernel[m][2] =
2143
                    sna_static_stream_compile_wm(sna, &general,
2144
                                     wm_kernels[m].data, 32);
2145
            }
2146
        }
2147
        assert(state->wm_kernel[m][0]|state->wm_kernel[m][1]|state->wm_kernel[m][2]);
2148
    }
2149
 
2150
    ss = sna_static_stream_map(&general,
2151
                   2 * sizeof(*ss) *
2152
                   (2 +
2153
                    FILTER_COUNT * EXTEND_COUNT *
2154
                    FILTER_COUNT * EXTEND_COUNT),
2155
                   32);
2156
    state->wm_state = sna_static_stream_offsetof(&general, ss);
2157
    sampler_copy_init(ss); ss += 2;
2158
    sampler_fill_init(ss); ss += 2;
2159
    for (i = 0; i < FILTER_COUNT; i++) {
2160
        for (j = 0; j < EXTEND_COUNT; j++) {
2161
            for (k = 0; k < FILTER_COUNT; k++) {
2162
                for (l = 0; l < EXTEND_COUNT; l++) {
2163
                    sampler_state_init(ss++, i, j);
2164
                    sampler_state_init(ss++, k, l);
2165
                }
2166
            }
2167
        }
2168
    }
2169
 
2170
    state->cc_blend = gen7_composite_create_blend_state(&general);
2171
 
2172
    state->general_bo = sna_static_stream_fini(sna, &general);
2173
    return state->general_bo != NULL;
2174
}
2175
 
4251 Serge 2176
const char *gen7_render_init(struct sna *sna, const char *backend)
3280 Serge 2177
{
2178
    if (!gen7_render_setup(sna))
4251 Serge 2179
		return backend;
3280 Serge 2180
 
2181
    sna->kgem.context_switch = gen7_render_context_switch;
2182
    sna->kgem.retire = gen7_render_retire;
2183
    sna->kgem.expire = gen7_render_expire;
2184
 
4251 Serge 2185
#if 0
2186
#if !NO_COMPOSITE
2187
	sna->render.composite = gen7_render_composite;
2188
	sna->render.prefer_gpu |= PREFER_GPU_RENDER;
2189
#endif
2190
#if !NO_COMPOSITE_SPANS
2191
	sna->render.check_composite_spans = gen7_check_composite_spans;
2192
	sna->render.composite_spans = gen7_render_composite_spans;
2193
	if (is_mobile(sna) || is_gt2(sna) || is_byt(sna))
2194
		sna->render.prefer_gpu |= PREFER_GPU_SPANS;
2195
#endif
2196
	sna->render.video = gen7_render_video;
2197
 
2198
#if !NO_COPY_BOXES
2199
	sna->render.copy_boxes = gen7_render_copy_boxes;
2200
#endif
2201
#if !NO_COPY
2202
	sna->render.copy = gen7_render_copy;
2203
#endif
2204
 
2205
#if !NO_FILL_BOXES
2206
	sna->render.fill_boxes = gen7_render_fill_boxes;
2207
#endif
2208
#if !NO_FILL
2209
	sna->render.fill = gen7_render_fill;
2210
#endif
2211
#if !NO_FILL_ONE
2212
	sna->render.fill_one = gen7_render_fill_one;
2213
#endif
2214
#if !NO_FILL_CLEAR
2215
	sna->render.clear = gen7_render_clear;
2216
#endif
2217
#endif
2218
 
3280 Serge 2219
    sna->render.blit_tex = gen7_blit_tex;
4251 Serge 2220
    sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT;
2221
 
3280 Serge 2222
    sna->render.flush = gen7_render_flush;
2223
    sna->render.reset = gen7_render_reset;
2224
    sna->render.fini = gen7_render_fini;
2225
 
2226
    sna->render.max_3d_size = GEN7_MAX_SIZE;
2227
    sna->render.max_3d_pitch = 1 << 18;
4251 Serge 2228
	return sna->render_state.gen7.info->name;
3280 Serge 2229
}
2230
 
2231
 
4251 Serge 2232
static bool
2233
gen7_blit_tex(struct sna *sna,
2234
              uint8_t op, bool scale,
2235
		      PixmapPtr src, struct kgem_bo *src_bo,
2236
		      PixmapPtr mask,struct kgem_bo *mask_bo,
2237
		      PixmapPtr dst, struct kgem_bo *dst_bo,
2238
              int32_t src_x, int32_t src_y,
2239
              int32_t msk_x, int32_t msk_y,
2240
              int32_t dst_x, int32_t dst_y,
2241
              int32_t width, int32_t height,
2242
              struct sna_composite_op *tmp)
2243
{
2244
 
2245
 
2246
    tmp->op = PictOpSrc;
2247
 
2248
    tmp->dst.pixmap = dst;
2249
    tmp->dst.bo     = dst_bo;
2250
    tmp->dst.width  = dst->drawable.width;
2251
    tmp->dst.height = dst->drawable.height;
2252
    tmp->dst.format = PICT_x8r8g8b8;
2253
 
2254
 
2255
	tmp->src.repeat = RepeatNone;
2256
	tmp->src.filter = PictFilterNearest;
2257
    tmp->src.is_affine = true;
2258
 
2259
    tmp->src.bo = src_bo;
2260
	tmp->src.pict_format = PICT_x8r8g8b8;
2261
    tmp->src.card_format = gen7_get_card_format(tmp->src.pict_format);
2262
    tmp->src.width  = src->drawable.width;
2263
    tmp->src.height = src->drawable.height;
2264
 
2265
 
2266
	tmp->is_affine = tmp->src.is_affine;
2267
	tmp->has_component_alpha = false;
2268
	tmp->need_magic_ca_pass = false;
2269
 
2270
	tmp->mask.repeat = SAMPLER_EXTEND_NONE;
2271
	tmp->mask.filter = SAMPLER_FILTER_NEAREST;
2272
    tmp->mask.is_affine = true;
2273
 
2274
    tmp->mask.bo = mask_bo;
2275
    tmp->mask.pict_format = PIXMAN_a8;
2276
    tmp->mask.card_format = gen7_get_card_format(tmp->mask.pict_format);
2277
    tmp->mask.width  = mask->drawable.width;
2278
    tmp->mask.height = mask->drawable.height;
2279
 
2280
    if( scale )
2281
    {
2282
        tmp->src.scale[0] = 1.f/width;
2283
        tmp->src.scale[1] = 1.f/height;
2284
    }
2285
    else
2286
    {
2287
        tmp->src.scale[0] = 1.f/src->drawable.width;
2288
        tmp->src.scale[1] = 1.f/src->drawable.height;
2289
    }
2290
 
2291
    tmp->mask.scale[0] = 1.f/mask->drawable.width;
2292
    tmp->mask.scale[1] = 1.f/mask->drawable.height;
2293
 
2294
 
2295
 
2296
	tmp->u.gen7.flags =
2297
		GEN7_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter,
2298
					      tmp->src.repeat,
2299
					      tmp->mask.filter,
2300
					      tmp->mask.repeat),
2301
			       gen7_get_blend(tmp->op,
2302
					      tmp->has_component_alpha,
2303
					      tmp->dst.format),
2304
/*			       gen7_choose_composite_kernel(tmp->op,
2305
							    tmp->mask.bo != NULL,
2306
							    tmp->has_component_alpha,
2307
							    tmp->is_affine), */
2308
                   GEN7_WM_KERNEL_MASK,
2309
			       gen4_choose_composite_emitter(sna, tmp));
2310
 
2311
	tmp->blt   = gen7_render_composite_blt;
2312
//	tmp->box   = gen7_render_composite_box;
2313
	tmp->done  = gen7_render_composite_done;
2314
 
2315
	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo);
2316
	if (!kgem_check_bo(&sna->kgem,
2317
			   tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
2318
			   NULL)) {
2319
		kgem_submit(&sna->kgem);
2320
		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
2321
	}
2322
 
2323
	gen7_emit_composite_state(sna, tmp);
2324
	gen7_align_vertex(sna, tmp);
2325
	return true;
2326
}