Subversion Repositories Kolibri OS

Rev

Rev 4359 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
4304 Serge 1
/*
2
 * Copyright © 2006,2008,2011 Intel Corporation
3
 * Copyright © 2007 Red Hat, Inc.
4
 *
5
 * Permission is hereby granted, free of charge, to any person obtaining a
6
 * copy of this software and associated documentation files (the "Software"),
7
 * to deal in the Software without restriction, including without limitation
8
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
 * and/or sell copies of the Software, and to permit persons to whom the
10
 * Software is furnished to do so, subject to the following conditions:
11
 *
12
 * The above copyright notice and this permission notice (including the next
13
 * paragraph) shall be included in all copies or substantial portions of the
14
 * Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
 * SOFTWARE.
23
 *
24
 * Authors:
25
 *    Wang Zhenyu 
26
 *    Eric Anholt 
27
 *    Carl Worth 
28
 *    Keith Packard 
29
 *    Chris Wilson 
30
 *
31
 */
32
 
33
#ifdef HAVE_CONFIG_H
34
#include "config.h"
35
#endif
36
 
37
#include "sna.h"
38
#include "sna_reg.h"
39
#include "sna_render.h"
40
#include "sna_render_inline.h"
41
//#include "sna_video.h"
42
 
43
#include "brw/brw.h"
44
#include "gen7_render.h"
4501 Serge 45
#include "gen4_common.h"
4304 Serge 46
#include "gen4_source.h"
47
#include "gen4_vertex.h"
4501 Serge 48
#include "gen6_common.h"
4304 Serge 49
 
4501 Serge 50
#define ALWAYS_INVALIDATE 0
4304 Serge 51
#define ALWAYS_FLUSH 0
4501 Serge 52
#define ALWAYS_STALL 0
4304 Serge 53
 
54
#define NO_COMPOSITE 0
55
#define NO_COMPOSITE_SPANS 0
56
#define NO_COPY 0
57
#define NO_COPY_BOXES 0
58
#define NO_FILL 0
59
#define NO_FILL_BOXES 0
60
#define NO_FILL_ONE 0
61
#define NO_FILL_CLEAR 0
62
 
63
#define NO_RING_SWITCH 0
64
 
65
#define USE_8_PIXEL_DISPATCH 1
66
#define USE_16_PIXEL_DISPATCH 1
67
#define USE_32_PIXEL_DISPATCH 0
68
 
69
#if !USE_8_PIXEL_DISPATCH && !USE_16_PIXEL_DISPATCH && !USE_32_PIXEL_DISPATCH
70
#error "Must select at least 8, 16 or 32 pixel dispatch"
71
#endif
72
 
73
#define GEN7_MAX_SIZE 16384
74
 
75
/* XXX Todo
76
 *
77
 * STR (software tiled rendering) mode. No, really.
78
 * 64x32 pixel blocks align with the rendering cache. Worth considering.
79
 */
80
 
81
#define is_aligned(x, y) (((x) & ((y) - 1)) == 0)
82
 
83
struct gt_info {
84
	const char *name;
85
	uint32_t max_vs_threads;
86
	uint32_t max_gs_threads;
87
	uint32_t max_wm_threads;
88
	struct {
89
		int size;
90
		int max_vs_entries;
91
		int max_gs_entries;
92
		int push_ps_size; /* in 1KBs */
93
	} urb;
94
	int gt;
95
};
96
 
97
static const struct gt_info ivb_gt_info = {
98
	.name = "Ivybridge (gen7)",
99
	.max_vs_threads = 16,
100
	.max_gs_threads = 16,
101
	.max_wm_threads = (16-1) << IVB_PS_MAX_THREADS_SHIFT,
102
	.urb = { 128, 64, 64, 8 },
103
	.gt = 0,
104
};
105
 
106
static const struct gt_info ivb_gt1_info = {
107
	.name = "Ivybridge (gen7, gt1)",
108
	.max_vs_threads = 36,
109
	.max_gs_threads = 36,
110
	.max_wm_threads = (48-1) << IVB_PS_MAX_THREADS_SHIFT,
111
	.urb = { 128, 512, 192, 8 },
112
	.gt = 1,
113
};
114
 
115
static const struct gt_info ivb_gt2_info = {
116
	.name = "Ivybridge (gen7, gt2)",
117
	.max_vs_threads = 128,
118
	.max_gs_threads = 128,
119
	.max_wm_threads = (172-1) << IVB_PS_MAX_THREADS_SHIFT,
120
	.urb = { 256, 704, 320, 8 },
121
	.gt = 2,
122
};
123
 
124
static const struct gt_info byt_gt_info = {
125
	.name = "Baytrail (gen7)",
126
	.urb = { 128, 64, 64 },
127
	.max_vs_threads = 36,
128
	.max_gs_threads = 36,
129
	.max_wm_threads = (48-1) << IVB_PS_MAX_THREADS_SHIFT,
130
	.urb = { 128, 512, 192, 8 },
131
	.gt = 1,
132
};
133
 
134
static const struct gt_info hsw_gt_info = {
135
	.name = "Haswell (gen7.5)",
136
	.max_vs_threads = 8,
137
	.max_gs_threads = 8,
138
	.max_wm_threads =
139
		(8 - 1) << HSW_PS_MAX_THREADS_SHIFT |
140
		1 << HSW_PS_SAMPLE_MASK_SHIFT,
141
	.urb = { 128, 64, 64, 8 },
142
	.gt = 0,
143
};
144
 
145
static const struct gt_info hsw_gt1_info = {
146
	.name = "Haswell (gen7.5, gt1)",
147
	.max_vs_threads = 70,
148
	.max_gs_threads = 70,
149
	.max_wm_threads =
150
		(102 - 1) << HSW_PS_MAX_THREADS_SHIFT |
151
		1 << HSW_PS_SAMPLE_MASK_SHIFT,
152
	.urb = { 128, 640, 256, 8 },
153
	.gt = 1,
154
};
155
 
156
static const struct gt_info hsw_gt2_info = {
157
	.name = "Haswell (gen7.5, gt2)",
158
	.max_vs_threads = 140,
159
	.max_gs_threads = 140,
160
	.max_wm_threads =
161
		(140 - 1) << HSW_PS_MAX_THREADS_SHIFT |
162
		1 << HSW_PS_SAMPLE_MASK_SHIFT,
163
	.urb = { 256, 1664, 640, 8 },
164
	.gt = 2,
165
};
166
 
167
static const struct gt_info hsw_gt3_info = {
168
	.name = "Haswell (gen7.5, gt3)",
169
	.max_vs_threads = 280,
170
	.max_gs_threads = 280,
171
	.max_wm_threads =
172
		(280 - 1) << HSW_PS_MAX_THREADS_SHIFT |
173
		1 << HSW_PS_SAMPLE_MASK_SHIFT,
174
	.urb = { 512, 3328, 1280, 16 },
175
	.gt = 3,
176
};
177
 
178
inline static bool is_ivb(struct sna *sna)
179
{
180
	return sna->kgem.gen == 070;
181
}
182
 
183
inline static bool is_byt(struct sna *sna)
184
{
185
	return sna->kgem.gen == 071;
186
}
187
 
188
inline static bool is_hsw(struct sna *sna)
189
{
190
	return sna->kgem.gen == 075;
191
}
192
 
193
static const uint32_t ps_kernel_packed[][4] = {
194
#include "exa_wm_src_affine.g7b"
195
#include "exa_wm_src_sample_argb.g7b"
196
#include "exa_wm_yuv_rgb.g7b"
197
#include "exa_wm_write.g7b"
198
};
199
 
200
static const uint32_t ps_kernel_planar[][4] = {
201
#include "exa_wm_src_affine.g7b"
202
#include "exa_wm_src_sample_planar.g7b"
203
#include "exa_wm_yuv_rgb.g7b"
204
#include "exa_wm_write.g7b"
205
};
206
 
207
#define KERNEL(kernel_enum, kernel, num_surfaces) \
208
    [GEN7_WM_KERNEL_##kernel_enum] = {#kernel_enum, kernel, sizeof(kernel), num_surfaces}
209
#define NOKERNEL(kernel_enum, func, num_surfaces) \
210
    [GEN7_WM_KERNEL_##kernel_enum] = {#kernel_enum, (void *)func, 0, num_surfaces}
211
static const struct wm_kernel_info {
212
	const char *name;
213
	const void *data;
214
	unsigned int size;
215
	int num_surfaces;
216
} wm_kernels[] = {
217
	NOKERNEL(NOMASK, brw_wm_kernel__affine, 2),
218
	NOKERNEL(NOMASK_P, brw_wm_kernel__projective, 2),
219
 
220
	NOKERNEL(MASK, brw_wm_kernel__affine_mask, 3),
221
	NOKERNEL(MASK_P, brw_wm_kernel__projective_mask, 3),
222
 
223
	NOKERNEL(MASKCA, brw_wm_kernel__affine_mask_ca, 3),
224
	NOKERNEL(MASKCA_P, brw_wm_kernel__projective_mask_ca, 3),
225
 
226
	NOKERNEL(MASKSA, brw_wm_kernel__affine_mask_sa, 3),
227
	NOKERNEL(MASKSA_P, brw_wm_kernel__projective_mask_sa, 3),
228
 
229
	NOKERNEL(OPACITY, brw_wm_kernel__affine_opacity, 2),
230
	NOKERNEL(OPACITY_P, brw_wm_kernel__projective_opacity, 2),
231
 
232
	KERNEL(VIDEO_PLANAR, ps_kernel_planar, 7),
233
	KERNEL(VIDEO_PACKED, ps_kernel_packed, 2),
234
};
235
#undef KERNEL
236
 
237
static const struct blendinfo {
238
	bool src_alpha;
239
	uint32_t src_blend;
240
	uint32_t dst_blend;
241
} gen7_blend_op[] = {
242
	/* Clear */	{0, GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_ZERO},
243
	/* Src */	{0, GEN7_BLENDFACTOR_ONE, GEN7_BLENDFACTOR_ZERO},
244
	/* Dst */	{0, GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_ONE},
245
	/* Over */	{1, GEN7_BLENDFACTOR_ONE, GEN7_BLENDFACTOR_INV_SRC_ALPHA},
246
	/* OverReverse */ {0, GEN7_BLENDFACTOR_INV_DST_ALPHA, GEN7_BLENDFACTOR_ONE},
247
	/* In */	{0, GEN7_BLENDFACTOR_DST_ALPHA, GEN7_BLENDFACTOR_ZERO},
248
	/* InReverse */	{1, GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_SRC_ALPHA},
249
	/* Out */	{0, GEN7_BLENDFACTOR_INV_DST_ALPHA, GEN7_BLENDFACTOR_ZERO},
250
	/* OutReverse */ {1, GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_INV_SRC_ALPHA},
251
	/* Atop */	{1, GEN7_BLENDFACTOR_DST_ALPHA, GEN7_BLENDFACTOR_INV_SRC_ALPHA},
252
	/* AtopReverse */ {1, GEN7_BLENDFACTOR_INV_DST_ALPHA, GEN7_BLENDFACTOR_SRC_ALPHA},
253
	/* Xor */	{1, GEN7_BLENDFACTOR_INV_DST_ALPHA, GEN7_BLENDFACTOR_INV_SRC_ALPHA},
254
	/* Add */	{0, GEN7_BLENDFACTOR_ONE, GEN7_BLENDFACTOR_ONE},
255
};
256
 
257
/**
258
 * Highest-valued BLENDFACTOR used in gen7_blend_op.
259
 *
260
 * This leaves out GEN7_BLENDFACTOR_INV_DST_COLOR,
261
 * GEN7_BLENDFACTOR_INV_CONST_{COLOR,ALPHA},
262
 * GEN7_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA}
263
 */
264
#define GEN7_BLENDFACTOR_COUNT (GEN7_BLENDFACTOR_INV_DST_ALPHA + 1)
265
 
266
#define GEN7_BLEND_STATE_PADDED_SIZE	ALIGN(sizeof(struct gen7_blend_state), 64)
267
 
268
#define BLEND_OFFSET(s, d) \
269
	((d != GEN7_BLENDFACTOR_ZERO) << 15 | \
270
	 (((s) * GEN7_BLENDFACTOR_COUNT + (d)) * GEN7_BLEND_STATE_PADDED_SIZE))
271
 
272
#define NO_BLEND BLEND_OFFSET(GEN7_BLENDFACTOR_ONE, GEN7_BLENDFACTOR_ZERO)
273
#define CLEAR BLEND_OFFSET(GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_ZERO)
274
 
275
#define SAMPLER_OFFSET(sf, se, mf, me) \
276
	((((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) + 2) * 2 * sizeof(struct gen7_sampler_state))
277
 
278
#define VERTEX_2s2s 0
279
 
280
#define COPY_SAMPLER 0
281
#define COPY_VERTEX VERTEX_2s2s
282
#define COPY_FLAGS(a) GEN7_SET_FLAGS(COPY_SAMPLER, (a) == GXcopy ? NO_BLEND : CLEAR, GEN7_WM_KERNEL_NOMASK, COPY_VERTEX)
283
 
284
#define FILL_SAMPLER (2 * sizeof(struct gen7_sampler_state))
285
#define FILL_VERTEX VERTEX_2s2s
286
#define FILL_FLAGS(op, format) GEN7_SET_FLAGS(FILL_SAMPLER, gen7_get_blend((op), false, (format)), GEN7_WM_KERNEL_NOMASK, FILL_VERTEX)
287
#define FILL_FLAGS_NOBLEND GEN7_SET_FLAGS(FILL_SAMPLER, NO_BLEND, GEN7_WM_KERNEL_NOMASK, FILL_VERTEX)
288
 
289
#define GEN7_SAMPLER(f) (((f) >> 16) & 0xfff0)
290
#define GEN7_BLEND(f) (((f) >> 0) & 0x7ff0)
291
#define GEN7_READS_DST(f) (((f) >> 15) & 1)
292
#define GEN7_KERNEL(f) (((f) >> 16) & 0xf)
293
#define GEN7_VERTEX(f) (((f) >> 0) & 0xf)
294
#define GEN7_SET_FLAGS(S, B, K, V)  (((S) | (K)) << 16 | ((B) | (V)))
295
 
296
#define OUT_BATCH(v) batch_emit(sna, v)
297
#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y)
298
#define OUT_VERTEX_F(v) vertex_emit(sna, v)
299
 
300
static inline bool too_large(int width, int height)
301
{
302
	return width > GEN7_MAX_SIZE || height > GEN7_MAX_SIZE;
303
}
304
 
305
static uint32_t gen7_get_blend(int op,
306
			       bool has_component_alpha,
307
			       uint32_t dst_format)
308
{
309
	uint32_t src, dst;
310
 
311
 
312
    src = GEN7_BLENDFACTOR_ONE; //gen6_blend_op[op].src_blend;
313
    dst = GEN7_BLENDFACTOR_INV_SRC_ALPHA; //gen6_blend_op[op].dst_blend;
314
 
315
 
316
#if 0
317
	/* If there's no dst alpha channel, adjust the blend op so that
318
	 * we'll treat it always as 1.
319
	 */
320
	if (PICT_FORMAT_A(dst_format) == 0) {
321
		if (src == GEN7_BLENDFACTOR_DST_ALPHA)
322
			src = GEN7_BLENDFACTOR_ONE;
323
		else if (src == GEN7_BLENDFACTOR_INV_DST_ALPHA)
324
			src = GEN7_BLENDFACTOR_ZERO;
325
	}
326
 
327
	/* If the source alpha is being used, then we should only be in a
328
	 * case where the source blend factor is 0, and the source blend
329
	 * value is the mask channels multiplied by the source picture's alpha.
330
	 */
331
	if (has_component_alpha && gen7_blend_op[op].src_alpha) {
332
		if (dst == GEN7_BLENDFACTOR_SRC_ALPHA)
333
			dst = GEN7_BLENDFACTOR_SRC_COLOR;
334
		else if (dst == GEN7_BLENDFACTOR_INV_SRC_ALPHA)
335
			dst = GEN7_BLENDFACTOR_INV_SRC_COLOR;
336
	}
337
#endif
338
 
339
	DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n",
340
	     op, dst_format, PICT_FORMAT_A(dst_format),
341
	     src, dst, (int)BLEND_OFFSET(src, dst)));
342
	return BLEND_OFFSET(src, dst);
343
}
344
 
345
static uint32_t gen7_get_card_format(PictFormat format)
346
{
347
	switch (format) {
348
	default:
349
		return -1;
350
	case PICT_a8r8g8b8:
351
		return GEN7_SURFACEFORMAT_B8G8R8A8_UNORM;
352
	case PICT_x8r8g8b8:
353
		return GEN7_SURFACEFORMAT_B8G8R8X8_UNORM;
354
	case PICT_a8b8g8r8:
355
		return GEN7_SURFACEFORMAT_R8G8B8A8_UNORM;
356
	case PICT_x8b8g8r8:
357
		return GEN7_SURFACEFORMAT_R8G8B8X8_UNORM;
358
	case PICT_a2r10g10b10:
359
		return GEN7_SURFACEFORMAT_B10G10R10A2_UNORM;
360
	case PICT_x2r10g10b10:
361
		return GEN7_SURFACEFORMAT_B10G10R10X2_UNORM;
362
	case PICT_r8g8b8:
363
		return GEN7_SURFACEFORMAT_R8G8B8_UNORM;
364
	case PICT_r5g6b5:
365
		return GEN7_SURFACEFORMAT_B5G6R5_UNORM;
366
	case PICT_a1r5g5b5:
367
		return GEN7_SURFACEFORMAT_B5G5R5A1_UNORM;
368
	case PICT_a8:
369
		return GEN7_SURFACEFORMAT_A8_UNORM;
370
	case PICT_a4r4g4b4:
371
		return GEN7_SURFACEFORMAT_B4G4R4A4_UNORM;
372
	}
373
}
374
 
375
static uint32_t gen7_get_dest_format(PictFormat format)
376
{
377
	switch (format) {
378
	default:
379
		return -1;
380
	case PICT_a8r8g8b8:
381
	case PICT_x8r8g8b8:
382
		return GEN7_SURFACEFORMAT_B8G8R8A8_UNORM;
383
	case PICT_a8b8g8r8:
384
	case PICT_x8b8g8r8:
385
		return GEN7_SURFACEFORMAT_R8G8B8A8_UNORM;
386
	case PICT_a2r10g10b10:
387
	case PICT_x2r10g10b10:
388
		return GEN7_SURFACEFORMAT_B10G10R10A2_UNORM;
389
	case PICT_r5g6b5:
390
		return GEN7_SURFACEFORMAT_B5G6R5_UNORM;
391
	case PICT_x1r5g5b5:
392
	case PICT_a1r5g5b5:
393
		return GEN7_SURFACEFORMAT_B5G5R5A1_UNORM;
394
	case PICT_a8:
395
		return GEN7_SURFACEFORMAT_A8_UNORM;
396
	case PICT_a4r4g4b4:
397
	case PICT_x4r4g4b4:
398
		return GEN7_SURFACEFORMAT_B4G4R4A4_UNORM;
399
	}
400
}
401
 
402
static int
403
gen7_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine)
404
{
405
	int base;
406
 
407
	if (has_mask) {
408
		if (is_ca) {
409
			if (gen7_blend_op[op].src_alpha)
410
				base = GEN7_WM_KERNEL_MASKSA;
411
			else
412
				base = GEN7_WM_KERNEL_MASKCA;
413
		} else
414
			base = GEN7_WM_KERNEL_MASK;
415
	} else
416
		base = GEN7_WM_KERNEL_NOMASK;
417
 
418
	return base + !is_affine;
419
}
420
 
421
static void
422
gen7_emit_urb(struct sna *sna)
423
{
424
	OUT_BATCH(GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
425
	OUT_BATCH(sna->render_state.gen7.info->urb.push_ps_size);
426
 
427
	/* num of VS entries must be divisible by 8 if size < 9 */
428
	OUT_BATCH(GEN7_3DSTATE_URB_VS | (2 - 2));
429
	OUT_BATCH((sna->render_state.gen7.info->urb.max_vs_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) |
430
		  (2 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
431
		  (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
432
 
433
	OUT_BATCH(GEN7_3DSTATE_URB_HS | (2 - 2));
434
	OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
435
		  (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
436
 
437
	OUT_BATCH(GEN7_3DSTATE_URB_DS | (2 - 2));
438
	OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
439
		  (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
440
 
441
	OUT_BATCH(GEN7_3DSTATE_URB_GS | (2 - 2));
442
	OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
443
		  (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
444
}
445
 
446
static void
447
gen7_emit_state_base_address(struct sna *sna)
448
{
449
	uint32_t mocs;
450
 
451
	mocs = is_hsw(sna) ? 5 << 8 : 3 << 8;
452
 
453
	OUT_BATCH(GEN7_STATE_BASE_ADDRESS | (10 - 2));
454
	OUT_BATCH(0); /* general */
455
	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */
456
				 sna->kgem.nbatch,
457
				 NULL,
458
				 I915_GEM_DOMAIN_INSTRUCTION << 16,
459
				 BASE_ADDRESS_MODIFY));
460
	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* dynamic */
461
				 sna->kgem.nbatch,
462
				 sna->render_state.gen7.general_bo,
463
				 I915_GEM_DOMAIN_INSTRUCTION << 16,
464
				 mocs | BASE_ADDRESS_MODIFY));
465
	OUT_BATCH(0); /* indirect */
466
	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* instruction */
467
				 sna->kgem.nbatch,
468
				 sna->render_state.gen7.general_bo,
469
				 I915_GEM_DOMAIN_INSTRUCTION << 16,
470
				 mocs | BASE_ADDRESS_MODIFY));
471
 
472
	/* upper bounds, disable */
473
	OUT_BATCH(0);
474
	OUT_BATCH(BASE_ADDRESS_MODIFY);
475
	OUT_BATCH(0);
476
	OUT_BATCH(BASE_ADDRESS_MODIFY);
477
}
478
 
479
static void
480
gen7_disable_vs(struct sna *sna)
481
{
482
	/* For future reference:
483
	 * A PIPE_CONTROL with post-sync op set to 1 and a depth stall needs
484
	 * to be emitted just prior to change VS state, i.e. 3DSTATE_VS,
485
	 * 3DSTATE_URB_VS, 3DSTATE_CONSTANT_VS,
486
	 * 3DSTATE_BINDING_TABLE_POINTER_VS, 3DSTATE_SAMPLER_STATE_POINTER_VS.
487
	 *
488
	 * Here we saved by the full-flush incurred when emitting
489
	 * the batchbuffer.
490
	 */
491
	OUT_BATCH(GEN7_3DSTATE_VS | (6 - 2));
492
	OUT_BATCH(0); /* no VS kernel */
493
	OUT_BATCH(0);
494
	OUT_BATCH(0);
495
	OUT_BATCH(0);
496
	OUT_BATCH(0); /* pass-through */
497
 
498
#if 0
499
	OUT_BATCH(GEN7_3DSTATE_CONSTANT_VS | (7 - 2));
500
	OUT_BATCH(0);
501
	OUT_BATCH(0);
502
	OUT_BATCH(0);
503
	OUT_BATCH(0);
504
	OUT_BATCH(0);
505
	OUT_BATCH(0);
506
 
507
	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS | (2 - 2));
508
	OUT_BATCH(0);
509
 
510
	OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS | (2 - 2));
511
	OUT_BATCH(0);
512
#endif
513
}
514
 
515
static void
516
gen7_disable_hs(struct sna *sna)
517
{
518
	OUT_BATCH(GEN7_3DSTATE_HS | (7 - 2));
519
	OUT_BATCH(0); /* no HS kernel */
520
	OUT_BATCH(0);
521
	OUT_BATCH(0);
522
	OUT_BATCH(0);
523
	OUT_BATCH(0);
524
	OUT_BATCH(0); /* pass-through */
525
 
526
#if 0
527
	OUT_BATCH(GEN7_3DSTATE_CONSTANT_HS | (7 - 2));
528
	OUT_BATCH(0);
529
	OUT_BATCH(0);
530
	OUT_BATCH(0);
531
	OUT_BATCH(0);
532
	OUT_BATCH(0);
533
	OUT_BATCH(0);
534
 
535
	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
536
	OUT_BATCH(0);
537
 
538
	OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS | (2 - 2));
539
	OUT_BATCH(0);
540
#endif
541
}
542
 
543
static void
544
gen7_disable_te(struct sna *sna)
545
{
546
	OUT_BATCH(GEN7_3DSTATE_TE | (4 - 2));
547
	OUT_BATCH(0);
548
	OUT_BATCH(0);
549
	OUT_BATCH(0);
550
}
551
 
552
static void
553
gen7_disable_ds(struct sna *sna)
554
{
555
	OUT_BATCH(GEN7_3DSTATE_DS | (6 - 2));
556
	OUT_BATCH(0);
557
	OUT_BATCH(0);
558
	OUT_BATCH(0);
559
	OUT_BATCH(0);
560
	OUT_BATCH(0);
561
 
562
#if 0
563
	OUT_BATCH(GEN7_3DSTATE_CONSTANT_DS | (7 - 2));
564
	OUT_BATCH(0);
565
	OUT_BATCH(0);
566
	OUT_BATCH(0);
567
	OUT_BATCH(0);
568
	OUT_BATCH(0);
569
	OUT_BATCH(0);
570
 
571
	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
572
	OUT_BATCH(0);
573
 
574
	OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS | (2 - 2));
575
	OUT_BATCH(0);
576
#endif
577
}
578
 
579
static void
580
gen7_disable_gs(struct sna *sna)
581
{
582
	OUT_BATCH(GEN7_3DSTATE_GS | (7 - 2));
583
	OUT_BATCH(0); /* no GS kernel */
584
	OUT_BATCH(0);
585
	OUT_BATCH(0);
586
	OUT_BATCH(0);
587
	OUT_BATCH(0);
588
	OUT_BATCH(0); /* pass-through */
589
 
590
#if 0
591
	OUT_BATCH(GEN7_3DSTATE_CONSTANT_GS | (7 - 2));
592
	OUT_BATCH(0);
593
	OUT_BATCH(0);
594
	OUT_BATCH(0);
595
	OUT_BATCH(0);
596
	OUT_BATCH(0);
597
	OUT_BATCH(0);
598
 
599
	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
600
	OUT_BATCH(0);
601
 
602
	OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS | (2 - 2));
603
	OUT_BATCH(0);
604
#endif
605
}
606
 
607
static void
608
gen7_disable_streamout(struct sna *sna)
609
{
610
	OUT_BATCH(GEN7_3DSTATE_STREAMOUT | (3 - 2));
611
	OUT_BATCH(0);
612
	OUT_BATCH(0);
613
}
614
 
615
static void
616
gen7_emit_sf_invariant(struct sna *sna)
617
{
618
	OUT_BATCH(GEN7_3DSTATE_SF | (7 - 2));
619
	OUT_BATCH(0);
620
	OUT_BATCH(GEN7_3DSTATE_SF_CULL_NONE);
621
	OUT_BATCH(2 << GEN7_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
622
	OUT_BATCH(0);
623
	OUT_BATCH(0);
624
	OUT_BATCH(0);
625
}
626
 
627
static void
628
gen7_emit_cc_invariant(struct sna *sna)
629
{
630
#if 0 /* unused, no change */
631
	OUT_BATCH(GEN7_3DSTATE_CC_STATE_POINTERS | (2 - 2));
632
	OUT_BATCH(0);
633
 
634
	OUT_BATCH(GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS | (2 - 2));
635
	OUT_BATCH(0);
636
#endif
637
 
638
	/* XXX clear to be safe */
639
	OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
640
	OUT_BATCH(0);
641
}
642
 
643
static void
644
gen7_disable_clip(struct sna *sna)
645
{
646
	OUT_BATCH(GEN7_3DSTATE_CLIP | (4 - 2));
647
	OUT_BATCH(0);
648
	OUT_BATCH(0); /* pass-through */
649
	OUT_BATCH(0);
650
 
651
	OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
652
	OUT_BATCH(0);
653
}
654
 
655
static void
656
gen7_emit_wm_invariant(struct sna *sna)
657
{
658
	OUT_BATCH(GEN7_3DSTATE_WM | (3 - 2));
659
	OUT_BATCH(GEN7_WM_DISPATCH_ENABLE |
660
		  GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
661
	OUT_BATCH(0);
662
 
663
#if 0
664
	/* XXX length bias of 7 in old spec? */
665
	OUT_BATCH(GEN7_3DSTATE_CONSTANT_PS | (7 - 2));
666
	OUT_BATCH(0);
667
	OUT_BATCH(0);
668
	OUT_BATCH(0);
669
	OUT_BATCH(0);
670
	OUT_BATCH(0);
671
	OUT_BATCH(0);
672
#endif
673
}
674
 
675
static void
676
gen7_emit_null_depth_buffer(struct sna *sna)
677
{
678
	OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER | (7 - 2));
679
	OUT_BATCH(GEN7_SURFACE_NULL << GEN7_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT |
680
		  GEN7_DEPTHFORMAT_D32_FLOAT << GEN7_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT);
681
	OUT_BATCH(0); /* disable depth, stencil and hiz */
682
	OUT_BATCH(0);
683
	OUT_BATCH(0);
684
	OUT_BATCH(0);
685
	OUT_BATCH(0);
686
 
687
#if 0
688
	OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
689
	OUT_BATCH(0);
690
	OUT_BATCH(0);
691
#endif
692
}
693
 
694
static void
695
gen7_emit_invariant(struct sna *sna)
696
{
697
	OUT_BATCH(GEN7_PIPELINE_SELECT | PIPELINE_SELECT_3D);
698
 
699
	OUT_BATCH(GEN7_3DSTATE_MULTISAMPLE | (4 - 2));
700
	OUT_BATCH(GEN7_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
701
		  GEN7_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
702
	OUT_BATCH(0);
703
	OUT_BATCH(0);
704
 
705
	OUT_BATCH(GEN7_3DSTATE_SAMPLE_MASK | (2 - 2));
706
	OUT_BATCH(1);
707
 
708
	gen7_emit_urb(sna);
709
 
710
	gen7_emit_state_base_address(sna);
711
 
712
	gen7_disable_vs(sna);
713
	gen7_disable_hs(sna);
714
	gen7_disable_te(sna);
715
	gen7_disable_ds(sna);
716
	gen7_disable_gs(sna);
717
	gen7_disable_clip(sna);
718
	gen7_emit_sf_invariant(sna);
719
	gen7_emit_wm_invariant(sna);
720
	gen7_emit_cc_invariant(sna);
721
	gen7_disable_streamout(sna);
722
	gen7_emit_null_depth_buffer(sna);
723
 
724
	sna->render_state.gen7.needs_invariant = false;
725
}
726
 
727
static void
728
gen7_emit_cc(struct sna *sna, uint32_t blend_offset)
729
{
730
	struct gen7_render_state *render = &sna->render_state.gen7;
731
 
732
	if (render->blend == blend_offset)
733
		return;
734
 
735
	DBG(("%s: blend = %x\n", __FUNCTION__, blend_offset));
736
 
737
	/* XXX can have upto 8 blend states preload, selectable via
738
	 * Render Target Index. What other side-effects of Render Target Index?
739
	 */
740
 
741
	assert (is_aligned(render->cc_blend + blend_offset, 64));
742
	OUT_BATCH(GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
743
	OUT_BATCH((render->cc_blend + blend_offset) | 1);
744
 
745
	render->blend = blend_offset;
746
}
747
 
748
static void
749
gen7_emit_sampler(struct sna *sna, uint32_t state)
750
{
751
	if (sna->render_state.gen7.samplers == state)
752
		return;
753
 
754
	sna->render_state.gen7.samplers = state;
755
 
756
	DBG(("%s: sampler = %x\n", __FUNCTION__, state));
757
 
758
	assert (is_aligned(sna->render_state.gen7.wm_state + state, 32));
759
	OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
760
	OUT_BATCH(sna->render_state.gen7.wm_state + state);
761
}
762
 
763
static void
764
gen7_emit_sf(struct sna *sna, bool has_mask)
765
{
766
	int num_sf_outputs = has_mask ? 2 : 1;
767
 
768
	if (sna->render_state.gen7.num_sf_outputs == num_sf_outputs)
769
		return;
770
 
771
	DBG(("%s: num_sf_outputs=%d, read_length=%d, read_offset=%d\n",
772
	     __FUNCTION__, num_sf_outputs, 1, 0));
773
 
774
	sna->render_state.gen7.num_sf_outputs = num_sf_outputs;
775
 
776
	OUT_BATCH(GEN7_3DSTATE_SBE | (14 - 2));
777
	OUT_BATCH(num_sf_outputs << GEN7_SBE_NUM_OUTPUTS_SHIFT |
778
		  1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
779
		  1 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT);
780
	OUT_BATCH(0);
781
	OUT_BATCH(0); /* dw4 */
782
	OUT_BATCH(0);
783
	OUT_BATCH(0);
784
	OUT_BATCH(0);
785
	OUT_BATCH(0); /* dw8 */
786
	OUT_BATCH(0);
787
	OUT_BATCH(0);
788
	OUT_BATCH(0);
789
	OUT_BATCH(0); /* dw12 */
790
	OUT_BATCH(0);
791
	OUT_BATCH(0);
792
}
793
 
794
static void
795
gen7_emit_wm(struct sna *sna, int kernel)
796
{
797
	const uint32_t *kernels;
798
 
799
	if (sna->render_state.gen7.kernel == kernel)
800
		return;
801
 
802
	sna->render_state.gen7.kernel = kernel;
803
	kernels = sna->render_state.gen7.wm_kernel[kernel];
804
 
805
	DBG(("%s: switching to %s, num_surfaces=%d (8-wide? %d, 16-wide? %d, 32-wide? %d)\n",
806
	     __FUNCTION__,
807
	     wm_kernels[kernel].name,
808
	     wm_kernels[kernel].num_surfaces,
809
	     kernels[0], kernels[1], kernels[2]));
810
 
811
	OUT_BATCH(GEN7_3DSTATE_PS | (8 - 2));
812
	OUT_BATCH(kernels[0] ?: kernels[1] ?: kernels[2]);
813
	OUT_BATCH(1 << GEN7_PS_SAMPLER_COUNT_SHIFT |
814
		  wm_kernels[kernel].num_surfaces << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT);
815
	OUT_BATCH(0); /* scratch address */
816
	OUT_BATCH(sna->render_state.gen7.info->max_wm_threads |
817
		  (kernels[0] ? GEN7_PS_8_DISPATCH_ENABLE : 0) |
818
		  (kernels[1] ? GEN7_PS_16_DISPATCH_ENABLE : 0) |
819
		  (kernels[2] ? GEN7_PS_32_DISPATCH_ENABLE : 0) |
820
		  GEN7_PS_ATTRIBUTE_ENABLE);
821
	OUT_BATCH((kernels[0] ? 4 : kernels[1] ? 6 : 8) << GEN7_PS_DISPATCH_START_GRF_SHIFT_0 |
822
		  8 << GEN7_PS_DISPATCH_START_GRF_SHIFT_1 |
823
		  6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_2);
824
	OUT_BATCH(kernels[2]);
825
	OUT_BATCH(kernels[1]);
826
}
827
 
828
static bool
829
gen7_emit_binding_table(struct sna *sna, uint16_t offset)
830
{
831
	if (sna->render_state.gen7.surface_table == offset)
832
		return false;
833
 
834
	/* Binding table pointers */
835
	assert(is_aligned(4*offset, 32));
836
	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
837
	OUT_BATCH(offset*4);
838
 
839
	sna->render_state.gen7.surface_table = offset;
840
	return true;
841
}
842
 
843
static bool
844
gen7_emit_drawing_rectangle(struct sna *sna,
845
			    const struct sna_composite_op *op)
846
{
847
	uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1);
848
	uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x;
849
 
850
	assert(!too_large(op->dst.x, op->dst.y));
851
	assert(!too_large(op->dst.width, op->dst.height));
852
 
853
	if (sna->render_state.gen7.drawrect_limit == limit &&
854
	    sna->render_state.gen7.drawrect_offset == offset)
855
		return true;
856
 
857
	sna->render_state.gen7.drawrect_offset = offset;
858
	sna->render_state.gen7.drawrect_limit = limit;
859
 
860
	OUT_BATCH(GEN7_3DSTATE_DRAWING_RECTANGLE | (4 - 2));
861
	OUT_BATCH(0);
862
	OUT_BATCH(limit);
863
	OUT_BATCH(offset);
864
	return false;
865
}
866
 
867
static void
868
gen7_emit_vertex_elements(struct sna *sna,
869
			  const struct sna_composite_op *op)
870
{
871
	/*
872
	 * vertex data in vertex buffer
873
	 *    position: (x, y)
874
	 *    texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0)
875
	 *    texture coordinate 1 if (has_mask is true): same as above
876
	 */
877
	struct gen7_render_state *render = &sna->render_state.gen7;
878
	uint32_t src_format, dw;
879
	int id = GEN7_VERTEX(op->u.gen7.flags);
880
	bool has_mask;
881
 
882
	DBG(("%s: setup id=%d\n", __FUNCTION__, id));
883
 
884
	if (render->ve_id == id)
885
		return;
886
	render->ve_id = id;
887
 
888
	/* The VUE layout
889
	 *    dword 0-3: pad (0.0, 0.0, 0.0. 0.0)
890
	 *    dword 4-7: position (x, y, 1.0, 1.0),
891
	 *    dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0)
892
	 *    dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0)
893
	 *
894
	 * dword 4-15 are fetched from vertex buffer
895
	 */
896
	has_mask = (id >> 2) != 0;
897
	OUT_BATCH(GEN7_3DSTATE_VERTEX_ELEMENTS |
898
		((2 * (3 + has_mask)) + 1 - 2));
899
 
900
	OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID |
901
		  GEN7_SURFACEFORMAT_R32G32B32A32_FLOAT << GEN7_VE0_FORMAT_SHIFT |
902
 
903
	OUT_BATCH(GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_0_SHIFT |
904
		  GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_1_SHIFT |
905
		  GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT |
906
		  GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_3_SHIFT);
907
 
908
	/* x,y */
909
	OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID |
910
		  GEN7_SURFACEFORMAT_R16G16_SSCALED << GEN7_VE0_FORMAT_SHIFT |
911
 
912
	OUT_BATCH(GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT |
913
		  GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT |
914
		  GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT |
915
		  GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT);
916
 
917
	/* u0, v0, w0 */
918
	DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3));
919
	dw = GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT;
920
	switch (id & 3) {
921
	default:
922
		assert(0);
923
	case 0:
924
		src_format = GEN7_SURFACEFORMAT_R16G16_SSCALED;
925
		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
926
		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT;
927
		dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT;
928
		break;
929
	case 1:
930
		src_format = GEN7_SURFACEFORMAT_R32_FLOAT;
931
		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
932
		dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_1_SHIFT;
933
		dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT;
934
		break;
935
	case 2:
936
		src_format = GEN7_SURFACEFORMAT_R32G32_FLOAT;
937
		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
938
		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT;
939
		dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT;
940
		break;
941
	case 3:
942
		src_format = GEN7_SURFACEFORMAT_R32G32B32_FLOAT;
943
		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
944
		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT;
945
		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_2_SHIFT;
946
		break;
947
	}
948
	OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID |
949
		  src_format << GEN7_VE0_FORMAT_SHIFT |
950
		  4 << GEN7_VE0_OFFSET_SHIFT);
951
	OUT_BATCH(dw);
952
 
953
	/* u1, v1, w1 */
954
	if (has_mask) {
955
		unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float);
956
		DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__, id >> 2, offset));
957
		dw = GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT;
958
		switch (id >> 2) {
959
		case 1:
960
			src_format = GEN7_SURFACEFORMAT_R32_FLOAT;
961
			dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
962
			dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_1_SHIFT;
963
			dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT;
964
			break;
965
		default:
966
			assert(0);
967
		case 2:
968
			src_format = GEN7_SURFACEFORMAT_R32G32_FLOAT;
969
			dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
970
			dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT;
971
			dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT;
972
			break;
973
		case 3:
974
			src_format = GEN7_SURFACEFORMAT_R32G32B32_FLOAT;
975
			dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
976
			dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT;
977
			dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_2_SHIFT;
978
			break;
979
		}
980
		OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID |
981
			  src_format << GEN7_VE0_FORMAT_SHIFT |
982
			  offset << GEN7_VE0_OFFSET_SHIFT);
983
		OUT_BATCH(dw);
984
	}
985
}
986
 
987
inline static void
988
gen7_emit_pipe_invalidate(struct sna *sna)
989
{
990
	OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2));
991
	OUT_BATCH(GEN7_PIPE_CONTROL_WC_FLUSH |
992
		  GEN7_PIPE_CONTROL_TC_FLUSH |
993
		  GEN7_PIPE_CONTROL_CS_STALL);
994
	OUT_BATCH(0);
995
	OUT_BATCH(0);
996
}
997
 
998
inline static void
999
gen7_emit_pipe_flush(struct sna *sna, bool need_stall)
1000
{
1001
	unsigned stall;
1002
 
1003
	stall = 0;
1004
	if (need_stall)
1005
		stall = (GEN7_PIPE_CONTROL_CS_STALL |
1006
			 GEN7_PIPE_CONTROL_STALL_AT_SCOREBOARD);
1007
 
1008
	OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2));
1009
	OUT_BATCH(GEN7_PIPE_CONTROL_WC_FLUSH | stall);
1010
	OUT_BATCH(0);
1011
	OUT_BATCH(0);
1012
}
1013
 
1014
inline static void
1015
gen7_emit_pipe_stall(struct sna *sna)
1016
{
1017
	OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2));
1018
	OUT_BATCH(GEN7_PIPE_CONTROL_CS_STALL |
1019
		  GEN7_PIPE_CONTROL_STALL_AT_SCOREBOARD);
1020
	OUT_BATCH(0);
1021
	OUT_BATCH(0);
1022
}
1023
 
1024
static void
1025
gen7_emit_state(struct sna *sna,
1026
		const struct sna_composite_op *op,
1027
		uint16_t wm_binding_table)
1028
{
4501 Serge 1029
	bool need_invalidate;
1030
	bool need_flush;
4304 Serge 1031
	bool need_stall;
1032
 
1033
	assert(op->dst.bo->exec);
1034
 
4501 Serge 1035
	need_invalidate = kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo);
1036
	if (ALWAYS_INVALIDATE)
1037
		need_invalidate = true;
4304 Serge 1038
 
4501 Serge 1039
	need_flush =
1040
		sna->render_state.gen7.emit_flush &&
1041
		wm_binding_table & GEN7_READS_DST(op->u.gen7.flags);
1042
	if (ALWAYS_FLUSH)
1043
		need_flush = true;
1044
 
1045
	wm_binding_table &= ~1;
1046
 
1047
	need_stall = sna->render_state.gen7.surface_table != wm_binding_table;
4304 Serge 1048
	need_stall &= gen7_emit_drawing_rectangle(sna, op);
4501 Serge 1049
	if (ALWAYS_STALL)
1050
		need_stall = true;
4304 Serge 1051
 
4501 Serge 1052
	if (need_invalidate) {
4304 Serge 1053
		gen7_emit_pipe_invalidate(sna);
1054
		kgem_clear_dirty(&sna->kgem);
1055
		assert(op->dst.bo->exec);
1056
			kgem_bo_mark_dirty(op->dst.bo);
4501 Serge 1057
 
1058
		need_flush = false;
4304 Serge 1059
		need_stall = false;
1060
	}
4501 Serge 1061
	if (need_flush) {
4304 Serge 1062
		gen7_emit_pipe_flush(sna, need_stall);
1063
		need_stall = false;
1064
	}
1065
	if (need_stall)
1066
		gen7_emit_pipe_stall(sna);
1067
 
4501 Serge 1068
	gen7_emit_cc(sna, GEN7_BLEND(op->u.gen7.flags));
1069
	gen7_emit_sampler(sna, GEN7_SAMPLER(op->u.gen7.flags));
1070
	gen7_emit_sf(sna, GEN7_VERTEX(op->u.gen7.flags) >> 2);
1071
	gen7_emit_wm(sna, GEN7_KERNEL(op->u.gen7.flags));
1072
	gen7_emit_vertex_elements(sna, op);
1073
	gen7_emit_binding_table(sna, wm_binding_table);
1074
 
4304 Serge 1075
	sna->render_state.gen7.emit_flush = GEN7_READS_DST(op->u.gen7.flags);
1076
}
1077
 
1078
static bool gen7_magic_ca_pass(struct sna *sna,
1079
			       const struct sna_composite_op *op)
1080
{
1081
	struct gen7_render_state *state = &sna->render_state.gen7;
1082
 
1083
	if (!op->need_magic_ca_pass)
1084
		return false;
1085
 
1086
	DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__,
1087
	     sna->render.vertex_start, sna->render.vertex_index));
1088
 
1089
	gen7_emit_pipe_stall(sna);
1090
 
1091
	gen7_emit_cc(sna,
1092
		     GEN7_BLEND(gen7_get_blend(PictOpAdd, true,
1093
					       op->dst.format)));
1094
	gen7_emit_wm(sna,
1095
		     gen7_choose_composite_kernel(PictOpAdd,
1096
						  true, true,
1097
						  op->is_affine));
1098
 
1099
	OUT_BATCH(GEN7_3DPRIMITIVE | (7- 2));
1100
	OUT_BATCH(GEN7_3DPRIMITIVE_VERTEX_SEQUENTIAL | _3DPRIM_RECTLIST);
1101
	OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start);
1102
	OUT_BATCH(sna->render.vertex_start);
1103
	OUT_BATCH(1);	/* single instance */
1104
	OUT_BATCH(0);	/* start instance location */
1105
	OUT_BATCH(0);	/* index buffer offset, ignored */
1106
 
1107
	state->last_primitive = sna->kgem.nbatch;
1108
	return true;
1109
}
1110
 
1111
static void null_create(struct sna_static_stream *stream)
1112
{
1113
	/* A bunch of zeros useful for legacy border color and depth-stencil */
1114
	sna_static_stream_map(stream, 64, 64);
1115
}
1116
 
1117
static void
1118
sampler_state_init(struct gen7_sampler_state *sampler_state,
1119
		   sampler_filter_t filter,
1120
		   sampler_extend_t extend)
1121
{
1122
	sampler_state->ss0.lod_preclamp = 1;	/* GL mode */
1123
 
1124
	/* We use the legacy mode to get the semantics specified by
1125
	 * the Render extension. */
1126
	sampler_state->ss0.default_color_mode = GEN7_BORDER_COLOR_MODE_LEGACY;
1127
 
1128
	switch (filter) {
1129
	default:
1130
	case SAMPLER_FILTER_NEAREST:
1131
		sampler_state->ss0.min_filter = GEN7_MAPFILTER_NEAREST;
1132
		sampler_state->ss0.mag_filter = GEN7_MAPFILTER_NEAREST;
1133
		break;
1134
	case SAMPLER_FILTER_BILINEAR:
1135
		sampler_state->ss0.min_filter = GEN7_MAPFILTER_LINEAR;
1136
		sampler_state->ss0.mag_filter = GEN7_MAPFILTER_LINEAR;
1137
		break;
1138
	}
1139
 
1140
	switch (extend) {
1141
	default:
1142
	case SAMPLER_EXTEND_NONE:
1143
		sampler_state->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_CLAMP_BORDER;
1144
		sampler_state->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_CLAMP_BORDER;
1145
		sampler_state->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_CLAMP_BORDER;
1146
		break;
1147
	case SAMPLER_EXTEND_REPEAT:
1148
		sampler_state->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_WRAP;
1149
		sampler_state->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_WRAP;
1150
		sampler_state->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_WRAP;
1151
		break;
1152
	case SAMPLER_EXTEND_PAD:
1153
		sampler_state->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_CLAMP;
1154
		sampler_state->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_CLAMP;
1155
		sampler_state->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_CLAMP;
1156
		break;
1157
	case SAMPLER_EXTEND_REFLECT:
1158
		sampler_state->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_MIRROR;
1159
		sampler_state->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_MIRROR;
1160
		sampler_state->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_MIRROR;
1161
		break;
1162
	}
1163
}
1164
 
1165
static void
1166
sampler_copy_init(struct gen7_sampler_state *ss)
1167
{
1168
	sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
1169
	ss->ss3.non_normalized_coord = 1;
1170
 
1171
	sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
1172
}
1173
 
1174
static void
1175
sampler_fill_init(struct gen7_sampler_state *ss)
1176
{
1177
	sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_REPEAT);
1178
	ss->ss3.non_normalized_coord = 1;
1179
 
1180
	sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
1181
}
1182
 
1183
static uint32_t
1184
gen7_tiling_bits(uint32_t tiling)
1185
{
1186
	switch (tiling) {
1187
	default: assert(0);
1188
	case I915_TILING_NONE: return 0;
1189
	case I915_TILING_X: return GEN7_SURFACE_TILED;
1190
	case I915_TILING_Y: return GEN7_SURFACE_TILED | GEN7_SURFACE_TILED_Y;
1191
	}
1192
}
1193
 
1194
/**
1195
 * Sets up the common fields for a surface state buffer for the given
1196
 * picture in the given surface state buffer.
1197
 */
1198
static uint32_t
1199
gen7_bind_bo(struct sna *sna,
1200
	     struct kgem_bo *bo,
1201
	     uint32_t width,
1202
	     uint32_t height,
1203
	     uint32_t format,
1204
	     bool is_dst)
1205
{
1206
	uint32_t *ss;
1207
	uint32_t domains;
1208
	int offset;
1209
	uint32_t is_scanout = is_dst && bo->scanout;
1210
 
1211
	COMPILE_TIME_ASSERT(sizeof(struct gen7_surface_state) == 32);
1212
 
1213
	/* After the first bind, we manage the cache domains within the batch */
1214
	offset = kgem_bo_get_binding(bo, format | is_dst << 30 | is_scanout << 31);
1215
	if (offset) {
1216
		if (is_dst)
1217
			kgem_bo_mark_dirty(bo);
1218
		return offset * sizeof(uint32_t);
1219
	}
1220
 
1221
	offset = sna->kgem.surface -=
1222
		sizeof(struct gen7_surface_state) / sizeof(uint32_t);
1223
	ss = sna->kgem.batch + offset;
1224
	ss[0] = (GEN7_SURFACE_2D << GEN7_SURFACE_TYPE_SHIFT |
1225
		 gen7_tiling_bits(bo->tiling) |
1226
		 format << GEN7_SURFACE_FORMAT_SHIFT);
1227
	if (bo->tiling == I915_TILING_Y)
1228
		ss[0] |= GEN7_SURFACE_VALIGN_4;
1229
	if (is_dst) {
1230
		ss[0] |= GEN7_SURFACE_RC_READ_WRITE;
1231
		domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER;
1232
	} else
1233
		domains = I915_GEM_DOMAIN_SAMPLER << 16;
1234
	ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
1235
	ss[2] = ((width - 1)  << GEN7_SURFACE_WIDTH_SHIFT |
1236
		 (height - 1) << GEN7_SURFACE_HEIGHT_SHIFT);
1237
	ss[3] = (bo->pitch - 1) << GEN7_SURFACE_PITCH_SHIFT;
1238
	ss[4] = 0;
1239
	ss[5] = (is_scanout || bo->io) ? 0 : is_hsw(sna) ? 5 << 16 : 3 << 16;
1240
	ss[6] = 0;
1241
	ss[7] = 0;
1242
	if (is_hsw(sna))
1243
		ss[7] |= HSW_SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA);
1244
 
1245
	kgem_bo_set_binding(bo, format | is_dst << 30 | is_scanout << 31, offset);
1246
 
1247
	DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
1248
	     offset, bo->handle, ss[1],
1249
	     format, width, height, bo->pitch, bo->tiling,
1250
	     domains & 0xffff ? "render" : "sampler"));
1251
 
1252
	return offset * sizeof(uint32_t);
1253
}
1254
 
1255
static void gen7_emit_vertex_buffer(struct sna *sna,
1256
				    const struct sna_composite_op *op)
1257
{
1258
	int id = GEN7_VERTEX(op->u.gen7.flags);
1259
 
1260
	OUT_BATCH(GEN7_3DSTATE_VERTEX_BUFFERS | (5 - 2));
1261
	OUT_BATCH(id << GEN7_VB0_BUFFER_INDEX_SHIFT |
1262
		  GEN7_VB0_VERTEXDATA |
1263
		  GEN7_VB0_ADDRESS_MODIFY_ENABLE |
1264
		  4*op->floats_per_vertex << GEN7_VB0_BUFFER_PITCH_SHIFT);
1265
	sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch;
1266
	OUT_BATCH(0);
1267
	OUT_BATCH(~0); /* max address: disabled */
1268
	OUT_BATCH(0);
1269
 
1270
	sna->render.vb_id |= 1 << id;
1271
}
1272
 
1273
static void gen7_emit_primitive(struct sna *sna)
1274
{
1275
	if (sna->kgem.nbatch == sna->render_state.gen7.last_primitive) {
1276
		sna->render.vertex_offset = sna->kgem.nbatch - 5;
1277
		return;
1278
	}
1279
 
1280
	OUT_BATCH(GEN7_3DPRIMITIVE | (7- 2));
1281
	OUT_BATCH(GEN7_3DPRIMITIVE_VERTEX_SEQUENTIAL | _3DPRIM_RECTLIST);
1282
	sna->render.vertex_offset = sna->kgem.nbatch;
1283
	OUT_BATCH(0);	/* vertex count, to be filled in later */
1284
	OUT_BATCH(sna->render.vertex_index);
1285
	OUT_BATCH(1);	/* single instance */
1286
	OUT_BATCH(0);	/* start instance location */
1287
	OUT_BATCH(0);	/* index buffer offset, ignored */
1288
	sna->render.vertex_start = sna->render.vertex_index;
1289
 
1290
	sna->render_state.gen7.last_primitive = sna->kgem.nbatch;
1291
}
1292
 
1293
static bool gen7_rectangle_begin(struct sna *sna,
1294
				 const struct sna_composite_op *op)
1295
{
1296
	int id = 1 << GEN7_VERTEX(op->u.gen7.flags);
1297
	int ndwords;
1298
 
1299
	if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
1300
		return true;
1301
 
1302
	ndwords = op->need_magic_ca_pass ? 60 : 6;
1303
	if ((sna->render.vb_id & id) == 0)
1304
		ndwords += 5;
1305
	if (!kgem_check_batch(&sna->kgem, ndwords))
1306
		return false;
1307
 
1308
	if ((sna->render.vb_id & id) == 0)
1309
		gen7_emit_vertex_buffer(sna, op);
1310
 
1311
	gen7_emit_primitive(sna);
1312
	return true;
1313
}
1314
 
1315
static int gen7_get_rectangles__flush(struct sna *sna,
1316
				      const struct sna_composite_op *op)
1317
{
1318
	/* Preventing discarding new vbo after lock contention */
1319
	if (sna_vertex_wait__locked(&sna->render)) {
1320
		int rem = vertex_space(sna);
1321
		if (rem > op->floats_per_rect)
1322
			return rem;
1323
	}
1324
 
1325
	if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 65 : 6))
1326
		return 0;
1327
	if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
1328
		return 0;
1329
 
1330
	if (sna->render.vertex_offset) {
1331
		gen4_vertex_flush(sna);
1332
		if (gen7_magic_ca_pass(sna, op)) {
1333
			gen7_emit_pipe_stall(sna);
1334
			gen7_emit_cc(sna, GEN7_BLEND(op->u.gen7.flags));
1335
			gen7_emit_wm(sna, GEN7_KERNEL(op->u.gen7.flags));
1336
		}
1337
	}
1338
 
1339
	return gen4_vertex_finish(sna);
1340
}
1341
 
1342
inline static int gen7_get_rectangles(struct sna *sna,
1343
				      const struct sna_composite_op *op,
1344
				      int want,
1345
				      void (*emit_state)(struct sna *sna, const struct sna_composite_op *op))
1346
{
1347
	int rem;
1348
 
1349
	assert(want);
1350
 
1351
start:
1352
	rem = vertex_space(sna);
1353
	if (unlikely(rem < op->floats_per_rect)) {
1354
		DBG(("flushing vbo for %s: %d < %d\n",
1355
		     __FUNCTION__, rem, op->floats_per_rect));
1356
		rem = gen7_get_rectangles__flush(sna, op);
1357
		if (unlikely(rem == 0))
1358
			goto flush;
1359
	}
1360
 
1361
	if (unlikely(sna->render.vertex_offset == 0)) {
1362
		if (!gen7_rectangle_begin(sna, op))
1363
			goto flush;
1364
		else
1365
			goto start;
1366
	}
1367
 
1368
	assert(rem <= vertex_space(sna));
1369
	assert(op->floats_per_rect <= rem);
1370
	if (want > 1 && want * op->floats_per_rect > rem)
1371
		want = rem / op->floats_per_rect;
1372
 
1373
	assert(want > 0);
1374
	sna->render.vertex_index += 3*want;
1375
	return want;
1376
 
1377
flush:
1378
	if (sna->render.vertex_offset) {
1379
		gen4_vertex_flush(sna);
1380
		gen7_magic_ca_pass(sna, op);
1381
	}
1382
	sna_vertex_wait__locked(&sna->render);
1383
	_kgem_submit(&sna->kgem);
1384
	emit_state(sna, op);
1385
	goto start;
1386
}
1387
 
1388
inline static uint32_t *gen7_composite_get_binding_table(struct sna *sna,
1389
							 uint16_t *offset)
1390
{
1391
	uint32_t *table;
1392
 
1393
	sna->kgem.surface -=
1394
		sizeof(struct gen7_surface_state) / sizeof(uint32_t);
1395
	/* Clear all surplus entries to zero in case of prefetch */
1396
	table = memset(sna->kgem.batch + sna->kgem.surface,
1397
		       0, sizeof(struct gen7_surface_state));
1398
 
1399
	DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface));
1400
 
1401
	*offset = sna->kgem.surface;
1402
	return table;
1403
}
1404
 
1405
static void
1406
gen7_get_batch(struct sna *sna, const struct sna_composite_op *op)
1407
{
1408
	kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
1409
 
1410
	if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) {
1411
		DBG(("%s: flushing batch: %d < %d+%d\n",
1412
		     __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch,
1413
		     150, 4*8));
1414
		_kgem_submit(&sna->kgem);
1415
		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
1416
	}
1417
 
1418
	assert(sna->kgem.mode == KGEM_RENDER);
1419
	assert(sna->kgem.ring == KGEM_RENDER);
1420
 
1421
	if (sna->render_state.gen7.needs_invariant)
1422
		gen7_emit_invariant(sna);
1423
}
1424
 
1425
static void gen7_emit_composite_state(struct sna *sna,
1426
				      const struct sna_composite_op *op)
1427
{
1428
	uint32_t *binding_table;
4501 Serge 1429
	uint16_t offset, dirty;
4304 Serge 1430
 
1431
	gen7_get_batch(sna, op);
1432
 
1433
	binding_table = gen7_composite_get_binding_table(sna, &offset);
1434
 
4501 Serge 1435
	dirty = kgem_bo_is_dirty(op->dst.bo);
1436
 
4304 Serge 1437
	binding_table[0] =
1438
		gen7_bind_bo(sna,
1439
			    op->dst.bo, op->dst.width, op->dst.height,
1440
			    gen7_get_dest_format(op->dst.format),
1441
			    true);
1442
	binding_table[1] =
1443
		gen7_bind_bo(sna,
1444
			     op->src.bo, op->src.width, op->src.height,
1445
			     op->src.card_format,
1446
			     false);
1447
	if (op->mask.bo) {
1448
		binding_table[2] =
1449
			gen7_bind_bo(sna,
1450
				     op->mask.bo,
1451
				     op->mask.width,
1452
				     op->mask.height,
1453
				     op->mask.card_format,
1454
				     false);
1455
	}
1456
 
1457
	if (sna->kgem.surface == offset &&
1458
	    *(uint64_t *)(sna->kgem.batch + sna->render_state.gen7.surface_table) == *(uint64_t*)binding_table &&
1459
	    (op->mask.bo == NULL ||
1460
	     sna->kgem.batch[sna->render_state.gen7.surface_table+2] == binding_table[2])) {
1461
		sna->kgem.surface += sizeof(struct gen7_surface_state) / sizeof(uint32_t);
1462
		offset = sna->render_state.gen7.surface_table;
1463
	}
1464
 
4501 Serge 1465
	gen7_emit_state(sna, op, offset | dirty);
4304 Serge 1466
}
1467
 
1468
static void
1469
gen7_align_vertex(struct sna *sna, const struct sna_composite_op *op)
1470
{
1471
	if (op->floats_per_vertex != sna->render_state.gen7.floats_per_vertex) {
4501 Serge 1472
		DBG(("aligning vertex: was %d, now %d floats per vertex\n",
1473
		     sna->render_state.gen7.floats_per_vertex, op->floats_per_vertex));
1474
		gen4_vertex_align(sna, op);
4304 Serge 1475
		sna->render_state.gen7.floats_per_vertex = op->floats_per_vertex;
1476
	}
1477
}
1478
 
1479
fastcall static void
1480
gen7_render_composite_blt(struct sna *sna,
1481
			  const struct sna_composite_op *op,
1482
			  const struct sna_composite_rectangles *r)
1483
{
1484
	gen7_get_rectangles(sna, op, 1, gen7_emit_composite_state);
1485
	op->prim_emit(sna, op, r);
1486
}
1487
static uint32_t
1488
gen7_composite_create_blend_state(struct sna_static_stream *stream)
1489
{
1490
	char *base, *ptr;
1491
	int src, dst;
1492
 
1493
	base = sna_static_stream_map(stream,
1494
				     GEN7_BLENDFACTOR_COUNT * GEN7_BLENDFACTOR_COUNT * GEN7_BLEND_STATE_PADDED_SIZE,
1495
				     64);
1496
 
1497
	ptr = base;
1498
	for (src = 0; src < GEN7_BLENDFACTOR_COUNT; src++) {
1499
		for (dst= 0; dst < GEN7_BLENDFACTOR_COUNT; dst++) {
1500
			struct gen7_blend_state *blend =
1501
				(struct gen7_blend_state *)ptr;
1502
 
1503
			blend->blend0.dest_blend_factor = dst;
1504
			blend->blend0.source_blend_factor = src;
1505
			blend->blend0.blend_func = GEN7_BLENDFUNCTION_ADD;
1506
			blend->blend0.blend_enable =
1507
				!(dst == GEN7_BLENDFACTOR_ZERO && src == GEN7_BLENDFACTOR_ONE);
1508
 
1509
			blend->blend1.post_blend_clamp_enable = 1;
1510
			blend->blend1.pre_blend_clamp_enable = 1;
1511
 
1512
			ptr += GEN7_BLEND_STATE_PADDED_SIZE;
1513
		}
1514
	}
1515
 
1516
	return sna_static_stream_offsetof(stream, base);
1517
}
1518
 
1519
#if 0
1520
static uint32_t gen7_bind_video_source(struct sna *sna,
1521
				       struct kgem_bo *bo,
1522
				       uint32_t offset,
1523
				       int width,
1524
				       int height,
1525
				       int pitch,
1526
				       uint32_t format)
1527
{
1528
	uint32_t *ss, bind;
1529
 
1530
	bind = sna->kgem.surface -=
1531
		sizeof(struct gen7_surface_state) / sizeof(uint32_t);
1532
 
1533
	assert(bo->tiling == I915_TILING_NONE);
1534
 
1535
	ss = sna->kgem.batch + bind;
1536
	ss[0] = (GEN7_SURFACE_2D << GEN7_SURFACE_TYPE_SHIFT |
1537
		 format << GEN7_SURFACE_FORMAT_SHIFT);
1538
	ss[1] = kgem_add_reloc(&sna->kgem, bind + 1, bo,
1539
			       I915_GEM_DOMAIN_SAMPLER << 16,
1540
			       offset);
1541
	ss[2] = ((width - 1)  << GEN7_SURFACE_WIDTH_SHIFT |
1542
		 (height - 1) << GEN7_SURFACE_HEIGHT_SHIFT);
1543
	ss[3] = (pitch - 1) << GEN7_SURFACE_PITCH_SHIFT;
1544
	ss[4] = 0;
1545
	ss[5] = 0;
1546
	ss[6] = 0;
1547
	ss[7] = 0;
1548
	if (is_hsw(sna))
1549
		ss[7] |= HSW_SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA);
1550
 
1551
	DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, offset=%d\n",
1552
	     bind, bo->handle, ss[1],
1553
	     format, width, height, pitch, offset));
1554
 
1555
	return bind * sizeof(uint32_t);
1556
}
1557
 
1558
static void gen7_emit_video_state(struct sna *sna,
1559
				  const struct sna_composite_op *op)
1560
{
1561
	struct sna_video_frame *frame = op->priv;
1562
	uint32_t src_surf_format;
1563
	uint32_t src_surf_base[6];
1564
	int src_width[6];
1565
	int src_height[6];
1566
	int src_pitch[6];
1567
	uint32_t *binding_table;
4501 Serge 1568
	uint16_t offset, dirty;
4304 Serge 1569
	int n_src, n;
1570
 
1571
	gen7_get_batch(sna, op);
1572
 
1573
	src_surf_base[0] = 0;
1574
	src_surf_base[1] = 0;
1575
	src_surf_base[2] = frame->VBufOffset;
1576
	src_surf_base[3] = frame->VBufOffset;
1577
	src_surf_base[4] = frame->UBufOffset;
1578
	src_surf_base[5] = frame->UBufOffset;
1579
 
1580
	if (is_planar_fourcc(frame->id)) {
1581
		src_surf_format = GEN7_SURFACEFORMAT_R8_UNORM;
1582
		src_width[1]  = src_width[0]  = frame->width;
1583
		src_height[1] = src_height[0] = frame->height;
1584
		src_pitch[1]  = src_pitch[0]  = frame->pitch[1];
1585
		src_width[4]  = src_width[5]  = src_width[2]  = src_width[3] =
1586
			frame->width / 2;
1587
		src_height[4] = src_height[5] = src_height[2] = src_height[3] =
1588
			frame->height / 2;
1589
		src_pitch[4]  = src_pitch[5]  = src_pitch[2]  = src_pitch[3] =
1590
			frame->pitch[0];
1591
		n_src = 6;
1592
	} else {
1593
		if (frame->id == FOURCC_UYVY)
1594
			src_surf_format = GEN7_SURFACEFORMAT_YCRCB_SWAPY;
1595
		else
1596
			src_surf_format = GEN7_SURFACEFORMAT_YCRCB_NORMAL;
1597
 
1598
		src_width[0]  = frame->width;
1599
		src_height[0] = frame->height;
1600
		src_pitch[0]  = frame->pitch[0];
1601
		n_src = 1;
1602
	}
1603
 
1604
	binding_table = gen7_composite_get_binding_table(sna, &offset);
1605
 
4501 Serge 1606
	dirty = kgem_bo_is_dirty(op->dst.bo);
1607
 
4304 Serge 1608
	binding_table[0] =
1609
		gen7_bind_bo(sna,
1610
			     op->dst.bo, op->dst.width, op->dst.height,
1611
			     gen7_get_dest_format(op->dst.format),
1612
			     true);
1613
	for (n = 0; n < n_src; n++) {
1614
		binding_table[1+n] =
1615
			gen7_bind_video_source(sna,
1616
					       frame->bo,
1617
					       src_surf_base[n],
1618
					       src_width[n],
1619
					       src_height[n],
1620
					       src_pitch[n],
1621
					       src_surf_format);
1622
	}
1623
 
4501 Serge 1624
	gen7_emit_state(sna, op, offset | dirty);
4304 Serge 1625
}
1626
 
1627
static bool
1628
gen7_render_video(struct sna *sna,
1629
		  struct sna_video *video,
1630
		  struct sna_video_frame *frame,
1631
		  RegionPtr dstRegion,
1632
		  PixmapPtr pixmap)
1633
{
1634
	struct sna_composite_op tmp;
1635
	int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1;
1636
	int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1;
1637
	int src_width = frame->src.x2 - frame->src.x1;
1638
	int src_height = frame->src.y2 - frame->src.y1;
1639
	float src_offset_x, src_offset_y;
1640
	float src_scale_x, src_scale_y;
1641
	int nbox, pix_xoff, pix_yoff;
1642
	struct sna_pixmap *priv;
1643
	unsigned filter;
1644
	BoxPtr box;
1645
 
1646
	DBG(("%s: src=(%d, %d), dst=(%d, %d), %ldx[(%d, %d), (%d, %d)...]\n",
1647
	     __FUNCTION__,
1648
	     src_width, src_height, dst_width, dst_height,
1649
	     (long)REGION_NUM_RECTS(dstRegion),
1650
	     REGION_EXTENTS(NULL, dstRegion)->x1,
1651
	     REGION_EXTENTS(NULL, dstRegion)->y1,
1652
	     REGION_EXTENTS(NULL, dstRegion)->x2,
1653
	     REGION_EXTENTS(NULL, dstRegion)->y2));
1654
 
1655
	priv = sna_pixmap_force_to_gpu(pixmap, MOVE_READ | MOVE_WRITE);
1656
	if (priv == NULL)
1657
		return false;
1658
 
1659
	memset(&tmp, 0, sizeof(tmp));
1660
 
1661
	tmp.dst.pixmap = pixmap;
1662
	tmp.dst.width  = pixmap->drawable.width;
1663
	tmp.dst.height = pixmap->drawable.height;
1664
	tmp.dst.format = sna_render_format_for_depth(pixmap->drawable.depth);
1665
	tmp.dst.bo = priv->gpu_bo;
1666
 
1667
	tmp.src.bo = frame->bo;
1668
	tmp.mask.bo = NULL;
1669
 
1670
	tmp.floats_per_vertex = 3;
1671
	tmp.floats_per_rect = 9;
1672
 
1673
	if (src_width == dst_width && src_height == dst_height)
1674
		filter = SAMPLER_FILTER_NEAREST;
1675
	else
1676
		filter = SAMPLER_FILTER_BILINEAR;
1677
 
1678
	tmp.u.gen7.flags =
1679
		GEN7_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD,
1680
					      SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE),
1681
			       NO_BLEND,
1682
			       is_planar_fourcc(frame->id) ?
1683
			       GEN7_WM_KERNEL_VIDEO_PLANAR :
1684
			       GEN7_WM_KERNEL_VIDEO_PACKED,
1685
			       2);
1686
	tmp.priv = frame;
1687
 
1688
	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo);
1689
	if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) {
1690
		kgem_submit(&sna->kgem);
4501 Serge 1691
		if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL))
1692
			return false;
1693
 
4304 Serge 1694
		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
1695
	}
1696
 
4501 Serge 1697
	gen7_align_vertex(sna, &tmp);
4304 Serge 1698
	gen7_emit_video_state(sna, &tmp);
1699
 
1700
	/* Set up the offset for translating from the given region (in screen
1701
	 * coordinates) to the backing pixmap.
1702
	 */
1703
#ifdef COMPOSITE
1704
	pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
1705
	pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
1706
#else
1707
	pix_xoff = 0;
1708
	pix_yoff = 0;
1709
#endif
1710
 
1711
	DBG(("%s: src=(%d, %d)x(%d, %d); frame=(%dx%d), dst=(%dx%d)\n",
1712
	     __FUNCTION__,
1713
	     frame->src.x1, frame->src.y1,
1714
	     src_width, src_height,
1715
	     dst_width, dst_height,
1716
	     frame->width, frame->height));
1717
 
1718
	src_scale_x = (float)src_width / dst_width / frame->width;
1719
	src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x;
1720
 
1721
	src_scale_y = (float)src_height / dst_height / frame->height;
1722
	src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y;
1723
 
1724
	DBG(("%s: scale=(%f, %f), offset=(%f, %f)\n",
1725
	     __FUNCTION__,
1726
	     src_scale_x, src_scale_y,
1727
	     src_offset_x, src_offset_y));
1728
 
1729
	box = REGION_RECTS(dstRegion);
1730
	nbox = REGION_NUM_RECTS(dstRegion);
1731
	while (nbox--) {
1732
		BoxRec r;
1733
 
1734
		DBG(("%s: dst=(%d, %d), (%d, %d) + (%d, %d); src=(%f, %f), (%f, %f)\n",
1735
		     __FUNCTION__,
1736
		     box->x1, box->y1,
1737
		     box->x2, box->y2,
1738
		     pix_xoff, pix_yoff,
1739
		     box->x1 * src_scale_x + src_offset_x,
1740
		     box->y1 * src_scale_y + src_offset_y,
1741
		     box->x2 * src_scale_x + src_offset_x,
1742
		     box->y2 * src_scale_y + src_offset_y));
1743
 
1744
		r.x1 = box->x1 + pix_xoff;
1745
		r.x2 = box->x2 + pix_xoff;
1746
		r.y1 = box->y1 + pix_yoff;
1747
		r.y2 = box->y2 + pix_yoff;
1748
 
1749
		gen7_get_rectangles(sna, &tmp, 1, gen7_emit_video_state);
1750
 
1751
		OUT_VERTEX(r.x2, r.y2);
1752
		OUT_VERTEX_F(box->x2 * src_scale_x + src_offset_x);
1753
		OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
1754
 
1755
		OUT_VERTEX(r.x1, r.y2);
1756
		OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
1757
		OUT_VERTEX_F(box->y2 * src_scale_y + src_offset_y);
1758
 
1759
		OUT_VERTEX(r.x1, r.y1);
1760
		OUT_VERTEX_F(box->x1 * src_scale_x + src_offset_x);
1761
		OUT_VERTEX_F(box->y1 * src_scale_y + src_offset_y);
1762
 
1763
		if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
1764
			sna_damage_add_box(&priv->gpu_damage, &r);
1765
			sna_damage_subtract_box(&priv->cpu_damage, &r);
1766
		}
1767
		box++;
1768
	}
1769
 
1770
	gen4_vertex_flush(sna);
1771
	return true;
1772
}
1773
#endif
1774
 
1775
static void gen7_render_composite_done(struct sna *sna,
1776
				       const struct sna_composite_op *op)
1777
{
1778
	if (sna->render.vertex_offset) {
1779
		gen4_vertex_flush(sna);
1780
		gen7_magic_ca_pass(sna, op);
1781
	}
1782
}
1783
 
1784
 
1785
 
1786
 
1787
 
1788
 
1789
 
1790
 
1791
 
1792
 
1793
 
1794
 
1795
 
1796
 
1797
 
1798
 
1799
 
1800
 
1801
 
1802
 
1803
 
1804
 
1805
 
1806
 
1807
 
1808
 
1809
 
1810
 
1811
 
1812
 
1813
 
1814
 
1815
 
1816
 
1817
 
1818
 
1819
 
1820
 
1821
 
1822
 
1823
 
1824
 
1825
 
1826
 
1827
 
1828
 
1829
 
1830
 
1831
 
1832
 
1833
 
1834
 
1835
 
1836
 
1837
 
1838
 
1839
 
1840
 
1841
 
1842
 
1843
 
1844
 
1845
 
1846
 
1847
 
1848
 
1849
 
1850
 
1851
 
1852
 
1853
 
1854
 
1855
 
1856
 
1857
 
1858
 
1859
 
1860
 
1861
 
1862
 
1863
 
1864
 
1865
 
1866
 
1867
 
1868
 
1869
 
1870
 
1871
 
1872
 
1873
 
1874
 
1875
 
1876
#if 0
1877
static bool
1878
gen7_render_fill_boxes(struct sna *sna,
1879
		       CARD8 op,
1880
		       PictFormat format,
1881
		       const xRenderColor *color,
1882
		       PixmapPtr dst, struct kgem_bo *dst_bo,
1883
		       const BoxRec *box, int n)
1884
{
1885
	struct sna_composite_op tmp;
1886
	uint32_t pixel;
1887
 
1888
	DBG(("%s (op=%d, color=(%04x, %04x, %04x, %04x) [%08x])\n",
1889
	     __FUNCTION__, op,
1890
	     color->red, color->green, color->blue, color->alpha, (int)format));
1891
 
1892
	if (op >= ARRAY_SIZE(gen7_blend_op)) {
1893
		DBG(("%s: fallback due to unhandled blend op: %d\n",
1894
		     __FUNCTION__, op));
1895
		return false;
1896
	}
1897
 
4501 Serge 1898
	if (prefer_blt_fill(sna, dst_bo, FILL_BOXES) ||
1899
	    !gen7_check_dst_format(format)) {
4304 Serge 1900
		uint8_t alu = GXinvalid;
1901
 
1902
		if (op <= PictOpSrc) {
1903
			pixel = 0;
1904
			if (op == PictOpClear)
1905
				alu = GXclear;
1906
			else if (sna_get_pixel_from_rgba(&pixel,
1907
							 color->red,
1908
							 color->green,
1909
							 color->blue,
1910
							 color->alpha,
1911
							 format))
1912
				alu = GXcopy;
1913
		}
1914
 
1915
		if (alu != GXinvalid &&
1916
		    sna_blt_fill_boxes(sna, alu,
1917
				       dst_bo, dst->drawable.bitsPerPixel,
1918
				       pixel, box, n))
1919
			return true;
1920
 
1921
		if (!gen7_check_dst_format(format))
1922
			return false;
1923
	}
1924
 
1925
	if (op == PictOpClear) {
1926
		pixel = 0;
1927
		op = PictOpSrc;
1928
	} else if (!sna_get_pixel_from_rgba(&pixel,
1929
					    color->red,
1930
					    color->green,
1931
					    color->blue,
1932
					    color->alpha,
1933
					    PICT_a8r8g8b8))
1934
		return false;
1935
 
1936
	DBG(("%s(%08x x %d [(%d, %d), (%d, %d) ...])\n",
1937
	     __FUNCTION__, pixel, n,
1938
	     box[0].x1, box[0].y1, box[0].x2, box[0].y2));
1939
 
1940
	tmp.dst.pixmap = dst;
1941
	tmp.dst.width  = dst->drawable.width;
1942
	tmp.dst.height = dst->drawable.height;
1943
	tmp.dst.format = format;
1944
	tmp.dst.bo = dst_bo;
1945
	tmp.dst.x = tmp.dst.y = 0;
1946
	tmp.damage = NULL;
1947
 
1948
	sna_render_composite_redirect_init(&tmp);
1949
	if (too_large(dst->drawable.width, dst->drawable.height)) {
1950
		BoxRec extents;
1951
 
1952
		boxes_extents(box, n, &extents);
1953
		if (!sna_render_composite_redirect(sna, &tmp,
1954
						   extents.x1, extents.y1,
1955
						   extents.x2 - extents.x1,
1956
						   extents.y2 - extents.y1,
1957
						   n > 1))
1958
			return sna_tiling_fill_boxes(sna, op, format, color,
1959
						     dst, dst_bo, box, n);
1960
	}
1961
 
1962
	tmp.src.bo = sna_render_get_solid(sna, pixel);
1963
	tmp.mask.bo = NULL;
1964
 
1965
	tmp.floats_per_vertex = 2;
1966
	tmp.floats_per_rect = 6;
1967
	tmp.need_magic_ca_pass = false;
1968
 
1969
	tmp.u.gen7.flags = FILL_FLAGS(op, format);
1970
 
1971
	kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo);
1972
	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
1973
		kgem_submit(&sna->kgem);
4501 Serge 1974
		if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
1975
			kgem_bo_destroy(&sna->kgem, tmp.src.bo);
1976
			if (tmp.redirect.real_bo)
1977
				kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
1978
			return false;
1979
		}
1980
		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
4304 Serge 1981
	}
1982
 
4501 Serge 1983
	gen7_align_vertex(sna, &tmp);
4304 Serge 1984
	gen7_emit_fill_state(sna, &tmp);
1985
 
1986
	do {
1987
		int n_this_time;
1988
		int16_t *v;
1989
 
1990
		n_this_time = gen7_get_rectangles(sna, &tmp, n,
1991
						  gen7_emit_fill_state);
1992
		n -= n_this_time;
1993
 
1994
		v = (int16_t *)(sna->render.vertices + sna->render.vertex_used);
1995
		sna->render.vertex_used += 6 * n_this_time;
1996
		assert(sna->render.vertex_used <= sna->render.vertex_size);
1997
		do {
1998
			DBG(("	(%d, %d), (%d, %d)\n",
1999
			     box->x1, box->y1, box->x2, box->y2));
2000
 
2001
			v[0] = box->x2;
2002
			v[5] = v[1] = box->y2;
2003
			v[8] = v[4] = box->x1;
2004
			v[9] = box->y1;
2005
			v[2] = v[3]  = v[7]  = 1;
2006
			v[6] = v[10] = v[11] = 0;
2007
			v += 12; box++;
2008
		} while (--n_this_time);
2009
	} while (n);
2010
 
2011
	gen4_vertex_flush(sna);
2012
	kgem_bo_destroy(&sna->kgem, tmp.src.bo);
2013
	sna_render_composite_redirect_done(sna, &tmp);
2014
	return true;
2015
}
2016
#endif
2017
 
2018
static void gen7_render_reset(struct sna *sna)
2019
{
2020
	sna->render_state.gen7.emit_flush = false;
2021
	sna->render_state.gen7.needs_invariant = true;
2022
	sna->render_state.gen7.ve_id = 3 << 2;
2023
	sna->render_state.gen7.last_primitive = -1;
2024
 
2025
	sna->render_state.gen7.num_sf_outputs = 0;
2026
	sna->render_state.gen7.samplers = -1;
2027
	sna->render_state.gen7.blend = -1;
2028
	sna->render_state.gen7.kernel = -1;
2029
	sna->render_state.gen7.drawrect_offset = -1;
2030
	sna->render_state.gen7.drawrect_limit = -1;
2031
	sna->render_state.gen7.surface_table = -1;
2032
 
4501 Serge 2033
	if (sna->render.vbo && !kgem_bo_can_map(&sna->kgem, sna->render.vbo)) {
2034
		DBG(("%s: discarding unmappable vbo\n", __FUNCTION__));
2035
		discard_vbo(sna);
2036
	}
2037
 
4304 Serge 2038
	sna->render.vertex_offset = 0;
2039
	sna->render.nvertex_reloc = 0;
2040
	sna->render.vb_id = 0;
2041
}
2042
 
2043
static void gen7_render_fini(struct sna *sna)
2044
{
2045
	kgem_bo_destroy(&sna->kgem, sna->render_state.gen7.general_bo);
2046
}
2047
 
4501 Serge 2048
static bool is_gt3(struct sna *sna, int devid)
4304 Serge 2049
{
2050
	assert(sna->kgem.gen == 075);
4501 Serge 2051
	return devid & 0x20;
4304 Serge 2052
}
2053
 
4501 Serge 2054
static bool is_gt2(struct sna *sna, int devid)
4304 Serge 2055
{
4501 Serge 2056
	return devid & (is_hsw(sna)? 0x30 : 0x20);
4304 Serge 2057
}
2058
 
4501 Serge 2059
static bool is_mobile(struct sna *sna, int devid)
4304 Serge 2060
{
4501 Serge 2061
	return (devid & 0xf) == 0x6;
4304 Serge 2062
}
2063
 
4501 Serge 2064
static bool gen7_render_setup(struct sna *sna, int devid)
4304 Serge 2065
{
2066
    struct gen7_render_state *state = &sna->render_state.gen7;
2067
    struct sna_static_stream general;
2068
    struct gen7_sampler_state *ss;
2069
    int i, j, k, l, m;
2070
 
2071
	if (is_ivb(sna)) {
2072
        state->info = &ivb_gt_info;
4501 Serge 2073
		if (devid & 0xf) {
4304 Serge 2074
            state->info = &ivb_gt1_info;
4501 Serge 2075
			if (is_gt2(sna, devid))
4304 Serge 2076
                state->info = &ivb_gt2_info; /* XXX requires GT_MODE WiZ disabled */
2077
        }
2078
	} else if (is_byt(sna)) {
2079
		state->info = &byt_gt_info;
2080
	} else if (is_hsw(sna)) {
2081
        state->info = &hsw_gt_info;
4501 Serge 2082
		if (devid & 0xf) {
2083
			if (is_gt3(sna, devid))
4304 Serge 2084
				state->info = &hsw_gt3_info;
4501 Serge 2085
			else if (is_gt2(sna, devid))
4304 Serge 2086
				state->info = &hsw_gt2_info;
2087
			else
2088
            state->info = &hsw_gt1_info;
2089
        }
2090
    } else
2091
        return false;
2092
 
4501 Serge 2093
	state->gt = state->info->gt;
2094
 
4304 Serge 2095
    sna_static_stream_init(&general);
2096
 
2097
    /* Zero pad the start. If you see an offset of 0x0 in the batchbuffer
2098
     * dumps, you know it points to zero.
2099
     */
2100
    null_create(&general);
2101
 
2102
    for (m = 0; m < GEN7_WM_KERNEL_COUNT; m++) {
2103
        if (wm_kernels[m].size) {
2104
            state->wm_kernel[m][1] =
2105
                sna_static_stream_add(&general,
2106
                              wm_kernels[m].data,
2107
                              wm_kernels[m].size,
2108
                              64);
2109
        } else {
2110
            if (USE_8_PIXEL_DISPATCH) {
2111
                state->wm_kernel[m][0] =
2112
                    sna_static_stream_compile_wm(sna, &general,
2113
                                     wm_kernels[m].data, 8);
2114
            }
2115
 
2116
            if (USE_16_PIXEL_DISPATCH) {
2117
                state->wm_kernel[m][1] =
2118
                    sna_static_stream_compile_wm(sna, &general,
2119
                                     wm_kernels[m].data, 16);
2120
            }
2121
 
2122
            if (USE_32_PIXEL_DISPATCH) {
2123
                state->wm_kernel[m][2] =
2124
                    sna_static_stream_compile_wm(sna, &general,
2125
                                     wm_kernels[m].data, 32);
2126
            }
2127
        }
2128
        assert(state->wm_kernel[m][0]|state->wm_kernel[m][1]|state->wm_kernel[m][2]);
2129
    }
2130
 
2131
    ss = sna_static_stream_map(&general,
2132
                   2 * sizeof(*ss) *
2133
                   (2 +
2134
                    FILTER_COUNT * EXTEND_COUNT *
2135
                    FILTER_COUNT * EXTEND_COUNT),
2136
                   32);
2137
    state->wm_state = sna_static_stream_offsetof(&general, ss);
2138
    sampler_copy_init(ss); ss += 2;
2139
    sampler_fill_init(ss); ss += 2;
2140
    for (i = 0; i < FILTER_COUNT; i++) {
2141
        for (j = 0; j < EXTEND_COUNT; j++) {
2142
            for (k = 0; k < FILTER_COUNT; k++) {
2143
                for (l = 0; l < EXTEND_COUNT; l++) {
2144
                    sampler_state_init(ss++, i, j);
2145
                    sampler_state_init(ss++, k, l);
2146
                }
2147
            }
2148
        }
2149
    }
2150
 
2151
    state->cc_blend = gen7_composite_create_blend_state(&general);
2152
 
2153
    state->general_bo = sna_static_stream_fini(sna, &general);
2154
    return state->general_bo != NULL;
2155
}
2156
 
2157
const char *gen7_render_init(struct sna *sna, const char *backend)
2158
{
4501 Serge 2159
	int devid = intel_get_device_id(sna);
2160
 
2161
	if (!gen7_render_setup(sna, devid))
4304 Serge 2162
		return backend;
2163
 
4501 Serge 2164
	sna->kgem.context_switch = gen6_render_context_switch;
2165
	sna->kgem.retire = gen6_render_retire;
2166
	sna->kgem.expire = gen4_render_expire;
4304 Serge 2167
 
2168
#if 0
2169
#if !NO_COMPOSITE
2170
	sna->render.composite = gen7_render_composite;
2171
	sna->render.prefer_gpu |= PREFER_GPU_RENDER;
2172
#endif
2173
#if !NO_COMPOSITE_SPANS
2174
	sna->render.check_composite_spans = gen7_check_composite_spans;
2175
	sna->render.composite_spans = gen7_render_composite_spans;
4501 Serge 2176
	if (is_mobile(sna, devid) || is_gt2(sna, devid) || is_byt(sna))
4304 Serge 2177
		sna->render.prefer_gpu |= PREFER_GPU_SPANS;
2178
#endif
2179
	sna->render.video = gen7_render_video;
2180
 
2181
#if !NO_COPY_BOXES
2182
	sna->render.copy_boxes = gen7_render_copy_boxes;
2183
#endif
2184
#if !NO_COPY
2185
	sna->render.copy = gen7_render_copy;
2186
#endif
2187
 
2188
#if !NO_FILL_BOXES
2189
	sna->render.fill_boxes = gen7_render_fill_boxes;
2190
#endif
2191
#if !NO_FILL
2192
	sna->render.fill = gen7_render_fill;
2193
#endif
2194
#if !NO_FILL_ONE
2195
	sna->render.fill_one = gen7_render_fill_one;
2196
#endif
2197
#if !NO_FILL_CLEAR
2198
	sna->render.clear = gen7_render_clear;
2199
#endif
2200
#endif
2201
 
2202
    sna->render.blit_tex = gen7_blit_tex;
2203
    sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT;
2204
 
4501 Serge 2205
	sna->render.flush = gen4_render_flush;
4304 Serge 2206
    sna->render.reset = gen7_render_reset;
2207
    sna->render.fini = gen7_render_fini;
2208
 
2209
    sna->render.max_3d_size = GEN7_MAX_SIZE;
2210
    sna->render.max_3d_pitch = 1 << 18;
2211
	return sna->render_state.gen7.info->name;
2212
}
2213
 
2214
 
2215
static bool
2216
gen7_blit_tex(struct sna *sna,
2217
              uint8_t op, bool scale,
2218
		      PixmapPtr src, struct kgem_bo *src_bo,
2219
		      PixmapPtr mask,struct kgem_bo *mask_bo,
2220
		      PixmapPtr dst, struct kgem_bo *dst_bo,
2221
              int32_t src_x, int32_t src_y,
2222
              int32_t msk_x, int32_t msk_y,
2223
              int32_t dst_x, int32_t dst_y,
2224
              int32_t width, int32_t height,
2225
              struct sna_composite_op *tmp)
2226
{
2227
 
2228
 
2229
    tmp->op = PictOpSrc;
2230
 
2231
    tmp->dst.pixmap = dst;
2232
    tmp->dst.bo     = dst_bo;
2233
    tmp->dst.width  = dst->drawable.width;
2234
    tmp->dst.height = dst->drawable.height;
2235
    tmp->dst.format = PICT_a8r8g8b8;
2236
 
2237
 
2238
	tmp->src.repeat = RepeatNone;
2239
	tmp->src.filter = PictFilterNearest;
2240
    tmp->src.is_affine = true;
2241
 
2242
    tmp->src.bo = src_bo;
2243
	tmp->src.pict_format = PICT_x8r8g8b8;
2244
    tmp->src.card_format = gen7_get_card_format(tmp->src.pict_format);
2245
    tmp->src.width  = src->drawable.width;
2246
    tmp->src.height = src->drawable.height;
2247
 
2248
 
2249
	tmp->is_affine = tmp->src.is_affine;
2250
	tmp->has_component_alpha = false;
2251
	tmp->need_magic_ca_pass = false;
2252
 
2253
	tmp->mask.repeat = SAMPLER_EXTEND_NONE;
2254
	tmp->mask.filter = SAMPLER_FILTER_NEAREST;
2255
    tmp->mask.is_affine = true;
2256
 
2257
    tmp->mask.bo = mask_bo;
2258
    tmp->mask.pict_format = PIXMAN_a8;
2259
    tmp->mask.card_format = gen7_get_card_format(tmp->mask.pict_format);
2260
    tmp->mask.width  = mask->drawable.width;
2261
    tmp->mask.height = mask->drawable.height;
2262
 
2263
    if( scale )
2264
    {
2265
        tmp->src.scale[0] = 1.f/width;
2266
        tmp->src.scale[1] = 1.f/height;
2267
    }
2268
    else
2269
    {
2270
        tmp->src.scale[0] = 1.f/src->drawable.width;
2271
        tmp->src.scale[1] = 1.f/src->drawable.height;
2272
    }
2273
 
2274
    tmp->mask.scale[0] = 1.f/mask->drawable.width;
2275
    tmp->mask.scale[1] = 1.f/mask->drawable.height;
2276
 
2277
 
2278
 
2279
	tmp->u.gen7.flags =
2280
		GEN7_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter,
2281
					      tmp->src.repeat,
2282
					      tmp->mask.filter,
2283
					      tmp->mask.repeat),
2284
			       gen7_get_blend(tmp->op,
2285
					      tmp->has_component_alpha,
2286
					      tmp->dst.format),
2287
/*			       gen7_choose_composite_kernel(tmp->op,
2288
							    tmp->mask.bo != NULL,
2289
							    tmp->has_component_alpha,
2290
							    tmp->is_affine), */
2291
                   GEN7_WM_KERNEL_MASK,
2292
			       gen4_choose_composite_emitter(sna, tmp));
2293
 
2294
	tmp->blt   = gen7_render_composite_blt;
2295
//	tmp->box   = gen7_render_composite_box;
2296
	tmp->done  = gen7_render_composite_done;
2297
 
4501 Serge 2298
	kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo);
4304 Serge 2299
	if (!kgem_check_bo(&sna->kgem,
2300
			   tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
2301
			   NULL)) {
2302
		kgem_submit(&sna->kgem);
2303
		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
2304
	}
2305
 
4501 Serge 2306
	gen7_align_vertex(sna, tmp);
4304 Serge 2307
	gen7_emit_composite_state(sna, tmp);
2308
	return true;
2309
}