Subversion Repositories Kolibri OS

Rev

Rev 3291 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
3280 Serge 1
/*
2
 * Copyright © 2006,2008,2011 Intel Corporation
3
 * Copyright © 2007 Red Hat, Inc.
4
 *
5
 * Permission is hereby granted, free of charge, to any person obtaining a
6
 * copy of this software and associated documentation files (the "Software"),
7
 * to deal in the Software without restriction, including without limitation
8
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
 * and/or sell copies of the Software, and to permit persons to whom the
10
 * Software is furnished to do so, subject to the following conditions:
11
 *
12
 * The above copyright notice and this permission notice (including the next
13
 * paragraph) shall be included in all copies or substantial portions of the
14
 * Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
 * SOFTWARE.
23
 *
24
 * Authors:
25
 *    Wang Zhenyu 
26
 *    Eric Anholt 
27
 *    Carl Worth 
28
 *    Keith Packard 
29
 *    Chris Wilson 
30
 *
31
 */
32
 
33
#ifdef HAVE_CONFIG_H
34
#include "config.h"
35
#endif
36
 
37
#include "sna.h"
38
#include "sna_reg.h"
39
#include "sna_render.h"
40
#include "sna_render_inline.h"
41
//#include "sna_video.h"
42
 
43
#include "brw/brw.h"
44
#include "gen7_render.h"
45
#include "gen4_source.h"
46
#include "gen4_vertex.h"
47
 
48
#define NO_COMPOSITE 0
49
#define NO_COMPOSITE_SPANS 0
50
#define NO_COPY 0
51
#define NO_COPY_BOXES 0
52
#define NO_FILL 0
53
#define NO_FILL_BOXES 0
54
#define NO_FILL_ONE 0
55
#define NO_FILL_CLEAR 0
56
 
57
#define NO_RING_SWITCH 0
58
 
59
#define USE_8_PIXEL_DISPATCH 1
60
#define USE_16_PIXEL_DISPATCH 1
61
#define USE_32_PIXEL_DISPATCH 0
62
 
63
#if !USE_8_PIXEL_DISPATCH && !USE_16_PIXEL_DISPATCH && !USE_32_PIXEL_DISPATCH
64
#error "Must select at least 8, 16 or 32 pixel dispatch"
65
#endif
66
 
67
#define GEN7_MAX_SIZE 16384
68
 
69
/* XXX Todo
70
 *
71
 * STR (software tiled rendering) mode. No, really.
72
 * 64x32 pixel blocks align with the rendering cache. Worth considering.
73
 */
74
 
75
#define is_aligned(x, y) (((x) & ((y) - 1)) == 0)
76
 
77
struct gt_info {
78
	uint32_t max_vs_threads;
79
	uint32_t max_gs_threads;
80
	uint32_t max_wm_threads;
81
	struct {
82
		int size;
83
		int max_vs_entries;
84
		int max_gs_entries;
85
	} urb;
86
};
87
 
88
static const struct gt_info ivb_gt_info = {
89
	.max_vs_threads = 16,
90
	.max_gs_threads = 16,
91
	.max_wm_threads = (16-1) << IVB_PS_MAX_THREADS_SHIFT,
92
	.urb = { 128, 64, 64 },
93
};
94
 
95
static const struct gt_info ivb_gt1_info = {
96
	.max_vs_threads = 36,
97
	.max_gs_threads = 36,
98
	.max_wm_threads = (48-1) << IVB_PS_MAX_THREADS_SHIFT,
99
	.urb = { 128, 512, 192 },
100
};
101
 
102
static const struct gt_info ivb_gt2_info = {
103
	.max_vs_threads = 128,
104
	.max_gs_threads = 128,
105
	.max_wm_threads = (172-1) << IVB_PS_MAX_THREADS_SHIFT,
106
	.urb = { 256, 704, 320 },
107
};
108
 
109
static const struct gt_info hsw_gt_info = {
110
	.max_vs_threads = 8,
111
	.max_gs_threads = 8,
112
	.max_wm_threads =
113
		(8 - 1) << HSW_PS_MAX_THREADS_SHIFT |
114
		1 << HSW_PS_SAMPLE_MASK_SHIFT,
115
	.urb = { 128, 64, 64 },
116
};
117
 
118
static const struct gt_info hsw_gt1_info = {
119
	.max_vs_threads = 70,
120
	.max_gs_threads = 70,
121
	.max_wm_threads =
122
		(102 - 1) << HSW_PS_MAX_THREADS_SHIFT |
123
		1 << HSW_PS_SAMPLE_MASK_SHIFT,
124
	.urb = { 128, 640, 256 },
125
};
126
 
127
static const struct gt_info hsw_gt2_info = {
128
	.max_vs_threads = 280,
129
	.max_gs_threads = 280,
130
	.max_wm_threads =
131
		(204 - 1) << HSW_PS_MAX_THREADS_SHIFT |
132
		1 << HSW_PS_SAMPLE_MASK_SHIFT,
133
	.urb = { 256, 1664, 640 },
134
};
135
 
136
static const uint32_t ps_kernel_packed[][4] = {
137
#include "exa_wm_src_affine.g7b"
138
#include "exa_wm_src_sample_argb.g7b"
139
#include "exa_wm_yuv_rgb.g7b"
140
#include "exa_wm_write.g7b"
141
};
142
 
143
static const uint32_t ps_kernel_planar[][4] = {
144
#include "exa_wm_src_affine.g7b"
145
#include "exa_wm_src_sample_planar.g7b"
146
#include "exa_wm_yuv_rgb.g7b"
147
#include "exa_wm_write.g7b"
148
};
149
 
150
#define KERNEL(kernel_enum, kernel, num_surfaces) \
151
    [GEN7_WM_KERNEL_##kernel_enum] = {#kernel_enum, kernel, sizeof(kernel), num_surfaces}
152
#define NOKERNEL(kernel_enum, func, num_surfaces) \
153
    [GEN7_WM_KERNEL_##kernel_enum] = {#kernel_enum, (void *)func, 0, num_surfaces}
154
static const struct wm_kernel_info {
155
	const char *name;
156
	const void *data;
157
	unsigned int size;
158
	int num_surfaces;
159
} wm_kernels[] = {
160
	NOKERNEL(NOMASK, brw_wm_kernel__affine, 2),
161
	NOKERNEL(NOMASK_P, brw_wm_kernel__projective, 2),
162
 
163
	NOKERNEL(MASK, brw_wm_kernel__affine_mask, 3),
164
	NOKERNEL(MASK_P, brw_wm_kernel__projective_mask, 3),
165
 
166
	NOKERNEL(MASKCA, brw_wm_kernel__affine_mask_ca, 3),
167
	NOKERNEL(MASKCA_P, brw_wm_kernel__projective_mask_ca, 3),
168
 
169
	NOKERNEL(MASKSA, brw_wm_kernel__affine_mask_sa, 3),
170
	NOKERNEL(MASKSA_P, brw_wm_kernel__projective_mask_sa, 3),
171
 
172
	NOKERNEL(OPACITY, brw_wm_kernel__affine_opacity, 2),
173
	NOKERNEL(OPACITY_P, brw_wm_kernel__projective_opacity, 2),
174
 
175
	KERNEL(VIDEO_PLANAR, ps_kernel_planar, 7),
176
	KERNEL(VIDEO_PACKED, ps_kernel_packed, 2),
177
};
178
#undef KERNEL
179
 
180
static const struct blendinfo {
181
	bool src_alpha;
182
	uint32_t src_blend;
183
	uint32_t dst_blend;
184
} gen7_blend_op[] = {
185
	/* Clear */	{0, GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_ZERO},
186
	/* Src */	{0, GEN7_BLENDFACTOR_ONE, GEN7_BLENDFACTOR_ZERO},
187
	/* Dst */	{0, GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_ONE},
188
	/* Over */	{1, GEN7_BLENDFACTOR_ONE, GEN7_BLENDFACTOR_INV_SRC_ALPHA},
189
	/* OverReverse */ {0, GEN7_BLENDFACTOR_INV_DST_ALPHA, GEN7_BLENDFACTOR_ONE},
190
	/* In */	{0, GEN7_BLENDFACTOR_DST_ALPHA, GEN7_BLENDFACTOR_ZERO},
191
	/* InReverse */	{1, GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_SRC_ALPHA},
192
	/* Out */	{0, GEN7_BLENDFACTOR_INV_DST_ALPHA, GEN7_BLENDFACTOR_ZERO},
193
	/* OutReverse */ {1, GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_INV_SRC_ALPHA},
194
	/* Atop */	{1, GEN7_BLENDFACTOR_DST_ALPHA, GEN7_BLENDFACTOR_INV_SRC_ALPHA},
195
	/* AtopReverse */ {1, GEN7_BLENDFACTOR_INV_DST_ALPHA, GEN7_BLENDFACTOR_SRC_ALPHA},
196
	/* Xor */	{1, GEN7_BLENDFACTOR_INV_DST_ALPHA, GEN7_BLENDFACTOR_INV_SRC_ALPHA},
197
	/* Add */	{0, GEN7_BLENDFACTOR_ONE, GEN7_BLENDFACTOR_ONE},
198
};
199
 
200
/**
201
 * Highest-valued BLENDFACTOR used in gen7_blend_op.
202
 *
203
 * This leaves out GEN7_BLENDFACTOR_INV_DST_COLOR,
204
 * GEN7_BLENDFACTOR_INV_CONST_{COLOR,ALPHA},
205
 * GEN7_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA}
206
 */
207
#define GEN7_BLENDFACTOR_COUNT (GEN7_BLENDFACTOR_INV_DST_ALPHA + 1)
208
 
209
#define GEN7_BLEND_STATE_PADDED_SIZE	ALIGN(sizeof(struct gen7_blend_state), 64)
210
 
211
#define BLEND_OFFSET(s, d) \
212
	((d != GEN7_BLENDFACTOR_ZERO) << 15 | \
213
	 (((s) * GEN7_BLENDFACTOR_COUNT + (d)) * GEN7_BLEND_STATE_PADDED_SIZE))
214
 
215
#define NO_BLEND BLEND_OFFSET(GEN7_BLENDFACTOR_ONE, GEN7_BLENDFACTOR_ZERO)
216
#define CLEAR BLEND_OFFSET(GEN7_BLENDFACTOR_ZERO, GEN7_BLENDFACTOR_ZERO)
217
 
218
#define SAMPLER_OFFSET(sf, se, mf, me) \
219
	((((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) + 2) * 2 * sizeof(struct gen7_sampler_state))
220
 
221
#define VERTEX_2s2s 0
222
 
223
#define COPY_SAMPLER 0
224
#define COPY_VERTEX VERTEX_2s2s
225
#define COPY_FLAGS(a) GEN7_SET_FLAGS(COPY_SAMPLER, (a) == GXcopy ? NO_BLEND : CLEAR, GEN7_WM_KERNEL_NOMASK, COPY_VERTEX)
226
 
227
#define FILL_SAMPLER (2 * sizeof(struct gen7_sampler_state))
228
#define FILL_VERTEX VERTEX_2s2s
229
#define FILL_FLAGS(op, format) GEN7_SET_FLAGS(FILL_SAMPLER, gen7_get_blend((op), false, (format)), GEN7_WM_KERNEL_NOMASK, FILL_VERTEX)
230
#define FILL_FLAGS_NOBLEND GEN7_SET_FLAGS(FILL_SAMPLER, NO_BLEND, GEN7_WM_KERNEL_NOMASK, FILL_VERTEX)
231
 
232
#define GEN7_SAMPLER(f) (((f) >> 16) & 0xfff0)
233
#define GEN7_BLEND(f) (((f) >> 0) & 0x7ff0)
234
#define GEN7_READS_DST(f) (((f) >> 15) & 1)
235
#define GEN7_KERNEL(f) (((f) >> 16) & 0xf)
236
#define GEN7_VERTEX(f) (((f) >> 0) & 0xf)
237
#define GEN7_SET_FLAGS(S, B, K, V)  (((S) | (K)) << 16 | ((B) | (V)))
238
 
239
#define OUT_BATCH(v) batch_emit(sna, v)
240
#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y)
241
#define OUT_VERTEX_F(v) vertex_emit(sna, v)
242
 
243
static inline bool too_large(int width, int height)
244
{
245
	return width > GEN7_MAX_SIZE || height > GEN7_MAX_SIZE;
246
}
247
 
248
static uint32_t gen7_get_blend(int op,
249
			       bool has_component_alpha,
250
			       uint32_t dst_format)
251
{
252
	uint32_t src, dst;
253
 
254
 
255
    src = GEN7_BLENDFACTOR_ONE; //gen6_blend_op[op].src_blend;
256
    dst = GEN7_BLENDFACTOR_INV_SRC_ALPHA; //gen6_blend_op[op].dst_blend;
257
 
3291 Serge 258
 
3280 Serge 259
#if 0
260
	/* If there's no dst alpha channel, adjust the blend op so that
261
	 * we'll treat it always as 1.
262
	 */
263
	if (PICT_FORMAT_A(dst_format) == 0) {
264
		if (src == GEN7_BLENDFACTOR_DST_ALPHA)
265
			src = GEN7_BLENDFACTOR_ONE;
266
		else if (src == GEN7_BLENDFACTOR_INV_DST_ALPHA)
267
			src = GEN7_BLENDFACTOR_ZERO;
268
	}
269
 
270
	/* If the source alpha is being used, then we should only be in a
271
	 * case where the source blend factor is 0, and the source blend
272
	 * value is the mask channels multiplied by the source picture's alpha.
273
	 */
274
	if (has_component_alpha && gen7_blend_op[op].src_alpha) {
275
		if (dst == GEN7_BLENDFACTOR_SRC_ALPHA)
276
			dst = GEN7_BLENDFACTOR_SRC_COLOR;
277
		else if (dst == GEN7_BLENDFACTOR_INV_SRC_ALPHA)
278
			dst = GEN7_BLENDFACTOR_INV_SRC_COLOR;
279
	}
280
#endif
281
 
282
	DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n",
283
	     op, dst_format, PICT_FORMAT_A(dst_format),
284
	     src, dst, (int)BLEND_OFFSET(src, dst)));
285
	return BLEND_OFFSET(src, dst);
286
}
287
 
288
static uint32_t gen7_get_card_format(PictFormat format)
289
{
290
	switch (format) {
291
	default:
292
		return -1;
293
	case PICT_a8r8g8b8:
294
		return GEN7_SURFACEFORMAT_B8G8R8A8_UNORM;
295
	case PICT_x8r8g8b8:
296
		return GEN7_SURFACEFORMAT_B8G8R8X8_UNORM;
297
	case PICT_a8:
298
		return GEN7_SURFACEFORMAT_A8_UNORM;
299
	}
300
}
301
 
302
static uint32_t gen7_get_dest_format(PictFormat format)
303
{
304
	switch (format) {
305
	default:
306
		return -1;
307
	case PICT_a8r8g8b8:
308
	case PICT_x8r8g8b8:
309
		return GEN7_SURFACEFORMAT_B8G8R8A8_UNORM;
310
	case PICT_a8:
311
		return GEN7_SURFACEFORMAT_A8_UNORM;
312
	}
313
}
314
 
315
static int
316
gen7_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine)
317
{
318
	int base;
319
 
320
	if (has_mask) {
321
		if (is_ca) {
322
			if (gen7_blend_op[op].src_alpha)
323
				base = GEN7_WM_KERNEL_MASKSA;
324
			else
325
				base = GEN7_WM_KERNEL_MASKCA;
326
		} else
327
			base = GEN7_WM_KERNEL_MASK;
328
	} else
329
		base = GEN7_WM_KERNEL_NOMASK;
330
 
331
	return base + !is_affine;
332
}
333
 
334
static void
335
gen7_emit_urb(struct sna *sna)
336
{
337
	OUT_BATCH(GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
338
	OUT_BATCH(8); /* in 1KBs */
339
 
340
	/* num of VS entries must be divisible by 8 if size < 9 */
341
	OUT_BATCH(GEN7_3DSTATE_URB_VS | (2 - 2));
342
	OUT_BATCH((sna->render_state.gen7.info->urb.max_vs_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) |
343
		  (2 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
344
		  (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
345
 
346
	OUT_BATCH(GEN7_3DSTATE_URB_HS | (2 - 2));
347
	OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
348
		  (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
349
 
350
	OUT_BATCH(GEN7_3DSTATE_URB_DS | (2 - 2));
351
	OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
352
		  (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
353
 
354
	OUT_BATCH(GEN7_3DSTATE_URB_GS | (2 - 2));
355
	OUT_BATCH((0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
356
		  (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
357
}
358
 
359
static void
360
gen7_emit_state_base_address(struct sna *sna)
361
{
362
	OUT_BATCH(GEN7_STATE_BASE_ADDRESS | (10 - 2));
363
	OUT_BATCH(0); /* general */
364
	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */
365
				 sna->kgem.nbatch,
366
				 NULL,
367
				 I915_GEM_DOMAIN_INSTRUCTION << 16,
368
				 BASE_ADDRESS_MODIFY));
369
	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* instruction */
370
				 sna->kgem.nbatch,
371
				 sna->render_state.gen7.general_bo,
372
				 I915_GEM_DOMAIN_INSTRUCTION << 16,
373
				 BASE_ADDRESS_MODIFY));
374
	OUT_BATCH(0); /* indirect */
375
	OUT_BATCH(kgem_add_reloc(&sna->kgem,
376
				 sna->kgem.nbatch,
377
				 sna->render_state.gen7.general_bo,
378
				 I915_GEM_DOMAIN_INSTRUCTION << 16,
379
				 BASE_ADDRESS_MODIFY));
380
 
381
	/* upper bounds, disable */
382
	OUT_BATCH(0);
383
	OUT_BATCH(BASE_ADDRESS_MODIFY);
384
	OUT_BATCH(0);
385
	OUT_BATCH(BASE_ADDRESS_MODIFY);
386
}
387
 
388
static void
389
gen7_disable_vs(struct sna *sna)
390
{
391
	/* For future reference:
392
	 * A PIPE_CONTROL with post-sync op set to 1 and a depth stall needs
393
	 * to be emitted just prior to change VS state, i.e. 3DSTATE_VS,
394
	 * 3DSTATE_URB_VS, 3DSTATE_CONSTANT_VS,
395
	 * 3DSTATE_BINDING_TABLE_POINTER_VS, 3DSTATE_SAMPLER_STATE_POINTER_VS.
396
	 *
397
	 * Here we saved by the full-flush incurred when emitting
398
	 * the batchbuffer.
399
	 */
400
	OUT_BATCH(GEN7_3DSTATE_VS | (6 - 2));
401
	OUT_BATCH(0); /* no VS kernel */
402
	OUT_BATCH(0);
403
	OUT_BATCH(0);
404
	OUT_BATCH(0);
405
	OUT_BATCH(0); /* pass-through */
406
 
407
#if 0
408
	OUT_BATCH(GEN7_3DSTATE_CONSTANT_VS | (7 - 2));
409
	OUT_BATCH(0);
410
	OUT_BATCH(0);
411
	OUT_BATCH(0);
412
	OUT_BATCH(0);
413
	OUT_BATCH(0);
414
	OUT_BATCH(0);
415
 
416
	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS | (2 - 2));
417
	OUT_BATCH(0);
418
 
419
	OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS | (2 - 2));
420
	OUT_BATCH(0);
421
#endif
422
}
423
 
424
static void
425
gen7_disable_hs(struct sna *sna)
426
{
427
	OUT_BATCH(GEN7_3DSTATE_HS | (7 - 2));
428
	OUT_BATCH(0); /* no HS kernel */
429
	OUT_BATCH(0);
430
	OUT_BATCH(0);
431
	OUT_BATCH(0);
432
	OUT_BATCH(0);
433
	OUT_BATCH(0); /* pass-through */
434
 
435
#if 0
436
	OUT_BATCH(GEN7_3DSTATE_CONSTANT_HS | (7 - 2));
437
	OUT_BATCH(0);
438
	OUT_BATCH(0);
439
	OUT_BATCH(0);
440
	OUT_BATCH(0);
441
	OUT_BATCH(0);
442
	OUT_BATCH(0);
443
 
444
	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
445
	OUT_BATCH(0);
446
 
447
	OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS | (2 - 2));
448
	OUT_BATCH(0);
449
#endif
450
}
451
 
452
static void
453
gen7_disable_te(struct sna *sna)
454
{
455
	OUT_BATCH(GEN7_3DSTATE_TE | (4 - 2));
456
	OUT_BATCH(0);
457
	OUT_BATCH(0);
458
	OUT_BATCH(0);
459
}
460
 
461
static void
462
gen7_disable_ds(struct sna *sna)
463
{
464
	OUT_BATCH(GEN7_3DSTATE_DS | (6 - 2));
465
	OUT_BATCH(0);
466
	OUT_BATCH(0);
467
	OUT_BATCH(0);
468
	OUT_BATCH(0);
469
	OUT_BATCH(0);
470
 
471
#if 0
472
	OUT_BATCH(GEN7_3DSTATE_CONSTANT_DS | (7 - 2));
473
	OUT_BATCH(0);
474
	OUT_BATCH(0);
475
	OUT_BATCH(0);
476
	OUT_BATCH(0);
477
	OUT_BATCH(0);
478
	OUT_BATCH(0);
479
 
480
	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
481
	OUT_BATCH(0);
482
 
483
	OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS | (2 - 2));
484
	OUT_BATCH(0);
485
#endif
486
}
487
 
488
static void
489
gen7_disable_gs(struct sna *sna)
490
{
491
	OUT_BATCH(GEN7_3DSTATE_GS | (7 - 2));
492
	OUT_BATCH(0); /* no GS kernel */
493
	OUT_BATCH(0);
494
	OUT_BATCH(0);
495
	OUT_BATCH(0);
496
	OUT_BATCH(0);
497
	OUT_BATCH(0); /* pass-through */
498
 
499
#if 0
500
	OUT_BATCH(GEN7_3DSTATE_CONSTANT_GS | (7 - 2));
501
	OUT_BATCH(0);
502
	OUT_BATCH(0);
503
	OUT_BATCH(0);
504
	OUT_BATCH(0);
505
	OUT_BATCH(0);
506
	OUT_BATCH(0);
507
 
508
	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
509
	OUT_BATCH(0);
510
 
511
	OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS | (2 - 2));
512
	OUT_BATCH(0);
513
#endif
514
}
515
 
516
static void
517
gen7_disable_streamout(struct sna *sna)
518
{
519
	OUT_BATCH(GEN7_3DSTATE_STREAMOUT | (3 - 2));
520
	OUT_BATCH(0);
521
	OUT_BATCH(0);
522
}
523
 
524
static void
525
gen7_emit_sf_invariant(struct sna *sna)
526
{
527
	OUT_BATCH(GEN7_3DSTATE_SF | (7 - 2));
528
	OUT_BATCH(0);
529
	OUT_BATCH(GEN7_3DSTATE_SF_CULL_NONE);
530
	OUT_BATCH(2 << GEN7_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
531
	OUT_BATCH(0);
532
	OUT_BATCH(0);
533
	OUT_BATCH(0);
534
}
535
 
536
static void
537
gen7_emit_cc_invariant(struct sna *sna)
538
{
539
#if 0 /* unused, no change */
540
	OUT_BATCH(GEN7_3DSTATE_CC_STATE_POINTERS | (2 - 2));
541
	OUT_BATCH(0);
542
 
543
	OUT_BATCH(GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS | (2 - 2));
544
	OUT_BATCH(0);
545
#endif
546
 
547
	/* XXX clear to be safe */
548
	OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
549
	OUT_BATCH(0);
550
}
551
 
552
static void
553
gen7_disable_clip(struct sna *sna)
554
{
555
	OUT_BATCH(GEN7_3DSTATE_CLIP | (4 - 2));
556
	OUT_BATCH(0);
557
	OUT_BATCH(0); /* pass-through */
558
	OUT_BATCH(0);
559
 
560
	OUT_BATCH(GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
561
	OUT_BATCH(0);
562
}
563
 
564
static void
565
gen7_emit_wm_invariant(struct sna *sna)
566
{
567
	OUT_BATCH(GEN7_3DSTATE_WM | (3 - 2));
568
	OUT_BATCH(GEN7_WM_DISPATCH_ENABLE |
569
		  GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
570
	OUT_BATCH(0);
571
 
572
#if 0
573
	/* XXX length bias of 7 in old spec? */
574
	OUT_BATCH(GEN7_3DSTATE_CONSTANT_PS | (7 - 2));
575
	OUT_BATCH(0);
576
	OUT_BATCH(0);
577
	OUT_BATCH(0);
578
	OUT_BATCH(0);
579
	OUT_BATCH(0);
580
	OUT_BATCH(0);
581
#endif
582
}
583
 
584
static void
585
gen7_emit_null_depth_buffer(struct sna *sna)
586
{
587
	OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER | (7 - 2));
588
	OUT_BATCH(GEN7_SURFACE_NULL << GEN7_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT |
589
		  GEN7_DEPTHFORMAT_D32_FLOAT << GEN7_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT);
590
	OUT_BATCH(0); /* disable depth, stencil and hiz */
591
	OUT_BATCH(0);
592
	OUT_BATCH(0);
593
	OUT_BATCH(0);
594
	OUT_BATCH(0);
595
 
596
#if 0
597
	OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
598
	OUT_BATCH(0);
599
	OUT_BATCH(0);
600
#endif
601
}
602
 
603
static void
604
gen7_emit_invariant(struct sna *sna)
605
{
606
	OUT_BATCH(GEN7_PIPELINE_SELECT | PIPELINE_SELECT_3D);
607
 
608
	OUT_BATCH(GEN7_3DSTATE_MULTISAMPLE | (4 - 2));
609
	OUT_BATCH(GEN7_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
610
		  GEN7_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
611
	OUT_BATCH(0);
612
	OUT_BATCH(0);
613
 
614
	OUT_BATCH(GEN7_3DSTATE_SAMPLE_MASK | (2 - 2));
615
	OUT_BATCH(1);
616
 
617
	gen7_emit_urb(sna);
618
 
619
	gen7_emit_state_base_address(sna);
620
 
621
	gen7_disable_vs(sna);
622
	gen7_disable_hs(sna);
623
	gen7_disable_te(sna);
624
	gen7_disable_ds(sna);
625
	gen7_disable_gs(sna);
626
	gen7_disable_clip(sna);
627
	gen7_emit_sf_invariant(sna);
628
	gen7_emit_wm_invariant(sna);
629
	gen7_emit_cc_invariant(sna);
630
	gen7_disable_streamout(sna);
631
	gen7_emit_null_depth_buffer(sna);
632
 
633
	sna->render_state.gen7.needs_invariant = false;
634
}
635
 
636
static void
637
gen7_emit_cc(struct sna *sna, uint32_t blend_offset)
638
{
639
	struct gen7_render_state *render = &sna->render_state.gen7;
640
 
641
	if (render->blend == blend_offset)
642
		return;
643
 
644
	DBG(("%s: blend = %x\n", __FUNCTION__, blend_offset));
645
 
646
	/* XXX can have upto 8 blend states preload, selectable via
647
	 * Render Target Index. What other side-effects of Render Target Index?
648
	 */
649
 
650
	assert (is_aligned(render->cc_blend + blend_offset, 64));
651
	OUT_BATCH(GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
652
	OUT_BATCH((render->cc_blend + blend_offset) | 1);
653
 
654
	render->blend = blend_offset;
655
}
656
 
657
static void
658
gen7_emit_sampler(struct sna *sna, uint32_t state)
659
{
660
	if (sna->render_state.gen7.samplers == state)
661
		return;
662
 
663
	sna->render_state.gen7.samplers = state;
664
 
665
	DBG(("%s: sampler = %x\n", __FUNCTION__, state));
666
 
667
	assert (is_aligned(sna->render_state.gen7.wm_state + state, 32));
668
	OUT_BATCH(GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
669
	OUT_BATCH(sna->render_state.gen7.wm_state + state);
670
}
671
 
672
static void
673
gen7_emit_sf(struct sna *sna, bool has_mask)
674
{
675
	int num_sf_outputs = has_mask ? 2 : 1;
676
 
677
	if (sna->render_state.gen7.num_sf_outputs == num_sf_outputs)
678
		return;
679
 
680
	DBG(("%s: num_sf_outputs=%d, read_length=%d, read_offset=%d\n",
681
	     __FUNCTION__, num_sf_outputs, 1, 0));
682
 
683
	sna->render_state.gen7.num_sf_outputs = num_sf_outputs;
684
 
685
	OUT_BATCH(GEN7_3DSTATE_SBE | (14 - 2));
686
	OUT_BATCH(num_sf_outputs << GEN7_SBE_NUM_OUTPUTS_SHIFT |
687
		  1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT |
688
		  1 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT);
689
	OUT_BATCH(0);
690
	OUT_BATCH(0); /* dw4 */
691
	OUT_BATCH(0);
692
	OUT_BATCH(0);
693
	OUT_BATCH(0);
694
	OUT_BATCH(0); /* dw8 */
695
	OUT_BATCH(0);
696
	OUT_BATCH(0);
697
	OUT_BATCH(0);
698
	OUT_BATCH(0); /* dw12 */
699
	OUT_BATCH(0);
700
	OUT_BATCH(0);
701
}
702
 
703
static void
704
gen7_emit_wm(struct sna *sna, int kernel)
705
{
706
	const uint32_t *kernels;
707
 
708
	if (sna->render_state.gen7.kernel == kernel)
709
		return;
710
 
711
	sna->render_state.gen7.kernel = kernel;
712
	kernels = sna->render_state.gen7.wm_kernel[kernel];
713
 
714
	DBG(("%s: switching to %s, num_surfaces=%d (8-wide? %d, 16-wide? %d, 32-wide? %d)\n",
715
	     __FUNCTION__,
716
	     wm_kernels[kernel].name,
717
	     wm_kernels[kernel].num_surfaces,
718
	     kernels[0], kernels[1], kernels[2]));
719
 
720
	OUT_BATCH(GEN7_3DSTATE_PS | (8 - 2));
721
	OUT_BATCH(kernels[0] ?: kernels[1] ?: kernels[2]);
722
	OUT_BATCH(1 << GEN7_PS_SAMPLER_COUNT_SHIFT |
723
		  wm_kernels[kernel].num_surfaces << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT);
724
	OUT_BATCH(0); /* scratch address */
725
	OUT_BATCH(sna->render_state.gen7.info->max_wm_threads |
726
		  (kernels[0] ? GEN7_PS_8_DISPATCH_ENABLE : 0) |
727
		  (kernels[1] ? GEN7_PS_16_DISPATCH_ENABLE : 0) |
728
		  (kernels[2] ? GEN7_PS_32_DISPATCH_ENABLE : 0) |
729
		  GEN7_PS_ATTRIBUTE_ENABLE);
730
	OUT_BATCH((kernels[0] ? 4 : kernels[1] ? 6 : 8) << GEN7_PS_DISPATCH_START_GRF_SHIFT_0 |
731
		  8 << GEN7_PS_DISPATCH_START_GRF_SHIFT_1 |
732
		  6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_2);
733
	OUT_BATCH(kernels[2]);
734
	OUT_BATCH(kernels[1]);
735
}
736
 
737
static bool
738
gen7_emit_binding_table(struct sna *sna, uint16_t offset)
739
{
740
	if (sna->render_state.gen7.surface_table == offset)
741
		return false;
742
 
743
	/* Binding table pointers */
744
	assert(is_aligned(4*offset, 32));
745
	OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
746
	OUT_BATCH(offset*4);
747
 
748
	sna->render_state.gen7.surface_table = offset;
749
	return true;
750
}
751
 
752
static bool
753
gen7_emit_drawing_rectangle(struct sna *sna,
754
			    const struct sna_composite_op *op)
755
{
756
	uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1);
757
	uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x;
758
 
759
	assert(!too_large(op->dst.x, op->dst.y));
760
	assert(!too_large(op->dst.width, op->dst.height));
761
 
762
	if (sna->render_state.gen7.drawrect_limit == limit &&
763
	    sna->render_state.gen7.drawrect_offset == offset)
764
		return true;
765
 
766
	sna->render_state.gen7.drawrect_offset = offset;
767
	sna->render_state.gen7.drawrect_limit = limit;
768
 
769
	OUT_BATCH(GEN7_3DSTATE_DRAWING_RECTANGLE | (4 - 2));
770
	OUT_BATCH(0);
771
	OUT_BATCH(limit);
772
	OUT_BATCH(offset);
773
	return false;
774
}
775
 
776
static void
777
gen7_emit_vertex_elements(struct sna *sna,
778
			  const struct sna_composite_op *op)
779
{
780
	/*
781
	 * vertex data in vertex buffer
782
	 *    position: (x, y)
783
	 *    texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0)
784
	 *    texture coordinate 1 if (has_mask is true): same as above
785
	 */
786
	struct gen7_render_state *render = &sna->render_state.gen7;
787
	uint32_t src_format, dw;
788
	int id = GEN7_VERTEX(op->u.gen7.flags);
789
	bool has_mask;
790
 
791
	DBG(("%s: setup id=%d\n", __FUNCTION__, id));
792
 
793
	if (render->ve_id == id)
794
		return;
795
	render->ve_id = id;
796
 
797
	/* The VUE layout
798
	 *    dword 0-3: pad (0.0, 0.0, 0.0. 0.0)
799
	 *    dword 4-7: position (x, y, 1.0, 1.0),
800
	 *    dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0)
801
	 *    dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0)
802
	 *
803
	 * dword 4-15 are fetched from vertex buffer
804
	 */
805
	has_mask = (id >> 2) != 0;
806
	OUT_BATCH(GEN7_3DSTATE_VERTEX_ELEMENTS |
807
		((2 * (3 + has_mask)) + 1 - 2));
808
 
809
	OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID |
810
		  GEN7_SURFACEFORMAT_R32G32B32A32_FLOAT << GEN7_VE0_FORMAT_SHIFT |
811
 
812
	OUT_BATCH(GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_0_SHIFT |
813
		  GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_1_SHIFT |
814
		  GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT |
815
		  GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_3_SHIFT);
816
 
817
	/* x,y */
818
	OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID |
819
		  GEN7_SURFACEFORMAT_R16G16_SSCALED << GEN7_VE0_FORMAT_SHIFT |
820
 
821
	OUT_BATCH(GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT |
822
		  GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT |
823
		  GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT |
824
		  GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT);
825
 
826
	/* u0, v0, w0 */
827
	DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3));
828
	dw = GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT;
829
	switch (id & 3) {
830
	default:
831
		assert(0);
832
	case 0:
833
		src_format = GEN7_SURFACEFORMAT_R16G16_SSCALED;
834
		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
835
		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT;
836
		dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT;
837
		break;
838
	case 1:
839
		src_format = GEN7_SURFACEFORMAT_R32_FLOAT;
840
		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
841
		dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_1_SHIFT;
842
		dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT;
843
		break;
844
	case 2:
845
		src_format = GEN7_SURFACEFORMAT_R32G32_FLOAT;
846
		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
847
		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT;
848
		dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT;
849
		break;
850
	case 3:
851
		src_format = GEN7_SURFACEFORMAT_R32G32B32_FLOAT;
852
		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
853
		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT;
854
		dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_2_SHIFT;
855
		break;
856
	}
857
	OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID |
858
		  src_format << GEN7_VE0_FORMAT_SHIFT |
859
		  4 << GEN7_VE0_OFFSET_SHIFT);
860
	OUT_BATCH(dw);
861
 
862
	/* u1, v1, w1 */
863
	if (has_mask) {
864
		unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float);
865
		DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__, id >> 2, offset));
866
		dw = GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT;
867
		switch (id >> 2) {
868
		case 1:
869
			src_format = GEN7_SURFACEFORMAT_R32_FLOAT;
870
			dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
871
			dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_1_SHIFT;
872
			dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT;
873
			break;
874
		default:
875
			assert(0);
876
		case 2:
877
			src_format = GEN7_SURFACEFORMAT_R32G32_FLOAT;
878
			dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
879
			dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT;
880
			dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT;
881
			break;
882
		case 3:
883
			src_format = GEN7_SURFACEFORMAT_R32G32B32_FLOAT;
884
			dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
885
			dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT;
886
			dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_2_SHIFT;
887
			break;
888
		}
889
		OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID |
890
			  src_format << GEN7_VE0_FORMAT_SHIFT |
891
			  offset << GEN7_VE0_OFFSET_SHIFT);
892
		OUT_BATCH(dw);
893
	}
894
}
895
 
896
inline static void
897
gen7_emit_pipe_invalidate(struct sna *sna)
898
{
899
	OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2));
900
	OUT_BATCH(GEN7_PIPE_CONTROL_WC_FLUSH |
901
		  GEN7_PIPE_CONTROL_TC_FLUSH |
902
		  GEN7_PIPE_CONTROL_CS_STALL);
903
	OUT_BATCH(0);
904
	OUT_BATCH(0);
905
}
906
 
907
inline static void
908
gen7_emit_pipe_flush(struct sna *sna)
909
{
910
	OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2));
911
	OUT_BATCH(GEN7_PIPE_CONTROL_WC_FLUSH);
912
	OUT_BATCH(0);
913
	OUT_BATCH(0);
914
}
915
 
916
inline static void
917
gen7_emit_pipe_stall(struct sna *sna)
918
{
919
	OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2));
920
	OUT_BATCH(GEN7_PIPE_CONTROL_CS_STALL |
921
		  GEN7_PIPE_CONTROL_STALL_AT_SCOREBOARD);
922
	OUT_BATCH(0);
923
	OUT_BATCH(0);
924
}
925
 
926
static void
927
gen7_emit_state(struct sna *sna,
928
		const struct sna_composite_op *op,
929
		uint16_t wm_binding_table)
930
{
931
	bool need_stall;
932
 
933
	if (sna->render_state.gen7.emit_flush)
934
		gen7_emit_pipe_flush(sna);
935
 
936
	gen7_emit_cc(sna, GEN7_BLEND(op->u.gen7.flags));
937
	gen7_emit_sampler(sna, GEN7_SAMPLER(op->u.gen7.flags));
938
	gen7_emit_sf(sna, GEN7_VERTEX(op->u.gen7.flags) >> 2);
939
	gen7_emit_wm(sna, GEN7_KERNEL(op->u.gen7.flags));
940
	gen7_emit_vertex_elements(sna, op);
941
 
942
	need_stall = gen7_emit_binding_table(sna, wm_binding_table);
943
	need_stall &= gen7_emit_drawing_rectangle(sna, op);
944
 
945
	if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
946
		gen7_emit_pipe_invalidate(sna);
947
		kgem_clear_dirty(&sna->kgem);
948
		if (op->dst.bo->exec)
949
			kgem_bo_mark_dirty(op->dst.bo);
950
		need_stall = false;
951
	}
952
	if (need_stall)
953
		gen7_emit_pipe_stall(sna);
954
 
955
	sna->render_state.gen7.emit_flush = GEN7_READS_DST(op->u.gen7.flags);
956
}
957
 
958
static bool gen7_magic_ca_pass(struct sna *sna,
959
			       const struct sna_composite_op *op)
960
{
961
	struct gen7_render_state *state = &sna->render_state.gen7;
962
 
963
	if (!op->need_magic_ca_pass)
964
		return false;
965
 
966
	DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__,
967
	     sna->render.vertex_start, sna->render.vertex_index));
968
 
969
	gen7_emit_pipe_stall(sna);
970
 
971
	gen7_emit_cc(sna,
972
		     GEN7_BLEND(gen7_get_blend(PictOpAdd, true,
973
					       op->dst.format)));
974
	gen7_emit_wm(sna,
975
		     gen7_choose_composite_kernel(PictOpAdd,
976
						  true, true,
977
						  op->is_affine));
978
 
979
	OUT_BATCH(GEN7_3DPRIMITIVE | (7- 2));
980
	OUT_BATCH(GEN7_3DPRIMITIVE_VERTEX_SEQUENTIAL | _3DPRIM_RECTLIST);
981
	OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start);
982
	OUT_BATCH(sna->render.vertex_start);
983
	OUT_BATCH(1);	/* single instance */
984
	OUT_BATCH(0);	/* start instance location */
985
	OUT_BATCH(0);	/* index buffer offset, ignored */
986
 
987
	state->last_primitive = sna->kgem.nbatch;
988
	return true;
989
}
990
 
991
static void null_create(struct sna_static_stream *stream)
992
{
993
	/* A bunch of zeros useful for legacy border color and depth-stencil */
994
	sna_static_stream_map(stream, 64, 64);
995
}
996
 
997
static void
998
sampler_state_init(struct gen7_sampler_state *sampler_state,
999
		   sampler_filter_t filter,
1000
		   sampler_extend_t extend)
1001
{
1002
	sampler_state->ss0.lod_preclamp = 1;	/* GL mode */
1003
 
1004
	/* We use the legacy mode to get the semantics specified by
1005
	 * the Render extension. */
1006
	sampler_state->ss0.default_color_mode = GEN7_BORDER_COLOR_MODE_LEGACY;
1007
 
1008
	switch (filter) {
1009
	default:
1010
	case SAMPLER_FILTER_NEAREST:
1011
		sampler_state->ss0.min_filter = GEN7_MAPFILTER_NEAREST;
1012
		sampler_state->ss0.mag_filter = GEN7_MAPFILTER_NEAREST;
1013
		break;
1014
	case SAMPLER_FILTER_BILINEAR:
1015
		sampler_state->ss0.min_filter = GEN7_MAPFILTER_LINEAR;
1016
		sampler_state->ss0.mag_filter = GEN7_MAPFILTER_LINEAR;
1017
		break;
1018
	}
1019
 
1020
	switch (extend) {
1021
	default:
1022
	case SAMPLER_EXTEND_NONE:
1023
		sampler_state->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_CLAMP_BORDER;
1024
		sampler_state->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_CLAMP_BORDER;
1025
		sampler_state->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_CLAMP_BORDER;
1026
		break;
1027
	case SAMPLER_EXTEND_REPEAT:
1028
		sampler_state->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_WRAP;
1029
		sampler_state->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_WRAP;
1030
		sampler_state->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_WRAP;
1031
		break;
1032
	case SAMPLER_EXTEND_PAD:
1033
		sampler_state->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_CLAMP;
1034
		sampler_state->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_CLAMP;
1035
		sampler_state->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_CLAMP;
1036
		break;
1037
	case SAMPLER_EXTEND_REFLECT:
1038
		sampler_state->ss3.r_wrap_mode = GEN7_TEXCOORDMODE_MIRROR;
1039
		sampler_state->ss3.s_wrap_mode = GEN7_TEXCOORDMODE_MIRROR;
1040
		sampler_state->ss3.t_wrap_mode = GEN7_TEXCOORDMODE_MIRROR;
1041
		break;
1042
	}
1043
}
1044
 
1045
static void
1046
sampler_copy_init(struct gen7_sampler_state *ss)
1047
{
1048
	sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
1049
	ss->ss3.non_normalized_coord = 1;
1050
 
1051
	sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
1052
}
1053
 
1054
static void
1055
sampler_fill_init(struct gen7_sampler_state *ss)
1056
{
1057
	sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_REPEAT);
1058
	ss->ss3.non_normalized_coord = 1;
1059
 
1060
	sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
1061
}
1062
 
1063
static uint32_t
1064
gen7_tiling_bits(uint32_t tiling)
1065
{
1066
	switch (tiling) {
1067
	default: assert(0);
1068
	case I915_TILING_NONE: return 0;
1069
	case I915_TILING_X: return GEN7_SURFACE_TILED;
1070
	case I915_TILING_Y: return GEN7_SURFACE_TILED | GEN7_SURFACE_TILED_Y;
1071
	}
1072
}
1073
 
1074
/**
1075
 * Sets up the common fields for a surface state buffer for the given
1076
 * picture in the given surface state buffer.
1077
 */
1078
static uint32_t
1079
gen7_bind_bo(struct sna *sna,
1080
	     struct kgem_bo *bo,
1081
	     uint32_t width,
1082
	     uint32_t height,
1083
	     uint32_t format,
1084
	     bool is_dst)
1085
{
1086
	uint32_t *ss;
1087
	uint32_t domains;
1088
	int offset;
1089
	uint32_t is_scanout = is_dst && bo->scanout;
1090
 
1091
	COMPILE_TIME_ASSERT(sizeof(struct gen7_surface_state) == 32);
1092
 
1093
	/* After the first bind, we manage the cache domains within the batch */
1094
	offset = kgem_bo_get_binding(bo, format | is_scanout << 31);
1095
	if (offset) {
1096
		if (is_dst)
1097
			kgem_bo_mark_dirty(bo);
1098
		return offset * sizeof(uint32_t);
1099
	}
1100
 
1101
	offset = sna->kgem.surface -=
1102
		sizeof(struct gen7_surface_state) / sizeof(uint32_t);
1103
	ss = sna->kgem.batch + offset;
1104
	ss[0] = (GEN7_SURFACE_2D << GEN7_SURFACE_TYPE_SHIFT |
1105
		 gen7_tiling_bits(bo->tiling) |
1106
		 format << GEN7_SURFACE_FORMAT_SHIFT);
1107
	if (is_dst)
1108
		domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER;
1109
	else
1110
		domains = I915_GEM_DOMAIN_SAMPLER << 16;
1111
	ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
1112
	ss[2] = ((width - 1)  << GEN7_SURFACE_WIDTH_SHIFT |
1113
		 (height - 1) << GEN7_SURFACE_HEIGHT_SHIFT);
1114
	ss[3] = (bo->pitch - 1) << GEN7_SURFACE_PITCH_SHIFT;
1115
	ss[4] = 0;
1116
	ss[5] = is_scanout ? 0 : 3 << 16;
1117
	ss[6] = 0;
1118
	ss[7] = 0;
1119
	if (sna->kgem.gen == 075)
1120
		ss[7] |= HSW_SURFACE_SWIZZLE(RED, GREEN, BLUE, ALPHA);
1121
 
1122
	kgem_bo_set_binding(bo, format | is_scanout << 31, offset);
1123
 
1124
	DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
1125
	     offset, bo->handle, ss[1],
1126
	     format, width, height, bo->pitch, bo->tiling,
1127
	     domains & 0xffff ? "render" : "sampler"));
1128
 
1129
	return offset * sizeof(uint32_t);
1130
}
1131
 
1132
static void gen7_emit_vertex_buffer(struct sna *sna,
1133
				    const struct sna_composite_op *op)
1134
{
1135
	int id = GEN7_VERTEX(op->u.gen7.flags);
1136
 
1137
	OUT_BATCH(GEN7_3DSTATE_VERTEX_BUFFERS | (5 - 2));
1138
	OUT_BATCH(id << GEN7_VB0_BUFFER_INDEX_SHIFT |
1139
		  GEN7_VB0_VERTEXDATA |
1140
		  GEN7_VB0_ADDRESS_MODIFY_ENABLE |
1141
		  4*op->floats_per_vertex << GEN7_VB0_BUFFER_PITCH_SHIFT);
1142
	sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch;
1143
	OUT_BATCH(0);
1144
	OUT_BATCH(~0); /* max address: disabled */
1145
	OUT_BATCH(0);
1146
 
1147
	sna->render.vb_id |= 1 << id;
1148
}
1149
 
1150
static void gen7_emit_primitive(struct sna *sna)
1151
{
1152
	if (sna->kgem.nbatch == sna->render_state.gen7.last_primitive) {
1153
		sna->render.vertex_offset = sna->kgem.nbatch - 5;
1154
		return;
1155
	}
1156
 
1157
	OUT_BATCH(GEN7_3DPRIMITIVE | (7- 2));
1158
	OUT_BATCH(GEN7_3DPRIMITIVE_VERTEX_SEQUENTIAL | _3DPRIM_RECTLIST);
1159
	sna->render.vertex_offset = sna->kgem.nbatch;
1160
	OUT_BATCH(0);	/* vertex count, to be filled in later */
1161
	OUT_BATCH(sna->render.vertex_index);
1162
	OUT_BATCH(1);	/* single instance */
1163
	OUT_BATCH(0);	/* start instance location */
1164
	OUT_BATCH(0);	/* index buffer offset, ignored */
1165
	sna->render.vertex_start = sna->render.vertex_index;
1166
 
1167
	sna->render_state.gen7.last_primitive = sna->kgem.nbatch;
1168
}
1169
 
1170
static bool gen7_rectangle_begin(struct sna *sna,
1171
				 const struct sna_composite_op *op)
1172
{
1173
	int id = 1 << GEN7_VERTEX(op->u.gen7.flags);
1174
	int ndwords;
1175
 
1176
	if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
1177
		return true;
1178
 
1179
	ndwords = op->need_magic_ca_pass ? 60 : 6;
1180
	if ((sna->render.vb_id & id) == 0)
1181
		ndwords += 5;
1182
	if (!kgem_check_batch(&sna->kgem, ndwords))
1183
		return false;
1184
 
1185
	if ((sna->render.vb_id & id) == 0)
1186
		gen7_emit_vertex_buffer(sna, op);
1187
 
1188
	gen7_emit_primitive(sna);
1189
	return true;
1190
}
1191
 
1192
static int gen7_get_rectangles__flush(struct sna *sna,
1193
				      const struct sna_composite_op *op)
1194
{
1195
	/* Preventing discarding new vbo after lock contention */
1196
	if (sna_vertex_wait__locked(&sna->render)) {
1197
		int rem = vertex_space(sna);
1198
		if (rem > op->floats_per_rect)
1199
			return rem;
1200
	}
1201
 
1202
	if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 65 : 6))
1203
		return 0;
1204
	if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
1205
		return 0;
1206
 
1207
	if (sna->render.vertex_offset) {
1208
		gen4_vertex_flush(sna);
1209
		if (gen7_magic_ca_pass(sna, op)) {
1210
			gen7_emit_pipe_stall(sna);
1211
			gen7_emit_cc(sna, GEN7_BLEND(op->u.gen7.flags));
1212
			gen7_emit_wm(sna, GEN7_KERNEL(op->u.gen7.flags));
1213
		}
1214
	}
1215
 
1216
	return gen4_vertex_finish(sna);
1217
}
1218
 
1219
inline static int gen7_get_rectangles(struct sna *sna,
1220
				      const struct sna_composite_op *op,
1221
				      int want,
1222
				      void (*emit_state)(struct sna *sna, const struct sna_composite_op *op))
1223
{
1224
	int rem;
1225
 
1226
	assert(want);
1227
 
1228
start:
1229
	rem = vertex_space(sna);
1230
	if (unlikely(rem < op->floats_per_rect)) {
1231
		DBG(("flushing vbo for %s: %d < %d\n",
1232
		     __FUNCTION__, rem, op->floats_per_rect));
1233
		rem = gen7_get_rectangles__flush(sna, op);
1234
		if (unlikely(rem == 0))
1235
			goto flush;
1236
	}
1237
 
1238
	if (unlikely(sna->render.vertex_offset == 0)) {
1239
		if (!gen7_rectangle_begin(sna, op))
1240
			goto flush;
1241
		else
1242
			goto start;
1243
	}
1244
 
1245
	assert(op->floats_per_rect >= vertex_space(sna));
1246
	assert(rem <= vertex_space(sna));
1247
	if (want > 1 && want * op->floats_per_rect > rem)
1248
		want = rem / op->floats_per_rect;
1249
 
1250
	assert(want > 0);
1251
	sna->render.vertex_index += 3*want;
1252
	return want;
1253
 
1254
flush:
1255
	if (sna->render.vertex_offset) {
1256
		gen4_vertex_flush(sna);
1257
		gen7_magic_ca_pass(sna, op);
1258
	}
1259
	sna_vertex_wait__locked(&sna->render);
1260
	_kgem_submit(&sna->kgem);
1261
	emit_state(sna, op);
1262
	goto start;
1263
}
1264
 
1265
inline static uint32_t *gen7_composite_get_binding_table(struct sna *sna,
1266
							 uint16_t *offset)
1267
{
1268
	uint32_t *table;
1269
 
1270
	sna->kgem.surface -=
1271
		sizeof(struct gen7_surface_state) / sizeof(uint32_t);
1272
	/* Clear all surplus entries to zero in case of prefetch */
1273
	table = memset(sna->kgem.batch + sna->kgem.surface,
1274
		       0, sizeof(struct gen7_surface_state));
1275
 
1276
	DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface));
1277
 
1278
	*offset = sna->kgem.surface;
1279
	return table;
1280
}
1281
 
1282
static void
1283
gen7_get_batch(struct sna *sna, const struct sna_composite_op *op)
1284
{
1285
	kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
1286
 
1287
	if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) {
1288
		DBG(("%s: flushing batch: %d < %d+%d\n",
1289
		     __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch,
1290
		     150, 4*8));
1291
		_kgem_submit(&sna->kgem);
1292
		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
1293
	}
1294
 
1295
	assert(sna->kgem.mode == KGEM_RENDER);
1296
	assert(sna->kgem.ring == KGEM_RENDER);
1297
 
1298
	if (sna->render_state.gen7.needs_invariant)
1299
		gen7_emit_invariant(sna);
1300
}
1301
 
1302
static void gen7_emit_composite_state(struct sna *sna,
1303
				      const struct sna_composite_op *op)
1304
{
1305
	uint32_t *binding_table;
1306
	uint16_t offset;
1307
 
1308
	gen7_get_batch(sna, op);
1309
 
1310
	binding_table = gen7_composite_get_binding_table(sna, &offset);
1311
 
1312
	binding_table[0] =
1313
		gen7_bind_bo(sna,
1314
			    op->dst.bo, op->dst.width, op->dst.height,
1315
			    gen7_get_dest_format(op->dst.format),
1316
			    true);
1317
	binding_table[1] =
1318
		gen7_bind_bo(sna,
1319
			     op->src.bo, op->src.width, op->src.height,
1320
			     op->src.card_format,
1321
			     false);
1322
	if (op->mask.bo) {
1323
		binding_table[2] =
1324
			gen7_bind_bo(sna,
1325
				     op->mask.bo,
1326
				     op->mask.width,
1327
				     op->mask.height,
1328
				     op->mask.card_format,
1329
				     false);
1330
	}
1331
 
1332
	if (sna->kgem.surface == offset &&
1333
	    *(uint64_t *)(sna->kgem.batch + sna->render_state.gen7.surface_table) == *(uint64_t*)binding_table &&
1334
	    (op->mask.bo == NULL ||
1335
	     sna->kgem.batch[sna->render_state.gen7.surface_table+2] == binding_table[2])) {
1336
		sna->kgem.surface += sizeof(struct gen7_surface_state) / sizeof(uint32_t);
1337
		offset = sna->render_state.gen7.surface_table;
1338
	}
1339
 
1340
	gen7_emit_state(sna, op, offset);
1341
}
1342
 
1343
static void
1344
gen7_align_vertex(struct sna *sna, const struct sna_composite_op *op)
1345
{
1346
	if (op->floats_per_vertex != sna->render_state.gen7.floats_per_vertex) {
1347
		if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect)
1348
			gen4_vertex_finish(sna);
1349
 
1350
		DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n",
1351
		     sna->render_state.gen7.floats_per_vertex,
1352
		     op->floats_per_vertex,
1353
		     sna->render.vertex_index,
1354
		     (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex));
1355
		sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
1356
		sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex;
1357
		sna->render_state.gen7.floats_per_vertex = op->floats_per_vertex;
1358
	}
1359
}
1360
 
3291 Serge 1361
fastcall static void
1362
gen7_render_composite_blt(struct sna *sna,
1363
			  const struct sna_composite_op *op,
1364
			  const struct sna_composite_rectangles *r)
1365
{
1366
	gen7_get_rectangles(sna, op, 1, gen7_emit_composite_state);
1367
	op->prim_emit(sna, op, r);
1368
}
3280 Serge 1369
static uint32_t
1370
gen7_composite_create_blend_state(struct sna_static_stream *stream)
1371
{
1372
	char *base, *ptr;
1373
	int src, dst;
1374
 
1375
	base = sna_static_stream_map(stream,
1376
				     GEN7_BLENDFACTOR_COUNT * GEN7_BLENDFACTOR_COUNT * GEN7_BLEND_STATE_PADDED_SIZE,
1377
				     64);
1378
 
1379
	ptr = base;
1380
	for (src = 0; src < GEN7_BLENDFACTOR_COUNT; src++) {
1381
		for (dst= 0; dst < GEN7_BLENDFACTOR_COUNT; dst++) {
1382
			struct gen7_blend_state *blend =
1383
				(struct gen7_blend_state *)ptr;
1384
 
1385
			blend->blend0.dest_blend_factor = dst;
1386
			blend->blend0.source_blend_factor = src;
1387
			blend->blend0.blend_func = GEN7_BLENDFUNCTION_ADD;
1388
			blend->blend0.blend_enable =
1389
				!(dst == GEN7_BLENDFACTOR_ZERO && src == GEN7_BLENDFACTOR_ONE);
1390
 
1391
			blend->blend1.post_blend_clamp_enable = 1;
1392
			blend->blend1.pre_blend_clamp_enable = 1;
1393
 
1394
			ptr += GEN7_BLEND_STATE_PADDED_SIZE;
1395
		}
1396
	}
1397
 
1398
	return sna_static_stream_offsetof(stream, base);
1399
}
1400
 
1401
 
1402
 
1403
static void gen7_render_composite_done(struct sna *sna,
1404
				       const struct sna_composite_op *op)
1405
{
1406
	if (sna->render.vertex_offset) {
1407
		gen4_vertex_flush(sna);
1408
		gen7_magic_ca_pass(sna, op);
1409
	}
1410
}
1411
 
1412
 
1413
static bool
1414
gen7_blit_tex(struct sna *sna,
3769 Serge 1415
              uint8_t op, bool scale,
3280 Serge 1416
		      PixmapPtr src, struct kgem_bo *src_bo,
1417
		      PixmapPtr mask,struct kgem_bo *mask_bo,
1418
		      PixmapPtr dst, struct kgem_bo *dst_bo,
1419
              int32_t src_x, int32_t src_y,
1420
              int32_t msk_x, int32_t msk_y,
1421
              int32_t dst_x, int32_t dst_y,
1422
              int32_t width, int32_t height,
1423
              struct sna_composite_op *tmp)
1424
{
1425
 
1426
 
1427
    tmp->op = PictOpSrc;
1428
 
1429
    tmp->dst.pixmap = dst;
1430
    tmp->dst.bo     = dst_bo;
1431
    tmp->dst.width  = dst->drawable.width;
1432
    tmp->dst.height = dst->drawable.height;
1433
    tmp->dst.format = PICT_x8r8g8b8;
1434
 
1435
 
1436
	tmp->src.repeat = RepeatNone;
1437
	tmp->src.filter = PictFilterNearest;
1438
    tmp->src.is_affine = true;
1439
 
1440
    tmp->src.bo = src_bo;
1441
	tmp->src.pict_format = PICT_x8r8g8b8;
1442
    tmp->src.card_format = gen7_get_card_format(tmp->src.pict_format);
1443
    tmp->src.width  = src->drawable.width;
1444
    tmp->src.height = src->drawable.height;
1445
 
1446
 
1447
	tmp->is_affine = tmp->src.is_affine;
1448
	tmp->has_component_alpha = false;
1449
	tmp->need_magic_ca_pass = false;
1450
 
1451
	tmp->mask.repeat = SAMPLER_EXTEND_NONE;
1452
	tmp->mask.filter = SAMPLER_FILTER_NEAREST;
1453
    tmp->mask.is_affine = true;
1454
 
1455
    tmp->mask.bo = mask_bo;
1456
    tmp->mask.pict_format = PIXMAN_a8;
1457
    tmp->mask.card_format = gen7_get_card_format(tmp->mask.pict_format);
1458
    tmp->mask.width  = mask->drawable.width;
1459
    tmp->mask.height = mask->drawable.height;
1460
 
3769 Serge 1461
    if( scale )
1462
    {
1463
        tmp->src.scale[0] = 1.f/width;
1464
        tmp->src.scale[1] = 1.f/height;
1465
    }
1466
    else
1467
    {
1468
        tmp->src.scale[0] = 1.f/src->drawable.width;
1469
        tmp->src.scale[1] = 1.f/src->drawable.height;
1470
    }
3280 Serge 1471
 
1472
    tmp->mask.scale[0] = 1.f/mask->drawable.width;
1473
    tmp->mask.scale[1] = 1.f/mask->drawable.height;
1474
 
1475
 
1476
 
1477
	tmp->u.gen7.flags =
1478
		GEN7_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter,
1479
					      tmp->src.repeat,
1480
					      tmp->mask.filter,
1481
					      tmp->mask.repeat),
1482
			       gen7_get_blend(tmp->op,
1483
					      tmp->has_component_alpha,
1484
					      tmp->dst.format),
1485
/*			       gen7_choose_composite_kernel(tmp->op,
1486
							    tmp->mask.bo != NULL,
1487
							    tmp->has_component_alpha,
1488
							    tmp->is_affine), */
1489
                   GEN7_WM_KERNEL_MASK,
1490
			       gen4_choose_composite_emitter(tmp));
1491
 
1492
	tmp->blt   = gen7_render_composite_blt;
1493
//	tmp->box   = gen7_render_composite_box;
1494
	tmp->done  = gen7_render_composite_done;
1495
 
1496
	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo);
1497
	if (!kgem_check_bo(&sna->kgem,
1498
			   tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
1499
			   NULL)) {
1500
		kgem_submit(&sna->kgem);
1501
		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
1502
	}
1503
 
1504
	gen7_emit_composite_state(sna, tmp);
1505
	gen7_align_vertex(sna, tmp);
1506
	return true;
1507
}
1508
 
1509
 
1510
 
1511
 
1512
 
3291 Serge 1513
 
1514
 
1515
 
1516
 
1517
 
1518
 
1519
 
1520
 
1521
 
1522
 
1523
 
1524
 
1525
 
1526
 
1527
 
1528
 
1529
 
1530
 
1531
 
1532
 
1533
 
1534
 
1535
 
1536
 
1537
 
1538
 
1539
 
1540
 
1541
 
1542
 
1543
 
1544
 
1545
 
1546
 
1547
 
1548
 
1549
 
1550
 
1551
 
1552
 
1553
 
1554
 
1555
 
1556
 
1557
 
1558
 
1559
 
1560
 
1561
 
1562
 
1563
 
1564
 
1565
 
1566
 
1567
 
1568
 
1569
 
1570
 
1571
 
1572
 
1573
 
1574
 
1575
 
1576
 
1577
 
1578
 
1579
 
1580
 
1581
 
1582
 
1583
 
1584
 
1585
 
1586
 
1587
 
1588
 
1589
 
1590
 
1591
 
1592
 
1593
 
1594
 
1595
 
1596
 
1597
 
1598
 
1599
 
3280 Serge 1600
static void gen7_render_flush(struct sna *sna)
1601
{
1602
	gen4_vertex_close(sna);
1603
 
1604
	assert(sna->render.vb_id == 0);
1605
	assert(sna->render.vertex_offset == 0);
1606
}
1607
 
1608
static void
1609
gen7_render_context_switch(struct kgem *kgem,
1610
			   int new_mode)
1611
{
1612
	if (kgem->nbatch) {
1613
		DBG(("%s: switch rings %d -> %d\n",
1614
		     __FUNCTION__, kgem->mode, new_mode));
1615
		_kgem_submit(kgem);
1616
	}
1617
 
1618
	kgem->ring = new_mode;
1619
}
1620
 
1621
static void
1622
gen7_render_retire(struct kgem *kgem)
1623
{
1624
	struct sna *sna;
1625
 
1626
	if (kgem->ring && (kgem->has_semaphores || !kgem->need_retire))
1627
		kgem->ring = kgem->mode;
1628
 
1629
	sna = container_of(kgem, struct sna, kgem);
1630
	if (kgem->nbatch == 0 && sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) {
1631
		DBG(("%s: resetting idle vbo\n", __FUNCTION__));
1632
		sna->render.vertex_used = 0;
1633
		sna->render.vertex_index = 0;
1634
	}
1635
}
1636
 
1637
static void
1638
gen7_render_expire(struct kgem *kgem)
1639
{
1640
	struct sna *sna;
1641
 
1642
	sna = container_of(kgem, struct sna, kgem);
1643
	if (sna->render.vbo && !sna->render.vertex_used) {
1644
		DBG(("%s: discarding vbo\n", __FUNCTION__));
1645
		kgem_bo_destroy(kgem, sna->render.vbo);
1646
		sna->render.vbo = NULL;
1647
		sna->render.vertices = sna->render.vertex_data;
1648
		sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
1649
		sna->render.vertex_used = 0;
1650
		sna->render.vertex_index = 0;
1651
	}
1652
}
1653
 
1654
static void gen7_render_reset(struct sna *sna)
1655
{
1656
	sna->render_state.gen7.emit_flush = false;
1657
	sna->render_state.gen7.needs_invariant = true;
1658
	sna->render_state.gen7.ve_id = 3 << 2;
1659
	sna->render_state.gen7.last_primitive = -1;
1660
 
1661
	sna->render_state.gen7.num_sf_outputs = 0;
1662
	sna->render_state.gen7.samplers = -1;
1663
	sna->render_state.gen7.blend = -1;
1664
	sna->render_state.gen7.kernel = -1;
1665
	sna->render_state.gen7.drawrect_offset = -1;
1666
	sna->render_state.gen7.drawrect_limit = -1;
1667
	sna->render_state.gen7.surface_table = -1;
1668
 
1669
	sna->render.vertex_offset = 0;
1670
	sna->render.nvertex_reloc = 0;
1671
	sna->render.vb_id = 0;
1672
}
1673
 
1674
static void gen7_render_fini(struct sna *sna)
1675
{
1676
	kgem_bo_destroy(&sna->kgem, sna->render_state.gen7.general_bo);
1677
}
1678
 
1679
static bool is_gt2(struct sna *sna)
1680
{
1681
	return DEVICE_ID(sna->PciInfo) & 0x20;
1682
}
1683
 
1684
static bool is_mobile(struct sna *sna)
1685
{
1686
	return (DEVICE_ID(sna->PciInfo) & 0xf) == 0x6;
1687
}
1688
 
1689
static bool gen7_render_setup(struct sna *sna)
1690
{
1691
    struct gen7_render_state *state = &sna->render_state.gen7;
1692
    struct sna_static_stream general;
1693
    struct gen7_sampler_state *ss;
1694
    int i, j, k, l, m;
1695
 
1696
    if (sna->kgem.gen == 070) {
1697
        state->info = &ivb_gt_info;
1698
        if (DEVICE_ID(sna->PciInfo) & 0xf) {
1699
            state->info = &ivb_gt1_info;
1700
            if (is_gt2(sna))
1701
                state->info = &ivb_gt2_info; /* XXX requires GT_MODE WiZ disabled */
1702
        }
1703
    } else if (sna->kgem.gen == 075) {
1704
        state->info = &hsw_gt_info;
1705
        if (DEVICE_ID(sna->PciInfo) & 0xf) {
1706
            state->info = &hsw_gt1_info;
1707
            if (is_gt2(sna))
1708
                state->info = &hsw_gt2_info;
1709
        }
1710
    } else
1711
        return false;
1712
 
1713
    sna_static_stream_init(&general);
1714
 
1715
    /* Zero pad the start. If you see an offset of 0x0 in the batchbuffer
1716
     * dumps, you know it points to zero.
1717
     */
1718
    null_create(&general);
1719
 
1720
    for (m = 0; m < GEN7_WM_KERNEL_COUNT; m++) {
1721
        if (wm_kernels[m].size) {
1722
            state->wm_kernel[m][1] =
1723
                sna_static_stream_add(&general,
1724
                              wm_kernels[m].data,
1725
                              wm_kernels[m].size,
1726
                              64);
1727
        } else {
1728
            if (USE_8_PIXEL_DISPATCH) {
1729
                state->wm_kernel[m][0] =
1730
                    sna_static_stream_compile_wm(sna, &general,
1731
                                     wm_kernels[m].data, 8);
1732
            }
1733
 
1734
            if (USE_16_PIXEL_DISPATCH) {
1735
                state->wm_kernel[m][1] =
1736
                    sna_static_stream_compile_wm(sna, &general,
1737
                                     wm_kernels[m].data, 16);
1738
            }
1739
 
1740
            if (USE_32_PIXEL_DISPATCH) {
1741
                state->wm_kernel[m][2] =
1742
                    sna_static_stream_compile_wm(sna, &general,
1743
                                     wm_kernels[m].data, 32);
1744
            }
1745
        }
1746
        assert(state->wm_kernel[m][0]|state->wm_kernel[m][1]|state->wm_kernel[m][2]);
1747
    }
1748
 
1749
    ss = sna_static_stream_map(&general,
1750
                   2 * sizeof(*ss) *
1751
                   (2 +
1752
                    FILTER_COUNT * EXTEND_COUNT *
1753
                    FILTER_COUNT * EXTEND_COUNT),
1754
                   32);
1755
    state->wm_state = sna_static_stream_offsetof(&general, ss);
1756
    sampler_copy_init(ss); ss += 2;
1757
    sampler_fill_init(ss); ss += 2;
1758
    for (i = 0; i < FILTER_COUNT; i++) {
1759
        for (j = 0; j < EXTEND_COUNT; j++) {
1760
            for (k = 0; k < FILTER_COUNT; k++) {
1761
                for (l = 0; l < EXTEND_COUNT; l++) {
1762
                    sampler_state_init(ss++, i, j);
1763
                    sampler_state_init(ss++, k, l);
1764
                }
1765
            }
1766
        }
1767
    }
1768
 
1769
    state->cc_blend = gen7_composite_create_blend_state(&general);
1770
 
1771
    state->general_bo = sna_static_stream_fini(sna, &general);
1772
    return state->general_bo != NULL;
1773
}
1774
 
1775
bool gen7_render_init(struct sna *sna)
1776
{
1777
    if (!gen7_render_setup(sna))
1778
        return false;
1779
 
1780
    sna->kgem.context_switch = gen7_render_context_switch;
1781
    sna->kgem.retire = gen7_render_retire;
1782
    sna->kgem.expire = gen7_render_expire;
1783
 
1784
    sna->render.blit_tex = gen7_blit_tex;
1785
 
1786
    sna->render.flush = gen7_render_flush;
1787
    sna->render.reset = gen7_render_reset;
1788
    sna->render.fini = gen7_render_fini;
1789
 
1790
    sna->render.max_3d_size = GEN7_MAX_SIZE;
1791
    sna->render.max_3d_pitch = 1 << 18;
3291 Serge 1792
    sna->render.caps = HW_BIT_BLIT | HW_TEX_BLIT;
1793
 
3280 Serge 1794
    return true;
1795
}
1796