Subversion Repositories Kolibri OS

Rev

Rev 3258 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
3254 Serge 1
/*
2
 * Copyright © 2006,2008,2011 Intel Corporation
3
 * Copyright © 2007 Red Hat, Inc.
4
 *
5
 * Permission is hereby granted, free of charge, to any person obtaining a
6
 * copy of this software and associated documentation files (the "Software"),
7
 * to deal in the Software without restriction, including without limitation
8
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
 * and/or sell copies of the Software, and to permit persons to whom the
10
 * Software is furnished to do so, subject to the following conditions:
11
 *
12
 * The above copyright notice and this permission notice (including the next
13
 * paragraph) shall be included in all copies or substantial portions of the
14
 * Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
 * SOFTWARE.
23
 *
24
 * Authors:
25
 *    Wang Zhenyu 
26
 *    Eric Anholt 
27
 *    Carl Worth 
28
 *    Keith Packard 
29
 *    Chris Wilson 
30
 *
31
 */
32
 
33
#ifdef HAVE_CONFIG_H
34
#include "config.h"
35
#endif
36
 
37
#include "sna.h"
38
#include "sna_reg.h"
39
#include "sna_render.h"
40
//#include "sna_render_inline.h"
41
//#include "sna_video.h"
42
 
43
#include "brw/brw.h"
44
#include "gen6_render.h"
45
 
46
#include "gen4_vertex.h"
47
 
48
#define NO_COMPOSITE 0
49
#define NO_COMPOSITE_SPANS 0
50
#define NO_COPY 0
51
#define NO_COPY_BOXES 0
52
#define NO_FILL 0
53
#define NO_FILL_BOXES 0
54
#define NO_FILL_ONE 0
55
#define NO_FILL_CLEAR 0
56
 
57
#define NO_RING_SWITCH 1
58
#define PREFER_RENDER 0
59
 
60
#define USE_8_PIXEL_DISPATCH 1
61
#define USE_16_PIXEL_DISPATCH 1
62
#define USE_32_PIXEL_DISPATCH 0
63
 
64
#if !USE_8_PIXEL_DISPATCH && !USE_16_PIXEL_DISPATCH && !USE_32_PIXEL_DISPATCH
65
#error "Must select at least 8, 16 or 32 pixel dispatch"
66
#endif
67
 
68
#define GEN6_MAX_SIZE 8192
69
 
70
struct gt_info {
71
	int max_vs_threads;
72
	int max_gs_threads;
73
	int max_wm_threads;
74
	struct {
75
		int size;
76
		int max_vs_entries;
77
		int max_gs_entries;
78
	} urb;
79
};
80
 
81
static const struct gt_info gt1_info = {
82
	.max_vs_threads = 24,
83
	.max_gs_threads = 21,
84
	.max_wm_threads = 40,
85
	.urb = { 32, 256, 256 },
86
};
87
 
88
static const struct gt_info gt2_info = {
89
	.max_vs_threads = 60,
90
	.max_gs_threads = 60,
91
	.max_wm_threads = 80,
92
	.urb = { 64, 256, 256 },
93
};
94
 
95
static const uint32_t ps_kernel_packed[][4] = {
96
#include "exa_wm_src_affine.g6b"
97
#include "exa_wm_src_sample_argb.g6b"
98
#include "exa_wm_yuv_rgb.g6b"
99
#include "exa_wm_write.g6b"
100
};
101
 
102
static const uint32_t ps_kernel_planar[][4] = {
103
#include "exa_wm_src_affine.g6b"
104
#include "exa_wm_src_sample_planar.g6b"
105
#include "exa_wm_yuv_rgb.g6b"
106
#include "exa_wm_write.g6b"
107
};
108
 
109
#define NOKERNEL(kernel_enum, func, ns) \
110
    [GEN6_WM_KERNEL_##kernel_enum] = {#kernel_enum, func, 0, ns}
111
#define KERNEL(kernel_enum, kernel, ns) \
112
    [GEN6_WM_KERNEL_##kernel_enum] = {#kernel_enum, kernel, sizeof(kernel), ns}
113
 
114
static const struct wm_kernel_info {
115
	const char *name;
116
	const void *data;
117
	unsigned int size;
118
	unsigned int num_surfaces;
119
} wm_kernels[] = {
120
	NOKERNEL(NOMASK, brw_wm_kernel__affine, 2),
121
	NOKERNEL(NOMASK_P, brw_wm_kernel__projective, 2),
122
 
123
	NOKERNEL(MASK, brw_wm_kernel__affine_mask, 3),
124
	NOKERNEL(MASK_P, brw_wm_kernel__projective_mask, 3),
125
 
126
	NOKERNEL(MASKCA, brw_wm_kernel__affine_mask_ca, 3),
127
	NOKERNEL(MASKCA_P, brw_wm_kernel__projective_mask_ca, 3),
128
 
129
	NOKERNEL(MASKSA, brw_wm_kernel__affine_mask_sa, 3),
130
	NOKERNEL(MASKSA_P, brw_wm_kernel__projective_mask_sa, 3),
131
 
132
	NOKERNEL(OPACITY, brw_wm_kernel__affine_opacity, 2),
133
	NOKERNEL(OPACITY_P, brw_wm_kernel__projective_opacity, 2),
134
 
135
	KERNEL(VIDEO_PLANAR, ps_kernel_planar, 7),
136
	KERNEL(VIDEO_PACKED, ps_kernel_packed, 2),
137
};
138
#undef KERNEL
139
 
140
static const struct blendinfo {
141
	bool src_alpha;
142
	uint32_t src_blend;
143
	uint32_t dst_blend;
144
} gen6_blend_op[] = {
145
	/* Clear */	{0, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ZERO},
146
	/* Src */	{0, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ZERO},
147
	/* Dst */	{0, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ONE},
148
	/* Over */	{1, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_INV_SRC_ALPHA},
149
	/* OverReverse */ {0, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_ONE},
150
	/* In */	{0, GEN6_BLENDFACTOR_DST_ALPHA, GEN6_BLENDFACTOR_ZERO},
151
	/* InReverse */	{1, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_SRC_ALPHA},
152
	/* Out */	{0, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_ZERO},
153
	/* OutReverse */ {1, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_INV_SRC_ALPHA},
154
	/* Atop */	{1, GEN6_BLENDFACTOR_DST_ALPHA, GEN6_BLENDFACTOR_INV_SRC_ALPHA},
155
	/* AtopReverse */ {1, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_SRC_ALPHA},
156
	/* Xor */	{1, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_INV_SRC_ALPHA},
157
	/* Add */	{0, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ONE},
158
};
159
 
160
/**
161
 * Highest-valued BLENDFACTOR used in gen6_blend_op.
162
 *
163
 * This leaves out GEN6_BLENDFACTOR_INV_DST_COLOR,
164
 * GEN6_BLENDFACTOR_INV_CONST_{COLOR,ALPHA},
165
 * GEN6_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA}
166
 */
167
#define GEN6_BLENDFACTOR_COUNT (GEN6_BLENDFACTOR_INV_DST_ALPHA + 1)
168
 
169
#define GEN6_BLEND_STATE_PADDED_SIZE	ALIGN(sizeof(struct gen6_blend_state), 64)
170
 
171
#define BLEND_OFFSET(s, d) \
172
	(((s) * GEN6_BLENDFACTOR_COUNT + (d)) * GEN6_BLEND_STATE_PADDED_SIZE)
173
 
174
#define NO_BLEND BLEND_OFFSET(GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ZERO)
175
#define CLEAR BLEND_OFFSET(GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ZERO)
176
 
177
#define SAMPLER_OFFSET(sf, se, mf, me) \
178
	(((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me) + 2) * 2 * sizeof(struct gen6_sampler_state))
179
 
180
#define VERTEX_2s2s 0
181
 
182
#define COPY_SAMPLER 0
183
#define COPY_VERTEX VERTEX_2s2s
184
#define COPY_FLAGS(a) GEN6_SET_FLAGS(COPY_SAMPLER, (a) == GXcopy ? NO_BLEND : CLEAR, GEN6_WM_KERNEL_NOMASK, COPY_VERTEX)
185
 
186
#define FILL_SAMPLER (2 * sizeof(struct gen6_sampler_state))
187
#define FILL_VERTEX VERTEX_2s2s
188
#define FILL_FLAGS(op, format) GEN6_SET_FLAGS(FILL_SAMPLER, gen6_get_blend((op), false, (format)), GEN6_WM_KERNEL_NOMASK, FILL_VERTEX)
189
#define FILL_FLAGS_NOBLEND GEN6_SET_FLAGS(FILL_SAMPLER, NO_BLEND, GEN6_WM_KERNEL_NOMASK, FILL_VERTEX)
190
 
191
#define GEN6_SAMPLER(f) (((f) >> 16) & 0xfff0)
192
#define GEN6_BLEND(f) (((f) >> 0) & 0xfff0)
193
#define GEN6_KERNEL(f) (((f) >> 16) & 0xf)
194
#define GEN6_VERTEX(f) (((f) >> 0) & 0xf)
195
#define GEN6_SET_FLAGS(S, B, K, V)  (((S) | (K)) << 16 | ((B) | (V)))
196
 
197
#define OUT_BATCH(v) batch_emit(sna, v)
198
#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y)
199
#define OUT_VERTEX_F(v) vertex_emit(sna, v)
200
 
201
static inline bool too_large(int width, int height)
202
{
203
	return width > GEN6_MAX_SIZE || height > GEN6_MAX_SIZE;
204
}
205
 
206
static uint32_t gen6_get_blend(int op,
207
			       bool has_component_alpha,
208
			       uint32_t dst_format)
209
{
210
	uint32_t src, dst;
211
 
212
//    src = GEN6_BLENDFACTOR_ONE; //gen6_blend_op[op].src_blend;
213
//    dst = GEN6_BLENDFACTOR_ZERO; //gen6_blend_op[op].dst_blend;
214
 
215
    src = GEN6_BLENDFACTOR_ONE; //gen6_blend_op[op].src_blend;
216
    dst = GEN6_BLENDFACTOR_INV_SRC_ALPHA; //gen6_blend_op[op].dst_blend;
217
 
218
#if 0
219
	/* If there's no dst alpha channel, adjust the blend op so that
220
	 * we'll treat it always as 1.
221
	 */
222
	if (PICT_FORMAT_A(dst_format) == 0) {
223
		if (src == GEN6_BLENDFACTOR_DST_ALPHA)
224
			src = GEN6_BLENDFACTOR_ONE;
225
		else if (src == GEN6_BLENDFACTOR_INV_DST_ALPHA)
226
			src = GEN6_BLENDFACTOR_ZERO;
227
	}
228
 
229
	/* If the source alpha is being used, then we should only be in a
230
	 * case where the source blend factor is 0, and the source blend
231
	 * value is the mask channels multiplied by the source picture's alpha.
232
	 */
233
	if (has_component_alpha && gen6_blend_op[op].src_alpha) {
234
		if (dst == GEN6_BLENDFACTOR_SRC_ALPHA)
235
			dst = GEN6_BLENDFACTOR_SRC_COLOR;
236
		else if (dst == GEN6_BLENDFACTOR_INV_SRC_ALPHA)
237
			dst = GEN6_BLENDFACTOR_INV_SRC_COLOR;
238
	}
239
 
240
	DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n",
241
	     op, dst_format, PICT_FORMAT_A(dst_format),
242
	     src, dst, (int)BLEND_OFFSET(src, dst)));
243
#endif
244
 
245
	return BLEND_OFFSET(src, dst);
246
}
247
 
248
static uint32_t gen6_get_card_format(PictFormat format)
249
{
250
    return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM;
251
 
252
/*
253
	switch (format) {
254
	default:
255
		return -1;
256
	case PICT_a8r8g8b8:
257
		return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM;
258
	case PICT_x8r8g8b8:
259
		return GEN6_SURFACEFORMAT_B8G8R8X8_UNORM;
260
	case PICT_a8b8g8r8:
261
		return GEN6_SURFACEFORMAT_R8G8B8A8_UNORM;
262
	case PICT_x8b8g8r8:
263
		return GEN6_SURFACEFORMAT_R8G8B8X8_UNORM;
264
	case PICT_a2r10g10b10:
265
		return GEN6_SURFACEFORMAT_B10G10R10A2_UNORM;
266
	case PICT_x2r10g10b10:
267
		return GEN6_SURFACEFORMAT_B10G10R10X2_UNORM;
268
	case PICT_r8g8b8:
269
		return GEN6_SURFACEFORMAT_R8G8B8_UNORM;
270
	case PICT_r5g6b5:
271
		return GEN6_SURFACEFORMAT_B5G6R5_UNORM;
272
	case PICT_a1r5g5b5:
273
		return GEN6_SURFACEFORMAT_B5G5R5A1_UNORM;
274
	case PICT_a8:
275
		return GEN6_SURFACEFORMAT_A8_UNORM;
276
	case PICT_a4r4g4b4:
277
		return GEN6_SURFACEFORMAT_B4G4R4A4_UNORM;
278
	}
279
 */
280
}
281
 
282
static uint32_t gen6_get_dest_format(PictFormat format)
283
{
284
    return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM;
285
 
286
#if 0
287
 
288
	switch (format) {
289
	default:
290
		return -1;
291
	case PICT_a8r8g8b8:
292
	case PICT_x8r8g8b8:
293
		return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM;
294
	case PICT_a8b8g8r8:
295
	case PICT_x8b8g8r8:
296
		return GEN6_SURFACEFORMAT_R8G8B8A8_UNORM;
297
	case PICT_a2r10g10b10:
298
	case PICT_x2r10g10b10:
299
		return GEN6_SURFACEFORMAT_B10G10R10A2_UNORM;
300
	case PICT_r5g6b5:
301
		return GEN6_SURFACEFORMAT_B5G6R5_UNORM;
302
	case PICT_x1r5g5b5:
303
	case PICT_a1r5g5b5:
304
		return GEN6_SURFACEFORMAT_B5G5R5A1_UNORM;
305
	case PICT_a8:
306
		return GEN6_SURFACEFORMAT_A8_UNORM;
307
	case PICT_a4r4g4b4:
308
	case PICT_x4r4g4b4:
309
		return GEN6_SURFACEFORMAT_B4G4R4A4_UNORM;
310
	}
311
#endif
312
 
313
}
314
 
315
#if 0
316
 
317
static bool gen6_check_dst_format(PictFormat format)
318
{
319
	if (gen6_get_dest_format(format) != -1)
320
		return true;
321
 
322
	DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format));
323
	return false;
324
}
325
 
326
static bool gen6_check_format(uint32_t format)
327
{
328
	if (gen6_get_card_format(format) != -1)
329
		return true;
330
 
331
	DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format));
332
		return false;
333
}
334
 
335
static uint32_t gen6_filter(uint32_t filter)
336
{
337
	switch (filter) {
338
	default:
339
		assert(0);
340
	case PictFilterNearest:
341
		return SAMPLER_FILTER_NEAREST;
342
	case PictFilterBilinear:
343
		return SAMPLER_FILTER_BILINEAR;
344
	}
345
}
346
 
347
static uint32_t gen6_check_filter(PicturePtr picture)
348
{
349
	switch (picture->filter) {
350
	case PictFilterNearest:
351
	case PictFilterBilinear:
352
		return true;
353
	default:
354
		return false;
355
	}
356
}
357
 
358
static uint32_t gen6_repeat(uint32_t repeat)
359
{
360
	switch (repeat) {
361
	default:
362
		assert(0);
363
	case RepeatNone:
364
		return SAMPLER_EXTEND_NONE;
365
	case RepeatNormal:
366
		return SAMPLER_EXTEND_REPEAT;
367
	case RepeatPad:
368
		return SAMPLER_EXTEND_PAD;
369
	case RepeatReflect:
370
		return SAMPLER_EXTEND_REFLECT;
371
	}
372
}
373
 
374
static bool gen6_check_repeat(PicturePtr picture)
375
{
376
	if (!picture->repeat)
377
		return true;
378
 
379
	switch (picture->repeatType) {
380
	case RepeatNone:
381
	case RepeatNormal:
382
	case RepeatPad:
383
	case RepeatReflect:
384
		return true;
385
	default:
386
		return false;
387
	}
388
}
389
#endif
390
 
391
static int
392
gen6_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine)
393
{
394
	int base;
395
 
396
	if (has_mask) {
397
/*
398
		if (is_ca) {
399
			if (gen6_blend_op[op].src_alpha)
3258 Serge 400
				base = GEN6_WM_KERNEL_MASKSA;
3254 Serge 401
			else
402
				base = GEN6_WM_KERNEL_MASKCA;
403
		} else
404
			base = GEN6_WM_KERNEL_MASK;
405
*/
406
	} else
407
		base = GEN6_WM_KERNEL_NOMASK;
408
 
409
	return base + !is_affine;
410
}
411
 
412
static void
413
gen6_emit_urb(struct sna *sna)
414
{
415
	OUT_BATCH(GEN6_3DSTATE_URB | (3 - 2));
416
	OUT_BATCH(((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
417
		  (sna->render_state.gen6.info->urb.max_vs_entries << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
418
	OUT_BATCH((0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
419
		  (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
420
}
421
 
422
static void
423
gen6_emit_state_base_address(struct sna *sna)
424
{
425
	OUT_BATCH(GEN6_STATE_BASE_ADDRESS | (10 - 2));
426
	OUT_BATCH(0); /* general */
427
	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */
428
				 sna->kgem.nbatch,
429
				 NULL,
430
				 I915_GEM_DOMAIN_INSTRUCTION << 16,
431
				 BASE_ADDRESS_MODIFY));
432
	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* instruction */
433
				 sna->kgem.nbatch,
434
				 sna->render_state.gen6.general_bo,
435
				 I915_GEM_DOMAIN_INSTRUCTION << 16,
436
				 BASE_ADDRESS_MODIFY));
437
	OUT_BATCH(0); /* indirect */
438
	OUT_BATCH(kgem_add_reloc(&sna->kgem,
439
				 sna->kgem.nbatch,
440
				 sna->render_state.gen6.general_bo,
441
				 I915_GEM_DOMAIN_INSTRUCTION << 16,
442
				 BASE_ADDRESS_MODIFY));
443
 
444
	/* upper bounds, disable */
445
	OUT_BATCH(0);
446
	OUT_BATCH(BASE_ADDRESS_MODIFY);
447
	OUT_BATCH(0);
448
	OUT_BATCH(BASE_ADDRESS_MODIFY);
449
}
450
 
451
static void
452
gen6_emit_viewports(struct sna *sna)
453
{
454
	OUT_BATCH(GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
455
		  GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
456
		  (4 - 2));
457
	OUT_BATCH(0);
458
	OUT_BATCH(0);
459
	OUT_BATCH(0);
460
}
461
 
462
static void
463
gen6_emit_vs(struct sna *sna)
464
{
465
	/* disable VS constant buffer */
466
	OUT_BATCH(GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
467
	OUT_BATCH(0);
468
	OUT_BATCH(0);
469
	OUT_BATCH(0);
470
	OUT_BATCH(0);
471
 
472
	OUT_BATCH(GEN6_3DSTATE_VS | (6 - 2));
473
	OUT_BATCH(0); /* no VS kernel */
474
	OUT_BATCH(0);
475
	OUT_BATCH(0);
476
	OUT_BATCH(0);
477
	OUT_BATCH(0); /* pass-through */
478
}
479
 
480
static void
481
gen6_emit_gs(struct sna *sna)
482
{
483
	/* disable GS constant buffer */
484
	OUT_BATCH(GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
485
	OUT_BATCH(0);
486
	OUT_BATCH(0);
487
	OUT_BATCH(0);
488
	OUT_BATCH(0);
489
 
490
	OUT_BATCH(GEN6_3DSTATE_GS | (7 - 2));
491
	OUT_BATCH(0); /* no GS kernel */
492
	OUT_BATCH(0);
493
	OUT_BATCH(0);
494
	OUT_BATCH(0);
495
	OUT_BATCH(0);
496
	OUT_BATCH(0); /* pass-through */
497
}
498
 
499
static void
500
gen6_emit_clip(struct sna *sna)
501
{
502
	OUT_BATCH(GEN6_3DSTATE_CLIP | (4 - 2));
503
	OUT_BATCH(0);
504
	OUT_BATCH(0); /* pass-through */
505
	OUT_BATCH(0);
506
}
507
 
508
static void
509
gen6_emit_wm_constants(struct sna *sna)
510
{
511
	/* disable WM constant buffer */
512
	OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | (5 - 2));
513
	OUT_BATCH(0);
514
	OUT_BATCH(0);
515
	OUT_BATCH(0);
516
	OUT_BATCH(0);
517
}
518
 
519
static void
520
gen6_emit_null_depth_buffer(struct sna *sna)
521
{
522
	OUT_BATCH(GEN6_3DSTATE_DEPTH_BUFFER | (7 - 2));
523
	OUT_BATCH(GEN6_SURFACE_NULL << GEN6_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT |
524
		  GEN6_DEPTHFORMAT_D32_FLOAT << GEN6_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT);
525
	OUT_BATCH(0);
526
	OUT_BATCH(0);
527
	OUT_BATCH(0);
528
	OUT_BATCH(0);
529
	OUT_BATCH(0);
530
 
531
	OUT_BATCH(GEN6_3DSTATE_CLEAR_PARAMS | (2 - 2));
532
	OUT_BATCH(0);
533
}
534
 
535
static void
536
gen6_emit_invariant(struct sna *sna)
537
{
538
	OUT_BATCH(GEN6_PIPELINE_SELECT | PIPELINE_SELECT_3D);
539
 
540
	OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
541
	OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
542
              GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
543
	OUT_BATCH(0);
544
 
545
	OUT_BATCH(GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
546
	OUT_BATCH(1);
547
 
548
	gen6_emit_urb(sna);
549
 
550
	gen6_emit_state_base_address(sna);
551
 
552
	gen6_emit_viewports(sna);
553
	gen6_emit_vs(sna);
554
	gen6_emit_gs(sna);
555
	gen6_emit_clip(sna);
556
	gen6_emit_wm_constants(sna);
557
	gen6_emit_null_depth_buffer(sna);
558
 
559
	sna->render_state.gen6.needs_invariant = false;
560
}
561
 
562
static bool
563
gen6_emit_cc(struct sna *sna, int blend)
564
{
565
	struct gen6_render_state *render = &sna->render_state.gen6;
566
 
567
	if (render->blend == blend)
568
		return blend != NO_BLEND;
569
 
570
	DBG(("%s: blend = %x\n", __FUNCTION__, blend));
571
 
572
	OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
573
	OUT_BATCH((render->cc_blend + blend) | 1);
574
	if (render->blend == (unsigned)-1) {
575
		OUT_BATCH(1);
576
		OUT_BATCH(1);
577
	} else {
578
		OUT_BATCH(0);
579
		OUT_BATCH(0);
580
	}
581
 
582
	render->blend = blend;
583
	return blend != NO_BLEND;
584
}
585
 
586
static void
587
gen6_emit_sampler(struct sna *sna, uint32_t state)
588
{
589
	if (sna->render_state.gen6.samplers == state)
590
		return;
591
 
592
	sna->render_state.gen6.samplers = state;
593
 
594
	DBG(("%s: sampler = %x\n", __FUNCTION__, state));
595
 
596
	OUT_BATCH(GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
597
		  GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
598
		  (4 - 2));
599
	OUT_BATCH(0); /* VS */
600
	OUT_BATCH(0); /* GS */
601
	OUT_BATCH(sna->render_state.gen6.wm_state + state);
602
}
603
 
604
static void
605
gen6_emit_sf(struct sna *sna, bool has_mask)
606
{
607
	int num_sf_outputs = has_mask ? 2 : 1;
608
 
609
	if (sna->render_state.gen6.num_sf_outputs == num_sf_outputs)
610
		return;
611
 
612
	DBG(("%s: num_sf_outputs=%d, read_length=%d, read_offset=%d\n",
613
	     __FUNCTION__, num_sf_outputs, 1, 0));
614
 
615
	sna->render_state.gen6.num_sf_outputs = num_sf_outputs;
616
 
617
	OUT_BATCH(GEN6_3DSTATE_SF | (20 - 2));
618
	OUT_BATCH(num_sf_outputs << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT |
619
		  1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT |
620
		  1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT);
621
	OUT_BATCH(0);
622
	OUT_BATCH(GEN6_3DSTATE_SF_CULL_NONE);
623
	OUT_BATCH(2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
624
	OUT_BATCH(0);
625
	OUT_BATCH(0);
626
	OUT_BATCH(0);
627
	OUT_BATCH(0);
628
	OUT_BATCH(0); /* DW9 */
629
	OUT_BATCH(0);
630
	OUT_BATCH(0);
631
	OUT_BATCH(0);
632
	OUT_BATCH(0);
633
	OUT_BATCH(0); /* DW14 */
634
	OUT_BATCH(0);
635
	OUT_BATCH(0);
636
	OUT_BATCH(0);
637
	OUT_BATCH(0);
638
	OUT_BATCH(0); /* DW19 */
639
}
640
 
641
static void
642
gen6_emit_wm(struct sna *sna, unsigned int kernel, bool has_mask)
643
{
644
	const uint32_t *kernels;
645
 
646
	if (sna->render_state.gen6.kernel == kernel)
647
		return;
648
 
649
	sna->render_state.gen6.kernel = kernel;
650
	kernels = sna->render_state.gen6.wm_kernel[kernel];
651
 
652
	DBG(("%s: switching to %s, num_surfaces=%d (8-pixel? %d, 16-pixel? %d,32-pixel? %d)\n",
653
	     __FUNCTION__,
654
	     wm_kernels[kernel].name, wm_kernels[kernel].num_surfaces,
655
	    kernels[0], kernels[1], kernels[2]));
656
 
657
	OUT_BATCH(GEN6_3DSTATE_WM | (9 - 2));
658
	OUT_BATCH(kernels[0] ?: kernels[1] ?: kernels[2]);
659
	OUT_BATCH(1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHIFT |
660
		  wm_kernels[kernel].num_surfaces << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT);
661
	OUT_BATCH(0); /* scratch space */
662
	OUT_BATCH((kernels[0] ? 4 : kernels[1] ? 6 : 8) << GEN6_3DSTATE_WM_DISPATCH_0_START_GRF_SHIFT |
663
		  8 << GEN6_3DSTATE_WM_DISPATCH_1_START_GRF_SHIFT |
664
		  6 << GEN6_3DSTATE_WM_DISPATCH_2_START_GRF_SHIFT);
665
	OUT_BATCH((sna->render_state.gen6.info->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT |
666
		  (kernels[0] ? GEN6_3DSTATE_WM_8_DISPATCH_ENABLE : 0) |
667
		  (kernels[1] ? GEN6_3DSTATE_WM_16_DISPATCH_ENABLE : 0) |
668
		  (kernels[2] ? GEN6_3DSTATE_WM_32_DISPATCH_ENABLE : 0) |
669
		  GEN6_3DSTATE_WM_DISPATCH_ENABLE);
670
	OUT_BATCH((1 + has_mask) << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT |
671
		  GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
672
	OUT_BATCH(kernels[2]);
673
	OUT_BATCH(kernels[1]);
674
}
675
 
676
static bool
677
gen6_emit_binding_table(struct sna *sna, uint16_t offset)
678
{
679
	if (sna->render_state.gen6.surface_table == offset)
680
		return false;
681
 
682
	/* Binding table pointers */
683
	OUT_BATCH(GEN6_3DSTATE_BINDING_TABLE_POINTERS |
684
		  GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS |
685
		  (4 - 2));
686
	OUT_BATCH(0);		/* vs */
687
	OUT_BATCH(0);		/* gs */
688
	/* Only the PS uses the binding table */
689
	OUT_BATCH(offset*4);
690
 
691
	sna->render_state.gen6.surface_table = offset;
692
	return true;
693
}
694
 
695
static bool
696
gen6_emit_drawing_rectangle(struct sna *sna,
697
			    const struct sna_composite_op *op)
698
{
699
	uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1);
700
	uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x;
701
 
702
	assert(!too_large(op->dst.x, op->dst.y));
703
	assert(!too_large(op->dst.width, op->dst.height));
704
 
705
	if (sna->render_state.gen6.drawrect_limit  == limit &&
706
	    sna->render_state.gen6.drawrect_offset == offset)
707
		return false;
708
 
709
	/* [DevSNB-C+{W/A}] Before any depth stall flush (including those
710
	 * produced by non-pipelined state commands), software needs to first
711
	 * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
712
	 * 0.
713
	 *
714
	 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
715
	 * BEFORE the pipe-control with a post-sync op and no write-cache
716
	 * flushes.
717
	 */
718
	if (!sna->render_state.gen6.first_state_packet) {
719
	OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2));
720
	OUT_BATCH(GEN6_PIPE_CONTROL_CS_STALL |
721
		  GEN6_PIPE_CONTROL_STALL_AT_SCOREBOARD);
722
	OUT_BATCH(0);
723
	OUT_BATCH(0);
724
	}
725
 
726
	OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2));
727
	OUT_BATCH(GEN6_PIPE_CONTROL_WRITE_TIME);
728
	OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
729
				 sna->render_state.gen6.general_bo,
730
				 I915_GEM_DOMAIN_INSTRUCTION << 16 |
731
				 I915_GEM_DOMAIN_INSTRUCTION,
732
				 64));
733
	OUT_BATCH(0);
734
 
735
	OUT_BATCH(GEN6_3DSTATE_DRAWING_RECTANGLE | (4 - 2));
736
	OUT_BATCH(0);
737
	OUT_BATCH(limit);
738
	OUT_BATCH(offset);
739
 
740
	sna->render_state.gen6.drawrect_offset = offset;
741
	sna->render_state.gen6.drawrect_limit = limit;
742
	return true;
743
}
744
 
745
static void
746
gen6_emit_vertex_elements(struct sna *sna,
747
			  const struct sna_composite_op *op)
748
{
749
	/*
750
	 * vertex data in vertex buffer
751
	 *    position: (x, y)
752
	 *    texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0)
753
	 *    texture coordinate 1 if (has_mask is true): same as above
754
	 */
755
	struct gen6_render_state *render = &sna->render_state.gen6;
756
	uint32_t src_format, dw;
757
	int id = GEN6_VERTEX(op->u.gen6.flags);
758
	bool has_mask;
759
 
760
	DBG(("%s: setup id=%d\n", __FUNCTION__, id));
761
 
762
	if (render->ve_id == id)
763
		return;
764
	render->ve_id = id;
765
 
766
	/* The VUE layout
767
	 *    dword 0-3: pad (0.0, 0.0, 0.0. 0.0)
768
	 *    dword 4-7: position (x, y, 1.0, 1.0),
769
	 *    dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0)
770
	 *    dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0)
771
	 *
772
	 * dword 4-15 are fetched from vertex buffer
773
	 */
774
	has_mask = (id >> 2) != 0;
775
	OUT_BATCH(GEN6_3DSTATE_VERTEX_ELEMENTS |
776
		((2 * (3 + has_mask)) + 1 - 2));
777
 
778
	OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
779
		  GEN6_SURFACEFORMAT_R32G32B32A32_FLOAT << VE0_FORMAT_SHIFT |
780
 
781
	OUT_BATCH(GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT |
782
		  GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT |
783
		  GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
784
		  GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT);
785
 
786
	/* x,y */
787
	OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
788
		  GEN6_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
789
 
790
	OUT_BATCH(GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
791
		  GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
792
		  GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
793
		  GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT);
794
 
795
	/* u0, v0, w0 */
796
	DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3));
797
	dw = GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
798
	switch (id & 3) {
799
	default:
800
		assert(0);
801
	case 0:
802
		src_format = GEN6_SURFACEFORMAT_R16G16_SSCALED;
803
		dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
804
		dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
805
		dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT;
806
		break;
807
	case 1:
808
		src_format = GEN6_SURFACEFORMAT_R32_FLOAT;
809
		dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
810
		dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
811
		dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT;
812
		break;
813
	case 2:
814
		src_format = GEN6_SURFACEFORMAT_R32G32_FLOAT;
815
		dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
816
		dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
817
		dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT;
818
		break;
819
	case 3:
820
		src_format = GEN6_SURFACEFORMAT_R32G32B32_FLOAT;
821
		dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
822
		dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
823
		dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
824
		break;
825
	}
826
	OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
827
		  src_format << VE0_FORMAT_SHIFT |
828
		  4 << VE0_OFFSET_SHIFT);
829
	OUT_BATCH(dw);
830
 
831
	/* u1, v1, w1 */
832
	if (has_mask) {
833
		unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float);
834
		DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__, id >> 2, offset));
835
		dw = GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
836
		switch (id >> 2) {
837
		case 1:
838
			src_format = GEN6_SURFACEFORMAT_R32_FLOAT;
839
			dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
840
			dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
841
			dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT;
842
			break;
843
		default:
844
			assert(0);
845
		case 2:
846
			src_format = GEN6_SURFACEFORMAT_R32G32_FLOAT;
847
			dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
848
			dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
849
			dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT;
850
			break;
851
		case 3:
852
			src_format = GEN6_SURFACEFORMAT_R32G32B32_FLOAT;
853
			dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
854
			dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
855
			dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
856
			break;
857
		}
858
		OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
859
			  src_format << VE0_FORMAT_SHIFT |
860
			  offset << VE0_OFFSET_SHIFT);
861
		OUT_BATCH(dw);
862
	}
863
}
864
 
865
static void
866
gen6_emit_flush(struct sna *sna)
867
{
868
	OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2));
869
	OUT_BATCH(GEN6_PIPE_CONTROL_WC_FLUSH |
870
		  GEN6_PIPE_CONTROL_TC_FLUSH |
871
		  GEN6_PIPE_CONTROL_CS_STALL);
872
	OUT_BATCH(0);
873
	OUT_BATCH(0);
874
}
875
 
876
static void
877
gen6_emit_state(struct sna *sna,
878
		const struct sna_composite_op *op,
879
		uint16_t wm_binding_table)
880
{
881
	bool need_stall = wm_binding_table & 1;
882
 
883
	if (gen6_emit_cc(sna, GEN6_BLEND(op->u.gen6.flags)))
884
		need_stall = false;
885
	gen6_emit_sampler(sna, GEN6_SAMPLER(op->u.gen6.flags));
886
	gen6_emit_sf(sna, GEN6_VERTEX(op->u.gen6.flags) >> 2);
887
	gen6_emit_wm(sna, GEN6_KERNEL(op->u.gen6.flags), GEN6_VERTEX(op->u.gen6.flags) >> 2);
888
	gen6_emit_vertex_elements(sna, op);
889
 
890
	need_stall |= gen6_emit_binding_table(sna, wm_binding_table & ~1);
891
	if (gen6_emit_drawing_rectangle(sna, op))
892
		need_stall = false;
893
	if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
894
        gen6_emit_flush(sna);
895
        kgem_clear_dirty(&sna->kgem);
896
		if (op->dst.bo->exec)
897
		kgem_bo_mark_dirty(op->dst.bo);
898
		need_stall = false;
899
	}
900
	if (need_stall) {
901
		OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2));
902
		OUT_BATCH(GEN6_PIPE_CONTROL_CS_STALL |
903
			  GEN6_PIPE_CONTROL_STALL_AT_SCOREBOARD);
904
		OUT_BATCH(0);
905
		OUT_BATCH(0);
906
	}
907
	sna->render_state.gen6.first_state_packet = false;
908
}
909
 
910
static bool gen6_magic_ca_pass(struct sna *sna,
911
			       const struct sna_composite_op *op)
912
{
913
	struct gen6_render_state *state = &sna->render_state.gen6;
914
 
915
	if (!op->need_magic_ca_pass)
916
		return false;
917
 
918
	DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__,
919
	     sna->render.vertex_start, sna->render.vertex_index));
920
 
921
	gen6_emit_flush(sna);
922
 
923
	gen6_emit_cc(sna, gen6_get_blend(PictOpAdd, true, op->dst.format));
924
	gen6_emit_wm(sna,
925
		     gen6_choose_composite_kernel(PictOpAdd,
926
						  true, true,
927
						  op->is_affine),
928
		     true);
929
 
930
	OUT_BATCH(GEN6_3DPRIMITIVE |
931
		  GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL |
932
		  _3DPRIM_RECTLIST << GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT |
933
 
934
		  4);
935
	OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start);
936
	OUT_BATCH(sna->render.vertex_start);
937
	OUT_BATCH(1);	/* single instance */
938
	OUT_BATCH(0);	/* start instance location */
939
	OUT_BATCH(0);	/* index buffer offset, ignored */
940
 
941
	state->last_primitive = sna->kgem.nbatch;
942
	return true;
943
}
944
 
945
typedef struct gen6_surface_state_padded {
946
	struct gen6_surface_state state;
947
	char pad[32 - sizeof(struct gen6_surface_state)];
948
} gen6_surface_state_padded;
949
 
950
static void null_create(struct sna_static_stream *stream)
951
{
952
	/* A bunch of zeros useful for legacy border color and depth-stencil */
953
	sna_static_stream_map(stream, 64, 64);
954
}
955
 
956
static void scratch_create(struct sna_static_stream *stream)
957
{
958
	/* 64 bytes of scratch space for random writes, such as
959
	 * the pipe-control w/a.
960
	 */
961
	sna_static_stream_map(stream, 64, 64);
962
}
963
 
964
static void
965
sampler_state_init(struct gen6_sampler_state *sampler_state,
966
		   sampler_filter_t filter,
967
		   sampler_extend_t extend)
968
{
969
	sampler_state->ss0.lod_preclamp = 1;	/* GL mode */
970
 
971
	/* We use the legacy mode to get the semantics specified by
972
	 * the Render extension. */
973
	sampler_state->ss0.border_color_mode = GEN6_BORDER_COLOR_MODE_LEGACY;
974
 
975
	switch (filter) {
976
	default:
977
	case SAMPLER_FILTER_NEAREST:
978
		sampler_state->ss0.min_filter = GEN6_MAPFILTER_NEAREST;
979
		sampler_state->ss0.mag_filter = GEN6_MAPFILTER_NEAREST;
980
		break;
981
	case SAMPLER_FILTER_BILINEAR:
982
		sampler_state->ss0.min_filter = GEN6_MAPFILTER_LINEAR;
983
		sampler_state->ss0.mag_filter = GEN6_MAPFILTER_LINEAR;
984
		break;
985
	}
986
 
987
	switch (extend) {
988
	default:
989
	case SAMPLER_EXTEND_NONE:
990
		sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER;
991
		sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER;
992
		sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER;
993
		break;
994
	case SAMPLER_EXTEND_REPEAT:
995
		sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_WRAP;
996
		sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_WRAP;
997
		sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_WRAP;
998
		break;
999
	case SAMPLER_EXTEND_PAD:
1000
		sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP;
1001
		sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP;
1002
		sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP;
1003
		break;
1004
	case SAMPLER_EXTEND_REFLECT:
1005
		sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_MIRROR;
1006
		sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_MIRROR;
1007
		sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_MIRROR;
1008
		break;
1009
	}
1010
}
1011
 
1012
static void
1013
sampler_copy_init(struct gen6_sampler_state *ss)
1014
{
1015
	sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
1016
	ss->ss3.non_normalized_coord = 1;
1017
 
1018
	sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
1019
}
1020
 
1021
static void
1022
sampler_fill_init(struct gen6_sampler_state *ss)
1023
{
1024
	sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_REPEAT);
1025
	ss->ss3.non_normalized_coord = 1;
1026
 
1027
	sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
1028
}
1029
 
1030
static uint32_t
1031
gen6_tiling_bits(uint32_t tiling)
1032
{
1033
    return 0;
1034
/*
1035
	switch (tiling) {
1036
	default: assert(0);
1037
	case I915_TILING_NONE: return 0;
1038
	case I915_TILING_X: return GEN6_SURFACE_TILED;
1039
	case I915_TILING_Y: return GEN6_SURFACE_TILED | GEN6_SURFACE_TILED_Y;
1040
	}
1041
*/
1042
}
1043
 
1044
/**
1045
 * Sets up the common fields for a surface state buffer for the given
1046
 * picture in the given surface state buffer.
1047
 */
1048
static int
1049
gen6_bind_bo(struct sna *sna,
1050
         struct kgem_bo *bo,
1051
	     uint32_t width,
1052
	     uint32_t height,
1053
	     uint32_t format,
1054
	     bool is_dst)
1055
{
1056
	uint32_t *ss;
1057
	uint32_t domains;
1058
	uint16_t offset;
1059
	uint32_t is_scanout = is_dst && bo->scanout;
1060
 
1061
	/* After the first bind, we manage the cache domains within the batch */
1062
	offset = kgem_bo_get_binding(bo, format | is_scanout << 31);
1063
	if (offset) {
1064
		DBG(("[%x]  bo(handle=%d), format=%d, reuse %s binding\n",
1065
		     offset, bo->handle, format,
1066
		     is_dst ? "render" : "sampler"));
1067
		if (is_dst)
1068
			kgem_bo_mark_dirty(bo);
1069
		return offset * sizeof(uint32_t);
1070
	}
1071
 
1072
	offset = sna->kgem.surface -=
1073
		sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t);
1074
	ss = sna->kgem.batch + offset;
1075
	ss[0] = (GEN6_SURFACE_2D << GEN6_SURFACE_TYPE_SHIFT |
1076
		 GEN6_SURFACE_BLEND_ENABLED |
1077
		 format << GEN6_SURFACE_FORMAT_SHIFT);
1078
	if (is_dst)
1079
		domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER;
1080
	else
1081
		domains = I915_GEM_DOMAIN_SAMPLER << 16;
1082
	ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
1083
	ss[2] = ((width - 1)  << GEN6_SURFACE_WIDTH_SHIFT |
1084
		 (height - 1) << GEN6_SURFACE_HEIGHT_SHIFT);
1085
	assert(bo->pitch <= (1 << 18));
1086
	ss[3] = (gen6_tiling_bits(bo->tiling) |
1087
		 (bo->pitch - 1) << GEN6_SURFACE_PITCH_SHIFT);
1088
	ss[4] = 0;
1089
	ss[5] = is_scanout ? 0 : 3 << 16;
1090
 
1091
	kgem_bo_set_binding(bo, format | is_scanout << 31, offset);
1092
 
1093
	DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
1094
	     offset, bo->handle, ss[1],
1095
	     format, width, height, bo->pitch, bo->tiling,
1096
	     domains & 0xffff ? "render" : "sampler"));
1097
 
1098
	return offset * sizeof(uint32_t);
1099
}
1100
 
1101
static void gen6_emit_vertex_buffer(struct sna *sna,
1102
				    const struct sna_composite_op *op)
1103
{
1104
	int id = GEN6_VERTEX(op->u.gen6.flags);
1105
 
1106
	OUT_BATCH(GEN6_3DSTATE_VERTEX_BUFFERS | 3);
1107
	OUT_BATCH(id << VB0_BUFFER_INDEX_SHIFT | VB0_VERTEXDATA |
1108
		  4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT);
1109
	sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch;
1110
	OUT_BATCH(0);
1111
	OUT_BATCH(~0); /* max address: disabled */
1112
	OUT_BATCH(0);
1113
 
1114
	sna->render.vb_id |= 1 << id;
1115
}
1116
 
1117
static void gen6_emit_primitive(struct sna *sna)
1118
{
1119
	if (sna->kgem.nbatch == sna->render_state.gen6.last_primitive) {
1120
		DBG(("%s: continuing previous primitive, start=%d, index=%d\n",
1121
		     __FUNCTION__,
1122
		     sna->render.vertex_start,
1123
		     sna->render.vertex_index));
1124
		sna->render.vertex_offset = sna->kgem.nbatch - 5;
1125
		return;
1126
	}
1127
 
1128
	OUT_BATCH(GEN6_3DPRIMITIVE |
1129
		  GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL |
1130
		  _3DPRIM_RECTLIST << GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT |
1131
 
1132
		  4);
1133
	sna->render.vertex_offset = sna->kgem.nbatch;
1134
	OUT_BATCH(0);	/* vertex count, to be filled in later */
1135
	OUT_BATCH(sna->render.vertex_index);
1136
	OUT_BATCH(1);	/* single instance */
1137
	OUT_BATCH(0);	/* start instance location */
1138
	OUT_BATCH(0);	/* index buffer offset, ignored */
1139
	sna->render.vertex_start = sna->render.vertex_index;
1140
	DBG(("%s: started new primitive: index=%d\n",
1141
	     __FUNCTION__, sna->render.vertex_start));
1142
 
1143
	sna->render_state.gen6.last_primitive = sna->kgem.nbatch;
1144
}
1145
 
1146
static bool gen6_rectangle_begin(struct sna *sna,
1147
				 const struct sna_composite_op *op)
1148
{
1149
	int id = 1 << GEN6_VERTEX(op->u.gen6.flags);
1150
	int ndwords;
1151
 
3263 Serge 1152
	if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
1153
		return true;
1154
 
3254 Serge 1155
	ndwords = op->need_magic_ca_pass ? 60 : 6;
1156
	if ((sna->render.vb_id & id) == 0)
1157
		ndwords += 5;
1158
	if (!kgem_check_batch(&sna->kgem, ndwords))
1159
		return false;
1160
 
1161
	if ((sna->render.vb_id & id) == 0)
1162
		gen6_emit_vertex_buffer(sna, op);
1163
 
1164
	gen6_emit_primitive(sna);
1165
	return true;
1166
}
1167
 
1168
static int gen6_get_rectangles__flush(struct sna *sna,
1169
				      const struct sna_composite_op *op)
1170
{
3263 Serge 1171
	/* Preventing discarding new vbo after lock contention */
1172
	if (sna_vertex_wait__locked(&sna->render)) {
1173
		int rem = vertex_space(sna);
1174
		if (rem > op->floats_per_rect)
1175
			return rem;
1176
	}
3254 Serge 1177
 
1178
	if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 65 : 5))
1179
		return 0;
1180
	if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
1181
		return 0;
1182
 
1183
	if (sna->render.vertex_offset) {
1184
		gen4_vertex_flush(sna);
1185
		if (gen6_magic_ca_pass(sna, op)) {
1186
			gen6_emit_flush(sna);
1187
			gen6_emit_cc(sna, GEN6_BLEND(op->u.gen6.flags));
1188
			gen6_emit_wm(sna,
1189
				     GEN6_KERNEL(op->u.gen6.flags),
1190
				     GEN6_VERTEX(op->u.gen6.flags) >> 2);
1191
		}
1192
	}
1193
 
1194
	return gen4_vertex_finish(sna);
1195
}
1196
 
1197
inline static int gen6_get_rectangles(struct sna *sna,
1198
				      const struct sna_composite_op *op,
1199
				      int want,
1200
				      void (*emit_state)(struct sna *, const struct sna_composite_op *op))
1201
{
1202
	int rem;
1203
 
1204
start:
1205
	rem = vertex_space(sna);
1206
	if (unlikely(rem < op->floats_per_rect)) {
1207
		DBG(("flushing vbo for %s: %d < %d\n",
1208
		     __FUNCTION__, rem, op->floats_per_rect));
1209
		rem = gen6_get_rectangles__flush(sna, op);
1210
		if (unlikely(rem == 0))
1211
			goto flush;
1212
	}
1213
 
1214
	if (unlikely(sna->render.vertex_offset == 0 &&
1215
		     !gen6_rectangle_begin(sna, op)))
1216
		goto flush;
1217
 
1218
	if (want > 1 && want * op->floats_per_rect > rem)
1219
		want = rem / op->floats_per_rect;
1220
 
1221
	assert(want > 0);
1222
	sna->render.vertex_index += 3*want;
1223
	return want;
1224
 
1225
flush:
1226
	if (sna->render.vertex_offset) {
1227
		gen4_vertex_flush(sna);
1228
		gen6_magic_ca_pass(sna, op);
1229
	}
3263 Serge 1230
	sna_vertex_wait__locked(&sna->render);
3254 Serge 1231
	_kgem_submit(&sna->kgem);
1232
	emit_state(sna, op);
1233
	goto start;
1234
}
1235
 
1236
inline static uint32_t *gen6_composite_get_binding_table(struct sna *sna,
1237
							 uint16_t *offset)
1238
{
1239
	uint32_t *table;
1240
 
1241
	sna->kgem.surface -=
1242
		sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t);
1243
	/* Clear all surplus entries to zero in case of prefetch */
1244
	table = memset(sna->kgem.batch + sna->kgem.surface,
1245
		       0, sizeof(struct gen6_surface_state_padded));
1246
 
1247
	DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface));
1248
 
1249
	*offset = sna->kgem.surface;
1250
	return table;
1251
}
1252
 
1253
static bool
1254
gen6_get_batch(struct sna *sna, const struct sna_composite_op *op)
1255
{
1256
	kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
1257
 
1258
	if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) {
1259
		DBG(("%s: flushing batch: %d < %d+%d\n",
1260
		     __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch,
1261
		     150, 4*8));
1262
		kgem_submit(&sna->kgem);
1263
		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
1264
	}
1265
 
1266
	if (sna->render_state.gen6.needs_invariant)
1267
		gen6_emit_invariant(sna);
1268
 
1269
	return kgem_bo_is_dirty(op->dst.bo);
1270
}
1271
 
1272
static void gen6_emit_composite_state(struct sna *sna,
1273
                      const struct sna_composite_op *op)
1274
{
1275
    uint32_t *binding_table;
1276
    uint16_t offset;
1277
    bool dirty;
1278
 
1279
	dirty = gen6_get_batch(sna, op);
1280
 
1281
    binding_table = gen6_composite_get_binding_table(sna, &offset);
1282
 
1283
    binding_table[0] =
1284
        gen6_bind_bo(sna,
1285
                op->dst.bo, op->dst.width, op->dst.height,
1286
			    gen6_get_dest_format(op->dst.format),
1287
			    true);
1288
    binding_table[1] =
1289
        gen6_bind_bo(sna,
1290
                 op->src.bo, op->src.width, op->src.height,
1291
                 op->src.card_format,
1292
			     false);
1293
    if (op->mask.bo) {
1294
        binding_table[2] =
1295
            gen6_bind_bo(sna,
1296
                     op->mask.bo,
1297
                     op->mask.width,
1298
                     op->mask.height,
1299
                     op->mask.card_format,
1300
				     false);
1301
    }
1302
 
1303
    if (sna->kgem.surface == offset &&
1304
        *(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table &&
1305
        (op->mask.bo == NULL ||
1306
         sna->kgem.batch[sna->render_state.gen6.surface_table+2] == binding_table[2])) {
1307
        sna->kgem.surface += sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t);
1308
        offset = sna->render_state.gen6.surface_table;
1309
    }
1310
 
1311
    gen6_emit_state(sna, op, offset | dirty);
1312
}
1313
 
1314
static void
1315
gen6_align_vertex(struct sna *sna, const struct sna_composite_op *op)
1316
{
1317
	assert (sna->render.vertex_offset == 0);
1318
	if (op->floats_per_vertex != sna->render_state.gen6.floats_per_vertex) {
1319
		if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect)
1320
			gen4_vertex_finish(sna);
1321
 
1322
		DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n",
1323
		     sna->render_state.gen6.floats_per_vertex,
1324
		     op->floats_per_vertex,
1325
		     sna->render.vertex_index,
1326
		     (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex));
1327
		sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
1328
		sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex;
1329
		sna->render_state.gen6.floats_per_vertex = op->floats_per_vertex;
1330
	}
1331
	assert((sna->render.vertex_used % op->floats_per_vertex) == 0);
1332
}
1333
 
1334
#if 0
1335
 
1336
fastcall static void
1337
gen6_render_composite_blt(struct sna *sna,
1338
			  const struct sna_composite_op *op,
1339
			  const struct sna_composite_rectangles *r)
1340
{
1341
	gen6_get_rectangles(sna, op, 1, gen6_emit_composite_state);
1342
	op->prim_emit(sna, op, r);
1343
}
1344
 
1345
fastcall static void
1346
gen6_render_composite_box(struct sna *sna,
1347
			  const struct sna_composite_op *op,
1348
			  const BoxRec *box)
1349
{
1350
	struct sna_composite_rectangles r;
1351
 
1352
	gen6_get_rectangles(sna, op, 1, gen6_emit_composite_state);
1353
 
1354
	DBG(("  %s: (%d, %d), (%d, %d)\n",
1355
	     __FUNCTION__,
1356
	     box->x1, box->y1, box->x2, box->y2));
1357
 
1358
	r.dst.x = box->x1;
1359
	r.dst.y = box->y1;
1360
	r.width  = box->x2 - box->x1;
1361
	r.height = box->y2 - box->y1;
1362
	r.src = r.mask = r.dst;
1363
 
1364
	op->prim_emit(sna, op, &r);
1365
}
1366
 
1367
static void
1368
gen6_render_composite_boxes__blt(struct sna *sna,
1369
				 const struct sna_composite_op *op,
1370
				 const BoxRec *box, int nbox)
1371
{
1372
	DBG(("composite_boxes(%d)\n", nbox));
1373
 
1374
	do {
1375
		int nbox_this_time;
1376
 
1377
		nbox_this_time = gen6_get_rectangles(sna, op, nbox,
1378
						     gen6_emit_composite_state);
1379
		nbox -= nbox_this_time;
1380
 
1381
		do {
1382
			struct sna_composite_rectangles r;
1383
 
1384
			DBG(("  %s: (%d, %d), (%d, %d)\n",
1385
			     __FUNCTION__,
1386
			     box->x1, box->y1, box->x2, box->y2));
1387
 
1388
			r.dst.x = box->x1;
1389
			r.dst.y = box->y1;
1390
			r.width  = box->x2 - box->x1;
1391
			r.height = box->y2 - box->y1;
1392
			r.src = r.mask = r.dst;
1393
 
1394
			op->prim_emit(sna, op, &r);
1395
			box++;
1396
		} while (--nbox_this_time);
1397
	} while (nbox);
1398
}
1399
 
1400
static void
1401
gen6_render_composite_boxes(struct sna *sna,
1402
			    const struct sna_composite_op *op,
1403
			    const BoxRec *box, int nbox)
1404
{
1405
	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1406
 
1407
	do {
1408
		int nbox_this_time;
1409
		float *v;
1410
 
1411
		nbox_this_time = gen6_get_rectangles(sna, op, nbox,
1412
						     gen6_emit_composite_state);
1413
		assert(nbox_this_time);
1414
		nbox -= nbox_this_time;
1415
 
1416
		v = sna->render.vertices + sna->render.vertex_used;
1417
		sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
1418
 
1419
		op->emit_boxes(op, box, nbox_this_time, v);
1420
		box += nbox_this_time;
1421
	} while (nbox);
1422
}
1423
 
1424
static void
1425
gen6_render_composite_boxes__thread(struct sna *sna,
1426
				    const struct sna_composite_op *op,
1427
				    const BoxRec *box, int nbox)
1428
{
1429
	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1430
 
1431
	sna_vertex_lock(&sna->render);
1432
	do {
1433
		int nbox_this_time;
1434
		float *v;
1435
 
1436
		nbox_this_time = gen6_get_rectangles(sna, op, nbox,
1437
						     gen6_emit_composite_state);
1438
		assert(nbox_this_time);
1439
		nbox -= nbox_this_time;
1440
 
1441
		v = sna->render.vertices + sna->render.vertex_used;
1442
		sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
1443
 
1444
		sna_vertex_acquire__locked(&sna->render);
1445
		sna_vertex_unlock(&sna->render);
1446
 
1447
		op->emit_boxes(op, box, nbox_this_time, v);
1448
		box += nbox_this_time;
1449
 
1450
		sna_vertex_lock(&sna->render);
1451
		sna_vertex_release__locked(&sna->render);
1452
	} while (nbox);
1453
	sna_vertex_unlock(&sna->render);
1454
}
1455
 
1456
#endif
1457
 
1458
#ifndef MAX
1459
#define MAX(a,b) ((a) > (b) ? (a) : (b))
1460
#endif
1461
 
1462
static uint32_t
1463
gen6_composite_create_blend_state(struct sna_static_stream *stream)
1464
{
1465
	char *base, *ptr;
1466
	int src, dst;
1467
 
1468
	base = sna_static_stream_map(stream,
1469
				     GEN6_BLENDFACTOR_COUNT * GEN6_BLENDFACTOR_COUNT * GEN6_BLEND_STATE_PADDED_SIZE,
1470
				     64);
1471
 
1472
	ptr = base;
1473
	for (src = 0; src < GEN6_BLENDFACTOR_COUNT; src++) {
1474
		for (dst= 0; dst < GEN6_BLENDFACTOR_COUNT; dst++) {
1475
			struct gen6_blend_state *blend =
1476
				(struct gen6_blend_state *)ptr;
1477
 
1478
			blend->blend0.dest_blend_factor = dst;
1479
			blend->blend0.source_blend_factor = src;
1480
			blend->blend0.blend_func = GEN6_BLENDFUNCTION_ADD;
1481
			blend->blend0.blend_enable =
1482
				!(dst == GEN6_BLENDFACTOR_ZERO && src == GEN6_BLENDFACTOR_ONE);
1483
 
1484
			blend->blend1.post_blend_clamp_enable = 1;
1485
			blend->blend1.pre_blend_clamp_enable = 1;
1486
 
1487
			ptr += GEN6_BLEND_STATE_PADDED_SIZE;
1488
		}
1489
	}
1490
 
1491
	return sna_static_stream_offsetof(stream, base);
1492
}
1493
 
1494
#if 0
1495
 
1496
static uint32_t gen6_bind_video_source(struct sna *sna,
1497
				       struct kgem_bo *src_bo,
1498
				       uint32_t src_offset,
1499
				       int src_width,
1500
				       int src_height,
1501
				       int src_pitch,
1502
				       uint32_t src_surf_format)
1503
{
1504
	struct gen6_surface_state *ss;
1505
 
1506
	sna->kgem.surface -= sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t);
1507
 
1508
	ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss));
1509
	ss->ss0.surface_type = GEN6_SURFACE_2D;
1510
	ss->ss0.surface_format = src_surf_format;
1511
 
1512
	ss->ss1.base_addr =
1513
		kgem_add_reloc(&sna->kgem,
1514
			       sna->kgem.surface + 1,
1515
			       src_bo,
1516
			       I915_GEM_DOMAIN_SAMPLER << 16,
1517
			       src_offset);
1518
 
1519
	ss->ss2.width  = src_width - 1;
1520
	ss->ss2.height = src_height - 1;
1521
	ss->ss3.pitch  = src_pitch - 1;
1522
 
1523
	return sna->kgem.surface * sizeof(uint32_t);
1524
}
1525
 
1526
static void gen6_emit_video_state(struct sna *sna,
1527
				  const struct sna_composite_op *op)
1528
{
1529
	struct sna_video_frame *frame = op->priv;
1530
	uint32_t src_surf_format;
1531
	uint32_t src_surf_base[6];
1532
	int src_width[6];
1533
	int src_height[6];
1534
	int src_pitch[6];
1535
	uint32_t *binding_table;
1536
	uint16_t offset;
1537
	bool dirty;
1538
	int n_src, n;
1539
 
1540
	dirty = gen6_get_batch(sna, op);
1541
 
1542
	src_surf_base[0] = 0;
1543
	src_surf_base[1] = 0;
1544
	src_surf_base[2] = frame->VBufOffset;
1545
	src_surf_base[3] = frame->VBufOffset;
1546
	src_surf_base[4] = frame->UBufOffset;
1547
	src_surf_base[5] = frame->UBufOffset;
1548
 
1549
	if (is_planar_fourcc(frame->id)) {
1550
		src_surf_format = GEN6_SURFACEFORMAT_R8_UNORM;
1551
		src_width[1]  = src_width[0]  = frame->width;
1552
		src_height[1] = src_height[0] = frame->height;
1553
		src_pitch[1]  = src_pitch[0]  = frame->pitch[1];
1554
		src_width[4]  = src_width[5]  = src_width[2]  = src_width[3] =
1555
			frame->width / 2;
1556
		src_height[4] = src_height[5] = src_height[2] = src_height[3] =
1557
			frame->height / 2;
1558
		src_pitch[4]  = src_pitch[5]  = src_pitch[2]  = src_pitch[3] =
1559
			frame->pitch[0];
1560
		n_src = 6;
1561
	} else {
1562
		if (frame->id == FOURCC_UYVY)
1563
			src_surf_format = GEN6_SURFACEFORMAT_YCRCB_SWAPY;
1564
		else
1565
			src_surf_format = GEN6_SURFACEFORMAT_YCRCB_NORMAL;
1566
 
1567
		src_width[0]  = frame->width;
1568
		src_height[0] = frame->height;
1569
		src_pitch[0]  = frame->pitch[0];
1570
		n_src = 1;
1571
	}
1572
 
1573
	binding_table = gen6_composite_get_binding_table(sna, &offset);
1574
 
1575
	binding_table[0] =
1576
		gen6_bind_bo(sna,
1577
			     op->dst.bo, op->dst.width, op->dst.height,
1578
			     gen6_get_dest_format(op->dst.format),
1579
			     true);
1580
	for (n = 0; n < n_src; n++) {
1581
		binding_table[1+n] =
1582
			gen6_bind_video_source(sna,
1583
					       frame->bo,
1584
					       src_surf_base[n],
1585
					       src_width[n],
1586
					       src_height[n],
1587
					       src_pitch[n],
1588
					       src_surf_format);
1589
	}
1590
 
1591
	gen6_emit_state(sna, op, offset | dirty);
1592
}
1593
 
1594
static bool
1595
gen6_render_video(struct sna *sna,
1596
		  struct sna_video *video,
1597
		  struct sna_video_frame *frame,
1598
		  RegionPtr dstRegion,
1599
		  short src_w, short src_h,
1600
		  short drw_w, short drw_h,
1601
		  short dx, short dy,
1602
		  PixmapPtr pixmap)
1603
{
1604
	struct sna_composite_op tmp;
1605
	int nbox, pix_xoff, pix_yoff;
1606
	float src_scale_x, src_scale_y;
1607
	struct sna_pixmap *priv;
1608
	unsigned filter;
1609
	BoxPtr box;
1610
 
1611
	DBG(("%s: src=(%d, %d), dst=(%d, %d), %dx[(%d, %d), (%d, %d)...]\n",
1612
	     __FUNCTION__, src_w, src_h, drw_w, drw_h,
1613
	     REGION_NUM_RECTS(dstRegion),
1614
	     REGION_EXTENTS(NULL, dstRegion)->x1,
1615
	     REGION_EXTENTS(NULL, dstRegion)->y1,
1616
	     REGION_EXTENTS(NULL, dstRegion)->x2,
1617
	     REGION_EXTENTS(NULL, dstRegion)->y2));
1618
 
1619
	priv = sna_pixmap_force_to_gpu(pixmap, MOVE_READ | MOVE_WRITE);
1620
	if (priv == NULL)
1621
		return false;
1622
 
1623
	memset(&tmp, 0, sizeof(tmp));
1624
 
1625
	tmp.dst.pixmap = pixmap;
1626
	tmp.dst.width  = pixmap->drawable.width;
1627
	tmp.dst.height = pixmap->drawable.height;
1628
	tmp.dst.format = sna_render_format_for_depth(pixmap->drawable.depth);
1629
	tmp.dst.bo = priv->gpu_bo;
1630
 
1631
	tmp.src.bo = frame->bo;
1632
	tmp.mask.bo = NULL;
1633
 
1634
	tmp.floats_per_vertex = 3;
1635
	tmp.floats_per_rect = 9;
1636
 
1637
	if (src_w == drw_w && src_h == drw_h)
1638
		filter = SAMPLER_FILTER_NEAREST;
1639
	else
1640
		filter = SAMPLER_FILTER_BILINEAR;
1641
 
1642
	tmp.u.gen6.flags =
1643
		GEN6_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD,
1644
					       SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE),
1645
			       NO_BLEND,
1646
			       is_planar_fourcc(frame->id) ?
1647
			       GEN6_WM_KERNEL_VIDEO_PLANAR :
1648
			       GEN6_WM_KERNEL_VIDEO_PACKED,
1649
			       2);
1650
	tmp.priv = frame;
1651
 
1652
	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo);
1653
	if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) {
1654
		kgem_submit(&sna->kgem);
1655
		assert(kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL));
1656
		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
1657
	}
1658
 
3258 Serge 1659
	gen6_emit_video_state(sna, &tmp);
3254 Serge 1660
	gen6_align_vertex(sna, &tmp);
1661
 
1662
	/* Set up the offset for translating from the given region (in screen
1663
	 * coordinates) to the backing pixmap.
1664
	 */
1665
#ifdef COMPOSITE
1666
	pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
1667
	pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
1668
#else
1669
	pix_xoff = 0;
1670
	pix_yoff = 0;
1671
#endif
1672
 
1673
	/* Use normalized texture coordinates */
1674
	src_scale_x = ((float)src_w / frame->width) / (float)drw_w;
1675
	src_scale_y = ((float)src_h / frame->height) / (float)drw_h;
1676
 
1677
	box = REGION_RECTS(dstRegion);
1678
	nbox = REGION_NUM_RECTS(dstRegion);
1679
	while (nbox--) {
1680
		BoxRec r;
1681
 
1682
		r.x1 = box->x1 + pix_xoff;
1683
		r.x2 = box->x2 + pix_xoff;
1684
		r.y1 = box->y1 + pix_yoff;
1685
		r.y2 = box->y2 + pix_yoff;
1686
 
1687
		gen6_get_rectangles(sna, &tmp, 1, gen6_emit_video_state);
1688
 
1689
		OUT_VERTEX(r.x2, r.y2);
1690
		OUT_VERTEX_F((box->x2 - dx) * src_scale_x);
1691
		OUT_VERTEX_F((box->y2 - dy) * src_scale_y);
1692
 
1693
		OUT_VERTEX(r.x1, r.y2);
1694
		OUT_VERTEX_F((box->x1 - dx) * src_scale_x);
1695
		OUT_VERTEX_F((box->y2 - dy) * src_scale_y);
1696
 
1697
		OUT_VERTEX(r.x1, r.y1);
1698
		OUT_VERTEX_F((box->x1 - dx) * src_scale_x);
1699
		OUT_VERTEX_F((box->y1 - dy) * src_scale_y);
1700
 
1701
		if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
1702
			sna_damage_add_box(&priv->gpu_damage, &r);
1703
			sna_damage_subtract_box(&priv->cpu_damage, &r);
1704
		}
1705
		box++;
1706
	}
1707
	priv->clear = false;
1708
 
1709
	gen4_vertex_flush(sna);
1710
	return true;
1711
}
1712
 
1713
static int
1714
gen6_composite_picture(struct sna *sna,
1715
		       PicturePtr picture,
1716
		       struct sna_composite_channel *channel,
1717
		       int x, int y,
1718
		       int w, int h,
1719
		       int dst_x, int dst_y,
1720
		       bool precise)
1721
{
1722
	PixmapPtr pixmap;
1723
	uint32_t color;
1724
	int16_t dx, dy;
1725
 
1726
	DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n",
1727
	     __FUNCTION__, x, y, w, h, dst_x, dst_y));
1728
 
1729
	channel->is_solid = false;
1730
	channel->card_format = -1;
1731
 
1732
	if (sna_picture_is_solid(picture, &color))
1733
		return gen4_channel_init_solid(sna, channel, color);
1734
 
1735
	if (picture->pDrawable == NULL) {
1736
		int ret;
1737
 
1738
		if (picture->pSourcePict->type == SourcePictTypeLinear)
1739
			return gen4_channel_init_linear(sna, picture, channel,
1740
							x, y,
1741
							w, h,
1742
							dst_x, dst_y);
1743
 
1744
		DBG(("%s -- fixup, gradient\n", __FUNCTION__));
1745
		ret = -1;
1746
		if (!precise)
1747
			ret = sna_render_picture_approximate_gradient(sna, picture, channel,
1748
								      x, y, w, h, dst_x, dst_y);
1749
		if (ret == -1)
1750
			ret = sna_render_picture_fixup(sna, picture, channel,
1751
						       x, y, w, h, dst_x, dst_y);
1752
		return ret;
1753
	}
1754
 
1755
	if (picture->alphaMap) {
1756
		DBG(("%s -- fixup, alphamap\n", __FUNCTION__));
1757
		return sna_render_picture_fixup(sna, picture, channel,
1758
						x, y, w, h, dst_x, dst_y);
1759
	}
1760
 
1761
	if (!gen6_check_repeat(picture))
1762
		return sna_render_picture_fixup(sna, picture, channel,
1763
						x, y, w, h, dst_x, dst_y);
1764
 
1765
	if (!gen6_check_filter(picture))
1766
		return sna_render_picture_fixup(sna, picture, channel,
1767
						x, y, w, h, dst_x, dst_y);
1768
 
1769
	channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
1770
	channel->filter = picture->filter;
1771
 
1772
	pixmap = get_drawable_pixmap(picture->pDrawable);
1773
	get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy);
1774
 
1775
	x += dx + picture->pDrawable->x;
1776
	y += dy + picture->pDrawable->y;
1777
 
1778
	channel->is_affine = sna_transform_is_affine(picture->transform);
1779
	if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) {
1780
		DBG(("%s: integer translation (%d, %d), removing\n",
1781
		     __FUNCTION__, dx, dy));
1782
		x += dx;
1783
		y += dy;
1784
		channel->transform = NULL;
1785
		channel->filter = PictFilterNearest;
1786
	} else
1787
		channel->transform = picture->transform;
1788
 
1789
	channel->pict_format = picture->format;
1790
	channel->card_format = gen6_get_card_format(picture->format);
1791
	if (channel->card_format == (unsigned)-1)
1792
		return sna_render_picture_convert(sna, picture, channel, pixmap,
1793
						  x, y, w, h, dst_x, dst_y,
1794
						  false);
1795
 
1796
	if (too_large(pixmap->drawable.width, pixmap->drawable.height)) {
1797
		DBG(("%s: extracting from pixmap %dx%d\n", __FUNCTION__,
1798
		     pixmap->drawable.width, pixmap->drawable.height));
1799
		return sna_render_picture_extract(sna, picture, channel,
1800
						  x, y, w, h, dst_x, dst_y);
1801
	}
1802
 
1803
	return sna_render_pixmap_bo(sna, channel, pixmap,
1804
				    x, y, w, h, dst_x, dst_y);
1805
}
1806
 
1807
inline static void gen6_composite_channel_convert(struct sna_composite_channel *channel)
1808
{
1809
	channel->repeat = gen6_repeat(channel->repeat);
1810
	channel->filter = gen6_filter(channel->filter);
1811
	if (channel->card_format == (unsigned)-1)
1812
		channel->card_format = gen6_get_card_format(channel->pict_format);
1813
	assert(channel->card_format != (unsigned)-1);
1814
}
1815
 
1816
static void gen6_render_composite_done(struct sna *sna,
1817
                       const struct sna_composite_op *op)
1818
{
1819
    DBG(("%s\n", __FUNCTION__));
1820
 
1821
	assert(!sna->render.active);
1822
	if (sna->render.vertex_offset) {
1823
		gen4_vertex_flush(sna);
1824
        gen6_magic_ca_pass(sna, op);
1825
    }
1826
 
3258 Serge 1827
	if (op->mask.bo)
1828
		kgem_bo_destroy(&sna->kgem, op->mask.bo);
1829
	if (op->src.bo)
1830
		kgem_bo_destroy(&sna->kgem, op->src.bo);
3254 Serge 1831
 
1832
//   sna_render_composite_redirect_done(sna, op);
1833
}
1834
 
1835
static bool
1836
gen6_composite_set_target(struct sna *sna,
1837
			  struct sna_composite_op *op,
1838
			  PicturePtr dst,
1839
			  int x, int y, int w, int h)
1840
{
1841
	BoxRec box;
1842
 
1843
	op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
1844
	op->dst.format = dst->format;
1845
	op->dst.width = op->dst.pixmap->drawable.width;
1846
	op->dst.height = op->dst.pixmap->drawable.height;
1847
 
1848
	if (w && h) {
1849
		box.x1 = x;
1850
		box.y1 = y;
1851
		box.x2 = x + w;
1852
		box.y2 = y + h;
1853
	} else
1854
		sna_render_picture_extents(dst, &box);
1855
 
1856
	op->dst.bo = sna_drawable_use_bo (dst->pDrawable,
1857
					  PREFER_GPU | FORCE_GPU | RENDER_GPU,
1858
					  &box, &op->damage);
1859
	if (op->dst.bo == NULL)
1860
		return false;
1861
 
1862
	get_drawable_deltas(dst->pDrawable, op->dst.pixmap,
1863
			    &op->dst.x, &op->dst.y);
1864
 
1865
	DBG(("%s: pixmap=%p, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n",
1866
	     __FUNCTION__,
1867
	     op->dst.pixmap, (int)op->dst.format,
1868
	     op->dst.width, op->dst.height,
1869
	     op->dst.bo->pitch,
1870
	     op->dst.x, op->dst.y,
1871
	     op->damage ? *op->damage : (void *)-1));
1872
 
1873
	assert(op->dst.bo->proxy == NULL);
1874
 
1875
	if (too_large(op->dst.width, op->dst.height) &&
1876
	    !sna_render_composite_redirect(sna, op, x, y, w, h))
1877
		return false;
1878
 
1879
	return true;
1880
}
1881
 
1882
 
1883
 
1884
static bool
1885
gen6_render_composite(struct sna *sna,
1886
              uint8_t op,
1887
		      PicturePtr src,
1888
		      PicturePtr mask,
1889
		      PicturePtr dst,
1890
              int16_t src_x, int16_t src_y,
1891
              int16_t msk_x, int16_t msk_y,
1892
              int16_t dst_x, int16_t dst_y,
1893
              int16_t width, int16_t height,
1894
              struct sna_composite_op *tmp)
1895
{
1896
	if (op >= ARRAY_SIZE(gen6_blend_op))
1897
		return false;
1898
 
1899
    DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
1900
         width, height, sna->kgem.ring));
1901
 
1902
	if (mask == NULL &&
1903
	    try_blt(sna, dst, src, width, height) &&
1904
	    sna_blt_composite(sna, op,
1905
			      src, dst,
1906
			      src_x, src_y,
1907
			      dst_x, dst_y,
1908
			      width, height,
1909
			      tmp, false))
1910
		return true;
1911
 
1912
	if (gen6_composite_fallback(sna, src, mask, dst))
1913
		return false;
1914
 
1915
	if (need_tiling(sna, width, height))
1916
		return sna_tiling_composite(op, src, mask, dst,
1917
					    src_x, src_y,
1918
					    msk_x, msk_y,
1919
					    dst_x, dst_y,
1920
					    width, height,
1921
					    tmp);
1922
 
1923
	if (op == PictOpClear)
1924
		op = PictOpSrc;
1925
	tmp->op = op;
1926
	if (!gen6_composite_set_target(sna, tmp, dst,
1927
				       dst_x, dst_y, width, height))
1928
		return false;
1929
 
1930
	switch (gen6_composite_picture(sna, src, &tmp->src,
1931
				       src_x, src_y,
1932
				       width, height,
1933
				       dst_x, dst_y,
1934
				       dst->polyMode == PolyModePrecise)) {
1935
	case -1:
1936
		goto cleanup_dst;
1937
	case 0:
1938
		if (!gen4_channel_init_solid(sna, &tmp->src, 0))
1939
			goto cleanup_dst;
1940
		/* fall through to fixup */
1941
	case 1:
1942
		/* Did we just switch rings to prepare the source? */
1943
		if (mask == NULL &&
1944
		    prefer_blt_composite(sna, tmp) &&
1945
		    sna_blt_composite__convert(sna,
1946
					       dst_x, dst_y, width, height,
1947
					       tmp))
1948
			return true;
1949
 
1950
		gen6_composite_channel_convert(&tmp->src);
1951
		break;
1952
	}
1953
 
1954
	tmp->is_affine = tmp->src.is_affine;
1955
	tmp->has_component_alpha = false;
1956
	tmp->need_magic_ca_pass = false;
1957
 
1958
	tmp->mask.bo = NULL;
1959
    tmp->mask.filter = SAMPLER_FILTER_NEAREST;
1960
    tmp->mask.repeat = SAMPLER_EXTEND_NONE;
1961
 
1962
	if (mask) {
1963
		if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
1964
			tmp->has_component_alpha = true;
1965
 
1966
			/* Check if it's component alpha that relies on a source alpha and on
1967
			 * the source value.  We can only get one of those into the single
1968
			 * source value that we get to blend with.
1969
			 */
1970
			if (gen6_blend_op[op].src_alpha &&
1971
			    (gen6_blend_op[op].src_blend != GEN6_BLENDFACTOR_ZERO)) {
1972
				if (op != PictOpOver)
1973
					goto cleanup_src;
1974
 
1975
				tmp->need_magic_ca_pass = true;
1976
				tmp->op = PictOpOutReverse;
1977
			}
1978
		}
1979
 
1980
		if (!reuse_source(sna,
1981
				  src, &tmp->src, src_x, src_y,
1982
				  mask, &tmp->mask, msk_x, msk_y)) {
1983
			switch (gen6_composite_picture(sna, mask, &tmp->mask,
1984
						       msk_x, msk_y,
1985
						       width, height,
1986
						       dst_x, dst_y,
1987
						       dst->polyMode == PolyModePrecise)) {
1988
			case -1:
1989
				goto cleanup_src;
1990
			case 0:
1991
				if (!gen4_channel_init_solid(sna, &tmp->mask, 0))
1992
					goto cleanup_src;
1993
				/* fall through to fixup */
1994
			case 1:
1995
				gen6_composite_channel_convert(&tmp->mask);
1996
				break;
1997
			}
1998
		}
1999
 
2000
		tmp->is_affine &= tmp->mask.is_affine;
2001
	}
2002
 
2003
	tmp->u.gen6.flags =
2004
		GEN6_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter,
2005
					      tmp->src.repeat,
2006
					      tmp->mask.filter,
2007
					      tmp->mask.repeat),
2008
			       gen6_get_blend(tmp->op,
2009
					      tmp->has_component_alpha,
2010
					      tmp->dst.format),
2011
			       gen6_choose_composite_kernel(tmp->op,
2012
							    tmp->mask.bo != NULL,
2013
							    tmp->has_component_alpha,
2014
							    tmp->is_affine),
2015
			       gen4_choose_composite_emitter(tmp));
2016
 
2017
	tmp->blt   = gen6_render_composite_blt;
2018
    tmp->box   = gen6_render_composite_box;
2019
	tmp->boxes = gen6_render_composite_boxes__blt;
2020
	if (tmp->emit_boxes) {
2021
		tmp->boxes = gen6_render_composite_boxes;
2022
		tmp->thread_boxes = gen6_render_composite_boxes__thread;
2023
	}
2024
	tmp->done  = gen6_render_composite_done;
2025
 
3263 Serge 2026
	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo);
2027
	if (!kgem_check_bo(&sna->kgem,
2028
			   tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
2029
			   NULL)) {
2030
		kgem_submit(&sna->kgem);
2031
		if (!kgem_check_bo(&sna->kgem,
2032
				   tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
2033
				   NULL))
2034
			goto cleanup_mask;
2035
		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
2036
	}
3254 Serge 2037
 
2038
    gen6_emit_composite_state(sna, tmp);
2039
    gen6_align_vertex(sna, tmp);
2040
	return true;
2041
 
2042
cleanup_mask:
2043
	if (tmp->mask.bo)
2044
		kgem_bo_destroy(&sna->kgem, tmp->mask.bo);
2045
cleanup_src:
2046
	if (tmp->src.bo)
2047
		kgem_bo_destroy(&sna->kgem, tmp->src.bo);
2048
cleanup_dst:
2049
	if (tmp->redirect.real_bo)
2050
		kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
2051
	return false;
2052
}
2053
 
2054
#if !NO_COMPOSITE_SPANS
2055
fastcall static void
2056
gen6_render_composite_spans_box(struct sna *sna,
2057
				const struct sna_composite_spans_op *op,
2058
				const BoxRec *box, float opacity)
2059
{
2060
	DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n",
2061
	     __FUNCTION__,
2062
	     op->base.src.offset[0], op->base.src.offset[1],
2063
	     opacity,
2064
	     op->base.dst.x, op->base.dst.y,
2065
	     box->x1, box->y1,
2066
	     box->x2 - box->x1,
2067
	     box->y2 - box->y1));
2068
 
2069
	gen6_get_rectangles(sna, &op->base, 1, gen6_emit_composite_state);
2070
	op->prim_emit(sna, op, box, opacity);
2071
}
2072
 
2073
static void
2074
gen6_render_composite_spans_boxes(struct sna *sna,
2075
				  const struct sna_composite_spans_op *op,
2076
				  const BoxRec *box, int nbox,
2077
				  float opacity)
2078
{
2079
	DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n",
2080
	     __FUNCTION__, nbox,
2081
	     op->base.src.offset[0], op->base.src.offset[1],
2082
	     opacity,
2083
	     op->base.dst.x, op->base.dst.y));
2084
 
2085
	do {
2086
		int nbox_this_time;
2087
 
2088
		nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox,
2089
						     gen6_emit_composite_state);
2090
		nbox -= nbox_this_time;
2091
 
2092
		do {
2093
			DBG(("  %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
2094
			     box->x1, box->y1,
2095
			     box->x2 - box->x1,
2096
			     box->y2 - box->y1));
2097
 
2098
			op->prim_emit(sna, op, box++, opacity);
2099
		} while (--nbox_this_time);
2100
	} while (nbox);
2101
}
2102
 
2103
fastcall static void
2104
gen6_render_composite_spans_boxes__thread(struct sna *sna,
2105
					  const struct sna_composite_spans_op *op,
2106
					  const struct sna_opacity_box *box,
2107
					  int nbox)
2108
{
2109
	DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
2110
	     __FUNCTION__, nbox,
2111
	     op->base.src.offset[0], op->base.src.offset[1],
2112
	     op->base.dst.x, op->base.dst.y));
2113
 
2114
	sna_vertex_lock(&sna->render);
2115
	do {
2116
		int nbox_this_time;
2117
		float *v;
2118
 
2119
		nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox,
2120
						     gen6_emit_composite_state);
2121
		assert(nbox_this_time);
2122
		nbox -= nbox_this_time;
2123
 
2124
		v = sna->render.vertices + sna->render.vertex_used;
2125
		sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect;
2126
 
2127
		sna_vertex_acquire__locked(&sna->render);
2128
		sna_vertex_unlock(&sna->render);
2129
 
2130
		op->emit_boxes(op, box, nbox_this_time, v);
2131
		box += nbox_this_time;
2132
 
2133
		sna_vertex_lock(&sna->render);
2134
		sna_vertex_release__locked(&sna->render);
2135
	} while (nbox);
2136
	sna_vertex_unlock(&sna->render);
2137
}
2138
 
2139
fastcall static void
2140
gen6_render_composite_spans_done(struct sna *sna,
2141
				 const struct sna_composite_spans_op *op)
2142
{
2143
	DBG(("%s()\n", __FUNCTION__));
2144
	assert(!sna->render.active);
2145
 
2146
	if (sna->render.vertex_offset)
2147
		gen4_vertex_flush(sna);
2148
 
2149
	if (op->base.src.bo)
2150
		kgem_bo_destroy(&sna->kgem, op->base.src.bo);
2151
 
2152
	sna_render_composite_redirect_done(sna, &op->base);
2153
}
2154
 
2155
static bool
2156
gen6_check_composite_spans(struct sna *sna,
2157
			   uint8_t op, PicturePtr src, PicturePtr dst,
2158
			   int16_t width, int16_t height,
2159
			   unsigned flags)
2160
{
2161
	DBG(("%s: op=%d, width=%d, height=%d, flags=%x\n",
2162
	     __FUNCTION__, op, width, height, flags));
2163
 
2164
	if (op >= ARRAY_SIZE(gen6_blend_op))
2165
		return false;
2166
 
2167
	if (gen6_composite_fallback(sna, src, NULL, dst)) {
2168
		DBG(("%s: operation would fallback\n", __FUNCTION__));
2169
		return false;
2170
	}
2171
 
2172
	if (need_tiling(sna, width, height) &&
2173
	    !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) {
2174
		DBG(("%s: fallback, tiled operation not on GPU\n",
2175
		     __FUNCTION__));
2176
		return false;
2177
	}
2178
 
2179
	if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0) {
2180
		struct sna_pixmap *priv = sna_pixmap_from_drawable(dst->pDrawable);
2181
		assert(priv);
2182
 
2183
		if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
2184
			return true;
2185
 
2186
		if (flags & COMPOSITE_SPANS_INPLACE_HINT)
2187
			return false;
2188
 
2189
		return priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo);
2190
	}
2191
 
2192
	return true;
2193
}
2194
 
2195
static bool
2196
gen6_render_composite_spans(struct sna *sna,
2197
			    uint8_t op,
2198
			    PicturePtr src,
2199
			    PicturePtr dst,
2200
			    int16_t src_x,  int16_t src_y,
2201
			    int16_t dst_x,  int16_t dst_y,
2202
			    int16_t width,  int16_t height,
2203
			    unsigned flags,
2204
			    struct sna_composite_spans_op *tmp)
2205
{
2206
	DBG(("%s: %dx%d with flags=%x, current mode=%d\n", __FUNCTION__,
2207
	     width, height, flags, sna->kgem.ring));
2208
 
2209
	assert(gen6_check_composite_spans(sna, op, src, dst, width, height, flags));
2210
 
2211
	if (need_tiling(sna, width, height)) {
2212
		DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n",
2213
		     __FUNCTION__, width, height));
2214
		return sna_tiling_composite_spans(op, src, dst,
2215
						  src_x, src_y, dst_x, dst_y,
2216
						  width, height, flags, tmp);
2217
	}
2218
 
2219
	tmp->base.op = op;
2220
	if (!gen6_composite_set_target(sna, &tmp->base, dst,
2221
				       dst_x, dst_y, width, height))
2222
		return false;
2223
 
2224
	switch (gen6_composite_picture(sna, src, &tmp->base.src,
2225
				       src_x, src_y,
2226
				       width, height,
2227
				       dst_x, dst_y,
2228
				       dst->polyMode == PolyModePrecise)) {
2229
	case -1:
2230
		goto cleanup_dst;
2231
	case 0:
2232
		if (!gen4_channel_init_solid(sna, &tmp->base.src, 0))
2233
			goto cleanup_dst;
2234
		/* fall through to fixup */
2235
	case 1:
2236
		gen6_composite_channel_convert(&tmp->base.src);
2237
		break;
2238
	}
2239
	tmp->base.mask.bo = NULL;
2240
 
2241
	tmp->base.is_affine = tmp->base.src.is_affine;
2242
	tmp->base.need_magic_ca_pass = false;
2243
 
2244
	tmp->base.u.gen6.flags =
2245
		GEN6_SET_FLAGS(SAMPLER_OFFSET(tmp->base.src.filter,
2246
					      tmp->base.src.repeat,
2247
					      SAMPLER_FILTER_NEAREST,
2248
					      SAMPLER_EXTEND_PAD),
2249
			       gen6_get_blend(tmp->base.op, false, tmp->base.dst.format),
2250
			       GEN6_WM_KERNEL_OPACITY | !tmp->base.is_affine,
2251
			       gen4_choose_spans_emitter(tmp));
2252
 
2253
	tmp->box   = gen6_render_composite_spans_box;
2254
	tmp->boxes = gen6_render_composite_spans_boxes;
2255
	if (tmp->emit_boxes)
2256
		tmp->thread_boxes = gen6_render_composite_spans_boxes__thread;
2257
	tmp->done  = gen6_render_composite_spans_done;
2258
 
2259
	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->base.dst.bo);
2260
	if (!kgem_check_bo(&sna->kgem,
2261
			   tmp->base.dst.bo, tmp->base.src.bo,
2262
			   NULL)) {
2263
		kgem_submit(&sna->kgem);
2264
		if (!kgem_check_bo(&sna->kgem,
2265
				   tmp->base.dst.bo, tmp->base.src.bo,
2266
				   NULL))
2267
			goto cleanup_src;
2268
		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
2269
	}
2270
 
2271
	gen6_emit_composite_state(sna, &tmp->base);
2272
	gen6_align_vertex(sna, &tmp->base);
2273
	return true;
2274
 
2275
cleanup_src:
2276
	if (tmp->base.src.bo)
2277
		kgem_bo_destroy(&sna->kgem, tmp->base.src.bo);
2278
cleanup_dst:
2279
	if (tmp->base.redirect.real_bo)
2280
		kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo);
2281
	return false;
2282
}
2283
#endif
2284
 
2285
#endif
2286
 
2287
static void
2288
gen6_emit_copy_state(struct sna *sna,
2289
		     const struct sna_composite_op *op)
2290
{
2291
	uint32_t *binding_table;
2292
	uint16_t offset;
2293
	bool dirty;
2294
 
2295
	dirty = gen6_get_batch(sna, op);
2296
 
2297
	binding_table = gen6_composite_get_binding_table(sna, &offset);
2298
 
2299
	binding_table[0] =
2300
		gen6_bind_bo(sna,
2301
			     op->dst.bo, op->dst.width, op->dst.height,
2302
			     gen6_get_dest_format(op->dst.format),
2303
			     true);
2304
	binding_table[1] =
2305
		gen6_bind_bo(sna,
2306
			     op->src.bo, op->src.width, op->src.height,
2307
			     op->src.card_format,
2308
			     false);
2309
 
2310
	if (sna->kgem.surface == offset &&
2311
	    *(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table) {
2312
		sna->kgem.surface += sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t);
2313
		offset = sna->render_state.gen6.surface_table;
2314
	}
2315
 
2316
	gen6_emit_state(sna, op, offset | dirty);
2317
}
2318
 
2319
#if 0
2320
 
2321
static inline bool prefer_blt_copy(struct sna *sna,
2322
				   struct kgem_bo *src_bo,
2323
				   struct kgem_bo *dst_bo,
2324
				   unsigned flags)
2325
{
2326
	if (flags & COPY_SYNC)
2327
		return false;
2328
 
2329
	if (PREFER_RENDER)
2330
		return PREFER_RENDER > 0;
2331
 
2332
	if (sna->kgem.ring == KGEM_BLT)
2333
		return true;
2334
 
2335
	if (src_bo == dst_bo && can_switch_to_blt(sna, dst_bo, flags))
2336
		return true;
2337
 
2338
	if (untiled_tlb_miss(src_bo) ||
2339
	    untiled_tlb_miss(dst_bo))
2340
		return true;
2341
 
2342
	if (!prefer_blt_ring(sna, dst_bo, flags))
2343
		return false;
2344
 
2345
	return (prefer_blt_bo(sna, src_bo) >= 0 &&
2346
		prefer_blt_bo(sna, dst_bo) > 0);
2347
}
2348
 
2349
inline static void boxes_extents(const BoxRec *box, int n, BoxRec *extents)
2350
{
2351
	*extents = box[0];
2352
	while (--n) {
2353
		box++;
2354
 
2355
		if (box->x1 < extents->x1)
2356
			extents->x1 = box->x1;
2357
		if (box->x2 > extents->x2)
2358
			extents->x2 = box->x2;
2359
 
2360
		if (box->y1 < extents->y1)
2361
			extents->y1 = box->y1;
2362
		if (box->y2 > extents->y2)
2363
			extents->y2 = box->y2;
2364
	}
2365
}
2366
 
2367
static inline bool
2368
overlaps(struct sna *sna,
2369
	 struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
2370
	 struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
2371
	 const BoxRec *box, int n, BoxRec *extents)
2372
{
2373
	if (src_bo != dst_bo)
2374
		return false;
2375
 
2376
	boxes_extents(box, n, extents);
2377
	return (extents->x2 + src_dx > extents->x1 + dst_dx &&
2378
		extents->x1 + src_dx < extents->x2 + dst_dx &&
2379
		extents->y2 + src_dy > extents->y1 + dst_dy &&
2380
		extents->y1 + src_dy < extents->y2 + dst_dy);
2381
}
2382
 
2383
static bool
2384
gen6_render_copy_boxes(struct sna *sna, uint8_t alu,
2385
		       PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
2386
		       PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
2387
		       const BoxRec *box, int n, unsigned flags)
2388
{
2389
	struct sna_composite_op tmp;
2390
	BoxRec extents;
2391
 
2392
	DBG(("%s (%d, %d)->(%d, %d) x %d, alu=%x, self-copy=%d, overlaps? %d\n",
2393
	     __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n, alu,
2394
	     src_bo == dst_bo,
2395
	     overlaps(sna,
2396
		      src_bo, src_dx, src_dy,
2397
		      dst_bo, dst_dx, dst_dy,
2398
		      box, n, &extents)));
2399
 
2400
	if (prefer_blt_copy(sna, src_bo, dst_bo, flags) &&
2401
	    sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
2402
	    sna_blt_copy_boxes(sna, alu,
2403
			       src_bo, src_dx, src_dy,
2404
			       dst_bo, dst_dx, dst_dy,
2405
			       dst->drawable.bitsPerPixel,
2406
			       box, n))
2407
		return true;
2408
 
2409
	if (!(alu == GXcopy || alu == GXclear)) {
2410
fallback_blt:
2411
		if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
2412
			return false;
2413
 
2414
		return sna_blt_copy_boxes_fallback(sna, alu,
2415
						   src, src_bo, src_dx, src_dy,
2416
						   dst, dst_bo, dst_dx, dst_dy,
2417
						   box, n);
2418
	}
2419
 
2420
	if (overlaps(sna,
2421
		     src_bo, src_dx, src_dy,
2422
		     dst_bo, dst_dx, dst_dy,
2423
		     box, n, &extents)) {
2424
		if (too_large(extents.x2-extents.x1, extents.y2-extents.y1))
2425
			goto fallback_blt;
2426
 
2427
		if (can_switch_to_blt(sna, dst_bo, flags) &&
2428
		    sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
2429
		    sna_blt_copy_boxes(sna, alu,
2430
				       src_bo, src_dx, src_dy,
2431
				       dst_bo, dst_dx, dst_dy,
2432
				       dst->drawable.bitsPerPixel,
2433
				       box, n))
2434
			return true;
2435
 
2436
		return sna_render_copy_boxes__overlap(sna, alu,
2437
						      src, src_bo, src_dx, src_dy,
2438
						      dst, dst_bo, dst_dx, dst_dy,
2439
						      box, n, &extents);
2440
	}
2441
 
2442
	if (dst->drawable.depth == src->drawable.depth) {
2443
		tmp.dst.format = sna_render_format_for_depth(dst->drawable.depth);
2444
		tmp.src.pict_format = tmp.dst.format;
2445
	} else {
2446
		tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
2447
		tmp.src.pict_format = sna_format_for_depth(src->drawable.depth);
2448
	}
2449
	if (!gen6_check_format(tmp.src.pict_format))
2450
		goto fallback_blt;
2451
 
2452
	tmp.dst.pixmap = dst;
2453
	tmp.dst.width  = dst->drawable.width;
2454
	tmp.dst.height = dst->drawable.height;
2455
	tmp.dst.bo = dst_bo;
2456
	tmp.dst.x = tmp.dst.y = 0;
2457
	tmp.damage = NULL;
2458
 
2459
	sna_render_composite_redirect_init(&tmp);
2460
	if (too_large(tmp.dst.width, tmp.dst.height)) {
2461
		int i;
2462
 
2463
		extents = box[0];
2464
		for (i = 1; i < n; i++) {
2465
			if (box[i].x1 < extents.x1)
2466
				extents.x1 = box[i].x1;
2467
			if (box[i].y1 < extents.y1)
2468
				extents.y1 = box[i].y1;
2469
 
2470
			if (box[i].x2 > extents.x2)
2471
				extents.x2 = box[i].x2;
2472
			if (box[i].y2 > extents.y2)
2473
				extents.y2 = box[i].y2;
2474
		}
2475
 
2476
		if (!sna_render_composite_redirect(sna, &tmp,
2477
						   extents.x1 + dst_dx,
2478
						   extents.y1 + dst_dy,
2479
						   extents.x2 - extents.x1,
2480
						   extents.y2 - extents.y1))
2481
			goto fallback_tiled;
2482
 
2483
		dst_dx += tmp.dst.x;
2484
		dst_dy += tmp.dst.y;
2485
 
2486
		tmp.dst.x = tmp.dst.y = 0;
2487
	}
2488
 
2489
	tmp.src.card_format = gen6_get_card_format(tmp.src.pict_format);
2490
	if (too_large(src->drawable.width, src->drawable.height)) {
2491
		int i;
2492
 
2493
		extents = box[0];
2494
		for (i = 1; i < n; i++) {
2495
			if (extents.x1 < box[i].x1)
2496
				extents.x1 = box[i].x1;
2497
			if (extents.y1 < box[i].y1)
2498
				extents.y1 = box[i].y1;
2499
 
2500
			if (extents.x2 > box[i].x2)
2501
				extents.x2 = box[i].x2;
2502
			if (extents.y2 > box[i].y2)
2503
				extents.y2 = box[i].y2;
2504
		}
2505
 
2506
		if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src,
2507
					       extents.x1 + src_dx,
2508
					       extents.y1 + src_dy,
2509
					       extents.x2 - extents.x1,
2510
					       extents.y2 - extents.y1)) {
2511
			DBG(("%s: unable to extract partial pixmap\n", __FUNCTION__));
2512
			goto fallback_tiled_dst;
2513
		}
2514
 
2515
		src_dx += tmp.src.offset[0];
2516
		src_dy += tmp.src.offset[1];
2517
	} else {
2518
		tmp.src.bo = src_bo;
2519
		tmp.src.width  = src->drawable.width;
2520
		tmp.src.height = src->drawable.height;
2521
	}
2522
 
2523
	tmp.mask.bo = NULL;
2524
 
2525
	tmp.floats_per_vertex = 2;
2526
	tmp.floats_per_rect = 6;
2527
	tmp.need_magic_ca_pass = 0;
2528
 
2529
	tmp.u.gen6.flags = COPY_FLAGS(alu);
2530
	assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK);
2531
	assert(GEN6_SAMPLER(tmp.u.gen6.flags) == COPY_SAMPLER);
2532
	assert(GEN6_VERTEX(tmp.u.gen6.flags) == COPY_VERTEX);
2533
 
2534
	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo);
2535
	if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) {
2536
		kgem_submit(&sna->kgem);
2537
		if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) {
2538
			DBG(("%s: too large for a single operation\n",
2539
			     __FUNCTION__));
2540
			goto fallback_tiled_src;
2541
		}
2542
		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
2543
	}
2544
 
2545
	gen6_emit_copy_state(sna, &tmp);
2546
	gen6_align_vertex(sna, &tmp);
2547
 
2548
	do {
2549
		int16_t *v;
2550
		int n_this_time;
2551
 
2552
		n_this_time = gen6_get_rectangles(sna, &tmp, n,
2553
						  gen6_emit_copy_state);
2554
		n -= n_this_time;
2555
 
2556
		v = (int16_t *)(sna->render.vertices + sna->render.vertex_used);
2557
		sna->render.vertex_used += 6 * n_this_time;
2558
		assert(sna->render.vertex_used <= sna->render.vertex_size);
2559
		do {
2560
 
2561
			DBG(("	(%d, %d) -> (%d, %d) + (%d, %d)\n",
2562
			     box->x1 + src_dx, box->y1 + src_dy,
2563
			     box->x1 + dst_dx, box->y1 + dst_dy,
2564
			     box->x2 - box->x1, box->y2 - box->y1));
2565
			v[0] = box->x2 + dst_dx;
2566
			v[2] = box->x2 + src_dx;
2567
			v[1]  = v[5] = box->y2 + dst_dy;
2568
			v[3]  = v[7] = box->y2 + src_dy;
2569
			v[8]  = v[4] = box->x1 + dst_dx;
2570
			v[10] = v[6] = box->x1 + src_dx;
2571
			v[9]  = box->y1 + dst_dy;
2572
			v[11] = box->y1 + src_dy;
2573
			v += 12; box++;
2574
		} while (--n_this_time);
2575
	} while (n);
2576
 
2577
	gen4_vertex_flush(sna);
2578
	sna_render_composite_redirect_done(sna, &tmp);
2579
	if (tmp.src.bo != src_bo)
2580
		kgem_bo_destroy(&sna->kgem, tmp.src.bo);
2581
	return true;
2582
 
2583
fallback_tiled_src:
2584
	if (tmp.src.bo != src_bo)
2585
		kgem_bo_destroy(&sna->kgem, tmp.src.bo);
2586
fallback_tiled_dst:
2587
	if (tmp.redirect.real_bo)
2588
		kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
2589
fallback_tiled:
2590
	if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
2591
	    sna_blt_copy_boxes(sna, alu,
2592
			       src_bo, src_dx, src_dy,
2593
			       dst_bo, dst_dx, dst_dy,
2594
			       dst->drawable.bitsPerPixel,
2595
			       box, n))
2596
		return true;
2597
 
2598
	return sna_tiling_copy_boxes(sna, alu,
2599
				     src, src_bo, src_dx, src_dy,
2600
				     dst, dst_bo, dst_dx, dst_dy,
2601
				     box, n);
2602
}
2603
 
2604
#endif
2605
 
2606
static void
2607
gen6_render_copy_blt(struct sna *sna,
2608
		     const struct sna_copy_op *op,
2609
		     int16_t sx, int16_t sy,
2610
		     int16_t w,  int16_t h,
2611
		     int16_t dx, int16_t dy)
2612
{
2613
	int16_t *v;
2614
 
2615
	gen6_get_rectangles(sna, &op->base, 1, gen6_emit_copy_state);
2616
 
2617
	v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
2618
	sna->render.vertex_used += 6;
2619
	assert(sna->render.vertex_used <= sna->render.vertex_size);
2620
 
2621
	v[0]  = dx+w; v[1]  = dy+h;
2622
	v[2]  = sx+w; v[3]  = sy+h;
2623
	v[4]  = dx;   v[5]  = dy+h;
2624
	v[6]  = sx;   v[7]  = sy+h;
2625
	v[8]  = dx;   v[9]  = dy;
2626
	v[10] = sx;   v[11] = sy;
2627
}
2628
 
2629
static void
2630
gen6_render_copy_done(struct sna *sna, const struct sna_copy_op *op)
2631
{
2632
	DBG(("%s()\n", __FUNCTION__));
2633
 
2634
	assert(!sna->render.active);
2635
	if (sna->render.vertex_offset)
2636
		gen4_vertex_flush(sna);
2637
}
2638
 
2639
static bool
2640
gen6_render_copy(struct sna *sna, uint8_t alu,
2641
		 PixmapPtr src, struct kgem_bo *src_bo,
2642
		 PixmapPtr dst, struct kgem_bo *dst_bo,
2643
		 struct sna_copy_op *op)
2644
{
2645
	DBG(("%s (alu=%d, src=(%dx%d), dst=(%dx%d))\n",
2646
	     __FUNCTION__, alu,
2647
	     src->drawable.width, src->drawable.height,
2648
	     dst->drawable.width, dst->drawable.height));
2649
 
2650
fallback:
2651
 
2652
    op->base.dst.format = PIXMAN_a8r8g8b8;
2653
	op->base.src.pict_format = op->base.dst.format;
2654
 
2655
	op->base.dst.pixmap = dst;
2656
	op->base.dst.width  = dst->drawable.width;
2657
	op->base.dst.height = dst->drawable.height;
2658
	op->base.dst.bo = dst_bo;
2659
 
2660
	op->base.src.bo = src_bo;
2661
	op->base.src.card_format =
2662
		gen6_get_card_format(op->base.src.pict_format);
2663
	op->base.src.width  = src->drawable.width;
2664
	op->base.src.height = src->drawable.height;
2665
 
2666
	op->base.mask.bo = NULL;
2667
 
2668
	op->base.floats_per_vertex = 2;
2669
	op->base.floats_per_rect = 6;
2670
 
2671
	op->base.u.gen6.flags = COPY_FLAGS(alu);
2672
	assert(GEN6_KERNEL(op->base.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK);
2673
	assert(GEN6_SAMPLER(op->base.u.gen6.flags) == COPY_SAMPLER);
2674
	assert(GEN6_VERTEX(op->base.u.gen6.flags) == COPY_VERTEX);
2675
 
3263 Serge 2676
	kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo);
2677
	if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
2678
		kgem_submit(&sna->kgem);
2679
		if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL))
2680
			goto fallback;
2681
		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
2682
	}
3254 Serge 2683
 
2684
	gen6_emit_copy_state(sna, &op->base);
2685
	gen6_align_vertex(sna, &op->base);
2686
 
2687
	op->blt  = gen6_render_copy_blt;
2688
	op->done = gen6_render_copy_done;
2689
	return true;
2690
}
2691
 
2692
#if 0
2693
 
2694
static void
2695
gen6_emit_fill_state(struct sna *sna, const struct sna_composite_op *op)
2696
{
2697
	uint32_t *binding_table;
2698
	uint16_t offset;
2699
	bool dirty;
2700
 
2701
	dirty = gen6_get_batch(sna, op);
2702
 
2703
	binding_table = gen6_composite_get_binding_table(sna, &offset);
2704
 
2705
	binding_table[0] =
2706
		gen6_bind_bo(sna,
2707
			     op->dst.bo, op->dst.width, op->dst.height,
2708
			     gen6_get_dest_format(op->dst.format),
2709
			     true);
2710
	binding_table[1] =
2711
		gen6_bind_bo(sna,
2712
			     op->src.bo, 1, 1,
2713
			     GEN6_SURFACEFORMAT_B8G8R8A8_UNORM,
2714
			     false);
2715
 
2716
	if (sna->kgem.surface == offset &&
2717
	    *(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table) {
2718
		sna->kgem.surface +=
2719
			sizeof(struct gen6_surface_state_padded)/sizeof(uint32_t);
2720
		offset = sna->render_state.gen6.surface_table;
2721
	}
2722
 
2723
	gen6_emit_state(sna, op, offset | dirty);
2724
}
2725
 
2726
static inline bool prefer_blt_fill(struct sna *sna,
2727
				   struct kgem_bo *bo)
2728
{
2729
	if (PREFER_RENDER)
2730
		return PREFER_RENDER < 0;
2731
 
2732
	if (untiled_tlb_miss(bo))
2733
		return true;
2734
 
2735
	return prefer_blt_ring(sna, bo, 0) || prefer_blt_bo(sna, bo) >= 0;
2736
}
2737
 
2738
static bool
2739
gen6_render_fill_boxes(struct sna *sna,
2740
		       CARD8 op,
2741
		       PictFormat format,
2742
		       const xRenderColor *color,
2743
		       PixmapPtr dst, struct kgem_bo *dst_bo,
2744
		       const BoxRec *box, int n)
2745
{
2746
	struct sna_composite_op tmp;
2747
	uint32_t pixel;
2748
 
2749
	DBG(("%s (op=%d, color=(%04x, %04x, %04x, %04x) [%08x])\n",
2750
	     __FUNCTION__, op,
2751
	     color->red, color->green, color->blue, color->alpha, (int)format));
2752
 
2753
	if (op >= ARRAY_SIZE(gen6_blend_op)) {
2754
		DBG(("%s: fallback due to unhandled blend op: %d\n",
2755
		     __FUNCTION__, op));
2756
		return false;
2757
	}
2758
 
2759
	if (prefer_blt_fill(sna, dst_bo) || !gen6_check_dst_format(format)) {
2760
		uint8_t alu = GXinvalid;
2761
 
2762
		if (op <= PictOpSrc) {
2763
			pixel = 0;
2764
			if (op == PictOpClear)
2765
				alu = GXclear;
2766
			else if (sna_get_pixel_from_rgba(&pixel,
2767
							 color->red,
2768
							 color->green,
2769
							 color->blue,
2770
							 color->alpha,
2771
							 format))
2772
				alu = GXcopy;
2773
		}
2774
 
2775
		if (alu != GXinvalid &&
2776
		    sna_blt_fill_boxes(sna, alu,
2777
				       dst_bo, dst->drawable.bitsPerPixel,
2778
				       pixel, box, n))
2779
			return true;
2780
 
2781
		if (!gen6_check_dst_format(format))
2782
			return false;
2783
	}
2784
 
2785
	if (op == PictOpClear) {
2786
		pixel = 0;
2787
		op = PictOpSrc;
2788
	} else if (!sna_get_pixel_from_rgba(&pixel,
2789
					    color->red,
2790
					    color->green,
2791
					    color->blue,
2792
					    color->alpha,
2793
					    PICT_a8r8g8b8))
2794
		return false;
2795
 
2796
	DBG(("%s(%08x x %d [(%d, %d), (%d, %d) ...])\n",
2797
	     __FUNCTION__, pixel, n,
2798
	     box[0].x1, box[0].y1, box[0].x2, box[0].y2));
2799
 
2800
	tmp.dst.pixmap = dst;
2801
	tmp.dst.width  = dst->drawable.width;
2802
	tmp.dst.height = dst->drawable.height;
2803
	tmp.dst.format = format;
2804
	tmp.dst.bo = dst_bo;
2805
	tmp.dst.x = tmp.dst.y = 0;
2806
	tmp.damage = NULL;
2807
 
2808
	sna_render_composite_redirect_init(&tmp);
2809
	if (too_large(dst->drawable.width, dst->drawable.height)) {
2810
		BoxRec extents;
2811
 
2812
		boxes_extents(box, n, &extents);
2813
		if (!sna_render_composite_redirect(sna, &tmp,
2814
						   extents.x1, extents.y1,
2815
						   extents.x2 - extents.x1,
2816
						   extents.y2 - extents.y1))
2817
			return sna_tiling_fill_boxes(sna, op, format, color,
2818
						     dst, dst_bo, box, n);
2819
	}
2820
 
2821
	tmp.src.bo = sna_render_get_solid(sna, pixel);
2822
	tmp.mask.bo = NULL;
2823
 
2824
	tmp.floats_per_vertex = 2;
2825
	tmp.floats_per_rect = 6;
2826
	tmp.need_magic_ca_pass = false;
2827
 
2828
	tmp.u.gen6.flags = FILL_FLAGS(op, format);
2829
	assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK);
2830
	assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER);
2831
	assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX);
2832
 
2833
	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
2834
		kgem_submit(&sna->kgem);
2835
		assert(kgem_check_bo(&sna->kgem, dst_bo, NULL));
2836
	}
2837
 
2838
	gen6_emit_fill_state(sna, &tmp);
2839
	gen6_align_vertex(sna, &tmp);
2840
 
2841
	do {
2842
		int n_this_time;
2843
		int16_t *v;
2844
 
2845
		n_this_time = gen6_get_rectangles(sna, &tmp, n,
2846
						  gen6_emit_fill_state);
2847
		n -= n_this_time;
2848
 
2849
		v = (int16_t *)(sna->render.vertices + sna->render.vertex_used);
2850
		sna->render.vertex_used += 6 * n_this_time;
2851
		assert(sna->render.vertex_used <= sna->render.vertex_size);
2852
		do {
2853
			DBG(("	(%d, %d), (%d, %d)\n",
2854
			     box->x1, box->y1, box->x2, box->y2));
2855
 
2856
			v[0] = box->x2;
2857
			v[5] = v[1] = box->y2;
2858
			v[8] = v[4] = box->x1;
2859
			v[9] = box->y1;
2860
			v[2] = v[3]  = v[7]  = 1;
2861
			v[6] = v[10] = v[11] = 0;
2862
			v += 12; box++;
2863
		} while (--n_this_time);
2864
	} while (n);
2865
 
2866
	gen4_vertex_flush(sna);
2867
	kgem_bo_destroy(&sna->kgem, tmp.src.bo);
2868
	sna_render_composite_redirect_done(sna, &tmp);
2869
	return true;
2870
}
2871
 
2872
static void
2873
gen6_render_op_fill_blt(struct sna *sna,
2874
			const struct sna_fill_op *op,
2875
			int16_t x, int16_t y, int16_t w, int16_t h)
2876
{
2877
	int16_t *v;
2878
 
2879
	DBG(("%s: (%d, %d)x(%d, %d)\n", __FUNCTION__, x, y, w, h));
2880
 
2881
	gen6_get_rectangles(sna, &op->base, 1, gen6_emit_fill_state);
2882
 
2883
	v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
2884
	sna->render.vertex_used += 6;
2885
	assert(sna->render.vertex_used <= sna->render.vertex_size);
2886
 
2887
	v[0] = x+w;
2888
	v[4] = v[8] = x;
2889
	v[1] = v[5] = y+h;
2890
	v[9] = y;
2891
 
2892
	v[2] = v[3]  = v[7]  = 1;
2893
	v[6] = v[10] = v[11] = 0;
2894
}
2895
 
2896
fastcall static void
2897
gen6_render_op_fill_box(struct sna *sna,
2898
			const struct sna_fill_op *op,
2899
			const BoxRec *box)
2900
{
2901
	int16_t *v;
2902
 
2903
	DBG(("%s: (%d, %d),(%d, %d)\n", __FUNCTION__,
2904
	     box->x1, box->y1, box->x2, box->y2));
2905
 
2906
	gen6_get_rectangles(sna, &op->base, 1, gen6_emit_fill_state);
2907
 
2908
	v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
2909
	sna->render.vertex_used += 6;
2910
	assert(sna->render.vertex_used <= sna->render.vertex_size);
2911
 
2912
	v[0] = box->x2;
2913
	v[8] = v[4] = box->x1;
2914
	v[5] = v[1] = box->y2;
2915
	v[9] = box->y1;
2916
 
2917
	v[7] = v[2]  = v[3]  = 1;
2918
	v[6] = v[10] = v[11] = 0;
2919
}
2920
 
2921
fastcall static void
2922
gen6_render_op_fill_boxes(struct sna *sna,
2923
			  const struct sna_fill_op *op,
2924
			  const BoxRec *box,
2925
			  int nbox)
2926
{
2927
	DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__,
2928
	     box->x1, box->y1, box->x2, box->y2, nbox));
2929
 
2930
	do {
2931
		int nbox_this_time;
2932
		int16_t *v;
2933
 
2934
		nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox,
2935
						     gen6_emit_fill_state);
2936
		nbox -= nbox_this_time;
2937
 
2938
		v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
2939
		sna->render.vertex_used += 6 * nbox_this_time;
2940
		assert(sna->render.vertex_used <= sna->render.vertex_size);
2941
 
2942
		do {
2943
			v[0] = box->x2;
2944
			v[8] = v[4] = box->x1;
2945
			v[5] = v[1] = box->y2;
2946
			v[9] = box->y1;
2947
			v[7] = v[2]  = v[3]  = 1;
2948
			v[6] = v[10] = v[11] = 0;
2949
			box++; v += 12;
2950
		} while (--nbox_this_time);
2951
	} while (nbox);
2952
}
2953
 
2954
static void
2955
gen6_render_op_fill_done(struct sna *sna, const struct sna_fill_op *op)
2956
{
2957
	DBG(("%s()\n", __FUNCTION__));
2958
 
2959
	assert(!sna->render.active);
2960
	if (sna->render.vertex_offset)
2961
		gen4_vertex_flush(sna);
2962
	kgem_bo_destroy(&sna->kgem, op->base.src.bo);
2963
}
2964
 
2965
static bool
2966
gen6_render_fill(struct sna *sna, uint8_t alu,
2967
		 PixmapPtr dst, struct kgem_bo *dst_bo,
2968
		 uint32_t color,
2969
		 struct sna_fill_op *op)
2970
{
2971
	DBG(("%s: (alu=%d, color=%x)\n", __FUNCTION__, alu, color));
2972
 
2973
	if (prefer_blt_fill(sna, dst_bo) &&
2974
	    sna_blt_fill(sna, alu,
2975
			 dst_bo, dst->drawable.bitsPerPixel,
2976
			 color,
2977
			 op))
2978
		return true;
2979
 
2980
	if (!(alu == GXcopy || alu == GXclear) ||
2981
	    too_large(dst->drawable.width, dst->drawable.height))
2982
		return sna_blt_fill(sna, alu,
2983
				    dst_bo, dst->drawable.bitsPerPixel,
2984
				    color,
2985
				    op);
2986
 
2987
	if (alu == GXclear)
2988
		color = 0;
2989
 
2990
	op->base.dst.pixmap = dst;
2991
	op->base.dst.width  = dst->drawable.width;
2992
	op->base.dst.height = dst->drawable.height;
2993
	op->base.dst.format = sna_format_for_depth(dst->drawable.depth);
2994
	op->base.dst.bo = dst_bo;
2995
	op->base.dst.x = op->base.dst.y = 0;
2996
 
2997
	op->base.src.bo =
2998
		sna_render_get_solid(sna,
2999
				     sna_rgba_for_color(color,
3000
							dst->drawable.depth));
3001
	op->base.mask.bo = NULL;
3002
 
3003
	op->base.need_magic_ca_pass = false;
3004
	op->base.floats_per_vertex = 2;
3005
	op->base.floats_per_rect = 6;
3006
 
3007
	op->base.u.gen6.flags = FILL_FLAGS_NOBLEND;
3008
	assert(GEN6_KERNEL(op->base.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK);
3009
	assert(GEN6_SAMPLER(op->base.u.gen6.flags) == FILL_SAMPLER);
3010
	assert(GEN6_VERTEX(op->base.u.gen6.flags) == FILL_VERTEX);
3011
 
3012
	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
3013
		kgem_submit(&sna->kgem);
3014
		assert(kgem_check_bo(&sna->kgem, dst_bo, NULL));
3015
	}
3016
 
3017
	gen6_emit_fill_state(sna, &op->base);
3018
	gen6_align_vertex(sna, &op->base);
3019
 
3020
	op->blt  = gen6_render_op_fill_blt;
3021
	op->box  = gen6_render_op_fill_box;
3022
	op->boxes = gen6_render_op_fill_boxes;
3023
	op->done = gen6_render_op_fill_done;
3024
	return true;
3025
}
3026
 
3027
static bool
3028
gen6_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
3029
			     uint32_t color,
3030
			     int16_t x1, int16_t y1, int16_t x2, int16_t y2,
3031
			     uint8_t alu)
3032
{
3033
	BoxRec box;
3034
 
3035
	box.x1 = x1;
3036
	box.y1 = y1;
3037
	box.x2 = x2;
3038
	box.y2 = y2;
3039
 
3040
	return sna_blt_fill_boxes(sna, alu,
3041
				  bo, dst->drawable.bitsPerPixel,
3042
				  color, &box, 1);
3043
}
3044
 
3045
static bool
3046
gen6_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
3047
		     uint32_t color,
3048
		     int16_t x1, int16_t y1,
3049
		     int16_t x2, int16_t y2,
3050
		     uint8_t alu)
3051
{
3052
	struct sna_composite_op tmp;
3053
	int16_t *v;
3054
 
3055
	/* Prefer to use the BLT if already engaged */
3056
	if (prefer_blt_fill(sna, bo) &&
3057
	    gen6_render_fill_one_try_blt(sna, dst, bo, color,
3058
					 x1, y1, x2, y2, alu))
3059
		return true;
3060
 
3061
	/* Must use the BLT if we can't RENDER... */
3062
	if (!(alu == GXcopy || alu == GXclear) ||
3063
	    too_large(dst->drawable.width, dst->drawable.height))
3064
		return gen6_render_fill_one_try_blt(sna, dst, bo, color,
3065
						    x1, y1, x2, y2, alu);
3066
 
3067
	if (alu == GXclear)
3068
		color = 0;
3069
 
3070
	tmp.dst.pixmap = dst;
3071
	tmp.dst.width  = dst->drawable.width;
3072
	tmp.dst.height = dst->drawable.height;
3073
	tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
3074
	tmp.dst.bo = bo;
3075
	tmp.dst.x = tmp.dst.y = 0;
3076
 
3077
	tmp.src.bo =
3078
		sna_render_get_solid(sna,
3079
				     sna_rgba_for_color(color,
3080
							dst->drawable.depth));
3081
	tmp.mask.bo = NULL;
3082
 
3083
	tmp.floats_per_vertex = 2;
3084
	tmp.floats_per_rect = 6;
3085
	tmp.need_magic_ca_pass = false;
3086
 
3087
	tmp.u.gen6.flags = FILL_FLAGS_NOBLEND;
3088
	assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK);
3089
	assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER);
3090
	assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX);
3091
 
3092
	if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
3093
		kgem_submit(&sna->kgem);
3094
		if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
3095
			kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3096
			return false;
3097
		}
3098
	}
3099
 
3100
	gen6_emit_fill_state(sna, &tmp);
3101
	gen6_align_vertex(sna, &tmp);
3102
 
3103
	gen6_get_rectangles(sna, &tmp, 1, gen6_emit_fill_state);
3104
 
3105
	DBG(("	(%d, %d), (%d, %d)\n", x1, y1, x2, y2));
3106
 
3107
	v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
3108
	sna->render.vertex_used += 6;
3109
	assert(sna->render.vertex_used <= sna->render.vertex_size);
3110
 
3111
	v[0] = x2;
3112
	v[8] = v[4] = x1;
3113
	v[5] = v[1] = y2;
3114
	v[9] = y1;
3115
	v[7] = v[2]  = v[3]  = 1;
3116
	v[6] = v[10] = v[11] = 0;
3117
 
3118
	gen4_vertex_flush(sna);
3119
	kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3120
 
3121
	return true;
3122
}
3123
 
3124
static bool
3125
gen6_render_clear_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo)
3126
{
3127
	BoxRec box;
3128
 
3129
	box.x1 = 0;
3130
	box.y1 = 0;
3131
	box.x2 = dst->drawable.width;
3132
	box.y2 = dst->drawable.height;
3133
 
3134
	return sna_blt_fill_boxes(sna, GXclear,
3135
				  bo, dst->drawable.bitsPerPixel,
3136
				  0, &box, 1);
3137
}
3138
 
3139
static bool
3140
gen6_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo)
3141
{
3142
	struct sna_composite_op tmp;
3143
	int16_t *v;
3144
 
3145
	DBG(("%s: %dx%d\n",
3146
	     __FUNCTION__,
3147
	     dst->drawable.width,
3148
	     dst->drawable.height));
3149
 
3150
	/* Prefer to use the BLT if, and only if, already engaged */
3151
	if (sna->kgem.ring == KGEM_BLT &&
3152
	    gen6_render_clear_try_blt(sna, dst, bo))
3153
		return true;
3154
 
3155
	/* Must use the BLT if we can't RENDER... */
3156
	if (too_large(dst->drawable.width, dst->drawable.height))
3157
		return gen6_render_clear_try_blt(sna, dst, bo);
3158
 
3159
	tmp.dst.pixmap = dst;
3160
	tmp.dst.width  = dst->drawable.width;
3161
	tmp.dst.height = dst->drawable.height;
3162
	tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
3163
	tmp.dst.bo = bo;
3164
	tmp.dst.x = tmp.dst.y = 0;
3165
 
3166
	tmp.src.bo = sna_render_get_solid(sna, 0);
3167
	tmp.mask.bo = NULL;
3168
 
3169
	tmp.floats_per_vertex = 2;
3170
	tmp.floats_per_rect = 6;
3171
	tmp.need_magic_ca_pass = false;
3172
 
3173
	tmp.u.gen6.flags = FILL_FLAGS_NOBLEND;
3174
	assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK);
3175
	assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER);
3176
	assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX);
3177
 
3263 Serge 3178
	if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
3179
		kgem_submit(&sna->kgem);
3180
		if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
3181
			kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3182
			return false;
3183
		}
3184
	}
3254 Serge 3185
 
3186
	gen6_emit_fill_state(sna, &tmp);
3187
	gen6_align_vertex(sna, &tmp);
3188
 
3189
	gen6_get_rectangles(sna, &tmp, 1, gen6_emit_fill_state);
3190
 
3191
	v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
3192
	sna->render.vertex_used += 6;
3193
	assert(sna->render.vertex_used <= sna->render.vertex_size);
3194
 
3195
	v[0] = dst->drawable.width;
3196
	v[5] = v[1] = dst->drawable.height;
3197
	v[8] = v[4] = 0;
3198
	v[9] = 0;
3199
 
3200
	v[7] = v[2]  = v[3]  = 1;
3201
	v[6] = v[10] = v[11] = 0;
3202
 
3203
	gen4_vertex_flush(sna);
3204
	kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3205
 
3206
	return true;
3207
}
3263 Serge 3208
#endif
3254 Serge 3209
 
3210
static void gen6_render_flush(struct sna *sna)
3211
{
3212
	gen4_vertex_close(sna);
3213
 
3214
	assert(sna->render.vb_id == 0);
3215
	assert(sna->render.vertex_offset == 0);
3216
}
3217
 
3263 Serge 3218
static void
3219
gen6_render_context_switch(struct kgem *kgem,
3220
			   int new_mode)
3221
{
3222
	if (kgem->nbatch) {
3223
		DBG(("%s: from %d to %d\n", __FUNCTION__, kgem->mode, new_mode));
3224
		_kgem_submit(kgem);
3225
	}
3254 Serge 3226
 
3263 Serge 3227
	kgem->ring = new_mode;
3228
}
3229
 
3254 Serge 3230
static void
3231
gen6_render_retire(struct kgem *kgem)
3232
{
3233
	struct sna *sna;
3234
 
3235
	if (kgem->ring && (kgem->has_semaphores || !kgem->need_retire))
3236
		kgem->ring = kgem->mode;
3237
 
3238
	sna = container_of(kgem, struct sna, kgem);
3239
	if (kgem->nbatch == 0 && sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) {
3240
		DBG(("%s: resetting idle vbo handle=%d\n", __FUNCTION__, sna->render.vbo->handle));
3241
		sna->render.vertex_used = 0;
3242
		sna->render.vertex_index = 0;
3243
	}
3244
}
3245
 
3263 Serge 3246
static void
3247
gen6_render_expire(struct kgem *kgem)
3248
{
3249
	struct sna *sna;
3254 Serge 3250
 
3263 Serge 3251
	sna = container_of(kgem, struct sna, kgem);
3252
	if (sna->render.vbo && !sna->render.vertex_used) {
3253
		DBG(("%s: discarding vbo handle=%d\n", __FUNCTION__, sna->render.vbo->handle));
3254
		kgem_bo_destroy(kgem, sna->render.vbo);
3255
		assert(!sna->render.active);
3256
		sna->render.vbo = NULL;
3257
		sna->render.vertices = sna->render.vertex_data;
3258
		sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
3259
		sna->render.vertex_used = 0;
3260
		sna->render.vertex_index = 0;
3261
	}
3262
}
3263
 
3254 Serge 3264
static void gen6_render_reset(struct sna *sna)
3265
{
3266
	sna->render_state.gen6.needs_invariant = true;
3267
	sna->render_state.gen6.first_state_packet = true;
3268
	sna->render_state.gen6.ve_id = 3 << 2;
3269
	sna->render_state.gen6.last_primitive = -1;
3270
 
3271
	sna->render_state.gen6.num_sf_outputs = 0;
3272
	sna->render_state.gen6.samplers = -1;
3273
	sna->render_state.gen6.blend = -1;
3274
	sna->render_state.gen6.kernel = -1;
3275
	sna->render_state.gen6.drawrect_offset = -1;
3276
	sna->render_state.gen6.drawrect_limit = -1;
3277
	sna->render_state.gen6.surface_table = -1;
3278
 
3279
	sna->render.vertex_offset = 0;
3280
	sna->render.nvertex_reloc = 0;
3281
	sna->render.vb_id = 0;
3282
}
3283
 
3284
static void gen6_render_fini(struct sna *sna)
3285
{
3258 Serge 3286
    kgem_bo_destroy(&sna->kgem, sna->render_state.gen6.general_bo);
3254 Serge 3287
}
3288
 
3289
static bool is_gt2(struct sna *sna)
3290
{
3291
	return DEVICE_ID(sna->PciInfo) & 0x30;
3292
}
3293
 
3294
static bool is_mobile(struct sna *sna)
3295
{
3296
	return (DEVICE_ID(sna->PciInfo) & 0xf) == 0x6;
3297
}
3298
 
3299
static bool gen6_render_setup(struct sna *sna)
3300
{
3301
	struct gen6_render_state *state = &sna->render_state.gen6;
3302
	struct sna_static_stream general;
3303
	struct gen6_sampler_state *ss;
3304
	int i, j, k, l, m;
3305
 
3306
	state->info = >1_info;
3307
	if (is_gt2(sna))
3308
		state->info = >2_info; /* XXX requires GT_MODE WiZ disabled */
3309
 
3310
    sna_static_stream_init(&general);
3311
 
3312
	/* Zero pad the start. If you see an offset of 0x0 in the batchbuffer
3313
	 * dumps, you know it points to zero.
3314
	 */
3315
    null_create(&general);
3316
    scratch_create(&general);
3317
 
3318
	for (m = 0; m < GEN6_KERNEL_COUNT; m++) {
3319
		if (wm_kernels[m].size) {
3320
			state->wm_kernel[m][1] =
3321
			sna_static_stream_add(&general,
3322
					       wm_kernels[m].data,
3323
					       wm_kernels[m].size,
3324
					       64);
3325
		} else {
3326
			if (USE_8_PIXEL_DISPATCH) {
3327
				state->wm_kernel[m][0] =
3328
					sna_static_stream_compile_wm(sna, &general,
3329
								     wm_kernels[m].data, 8);
3330
			}
3331
 
3332
			if (USE_16_PIXEL_DISPATCH) {
3333
				state->wm_kernel[m][1] =
3334
					sna_static_stream_compile_wm(sna, &general,
3335
								     wm_kernels[m].data, 16);
3336
			}
3337
 
3338
			if (USE_32_PIXEL_DISPATCH) {
3339
				state->wm_kernel[m][2] =
3340
					sna_static_stream_compile_wm(sna, &general,
3341
								     wm_kernels[m].data, 32);
3342
			}
3343
		}
3344
		if ((state->wm_kernel[m][0]|state->wm_kernel[m][1]|state->wm_kernel[m][2]) == 0) {
3345
			state->wm_kernel[m][1] =
3346
				sna_static_stream_compile_wm(sna, &general,
3347
							     wm_kernels[m].data, 16);
3348
		}
3349
	}
3350
 
3351
	ss = sna_static_stream_map(&general,
3352
				   2 * sizeof(*ss) *
3353
				   (2 +
3354
				   FILTER_COUNT * EXTEND_COUNT *
3355
				    FILTER_COUNT * EXTEND_COUNT),
3356
				   32);
3357
	state->wm_state = sna_static_stream_offsetof(&general, ss);
3358
	sampler_copy_init(ss); ss += 2;
3359
	sampler_fill_init(ss); ss += 2;
3360
	for (i = 0; i < FILTER_COUNT; i++) {
3361
		for (j = 0; j < EXTEND_COUNT; j++) {
3362
			for (k = 0; k < FILTER_COUNT; k++) {
3363
				for (l = 0; l < EXTEND_COUNT; l++) {
3364
					sampler_state_init(ss++, i, j);
3365
					sampler_state_init(ss++, k, l);
3366
				}
3367
			}
3368
		}
3369
	}
3370
 
3371
    state->cc_blend = gen6_composite_create_blend_state(&general);
3372
 
3373
    state->general_bo = sna_static_stream_fini(sna, &general);
3374
    return state->general_bo != NULL;
3375
}
3376
 
3377
bool gen6_render_init(struct sna *sna)
3378
{
3379
    if (!gen6_render_setup(sna))
3380
		return false;
3381
 
3263 Serge 3382
	sna->kgem.context_switch = gen6_render_context_switch;
3254 Serge 3383
      sna->kgem.retire = gen6_render_retire;
3263 Serge 3384
	sna->kgem.expire = gen6_render_expire;
3254 Serge 3385
 
3386
//    sna->render.composite = gen6_render_composite;
3387
//    sna->render.video = gen6_render_video;
3388
 
3389
//    sna->render.copy_boxes = gen6_render_copy_boxes;
3390
    sna->render.copy = gen6_render_copy;
3391
 
3392
//    sna->render.fill_boxes = gen6_render_fill_boxes;
3393
//    sna->render.fill = gen6_render_fill;
3394
//    sna->render.fill_one = gen6_render_fill_one;
3395
//    sna->render.clear = gen6_render_clear;
3396
 
3258 Serge 3397
    sna->render.flush = gen6_render_flush;
3254 Serge 3398
    sna->render.reset = gen6_render_reset;
3399
	sna->render.fini = gen6_render_fini;
3400
 
3401
    sna->render.max_3d_size = GEN6_MAX_SIZE;
3402
    sna->render.max_3d_pitch = 1 << 18;
3403
	return true;
3404
}
3256 Serge 3405
 
3406
 
3407
void gen4_vertex_flush(struct sna *sna)
3408
{
3409
	DBG(("%s[%x] = %d\n", __FUNCTION__,
3410
	     4*sna->render.vertex_offset,
3411
	     sna->render.vertex_index - sna->render.vertex_start));
3412
 
3413
	assert(sna->render.vertex_offset);
3414
	assert(sna->render.vertex_index > sna->render.vertex_start);
3415
 
3416
	sna->kgem.batch[sna->render.vertex_offset] =
3417
		sna->render.vertex_index - sna->render.vertex_start;
3418
	sna->render.vertex_offset = 0;
3419
}
3420
 
3421
int gen4_vertex_finish(struct sna *sna)
3422
{
3423
	struct kgem_bo *bo;
3424
	unsigned int i;
3425
	unsigned hint, size;
3426
 
3427
	DBG(("%s: used=%d / %d\n", __FUNCTION__,
3428
	     sna->render.vertex_used, sna->render.vertex_size));
3429
	assert(sna->render.vertex_offset == 0);
3430
	assert(sna->render.vertex_used);
3431
 
3432
//	sna_vertex_wait__locked(&sna->render);
3433
 
3434
	/* Note: we only need dword alignment (currently) */
3435
 
3436
	bo = sna->render.vbo;
3437
	if (bo) {
3438
		for (i = 0; i < sna->render.nvertex_reloc; i++) {
3439
			DBG(("%s: reloc[%d] = %d\n", __FUNCTION__,
3440
			     i, sna->render.vertex_reloc[i]));
3441
 
3442
			sna->kgem.batch[sna->render.vertex_reloc[i]] =
3443
				kgem_add_reloc(&sna->kgem,
3444
					       sna->render.vertex_reloc[i], bo,
3445
					       I915_GEM_DOMAIN_VERTEX << 16,
3446
					       0);
3447
		}
3448
 
3449
		assert(!sna->render.active);
3450
		sna->render.nvertex_reloc = 0;
3451
		sna->render.vertex_used = 0;
3452
		sna->render.vertex_index = 0;
3453
		sna->render.vbo = NULL;
3454
		sna->render.vb_id = 0;
3455
 
3456
		kgem_bo_destroy(&sna->kgem, bo);
3457
	}
3458
 
3459
	hint = CREATE_GTT_MAP;
3460
	if (bo)
3461
		hint |= CREATE_CACHED | CREATE_NO_THROTTLE;
3462
 
3463
	size = 256*1024;
3464
	assert(!sna->render.active);
3465
	sna->render.vertices = NULL;
3466
	sna->render.vbo = kgem_create_linear(&sna->kgem, size, hint);
3467
	while (sna->render.vbo == NULL && size > 16*1024) {
3468
		size /= 2;
3469
		sna->render.vbo = kgem_create_linear(&sna->kgem, size, hint);
3470
	}
3471
	if (sna->render.vbo == NULL)
3472
		sna->render.vbo = kgem_create_linear(&sna->kgem,
3473
						     256*1024, CREATE_GTT_MAP);
3474
	if (sna->render.vbo)
3475
		sna->render.vertices = kgem_bo_map(&sna->kgem, sna->render.vbo);
3476
	if (sna->render.vertices == NULL) {
3477
		if (sna->render.vbo) {
3478
			kgem_bo_destroy(&sna->kgem, sna->render.vbo);
3479
			sna->render.vbo = NULL;
3480
		}
3481
		sna->render.vertices = sna->render.vertex_data;
3482
		sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
3483
		return 0;
3484
	}
3485
 
3486
	if (sna->render.vertex_used) {
3487
		DBG(("%s: copying initial buffer x %d to handle=%d\n",
3488
		     __FUNCTION__,
3489
		     sna->render.vertex_used,
3490
		     sna->render.vbo->handle));
3491
		assert(sizeof(float)*sna->render.vertex_used <=
3492
		       __kgem_bo_size(sna->render.vbo));
3493
		memcpy(sna->render.vertices,
3494
		       sna->render.vertex_data,
3495
		       sizeof(float)*sna->render.vertex_used);
3496
	}
3497
 
3498
	size = __kgem_bo_size(sna->render.vbo)/4;
3499
	if (size >= UINT16_MAX)
3500
		size = UINT16_MAX - 1;
3501
 
3502
	DBG(("%s: create vbo handle=%d, size=%d\n",
3503
	     __FUNCTION__, sna->render.vbo->handle, size));
3504
 
3505
	sna->render.vertex_size = size;
3506
	return sna->render.vertex_size - sna->render.vertex_used;
3507
}
3508
 
3263 Serge 3509
void gen4_vertex_close(struct sna *sna)
3510
{
3511
	struct kgem_bo *bo, *free_bo = NULL;
3512
	unsigned int i, delta = 0;
3513
 
3514
	assert(sna->render.vertex_offset == 0);
3515
	if (!sna->render.vb_id)
3516
		return;
3517
 
3518
	DBG(("%s: used=%d, vbo active? %d, vb=%x, nreloc=%d\n",
3519
	     __FUNCTION__, sna->render.vertex_used, sna->render.vbo ? sna->render.vbo->handle : 0,
3520
	     sna->render.vb_id, sna->render.nvertex_reloc));
3521
 
3522
	assert(!sna->render.active);
3523
 
3524
	bo = sna->render.vbo;
3525
	if (bo) {
3526
		if (sna->render.vertex_size - sna->render.vertex_used < 64) {
3527
			DBG(("%s: discarding vbo (full), handle=%d\n", __FUNCTION__, sna->render.vbo->handle));
3528
			sna->render.vbo = NULL;
3529
			sna->render.vertices = sna->render.vertex_data;
3530
			sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
3531
			free_bo = bo;
3532
		} else if (IS_CPU_MAP(bo->map) && !sna->kgem.has_llc) {
3533
			DBG(("%s: converting CPU map to GTT\n", __FUNCTION__));
3534
			sna->render.vertices =
3535
				kgem_bo_map__gtt(&sna->kgem, sna->render.vbo);
3536
			if (sna->render.vertices == NULL) {
3537
				sna->render.vbo = NULL;
3538
				sna->render.vertices = sna->render.vertex_data;
3539
				sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
3540
				free_bo = bo;
3541
			}
3542
 
3543
		}
3544
	} else {
3545
		if (sna->kgem.nbatch + sna->render.vertex_used <= sna->kgem.surface) {
3546
			DBG(("%s: copy to batch: %d @ %d\n", __FUNCTION__,
3547
			     sna->render.vertex_used, sna->kgem.nbatch));
3548
			memcpy(sna->kgem.batch + sna->kgem.nbatch,
3549
			       sna->render.vertex_data,
3550
			       sna->render.vertex_used * 4);
3551
			delta = sna->kgem.nbatch * 4;
3552
			bo = NULL;
3553
			sna->kgem.nbatch += sna->render.vertex_used;
3554
		} else {
3555
			bo = kgem_create_linear(&sna->kgem,
3556
						4*sna->render.vertex_used,
3557
						CREATE_NO_THROTTLE);
3558
			if (bo && !kgem_bo_write(&sna->kgem, bo,
3559
						 sna->render.vertex_data,
3560
						 4*sna->render.vertex_used)) {
3561
				kgem_bo_destroy(&sna->kgem, bo);
3562
				bo = NULL;
3563
			}
3564
			DBG(("%s: new vbo: %d\n", __FUNCTION__,
3565
			     sna->render.vertex_used));
3566
			free_bo = bo;
3567
		}
3568
	}
3569
 
3570
	assert(sna->render.nvertex_reloc);
3571
	for (i = 0; i < sna->render.nvertex_reloc; i++) {
3572
		DBG(("%s: reloc[%d] = %d\n", __FUNCTION__,
3573
		     i, sna->render.vertex_reloc[i]));
3574
 
3575
		sna->kgem.batch[sna->render.vertex_reloc[i]] =
3576
			kgem_add_reloc(&sna->kgem,
3577
				       sna->render.vertex_reloc[i], bo,
3578
				       I915_GEM_DOMAIN_VERTEX << 16,
3579
				       delta);
3580
	}
3581
	sna->render.nvertex_reloc = 0;
3582
	sna->render.vb_id = 0;
3583
 
3584
	if (sna->render.vbo == NULL) {
3585
		assert(!sna->render.active);
3586
		sna->render.vertex_used = 0;
3587
		sna->render.vertex_index = 0;
3588
		assert(sna->render.vertices == sna->render.vertex_data);
3589
		assert(sna->render.vertex_size == ARRAY_SIZE(sna->render.vertex_data));
3590
	}
3591
 
3592
	if (free_bo)
3593
		kgem_bo_destroy(&sna->kgem, free_bo);
3594
}
3595