Subversion Repositories Kolibri OS

Rev

Rev 3254 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
3254 Serge 1
/*
2
 * Copyright © 2006,2008,2011 Intel Corporation
3
 * Copyright © 2007 Red Hat, Inc.
4
 *
5
 * Permission is hereby granted, free of charge, to any person obtaining a
6
 * copy of this software and associated documentation files (the "Software"),
7
 * to deal in the Software without restriction, including without limitation
8
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
 * and/or sell copies of the Software, and to permit persons to whom the
10
 * Software is furnished to do so, subject to the following conditions:
11
 *
12
 * The above copyright notice and this permission notice (including the next
13
 * paragraph) shall be included in all copies or substantial portions of the
14
 * Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
 * SOFTWARE.
23
 *
24
 * Authors:
25
 *    Wang Zhenyu 
26
 *    Eric Anholt 
27
 *    Carl Worth 
28
 *    Keith Packard 
29
 *    Chris Wilson 
30
 *
31
 */
32
 
33
#ifdef HAVE_CONFIG_H
34
#include "config.h"
35
#endif
36
 
37
#include "sna.h"
38
#include "sna_reg.h"
39
#include "sna_render.h"
40
//#include "sna_render_inline.h"
41
//#include "sna_video.h"
42
 
43
#include "brw/brw.h"
44
#include "gen6_render.h"
45
 
46
#include "gen4_vertex.h"
47
 
48
#define NO_COMPOSITE 0
49
#define NO_COMPOSITE_SPANS 0
50
#define NO_COPY 0
51
#define NO_COPY_BOXES 0
52
#define NO_FILL 0
53
#define NO_FILL_BOXES 0
54
#define NO_FILL_ONE 0
55
#define NO_FILL_CLEAR 0
56
 
57
#define NO_RING_SWITCH 1
58
#define PREFER_RENDER 0
59
 
60
#define USE_8_PIXEL_DISPATCH 1
61
#define USE_16_PIXEL_DISPATCH 1
62
#define USE_32_PIXEL_DISPATCH 0
63
 
64
#if !USE_8_PIXEL_DISPATCH && !USE_16_PIXEL_DISPATCH && !USE_32_PIXEL_DISPATCH
65
#error "Must select at least 8, 16 or 32 pixel dispatch"
66
#endif
67
 
68
#define GEN6_MAX_SIZE 8192
69
 
70
struct gt_info {
71
	int max_vs_threads;
72
	int max_gs_threads;
73
	int max_wm_threads;
74
	struct {
75
		int size;
76
		int max_vs_entries;
77
		int max_gs_entries;
78
	} urb;
79
};
80
 
81
static const struct gt_info gt1_info = {
82
	.max_vs_threads = 24,
83
	.max_gs_threads = 21,
84
	.max_wm_threads = 40,
85
	.urb = { 32, 256, 256 },
86
};
87
 
88
static const struct gt_info gt2_info = {
89
	.max_vs_threads = 60,
90
	.max_gs_threads = 60,
91
	.max_wm_threads = 80,
92
	.urb = { 64, 256, 256 },
93
};
94
 
95
static const uint32_t ps_kernel_packed[][4] = {
96
#include "exa_wm_src_affine.g6b"
97
#include "exa_wm_src_sample_argb.g6b"
98
#include "exa_wm_yuv_rgb.g6b"
99
#include "exa_wm_write.g6b"
100
};
101
 
102
static const uint32_t ps_kernel_planar[][4] = {
103
#include "exa_wm_src_affine.g6b"
104
#include "exa_wm_src_sample_planar.g6b"
105
#include "exa_wm_yuv_rgb.g6b"
106
#include "exa_wm_write.g6b"
107
};
108
 
109
#define NOKERNEL(kernel_enum, func, ns) \
110
    [GEN6_WM_KERNEL_##kernel_enum] = {#kernel_enum, func, 0, ns}
111
#define KERNEL(kernel_enum, kernel, ns) \
112
    [GEN6_WM_KERNEL_##kernel_enum] = {#kernel_enum, kernel, sizeof(kernel), ns}
113
 
114
static const struct wm_kernel_info {
115
	const char *name;
116
	const void *data;
117
	unsigned int size;
118
	unsigned int num_surfaces;
119
} wm_kernels[] = {
120
	NOKERNEL(NOMASK, brw_wm_kernel__affine, 2),
121
	NOKERNEL(NOMASK_P, brw_wm_kernel__projective, 2),
122
 
123
	NOKERNEL(MASK, brw_wm_kernel__affine_mask, 3),
124
	NOKERNEL(MASK_P, brw_wm_kernel__projective_mask, 3),
125
 
126
	NOKERNEL(MASKCA, brw_wm_kernel__affine_mask_ca, 3),
127
	NOKERNEL(MASKCA_P, brw_wm_kernel__projective_mask_ca, 3),
128
 
129
	NOKERNEL(MASKSA, brw_wm_kernel__affine_mask_sa, 3),
130
	NOKERNEL(MASKSA_P, brw_wm_kernel__projective_mask_sa, 3),
131
 
132
	NOKERNEL(OPACITY, brw_wm_kernel__affine_opacity, 2),
133
	NOKERNEL(OPACITY_P, brw_wm_kernel__projective_opacity, 2),
134
 
135
	KERNEL(VIDEO_PLANAR, ps_kernel_planar, 7),
136
	KERNEL(VIDEO_PACKED, ps_kernel_packed, 2),
137
};
138
#undef KERNEL
139
 
140
static const struct blendinfo {
141
	bool src_alpha;
142
	uint32_t src_blend;
143
	uint32_t dst_blend;
144
} gen6_blend_op[] = {
145
	/* Clear */	{0, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ZERO},
146
	/* Src */	{0, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ZERO},
147
	/* Dst */	{0, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ONE},
148
	/* Over */	{1, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_INV_SRC_ALPHA},
149
	/* OverReverse */ {0, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_ONE},
150
	/* In */	{0, GEN6_BLENDFACTOR_DST_ALPHA, GEN6_BLENDFACTOR_ZERO},
151
	/* InReverse */	{1, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_SRC_ALPHA},
152
	/* Out */	{0, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_ZERO},
153
	/* OutReverse */ {1, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_INV_SRC_ALPHA},
154
	/* Atop */	{1, GEN6_BLENDFACTOR_DST_ALPHA, GEN6_BLENDFACTOR_INV_SRC_ALPHA},
155
	/* AtopReverse */ {1, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_SRC_ALPHA},
156
	/* Xor */	{1, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_INV_SRC_ALPHA},
157
	/* Add */	{0, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ONE},
158
};
159
 
160
/**
161
 * Highest-valued BLENDFACTOR used in gen6_blend_op.
162
 *
163
 * This leaves out GEN6_BLENDFACTOR_INV_DST_COLOR,
164
 * GEN6_BLENDFACTOR_INV_CONST_{COLOR,ALPHA},
165
 * GEN6_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA}
166
 */
167
#define GEN6_BLENDFACTOR_COUNT (GEN6_BLENDFACTOR_INV_DST_ALPHA + 1)
168
 
169
#define GEN6_BLEND_STATE_PADDED_SIZE	ALIGN(sizeof(struct gen6_blend_state), 64)
170
 
171
#define BLEND_OFFSET(s, d) \
172
	(((s) * GEN6_BLENDFACTOR_COUNT + (d)) * GEN6_BLEND_STATE_PADDED_SIZE)
173
 
174
#define NO_BLEND BLEND_OFFSET(GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ZERO)
175
#define CLEAR BLEND_OFFSET(GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ZERO)
176
 
177
#define SAMPLER_OFFSET(sf, se, mf, me) \
178
	(((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me) + 2) * 2 * sizeof(struct gen6_sampler_state))
179
 
180
#define VERTEX_2s2s 0
181
 
182
#define COPY_SAMPLER 0
183
#define COPY_VERTEX VERTEX_2s2s
184
#define COPY_FLAGS(a) GEN6_SET_FLAGS(COPY_SAMPLER, (a) == GXcopy ? NO_BLEND : CLEAR, GEN6_WM_KERNEL_NOMASK, COPY_VERTEX)
185
 
186
#define FILL_SAMPLER (2 * sizeof(struct gen6_sampler_state))
187
#define FILL_VERTEX VERTEX_2s2s
188
#define FILL_FLAGS(op, format) GEN6_SET_FLAGS(FILL_SAMPLER, gen6_get_blend((op), false, (format)), GEN6_WM_KERNEL_NOMASK, FILL_VERTEX)
189
#define FILL_FLAGS_NOBLEND GEN6_SET_FLAGS(FILL_SAMPLER, NO_BLEND, GEN6_WM_KERNEL_NOMASK, FILL_VERTEX)
190
 
191
#define GEN6_SAMPLER(f) (((f) >> 16) & 0xfff0)
192
#define GEN6_BLEND(f) (((f) >> 0) & 0xfff0)
193
#define GEN6_KERNEL(f) (((f) >> 16) & 0xf)
194
#define GEN6_VERTEX(f) (((f) >> 0) & 0xf)
195
#define GEN6_SET_FLAGS(S, B, K, V)  (((S) | (K)) << 16 | ((B) | (V)))
196
 
197
#define OUT_BATCH(v) batch_emit(sna, v)
198
#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y)
199
#define OUT_VERTEX_F(v) vertex_emit(sna, v)
200
 
201
static inline bool too_large(int width, int height)
202
{
203
	return width > GEN6_MAX_SIZE || height > GEN6_MAX_SIZE;
204
}
205
 
206
static uint32_t gen6_get_blend(int op,
207
			       bool has_component_alpha,
208
			       uint32_t dst_format)
209
{
210
	uint32_t src, dst;
211
 
212
//    src = GEN6_BLENDFACTOR_ONE; //gen6_blend_op[op].src_blend;
213
//    dst = GEN6_BLENDFACTOR_ZERO; //gen6_blend_op[op].dst_blend;
214
 
215
    src = GEN6_BLENDFACTOR_ONE; //gen6_blend_op[op].src_blend;
216
    dst = GEN6_BLENDFACTOR_INV_SRC_ALPHA; //gen6_blend_op[op].dst_blend;
217
 
218
#if 0
219
	/* If there's no dst alpha channel, adjust the blend op so that
220
	 * we'll treat it always as 1.
221
	 */
222
	if (PICT_FORMAT_A(dst_format) == 0) {
223
		if (src == GEN6_BLENDFACTOR_DST_ALPHA)
224
			src = GEN6_BLENDFACTOR_ONE;
225
		else if (src == GEN6_BLENDFACTOR_INV_DST_ALPHA)
226
			src = GEN6_BLENDFACTOR_ZERO;
227
	}
228
 
229
	/* If the source alpha is being used, then we should only be in a
230
	 * case where the source blend factor is 0, and the source blend
231
	 * value is the mask channels multiplied by the source picture's alpha.
232
	 */
233
	if (has_component_alpha && gen6_blend_op[op].src_alpha) {
234
		if (dst == GEN6_BLENDFACTOR_SRC_ALPHA)
235
			dst = GEN6_BLENDFACTOR_SRC_COLOR;
236
		else if (dst == GEN6_BLENDFACTOR_INV_SRC_ALPHA)
237
			dst = GEN6_BLENDFACTOR_INV_SRC_COLOR;
238
	}
239
 
240
	DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n",
241
	     op, dst_format, PICT_FORMAT_A(dst_format),
242
	     src, dst, (int)BLEND_OFFSET(src, dst)));
243
#endif
244
 
245
	return BLEND_OFFSET(src, dst);
246
}
247
 
248
static uint32_t gen6_get_card_format(PictFormat format)
249
{
250
    return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM;
251
 
252
/*
253
	switch (format) {
254
	default:
255
		return -1;
256
	case PICT_a8r8g8b8:
257
		return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM;
258
	case PICT_x8r8g8b8:
259
		return GEN6_SURFACEFORMAT_B8G8R8X8_UNORM;
260
	case PICT_a8b8g8r8:
261
		return GEN6_SURFACEFORMAT_R8G8B8A8_UNORM;
262
	case PICT_x8b8g8r8:
263
		return GEN6_SURFACEFORMAT_R8G8B8X8_UNORM;
264
	case PICT_a2r10g10b10:
265
		return GEN6_SURFACEFORMAT_B10G10R10A2_UNORM;
266
	case PICT_x2r10g10b10:
267
		return GEN6_SURFACEFORMAT_B10G10R10X2_UNORM;
268
	case PICT_r8g8b8:
269
		return GEN6_SURFACEFORMAT_R8G8B8_UNORM;
270
	case PICT_r5g6b5:
271
		return GEN6_SURFACEFORMAT_B5G6R5_UNORM;
272
	case PICT_a1r5g5b5:
273
		return GEN6_SURFACEFORMAT_B5G5R5A1_UNORM;
274
	case PICT_a8:
275
		return GEN6_SURFACEFORMAT_A8_UNORM;
276
	case PICT_a4r4g4b4:
277
		return GEN6_SURFACEFORMAT_B4G4R4A4_UNORM;
278
	}
279
 */
280
}
281
 
282
static uint32_t gen6_get_dest_format(PictFormat format)
283
{
284
    return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM;
285
 
286
#if 0
287
 
288
	switch (format) {
289
	default:
290
		return -1;
291
	case PICT_a8r8g8b8:
292
	case PICT_x8r8g8b8:
293
		return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM;
294
	case PICT_a8b8g8r8:
295
	case PICT_x8b8g8r8:
296
		return GEN6_SURFACEFORMAT_R8G8B8A8_UNORM;
297
	case PICT_a2r10g10b10:
298
	case PICT_x2r10g10b10:
299
		return GEN6_SURFACEFORMAT_B10G10R10A2_UNORM;
300
	case PICT_r5g6b5:
301
		return GEN6_SURFACEFORMAT_B5G6R5_UNORM;
302
	case PICT_x1r5g5b5:
303
	case PICT_a1r5g5b5:
304
		return GEN6_SURFACEFORMAT_B5G5R5A1_UNORM;
305
	case PICT_a8:
306
		return GEN6_SURFACEFORMAT_A8_UNORM;
307
	case PICT_a4r4g4b4:
308
	case PICT_x4r4g4b4:
309
		return GEN6_SURFACEFORMAT_B4G4R4A4_UNORM;
310
	}
311
#endif
312
 
313
}
314
 
315
#if 0
316
 
317
static bool gen6_check_dst_format(PictFormat format)
318
{
319
	if (gen6_get_dest_format(format) != -1)
320
		return true;
321
 
322
	DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format));
323
	return false;
324
}
325
 
326
static bool gen6_check_format(uint32_t format)
327
{
328
	if (gen6_get_card_format(format) != -1)
329
		return true;
330
 
331
	DBG(("%s: unhandled format: %x\n", __FUNCTION__, (int)format));
332
		return false;
333
}
334
 
335
static uint32_t gen6_filter(uint32_t filter)
336
{
337
	switch (filter) {
338
	default:
339
		assert(0);
340
	case PictFilterNearest:
341
		return SAMPLER_FILTER_NEAREST;
342
	case PictFilterBilinear:
343
		return SAMPLER_FILTER_BILINEAR;
344
	}
345
}
346
 
347
static uint32_t gen6_check_filter(PicturePtr picture)
348
{
349
	switch (picture->filter) {
350
	case PictFilterNearest:
351
	case PictFilterBilinear:
352
		return true;
353
	default:
354
		return false;
355
	}
356
}
357
 
358
static uint32_t gen6_repeat(uint32_t repeat)
359
{
360
	switch (repeat) {
361
	default:
362
		assert(0);
363
	case RepeatNone:
364
		return SAMPLER_EXTEND_NONE;
365
	case RepeatNormal:
366
		return SAMPLER_EXTEND_REPEAT;
367
	case RepeatPad:
368
		return SAMPLER_EXTEND_PAD;
369
	case RepeatReflect:
370
		return SAMPLER_EXTEND_REFLECT;
371
	}
372
}
373
 
374
static bool gen6_check_repeat(PicturePtr picture)
375
{
376
	if (!picture->repeat)
377
		return true;
378
 
379
	switch (picture->repeatType) {
380
	case RepeatNone:
381
	case RepeatNormal:
382
	case RepeatPad:
383
	case RepeatReflect:
384
		return true;
385
	default:
386
		return false;
387
	}
388
}
389
#endif
390
 
391
static int
392
gen6_choose_composite_kernel(int op, bool has_mask, bool is_ca, bool is_affine)
393
{
394
	int base;
395
 
396
	if (has_mask) {
397
/*
398
		if (is_ca) {
399
			if (gen6_blend_op[op].src_alpha)
400
				base = GEN6_WM_KERNEL_MASKCA_SRCALPHA;
401
			else
402
				base = GEN6_WM_KERNEL_MASKCA;
403
		} else
404
			base = GEN6_WM_KERNEL_MASK;
405
*/
406
	} else
407
		base = GEN6_WM_KERNEL_NOMASK;
408
 
409
	return base + !is_affine;
410
}
411
 
412
static void
413
gen6_emit_urb(struct sna *sna)
414
{
415
	OUT_BATCH(GEN6_3DSTATE_URB | (3 - 2));
416
	OUT_BATCH(((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
417
		  (sna->render_state.gen6.info->urb.max_vs_entries << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
418
	OUT_BATCH((0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
419
		  (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
420
}
421
 
422
static void
423
gen6_emit_state_base_address(struct sna *sna)
424
{
425
	OUT_BATCH(GEN6_STATE_BASE_ADDRESS | (10 - 2));
426
	OUT_BATCH(0); /* general */
427
	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */
428
				 sna->kgem.nbatch,
429
				 NULL,
430
				 I915_GEM_DOMAIN_INSTRUCTION << 16,
431
				 BASE_ADDRESS_MODIFY));
432
	OUT_BATCH(kgem_add_reloc(&sna->kgem, /* instruction */
433
				 sna->kgem.nbatch,
434
				 sna->render_state.gen6.general_bo,
435
				 I915_GEM_DOMAIN_INSTRUCTION << 16,
436
				 BASE_ADDRESS_MODIFY));
437
	OUT_BATCH(0); /* indirect */
438
	OUT_BATCH(kgem_add_reloc(&sna->kgem,
439
				 sna->kgem.nbatch,
440
				 sna->render_state.gen6.general_bo,
441
				 I915_GEM_DOMAIN_INSTRUCTION << 16,
442
				 BASE_ADDRESS_MODIFY));
443
 
444
	/* upper bounds, disable */
445
	OUT_BATCH(0);
446
	OUT_BATCH(BASE_ADDRESS_MODIFY);
447
	OUT_BATCH(0);
448
	OUT_BATCH(BASE_ADDRESS_MODIFY);
449
}
450
 
451
static void
452
gen6_emit_viewports(struct sna *sna)
453
{
454
	OUT_BATCH(GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
455
		  GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
456
		  (4 - 2));
457
	OUT_BATCH(0);
458
	OUT_BATCH(0);
459
	OUT_BATCH(0);
460
}
461
 
462
static void
463
gen6_emit_vs(struct sna *sna)
464
{
465
	/* disable VS constant buffer */
466
	OUT_BATCH(GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
467
	OUT_BATCH(0);
468
	OUT_BATCH(0);
469
	OUT_BATCH(0);
470
	OUT_BATCH(0);
471
 
472
	OUT_BATCH(GEN6_3DSTATE_VS | (6 - 2));
473
	OUT_BATCH(0); /* no VS kernel */
474
	OUT_BATCH(0);
475
	OUT_BATCH(0);
476
	OUT_BATCH(0);
477
	OUT_BATCH(0); /* pass-through */
478
}
479
 
480
static void
481
gen6_emit_gs(struct sna *sna)
482
{
483
	/* disable GS constant buffer */
484
	OUT_BATCH(GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
485
	OUT_BATCH(0);
486
	OUT_BATCH(0);
487
	OUT_BATCH(0);
488
	OUT_BATCH(0);
489
 
490
	OUT_BATCH(GEN6_3DSTATE_GS | (7 - 2));
491
	OUT_BATCH(0); /* no GS kernel */
492
	OUT_BATCH(0);
493
	OUT_BATCH(0);
494
	OUT_BATCH(0);
495
	OUT_BATCH(0);
496
	OUT_BATCH(0); /* pass-through */
497
}
498
 
499
static void
500
gen6_emit_clip(struct sna *sna)
501
{
502
	OUT_BATCH(GEN6_3DSTATE_CLIP | (4 - 2));
503
	OUT_BATCH(0);
504
	OUT_BATCH(0); /* pass-through */
505
	OUT_BATCH(0);
506
}
507
 
508
static void
509
gen6_emit_wm_constants(struct sna *sna)
510
{
511
	/* disable WM constant buffer */
512
	OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | (5 - 2));
513
	OUT_BATCH(0);
514
	OUT_BATCH(0);
515
	OUT_BATCH(0);
516
	OUT_BATCH(0);
517
}
518
 
519
static void
520
gen6_emit_null_depth_buffer(struct sna *sna)
521
{
522
	OUT_BATCH(GEN6_3DSTATE_DEPTH_BUFFER | (7 - 2));
523
	OUT_BATCH(GEN6_SURFACE_NULL << GEN6_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT |
524
		  GEN6_DEPTHFORMAT_D32_FLOAT << GEN6_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT);
525
	OUT_BATCH(0);
526
	OUT_BATCH(0);
527
	OUT_BATCH(0);
528
	OUT_BATCH(0);
529
	OUT_BATCH(0);
530
 
531
	OUT_BATCH(GEN6_3DSTATE_CLEAR_PARAMS | (2 - 2));
532
	OUT_BATCH(0);
533
}
534
 
535
static void
536
gen6_emit_invariant(struct sna *sna)
537
{
538
	OUT_BATCH(GEN6_PIPELINE_SELECT | PIPELINE_SELECT_3D);
539
 
540
	OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
541
	OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
542
              GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
543
	OUT_BATCH(0);
544
 
545
	OUT_BATCH(GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
546
	OUT_BATCH(1);
547
 
548
	gen6_emit_urb(sna);
549
 
550
	gen6_emit_state_base_address(sna);
551
 
552
	gen6_emit_viewports(sna);
553
	gen6_emit_vs(sna);
554
	gen6_emit_gs(sna);
555
	gen6_emit_clip(sna);
556
	gen6_emit_wm_constants(sna);
557
	gen6_emit_null_depth_buffer(sna);
558
 
559
	sna->render_state.gen6.needs_invariant = false;
560
}
561
 
562
static bool
563
gen6_emit_cc(struct sna *sna, int blend)
564
{
565
	struct gen6_render_state *render = &sna->render_state.gen6;
566
 
567
	if (render->blend == blend)
568
		return blend != NO_BLEND;
569
 
570
	DBG(("%s: blend = %x\n", __FUNCTION__, blend));
571
 
572
	OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
573
	OUT_BATCH((render->cc_blend + blend) | 1);
574
	if (render->blend == (unsigned)-1) {
575
		OUT_BATCH(1);
576
		OUT_BATCH(1);
577
	} else {
578
		OUT_BATCH(0);
579
		OUT_BATCH(0);
580
	}
581
 
582
	render->blend = blend;
583
	return blend != NO_BLEND;
584
}
585
 
586
static void
587
gen6_emit_sampler(struct sna *sna, uint32_t state)
588
{
589
	if (sna->render_state.gen6.samplers == state)
590
		return;
591
 
592
	sna->render_state.gen6.samplers = state;
593
 
594
	DBG(("%s: sampler = %x\n", __FUNCTION__, state));
595
 
596
	OUT_BATCH(GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
597
		  GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
598
		  (4 - 2));
599
	OUT_BATCH(0); /* VS */
600
	OUT_BATCH(0); /* GS */
601
	OUT_BATCH(sna->render_state.gen6.wm_state + state);
602
}
603
 
604
static void
605
gen6_emit_sf(struct sna *sna, bool has_mask)
606
{
607
	int num_sf_outputs = has_mask ? 2 : 1;
608
 
609
	if (sna->render_state.gen6.num_sf_outputs == num_sf_outputs)
610
		return;
611
 
612
	DBG(("%s: num_sf_outputs=%d, read_length=%d, read_offset=%d\n",
613
	     __FUNCTION__, num_sf_outputs, 1, 0));
614
 
615
	sna->render_state.gen6.num_sf_outputs = num_sf_outputs;
616
 
617
	OUT_BATCH(GEN6_3DSTATE_SF | (20 - 2));
618
	OUT_BATCH(num_sf_outputs << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT |
619
		  1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT |
620
		  1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT);
621
	OUT_BATCH(0);
622
	OUT_BATCH(GEN6_3DSTATE_SF_CULL_NONE);
623
	OUT_BATCH(2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
624
	OUT_BATCH(0);
625
	OUT_BATCH(0);
626
	OUT_BATCH(0);
627
	OUT_BATCH(0);
628
	OUT_BATCH(0); /* DW9 */
629
	OUT_BATCH(0);
630
	OUT_BATCH(0);
631
	OUT_BATCH(0);
632
	OUT_BATCH(0);
633
	OUT_BATCH(0); /* DW14 */
634
	OUT_BATCH(0);
635
	OUT_BATCH(0);
636
	OUT_BATCH(0);
637
	OUT_BATCH(0);
638
	OUT_BATCH(0); /* DW19 */
639
}
640
 
641
static void
642
gen6_emit_wm(struct sna *sna, unsigned int kernel, bool has_mask)
643
{
644
	const uint32_t *kernels;
645
 
646
	if (sna->render_state.gen6.kernel == kernel)
647
		return;
648
 
649
	sna->render_state.gen6.kernel = kernel;
650
	kernels = sna->render_state.gen6.wm_kernel[kernel];
651
 
652
	DBG(("%s: switching to %s, num_surfaces=%d (8-pixel? %d, 16-pixel? %d,32-pixel? %d)\n",
653
	     __FUNCTION__,
654
	     wm_kernels[kernel].name, wm_kernels[kernel].num_surfaces,
655
	    kernels[0], kernels[1], kernels[2]));
656
 
657
	OUT_BATCH(GEN6_3DSTATE_WM | (9 - 2));
658
	OUT_BATCH(kernels[0] ?: kernels[1] ?: kernels[2]);
659
	OUT_BATCH(1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHIFT |
660
		  wm_kernels[kernel].num_surfaces << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT);
661
	OUT_BATCH(0); /* scratch space */
662
	OUT_BATCH((kernels[0] ? 4 : kernels[1] ? 6 : 8) << GEN6_3DSTATE_WM_DISPATCH_0_START_GRF_SHIFT |
663
		  8 << GEN6_3DSTATE_WM_DISPATCH_1_START_GRF_SHIFT |
664
		  6 << GEN6_3DSTATE_WM_DISPATCH_2_START_GRF_SHIFT);
665
	OUT_BATCH((sna->render_state.gen6.info->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT |
666
		  (kernels[0] ? GEN6_3DSTATE_WM_8_DISPATCH_ENABLE : 0) |
667
		  (kernels[1] ? GEN6_3DSTATE_WM_16_DISPATCH_ENABLE : 0) |
668
		  (kernels[2] ? GEN6_3DSTATE_WM_32_DISPATCH_ENABLE : 0) |
669
		  GEN6_3DSTATE_WM_DISPATCH_ENABLE);
670
	OUT_BATCH((1 + has_mask) << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT |
671
		  GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
672
	OUT_BATCH(kernels[2]);
673
	OUT_BATCH(kernels[1]);
674
}
675
 
676
static bool
677
gen6_emit_binding_table(struct sna *sna, uint16_t offset)
678
{
679
	if (sna->render_state.gen6.surface_table == offset)
680
		return false;
681
 
682
	/* Binding table pointers */
683
	OUT_BATCH(GEN6_3DSTATE_BINDING_TABLE_POINTERS |
684
		  GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS |
685
		  (4 - 2));
686
	OUT_BATCH(0);		/* vs */
687
	OUT_BATCH(0);		/* gs */
688
	/* Only the PS uses the binding table */
689
	OUT_BATCH(offset*4);
690
 
691
	sna->render_state.gen6.surface_table = offset;
692
	return true;
693
}
694
 
695
static bool
696
gen6_emit_drawing_rectangle(struct sna *sna,
697
			    const struct sna_composite_op *op)
698
{
699
	uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1);
700
	uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x;
701
 
702
	assert(!too_large(op->dst.x, op->dst.y));
703
	assert(!too_large(op->dst.width, op->dst.height));
704
 
705
	if (sna->render_state.gen6.drawrect_limit  == limit &&
706
	    sna->render_state.gen6.drawrect_offset == offset)
707
		return false;
708
 
709
	/* [DevSNB-C+{W/A}] Before any depth stall flush (including those
710
	 * produced by non-pipelined state commands), software needs to first
711
	 * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
712
	 * 0.
713
	 *
714
	 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
715
	 * BEFORE the pipe-control with a post-sync op and no write-cache
716
	 * flushes.
717
	 */
718
	if (!sna->render_state.gen6.first_state_packet) {
719
	OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2));
720
	OUT_BATCH(GEN6_PIPE_CONTROL_CS_STALL |
721
		  GEN6_PIPE_CONTROL_STALL_AT_SCOREBOARD);
722
	OUT_BATCH(0);
723
	OUT_BATCH(0);
724
	}
725
 
726
	OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2));
727
	OUT_BATCH(GEN6_PIPE_CONTROL_WRITE_TIME);
728
	OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
729
				 sna->render_state.gen6.general_bo,
730
				 I915_GEM_DOMAIN_INSTRUCTION << 16 |
731
				 I915_GEM_DOMAIN_INSTRUCTION,
732
				 64));
733
	OUT_BATCH(0);
734
 
735
	OUT_BATCH(GEN6_3DSTATE_DRAWING_RECTANGLE | (4 - 2));
736
	OUT_BATCH(0);
737
	OUT_BATCH(limit);
738
	OUT_BATCH(offset);
739
 
740
	sna->render_state.gen6.drawrect_offset = offset;
741
	sna->render_state.gen6.drawrect_limit = limit;
742
	return true;
743
}
744
 
745
static void
746
gen6_emit_vertex_elements(struct sna *sna,
747
			  const struct sna_composite_op *op)
748
{
749
	/*
750
	 * vertex data in vertex buffer
751
	 *    position: (x, y)
752
	 *    texture coordinate 0: (u0, v0) if (is_affine is true) else (u0, v0, w0)
753
	 *    texture coordinate 1 if (has_mask is true): same as above
754
	 */
755
	struct gen6_render_state *render = &sna->render_state.gen6;
756
	uint32_t src_format, dw;
757
	int id = GEN6_VERTEX(op->u.gen6.flags);
758
	bool has_mask;
759
 
760
	DBG(("%s: setup id=%d\n", __FUNCTION__, id));
761
 
762
	if (render->ve_id == id)
763
		return;
764
	render->ve_id = id;
765
 
766
	/* The VUE layout
767
	 *    dword 0-3: pad (0.0, 0.0, 0.0. 0.0)
768
	 *    dword 4-7: position (x, y, 1.0, 1.0),
769
	 *    dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0)
770
	 *    dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0)
771
	 *
772
	 * dword 4-15 are fetched from vertex buffer
773
	 */
774
	has_mask = (id >> 2) != 0;
775
	OUT_BATCH(GEN6_3DSTATE_VERTEX_ELEMENTS |
776
		((2 * (3 + has_mask)) + 1 - 2));
777
 
778
	OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
779
		  GEN6_SURFACEFORMAT_R32G32B32A32_FLOAT << VE0_FORMAT_SHIFT |
780
 
781
	OUT_BATCH(GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT |
782
		  GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT |
783
		  GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
784
		  GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT);
785
 
786
	/* x,y */
787
	OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
788
		  GEN6_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
789
 
790
	OUT_BATCH(GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
791
		  GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
792
		  GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
793
		  GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT);
794
 
795
	/* u0, v0, w0 */
796
	DBG(("%s: first channel %d floats, offset=4b\n", __FUNCTION__, id & 3));
797
	dw = GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
798
	switch (id & 3) {
799
	default:
800
		assert(0);
801
	case 0:
802
		src_format = GEN6_SURFACEFORMAT_R16G16_SSCALED;
803
		dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
804
		dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
805
		dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT;
806
		break;
807
	case 1:
808
		src_format = GEN6_SURFACEFORMAT_R32_FLOAT;
809
		dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
810
		dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
811
		dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT;
812
		break;
813
	case 2:
814
		src_format = GEN6_SURFACEFORMAT_R32G32_FLOAT;
815
		dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
816
		dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
817
		dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT;
818
		break;
819
	case 3:
820
		src_format = GEN6_SURFACEFORMAT_R32G32B32_FLOAT;
821
		dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
822
		dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
823
		dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
824
		break;
825
	}
826
	OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
827
		  src_format << VE0_FORMAT_SHIFT |
828
		  4 << VE0_OFFSET_SHIFT);
829
	OUT_BATCH(dw);
830
 
831
	/* u1, v1, w1 */
832
	if (has_mask) {
833
		unsigned offset = 4 + ((id & 3) ?: 1) * sizeof(float);
834
		DBG(("%s: second channel %d floats, offset=%db\n", __FUNCTION__, id >> 2, offset));
835
		dw = GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
836
		switch (id >> 2) {
837
		case 1:
838
			src_format = GEN6_SURFACEFORMAT_R32_FLOAT;
839
			dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
840
			dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
841
			dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT;
842
			break;
843
		default:
844
			assert(0);
845
		case 2:
846
			src_format = GEN6_SURFACEFORMAT_R32G32_FLOAT;
847
			dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
848
			dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
849
			dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT;
850
			break;
851
		case 3:
852
			src_format = GEN6_SURFACEFORMAT_R32G32B32_FLOAT;
853
			dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
854
			dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
855
			dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
856
			break;
857
		}
858
		OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
859
			  src_format << VE0_FORMAT_SHIFT |
860
			  offset << VE0_OFFSET_SHIFT);
861
		OUT_BATCH(dw);
862
	}
863
}
864
 
865
static void
866
gen6_emit_flush(struct sna *sna)
867
{
868
	OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2));
869
	OUT_BATCH(GEN6_PIPE_CONTROL_WC_FLUSH |
870
		  GEN6_PIPE_CONTROL_TC_FLUSH |
871
		  GEN6_PIPE_CONTROL_CS_STALL);
872
	OUT_BATCH(0);
873
	OUT_BATCH(0);
874
}
875
 
876
static void
877
gen6_emit_state(struct sna *sna,
878
		const struct sna_composite_op *op,
879
		uint16_t wm_binding_table)
880
{
881
	bool need_stall = wm_binding_table & 1;
882
 
883
	if (gen6_emit_cc(sna, GEN6_BLEND(op->u.gen6.flags)))
884
		need_stall = false;
885
	gen6_emit_sampler(sna, GEN6_SAMPLER(op->u.gen6.flags));
886
	gen6_emit_sf(sna, GEN6_VERTEX(op->u.gen6.flags) >> 2);
887
	gen6_emit_wm(sna, GEN6_KERNEL(op->u.gen6.flags), GEN6_VERTEX(op->u.gen6.flags) >> 2);
888
	gen6_emit_vertex_elements(sna, op);
889
 
890
	need_stall |= gen6_emit_binding_table(sna, wm_binding_table & ~1);
891
	if (gen6_emit_drawing_rectangle(sna, op))
892
		need_stall = false;
893
	if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
894
        gen6_emit_flush(sna);
895
        kgem_clear_dirty(&sna->kgem);
896
		if (op->dst.bo->exec)
897
		kgem_bo_mark_dirty(op->dst.bo);
898
		need_stall = false;
899
	}
900
	if (need_stall) {
901
		OUT_BATCH(GEN6_PIPE_CONTROL | (4 - 2));
902
		OUT_BATCH(GEN6_PIPE_CONTROL_CS_STALL |
903
			  GEN6_PIPE_CONTROL_STALL_AT_SCOREBOARD);
904
		OUT_BATCH(0);
905
		OUT_BATCH(0);
906
	}
907
	sna->render_state.gen6.first_state_packet = false;
908
}
909
 
910
static bool gen6_magic_ca_pass(struct sna *sna,
911
			       const struct sna_composite_op *op)
912
{
913
	struct gen6_render_state *state = &sna->render_state.gen6;
914
 
915
	if (!op->need_magic_ca_pass)
916
		return false;
917
 
918
	DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__,
919
	     sna->render.vertex_start, sna->render.vertex_index));
920
 
921
	gen6_emit_flush(sna);
922
 
923
	gen6_emit_cc(sna, gen6_get_blend(PictOpAdd, true, op->dst.format));
924
	gen6_emit_wm(sna,
925
		     gen6_choose_composite_kernel(PictOpAdd,
926
						  true, true,
927
						  op->is_affine),
928
		     true);
929
 
930
	OUT_BATCH(GEN6_3DPRIMITIVE |
931
		  GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL |
932
		  _3DPRIM_RECTLIST << GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT |
933
 
934
		  4);
935
	OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start);
936
	OUT_BATCH(sna->render.vertex_start);
937
	OUT_BATCH(1);	/* single instance */
938
	OUT_BATCH(0);	/* start instance location */
939
	OUT_BATCH(0);	/* index buffer offset, ignored */
940
 
941
	state->last_primitive = sna->kgem.nbatch;
942
	return true;
943
}
944
 
945
typedef struct gen6_surface_state_padded {
946
	struct gen6_surface_state state;
947
	char pad[32 - sizeof(struct gen6_surface_state)];
948
} gen6_surface_state_padded;
949
 
950
static void null_create(struct sna_static_stream *stream)
951
{
952
	/* A bunch of zeros useful for legacy border color and depth-stencil */
953
	sna_static_stream_map(stream, 64, 64);
954
}
955
 
956
static void scratch_create(struct sna_static_stream *stream)
957
{
958
	/* 64 bytes of scratch space for random writes, such as
959
	 * the pipe-control w/a.
960
	 */
961
	sna_static_stream_map(stream, 64, 64);
962
}
963
 
964
static void
965
sampler_state_init(struct gen6_sampler_state *sampler_state,
966
		   sampler_filter_t filter,
967
		   sampler_extend_t extend)
968
{
969
	sampler_state->ss0.lod_preclamp = 1;	/* GL mode */
970
 
971
	/* We use the legacy mode to get the semantics specified by
972
	 * the Render extension. */
973
	sampler_state->ss0.border_color_mode = GEN6_BORDER_COLOR_MODE_LEGACY;
974
 
975
	switch (filter) {
976
	default:
977
	case SAMPLER_FILTER_NEAREST:
978
		sampler_state->ss0.min_filter = GEN6_MAPFILTER_NEAREST;
979
		sampler_state->ss0.mag_filter = GEN6_MAPFILTER_NEAREST;
980
		break;
981
	case SAMPLER_FILTER_BILINEAR:
982
		sampler_state->ss0.min_filter = GEN6_MAPFILTER_LINEAR;
983
		sampler_state->ss0.mag_filter = GEN6_MAPFILTER_LINEAR;
984
		break;
985
	}
986
 
987
	switch (extend) {
988
	default:
989
	case SAMPLER_EXTEND_NONE:
990
		sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER;
991
		sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER;
992
		sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER;
993
		break;
994
	case SAMPLER_EXTEND_REPEAT:
995
		sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_WRAP;
996
		sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_WRAP;
997
		sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_WRAP;
998
		break;
999
	case SAMPLER_EXTEND_PAD:
1000
		sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP;
1001
		sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP;
1002
		sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP;
1003
		break;
1004
	case SAMPLER_EXTEND_REFLECT:
1005
		sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_MIRROR;
1006
		sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_MIRROR;
1007
		sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_MIRROR;
1008
		break;
1009
	}
1010
}
1011
 
1012
static void
1013
sampler_copy_init(struct gen6_sampler_state *ss)
1014
{
1015
	sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
1016
	ss->ss3.non_normalized_coord = 1;
1017
 
1018
	sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
1019
}
1020
 
1021
static void
1022
sampler_fill_init(struct gen6_sampler_state *ss)
1023
{
1024
	sampler_state_init(ss, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_REPEAT);
1025
	ss->ss3.non_normalized_coord = 1;
1026
 
1027
	sampler_state_init(ss+1, SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE);
1028
}
1029
 
1030
static uint32_t
1031
gen6_tiling_bits(uint32_t tiling)
1032
{
1033
    return 0;
1034
/*
1035
	switch (tiling) {
1036
	default: assert(0);
1037
	case I915_TILING_NONE: return 0;
1038
	case I915_TILING_X: return GEN6_SURFACE_TILED;
1039
	case I915_TILING_Y: return GEN6_SURFACE_TILED | GEN6_SURFACE_TILED_Y;
1040
	}
1041
*/
1042
}
1043
 
1044
/**
1045
 * Sets up the common fields for a surface state buffer for the given
1046
 * picture in the given surface state buffer.
1047
 */
1048
static int
1049
gen6_bind_bo(struct sna *sna,
1050
         struct kgem_bo *bo,
1051
	     uint32_t width,
1052
	     uint32_t height,
1053
	     uint32_t format,
1054
	     bool is_dst)
1055
{
1056
	uint32_t *ss;
1057
	uint32_t domains;
1058
	uint16_t offset;
1059
	uint32_t is_scanout = is_dst && bo->scanout;
1060
 
1061
	/* After the first bind, we manage the cache domains within the batch */
1062
	offset = kgem_bo_get_binding(bo, format | is_scanout << 31);
1063
	if (offset) {
1064
		DBG(("[%x]  bo(handle=%d), format=%d, reuse %s binding\n",
1065
		     offset, bo->handle, format,
1066
		     is_dst ? "render" : "sampler"));
1067
		if (is_dst)
1068
			kgem_bo_mark_dirty(bo);
1069
		return offset * sizeof(uint32_t);
1070
	}
1071
 
1072
	offset = sna->kgem.surface -=
1073
		sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t);
1074
	ss = sna->kgem.batch + offset;
1075
	ss[0] = (GEN6_SURFACE_2D << GEN6_SURFACE_TYPE_SHIFT |
1076
		 GEN6_SURFACE_BLEND_ENABLED |
1077
		 format << GEN6_SURFACE_FORMAT_SHIFT);
1078
	if (is_dst)
1079
		domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER;
1080
	else
1081
		domains = I915_GEM_DOMAIN_SAMPLER << 16;
1082
	ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
1083
	ss[2] = ((width - 1)  << GEN6_SURFACE_WIDTH_SHIFT |
1084
		 (height - 1) << GEN6_SURFACE_HEIGHT_SHIFT);
1085
	assert(bo->pitch <= (1 << 18));
1086
	ss[3] = (gen6_tiling_bits(bo->tiling) |
1087
		 (bo->pitch - 1) << GEN6_SURFACE_PITCH_SHIFT);
1088
	ss[4] = 0;
1089
	ss[5] = is_scanout ? 0 : 3 << 16;
1090
 
1091
	kgem_bo_set_binding(bo, format | is_scanout << 31, offset);
1092
 
1093
	DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n",
1094
	     offset, bo->handle, ss[1],
1095
	     format, width, height, bo->pitch, bo->tiling,
1096
	     domains & 0xffff ? "render" : "sampler"));
1097
 
1098
	return offset * sizeof(uint32_t);
1099
}
1100
 
1101
static void gen6_emit_vertex_buffer(struct sna *sna,
1102
				    const struct sna_composite_op *op)
1103
{
1104
	int id = GEN6_VERTEX(op->u.gen6.flags);
1105
 
1106
	OUT_BATCH(GEN6_3DSTATE_VERTEX_BUFFERS | 3);
1107
	OUT_BATCH(id << VB0_BUFFER_INDEX_SHIFT | VB0_VERTEXDATA |
1108
		  4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT);
1109
	sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch;
1110
	OUT_BATCH(0);
1111
	OUT_BATCH(~0); /* max address: disabled */
1112
	OUT_BATCH(0);
1113
 
1114
	sna->render.vb_id |= 1 << id;
1115
}
1116
 
1117
static void gen6_emit_primitive(struct sna *sna)
1118
{
1119
	if (sna->kgem.nbatch == sna->render_state.gen6.last_primitive) {
1120
		DBG(("%s: continuing previous primitive, start=%d, index=%d\n",
1121
		     __FUNCTION__,
1122
		     sna->render.vertex_start,
1123
		     sna->render.vertex_index));
1124
		sna->render.vertex_offset = sna->kgem.nbatch - 5;
1125
		return;
1126
	}
1127
 
1128
	OUT_BATCH(GEN6_3DPRIMITIVE |
1129
		  GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL |
1130
		  _3DPRIM_RECTLIST << GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT |
1131
 
1132
		  4);
1133
	sna->render.vertex_offset = sna->kgem.nbatch;
1134
	OUT_BATCH(0);	/* vertex count, to be filled in later */
1135
	OUT_BATCH(sna->render.vertex_index);
1136
	OUT_BATCH(1);	/* single instance */
1137
	OUT_BATCH(0);	/* start instance location */
1138
	OUT_BATCH(0);	/* index buffer offset, ignored */
1139
	sna->render.vertex_start = sna->render.vertex_index;
1140
	DBG(("%s: started new primitive: index=%d\n",
1141
	     __FUNCTION__, sna->render.vertex_start));
1142
 
1143
	sna->render_state.gen6.last_primitive = sna->kgem.nbatch;
1144
}
1145
 
1146
static bool gen6_rectangle_begin(struct sna *sna,
1147
				 const struct sna_composite_op *op)
1148
{
1149
	int id = 1 << GEN6_VERTEX(op->u.gen6.flags);
1150
	int ndwords;
1151
 
1152
	ndwords = op->need_magic_ca_pass ? 60 : 6;
1153
	if ((sna->render.vb_id & id) == 0)
1154
		ndwords += 5;
1155
	if (!kgem_check_batch(&sna->kgem, ndwords))
1156
		return false;
1157
 
1158
	if ((sna->render.vb_id & id) == 0)
1159
		gen6_emit_vertex_buffer(sna, op);
1160
 
1161
	gen6_emit_primitive(sna);
1162
	return true;
1163
}
1164
 
1165
static int gen6_get_rectangles__flush(struct sna *sna,
1166
				      const struct sna_composite_op *op)
1167
{
1168
 
1169
	if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 65 : 5))
1170
		return 0;
1171
	if (!kgem_check_reloc_and_exec(&sna->kgem, 2))
1172
		return 0;
1173
 
1174
	if (sna->render.vertex_offset) {
1175
		gen4_vertex_flush(sna);
1176
		if (gen6_magic_ca_pass(sna, op)) {
1177
			gen6_emit_flush(sna);
1178
			gen6_emit_cc(sna, GEN6_BLEND(op->u.gen6.flags));
1179
			gen6_emit_wm(sna,
1180
				     GEN6_KERNEL(op->u.gen6.flags),
1181
				     GEN6_VERTEX(op->u.gen6.flags) >> 2);
1182
		}
1183
	}
1184
 
1185
	return gen4_vertex_finish(sna);
1186
}
1187
 
1188
inline static int gen6_get_rectangles(struct sna *sna,
1189
				      const struct sna_composite_op *op,
1190
				      int want,
1191
				      void (*emit_state)(struct sna *, const struct sna_composite_op *op))
1192
{
1193
	int rem;
1194
 
1195
start:
1196
	rem = vertex_space(sna);
1197
	if (unlikely(rem < op->floats_per_rect)) {
1198
		DBG(("flushing vbo for %s: %d < %d\n",
1199
		     __FUNCTION__, rem, op->floats_per_rect));
1200
		rem = gen6_get_rectangles__flush(sna, op);
1201
		if (unlikely(rem == 0))
1202
			goto flush;
1203
	}
1204
 
1205
	if (unlikely(sna->render.vertex_offset == 0 &&
1206
		     !gen6_rectangle_begin(sna, op)))
1207
		goto flush;
1208
 
1209
	if (want > 1 && want * op->floats_per_rect > rem)
1210
		want = rem / op->floats_per_rect;
1211
 
1212
	assert(want > 0);
1213
	sna->render.vertex_index += 3*want;
1214
	return want;
1215
 
1216
flush:
1217
	if (sna->render.vertex_offset) {
1218
		gen4_vertex_flush(sna);
1219
		gen6_magic_ca_pass(sna, op);
1220
	}
1221
//   sna_vertex_wait__locked(&sna->render);
1222
	_kgem_submit(&sna->kgem);
1223
	emit_state(sna, op);
1224
	goto start;
1225
}
1226
 
1227
inline static uint32_t *gen6_composite_get_binding_table(struct sna *sna,
1228
							 uint16_t *offset)
1229
{
1230
	uint32_t *table;
1231
 
1232
	sna->kgem.surface -=
1233
		sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t);
1234
	/* Clear all surplus entries to zero in case of prefetch */
1235
	table = memset(sna->kgem.batch + sna->kgem.surface,
1236
		       0, sizeof(struct gen6_surface_state_padded));
1237
 
1238
	DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface));
1239
 
1240
	*offset = sna->kgem.surface;
1241
	return table;
1242
}
1243
 
1244
static bool
1245
gen6_get_batch(struct sna *sna, const struct sna_composite_op *op)
1246
{
1247
	kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
1248
 
1249
	if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) {
1250
		DBG(("%s: flushing batch: %d < %d+%d\n",
1251
		     __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch,
1252
		     150, 4*8));
1253
		kgem_submit(&sna->kgem);
1254
		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
1255
	}
1256
 
1257
	if (sna->render_state.gen6.needs_invariant)
1258
		gen6_emit_invariant(sna);
1259
 
1260
	return kgem_bo_is_dirty(op->dst.bo);
1261
}
1262
 
1263
static void gen6_emit_composite_state(struct sna *sna,
1264
                      const struct sna_composite_op *op)
1265
{
1266
    uint32_t *binding_table;
1267
    uint16_t offset;
1268
    bool dirty;
1269
 
1270
	dirty = gen6_get_batch(sna, op);
1271
 
1272
    binding_table = gen6_composite_get_binding_table(sna, &offset);
1273
 
1274
    binding_table[0] =
1275
        gen6_bind_bo(sna,
1276
                op->dst.bo, op->dst.width, op->dst.height,
1277
			    gen6_get_dest_format(op->dst.format),
1278
			    true);
1279
    binding_table[1] =
1280
        gen6_bind_bo(sna,
1281
                 op->src.bo, op->src.width, op->src.height,
1282
                 op->src.card_format,
1283
			     false);
1284
    if (op->mask.bo) {
1285
        binding_table[2] =
1286
            gen6_bind_bo(sna,
1287
                     op->mask.bo,
1288
                     op->mask.width,
1289
                     op->mask.height,
1290
                     op->mask.card_format,
1291
				     false);
1292
    }
1293
 
1294
    if (sna->kgem.surface == offset &&
1295
        *(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table &&
1296
        (op->mask.bo == NULL ||
1297
         sna->kgem.batch[sna->render_state.gen6.surface_table+2] == binding_table[2])) {
1298
        sna->kgem.surface += sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t);
1299
        offset = sna->render_state.gen6.surface_table;
1300
    }
1301
 
1302
    gen6_emit_state(sna, op, offset | dirty);
1303
}
1304
 
1305
static void
1306
gen6_align_vertex(struct sna *sna, const struct sna_composite_op *op)
1307
{
1308
	assert (sna->render.vertex_offset == 0);
1309
	if (op->floats_per_vertex != sna->render_state.gen6.floats_per_vertex) {
1310
		if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect)
1311
			gen4_vertex_finish(sna);
1312
 
1313
		DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n",
1314
		     sna->render_state.gen6.floats_per_vertex,
1315
		     op->floats_per_vertex,
1316
		     sna->render.vertex_index,
1317
		     (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex));
1318
		sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
1319
		sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex;
1320
		sna->render_state.gen6.floats_per_vertex = op->floats_per_vertex;
1321
	}
1322
	assert((sna->render.vertex_used % op->floats_per_vertex) == 0);
1323
}
1324
 
1325
#if 0
1326
 
1327
fastcall static void
1328
gen6_render_composite_blt(struct sna *sna,
1329
			  const struct sna_composite_op *op,
1330
			  const struct sna_composite_rectangles *r)
1331
{
1332
	gen6_get_rectangles(sna, op, 1, gen6_emit_composite_state);
1333
	op->prim_emit(sna, op, r);
1334
}
1335
 
1336
fastcall static void
1337
gen6_render_composite_box(struct sna *sna,
1338
			  const struct sna_composite_op *op,
1339
			  const BoxRec *box)
1340
{
1341
	struct sna_composite_rectangles r;
1342
 
1343
	gen6_get_rectangles(sna, op, 1, gen6_emit_composite_state);
1344
 
1345
	DBG(("  %s: (%d, %d), (%d, %d)\n",
1346
	     __FUNCTION__,
1347
	     box->x1, box->y1, box->x2, box->y2));
1348
 
1349
	r.dst.x = box->x1;
1350
	r.dst.y = box->y1;
1351
	r.width  = box->x2 - box->x1;
1352
	r.height = box->y2 - box->y1;
1353
	r.src = r.mask = r.dst;
1354
 
1355
	op->prim_emit(sna, op, &r);
1356
}
1357
 
1358
static void
1359
gen6_render_composite_boxes__blt(struct sna *sna,
1360
				 const struct sna_composite_op *op,
1361
				 const BoxRec *box, int nbox)
1362
{
1363
	DBG(("composite_boxes(%d)\n", nbox));
1364
 
1365
	do {
1366
		int nbox_this_time;
1367
 
1368
		nbox_this_time = gen6_get_rectangles(sna, op, nbox,
1369
						     gen6_emit_composite_state);
1370
		nbox -= nbox_this_time;
1371
 
1372
		do {
1373
			struct sna_composite_rectangles r;
1374
 
1375
			DBG(("  %s: (%d, %d), (%d, %d)\n",
1376
			     __FUNCTION__,
1377
			     box->x1, box->y1, box->x2, box->y2));
1378
 
1379
			r.dst.x = box->x1;
1380
			r.dst.y = box->y1;
1381
			r.width  = box->x2 - box->x1;
1382
			r.height = box->y2 - box->y1;
1383
			r.src = r.mask = r.dst;
1384
 
1385
			op->prim_emit(sna, op, &r);
1386
			box++;
1387
		} while (--nbox_this_time);
1388
	} while (nbox);
1389
}
1390
 
1391
static void
1392
gen6_render_composite_boxes(struct sna *sna,
1393
			    const struct sna_composite_op *op,
1394
			    const BoxRec *box, int nbox)
1395
{
1396
	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1397
 
1398
	do {
1399
		int nbox_this_time;
1400
		float *v;
1401
 
1402
		nbox_this_time = gen6_get_rectangles(sna, op, nbox,
1403
						     gen6_emit_composite_state);
1404
		assert(nbox_this_time);
1405
		nbox -= nbox_this_time;
1406
 
1407
		v = sna->render.vertices + sna->render.vertex_used;
1408
		sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
1409
 
1410
		op->emit_boxes(op, box, nbox_this_time, v);
1411
		box += nbox_this_time;
1412
	} while (nbox);
1413
}
1414
 
1415
static void
1416
gen6_render_composite_boxes__thread(struct sna *sna,
1417
				    const struct sna_composite_op *op,
1418
				    const BoxRec *box, int nbox)
1419
{
1420
	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1421
 
1422
	sna_vertex_lock(&sna->render);
1423
	do {
1424
		int nbox_this_time;
1425
		float *v;
1426
 
1427
		nbox_this_time = gen6_get_rectangles(sna, op, nbox,
1428
						     gen6_emit_composite_state);
1429
		assert(nbox_this_time);
1430
		nbox -= nbox_this_time;
1431
 
1432
		v = sna->render.vertices + sna->render.vertex_used;
1433
		sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
1434
 
1435
		sna_vertex_acquire__locked(&sna->render);
1436
		sna_vertex_unlock(&sna->render);
1437
 
1438
		op->emit_boxes(op, box, nbox_this_time, v);
1439
		box += nbox_this_time;
1440
 
1441
		sna_vertex_lock(&sna->render);
1442
		sna_vertex_release__locked(&sna->render);
1443
	} while (nbox);
1444
	sna_vertex_unlock(&sna->render);
1445
}
1446
 
1447
#endif
1448
 
1449
#ifndef MAX
1450
#define MAX(a,b) ((a) > (b) ? (a) : (b))
1451
#endif
1452
 
1453
static uint32_t
1454
gen6_composite_create_blend_state(struct sna_static_stream *stream)
1455
{
1456
	char *base, *ptr;
1457
	int src, dst;
1458
 
1459
	base = sna_static_stream_map(stream,
1460
				     GEN6_BLENDFACTOR_COUNT * GEN6_BLENDFACTOR_COUNT * GEN6_BLEND_STATE_PADDED_SIZE,
1461
				     64);
1462
 
1463
	ptr = base;
1464
	for (src = 0; src < GEN6_BLENDFACTOR_COUNT; src++) {
1465
		for (dst= 0; dst < GEN6_BLENDFACTOR_COUNT; dst++) {
1466
			struct gen6_blend_state *blend =
1467
				(struct gen6_blend_state *)ptr;
1468
 
1469
			blend->blend0.dest_blend_factor = dst;
1470
			blend->blend0.source_blend_factor = src;
1471
			blend->blend0.blend_func = GEN6_BLENDFUNCTION_ADD;
1472
			blend->blend0.blend_enable =
1473
				!(dst == GEN6_BLENDFACTOR_ZERO && src == GEN6_BLENDFACTOR_ONE);
1474
 
1475
			blend->blend1.post_blend_clamp_enable = 1;
1476
			blend->blend1.pre_blend_clamp_enable = 1;
1477
 
1478
			ptr += GEN6_BLEND_STATE_PADDED_SIZE;
1479
		}
1480
	}
1481
 
1482
	return sna_static_stream_offsetof(stream, base);
1483
}
1484
 
1485
#if 0
1486
 
1487
static uint32_t gen6_bind_video_source(struct sna *sna,
1488
				       struct kgem_bo *src_bo,
1489
				       uint32_t src_offset,
1490
				       int src_width,
1491
				       int src_height,
1492
				       int src_pitch,
1493
				       uint32_t src_surf_format)
1494
{
1495
	struct gen6_surface_state *ss;
1496
 
1497
	sna->kgem.surface -= sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t);
1498
 
1499
	ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss));
1500
	ss->ss0.surface_type = GEN6_SURFACE_2D;
1501
	ss->ss0.surface_format = src_surf_format;
1502
 
1503
	ss->ss1.base_addr =
1504
		kgem_add_reloc(&sna->kgem,
1505
			       sna->kgem.surface + 1,
1506
			       src_bo,
1507
			       I915_GEM_DOMAIN_SAMPLER << 16,
1508
			       src_offset);
1509
 
1510
	ss->ss2.width  = src_width - 1;
1511
	ss->ss2.height = src_height - 1;
1512
	ss->ss3.pitch  = src_pitch - 1;
1513
 
1514
	return sna->kgem.surface * sizeof(uint32_t);
1515
}
1516
 
1517
static void gen6_emit_video_state(struct sna *sna,
1518
				  const struct sna_composite_op *op)
1519
{
1520
	struct sna_video_frame *frame = op->priv;
1521
	uint32_t src_surf_format;
1522
	uint32_t src_surf_base[6];
1523
	int src_width[6];
1524
	int src_height[6];
1525
	int src_pitch[6];
1526
	uint32_t *binding_table;
1527
	uint16_t offset;
1528
	bool dirty;
1529
	int n_src, n;
1530
 
1531
	dirty = gen6_get_batch(sna, op);
1532
 
1533
	src_surf_base[0] = 0;
1534
	src_surf_base[1] = 0;
1535
	src_surf_base[2] = frame->VBufOffset;
1536
	src_surf_base[3] = frame->VBufOffset;
1537
	src_surf_base[4] = frame->UBufOffset;
1538
	src_surf_base[5] = frame->UBufOffset;
1539
 
1540
	if (is_planar_fourcc(frame->id)) {
1541
		src_surf_format = GEN6_SURFACEFORMAT_R8_UNORM;
1542
		src_width[1]  = src_width[0]  = frame->width;
1543
		src_height[1] = src_height[0] = frame->height;
1544
		src_pitch[1]  = src_pitch[0]  = frame->pitch[1];
1545
		src_width[4]  = src_width[5]  = src_width[2]  = src_width[3] =
1546
			frame->width / 2;
1547
		src_height[4] = src_height[5] = src_height[2] = src_height[3] =
1548
			frame->height / 2;
1549
		src_pitch[4]  = src_pitch[5]  = src_pitch[2]  = src_pitch[3] =
1550
			frame->pitch[0];
1551
		n_src = 6;
1552
	} else {
1553
		if (frame->id == FOURCC_UYVY)
1554
			src_surf_format = GEN6_SURFACEFORMAT_YCRCB_SWAPY;
1555
		else
1556
			src_surf_format = GEN6_SURFACEFORMAT_YCRCB_NORMAL;
1557
 
1558
		src_width[0]  = frame->width;
1559
		src_height[0] = frame->height;
1560
		src_pitch[0]  = frame->pitch[0];
1561
		n_src = 1;
1562
	}
1563
 
1564
	binding_table = gen6_composite_get_binding_table(sna, &offset);
1565
 
1566
	binding_table[0] =
1567
		gen6_bind_bo(sna,
1568
			     op->dst.bo, op->dst.width, op->dst.height,
1569
			     gen6_get_dest_format(op->dst.format),
1570
			     true);
1571
	for (n = 0; n < n_src; n++) {
1572
		binding_table[1+n] =
1573
			gen6_bind_video_source(sna,
1574
					       frame->bo,
1575
					       src_surf_base[n],
1576
					       src_width[n],
1577
					       src_height[n],
1578
					       src_pitch[n],
1579
					       src_surf_format);
1580
	}
1581
 
1582
	gen6_emit_state(sna, op, offset | dirty);
1583
}
1584
 
1585
static bool
1586
gen6_render_video(struct sna *sna,
1587
		  struct sna_video *video,
1588
		  struct sna_video_frame *frame,
1589
		  RegionPtr dstRegion,
1590
		  short src_w, short src_h,
1591
		  short drw_w, short drw_h,
1592
		  short dx, short dy,
1593
		  PixmapPtr pixmap)
1594
{
1595
	struct sna_composite_op tmp;
1596
	int nbox, pix_xoff, pix_yoff;
1597
	float src_scale_x, src_scale_y;
1598
	struct sna_pixmap *priv;
1599
	unsigned filter;
1600
	BoxPtr box;
1601
 
1602
	DBG(("%s: src=(%d, %d), dst=(%d, %d), %dx[(%d, %d), (%d, %d)...]\n",
1603
	     __FUNCTION__, src_w, src_h, drw_w, drw_h,
1604
	     REGION_NUM_RECTS(dstRegion),
1605
	     REGION_EXTENTS(NULL, dstRegion)->x1,
1606
	     REGION_EXTENTS(NULL, dstRegion)->y1,
1607
	     REGION_EXTENTS(NULL, dstRegion)->x2,
1608
	     REGION_EXTENTS(NULL, dstRegion)->y2));
1609
 
1610
	priv = sna_pixmap_force_to_gpu(pixmap, MOVE_READ | MOVE_WRITE);
1611
	if (priv == NULL)
1612
		return false;
1613
 
1614
	memset(&tmp, 0, sizeof(tmp));
1615
 
1616
	tmp.dst.pixmap = pixmap;
1617
	tmp.dst.width  = pixmap->drawable.width;
1618
	tmp.dst.height = pixmap->drawable.height;
1619
	tmp.dst.format = sna_render_format_for_depth(pixmap->drawable.depth);
1620
	tmp.dst.bo = priv->gpu_bo;
1621
 
1622
	tmp.src.bo = frame->bo;
1623
	tmp.mask.bo = NULL;
1624
 
1625
	tmp.floats_per_vertex = 3;
1626
	tmp.floats_per_rect = 9;
1627
 
1628
	if (src_w == drw_w && src_h == drw_h)
1629
		filter = SAMPLER_FILTER_NEAREST;
1630
	else
1631
		filter = SAMPLER_FILTER_BILINEAR;
1632
 
1633
	tmp.u.gen6.flags =
1634
		GEN6_SET_FLAGS(SAMPLER_OFFSET(filter, SAMPLER_EXTEND_PAD,
1635
					       SAMPLER_FILTER_NEAREST, SAMPLER_EXTEND_NONE),
1636
			       NO_BLEND,
1637
			       is_planar_fourcc(frame->id) ?
1638
			       GEN6_WM_KERNEL_VIDEO_PLANAR :
1639
			       GEN6_WM_KERNEL_VIDEO_PACKED,
1640
			       2);
1641
	tmp.priv = frame;
1642
 
1643
	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo);
1644
	if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) {
1645
		kgem_submit(&sna->kgem);
1646
		assert(kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL));
1647
		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
1648
	}
1649
 
1650
	gen6_emit_video_state(sna, &tmp, frame);
1651
	gen6_align_vertex(sna, &tmp);
1652
 
1653
	/* Set up the offset for translating from the given region (in screen
1654
	 * coordinates) to the backing pixmap.
1655
	 */
1656
#ifdef COMPOSITE
1657
	pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
1658
	pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
1659
#else
1660
	pix_xoff = 0;
1661
	pix_yoff = 0;
1662
#endif
1663
 
1664
	/* Use normalized texture coordinates */
1665
	src_scale_x = ((float)src_w / frame->width) / (float)drw_w;
1666
	src_scale_y = ((float)src_h / frame->height) / (float)drw_h;
1667
 
1668
	box = REGION_RECTS(dstRegion);
1669
	nbox = REGION_NUM_RECTS(dstRegion);
1670
	while (nbox--) {
1671
		BoxRec r;
1672
 
1673
		r.x1 = box->x1 + pix_xoff;
1674
		r.x2 = box->x2 + pix_xoff;
1675
		r.y1 = box->y1 + pix_yoff;
1676
		r.y2 = box->y2 + pix_yoff;
1677
 
1678
		gen6_get_rectangles(sna, &tmp, 1, gen6_emit_video_state);
1679
 
1680
		OUT_VERTEX(r.x2, r.y2);
1681
		OUT_VERTEX_F((box->x2 - dx) * src_scale_x);
1682
		OUT_VERTEX_F((box->y2 - dy) * src_scale_y);
1683
 
1684
		OUT_VERTEX(r.x1, r.y2);
1685
		OUT_VERTEX_F((box->x1 - dx) * src_scale_x);
1686
		OUT_VERTEX_F((box->y2 - dy) * src_scale_y);
1687
 
1688
		OUT_VERTEX(r.x1, r.y1);
1689
		OUT_VERTEX_F((box->x1 - dx) * src_scale_x);
1690
		OUT_VERTEX_F((box->y1 - dy) * src_scale_y);
1691
 
1692
		if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
1693
			sna_damage_add_box(&priv->gpu_damage, &r);
1694
			sna_damage_subtract_box(&priv->cpu_damage, &r);
1695
		}
1696
		box++;
1697
	}
1698
	priv->clear = false;
1699
 
1700
	gen4_vertex_flush(sna);
1701
	return true;
1702
}
1703
 
1704
static int
1705
gen6_composite_picture(struct sna *sna,
1706
		       PicturePtr picture,
1707
		       struct sna_composite_channel *channel,
1708
		       int x, int y,
1709
		       int w, int h,
1710
		       int dst_x, int dst_y,
1711
		       bool precise)
1712
{
1713
	PixmapPtr pixmap;
1714
	uint32_t color;
1715
	int16_t dx, dy;
1716
 
1717
	DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n",
1718
	     __FUNCTION__, x, y, w, h, dst_x, dst_y));
1719
 
1720
	channel->is_solid = false;
1721
	channel->card_format = -1;
1722
 
1723
	if (sna_picture_is_solid(picture, &color))
1724
		return gen4_channel_init_solid(sna, channel, color);
1725
 
1726
	if (picture->pDrawable == NULL) {
1727
		int ret;
1728
 
1729
		if (picture->pSourcePict->type == SourcePictTypeLinear)
1730
			return gen4_channel_init_linear(sna, picture, channel,
1731
							x, y,
1732
							w, h,
1733
							dst_x, dst_y);
1734
 
1735
		DBG(("%s -- fixup, gradient\n", __FUNCTION__));
1736
		ret = -1;
1737
		if (!precise)
1738
			ret = sna_render_picture_approximate_gradient(sna, picture, channel,
1739
								      x, y, w, h, dst_x, dst_y);
1740
		if (ret == -1)
1741
			ret = sna_render_picture_fixup(sna, picture, channel,
1742
						       x, y, w, h, dst_x, dst_y);
1743
		return ret;
1744
	}
1745
 
1746
	if (picture->alphaMap) {
1747
		DBG(("%s -- fixup, alphamap\n", __FUNCTION__));
1748
		return sna_render_picture_fixup(sna, picture, channel,
1749
						x, y, w, h, dst_x, dst_y);
1750
	}
1751
 
1752
	if (!gen6_check_repeat(picture))
1753
		return sna_render_picture_fixup(sna, picture, channel,
1754
						x, y, w, h, dst_x, dst_y);
1755
 
1756
	if (!gen6_check_filter(picture))
1757
		return sna_render_picture_fixup(sna, picture, channel,
1758
						x, y, w, h, dst_x, dst_y);
1759
 
1760
	channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
1761
	channel->filter = picture->filter;
1762
 
1763
	pixmap = get_drawable_pixmap(picture->pDrawable);
1764
	get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy);
1765
 
1766
	x += dx + picture->pDrawable->x;
1767
	y += dy + picture->pDrawable->y;
1768
 
1769
	channel->is_affine = sna_transform_is_affine(picture->transform);
1770
	if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) {
1771
		DBG(("%s: integer translation (%d, %d), removing\n",
1772
		     __FUNCTION__, dx, dy));
1773
		x += dx;
1774
		y += dy;
1775
		channel->transform = NULL;
1776
		channel->filter = PictFilterNearest;
1777
	} else
1778
		channel->transform = picture->transform;
1779
 
1780
	channel->pict_format = picture->format;
1781
	channel->card_format = gen6_get_card_format(picture->format);
1782
	if (channel->card_format == (unsigned)-1)
1783
		return sna_render_picture_convert(sna, picture, channel, pixmap,
1784
						  x, y, w, h, dst_x, dst_y,
1785
						  false);
1786
 
1787
	if (too_large(pixmap->drawable.width, pixmap->drawable.height)) {
1788
		DBG(("%s: extracting from pixmap %dx%d\n", __FUNCTION__,
1789
		     pixmap->drawable.width, pixmap->drawable.height));
1790
		return sna_render_picture_extract(sna, picture, channel,
1791
						  x, y, w, h, dst_x, dst_y);
1792
	}
1793
 
1794
	return sna_render_pixmap_bo(sna, channel, pixmap,
1795
				    x, y, w, h, dst_x, dst_y);
1796
}
1797
 
1798
inline static void gen6_composite_channel_convert(struct sna_composite_channel *channel)
1799
{
1800
	channel->repeat = gen6_repeat(channel->repeat);
1801
	channel->filter = gen6_filter(channel->filter);
1802
	if (channel->card_format == (unsigned)-1)
1803
		channel->card_format = gen6_get_card_format(channel->pict_format);
1804
	assert(channel->card_format != (unsigned)-1);
1805
}
1806
 
1807
static void gen6_render_composite_done(struct sna *sna,
1808
                       const struct sna_composite_op *op)
1809
{
1810
    DBG(("%s\n", __FUNCTION__));
1811
 
1812
	assert(!sna->render.active);
1813
	if (sna->render.vertex_offset) {
1814
		gen4_vertex_flush(sna);
1815
        gen6_magic_ca_pass(sna, op);
1816
    }
1817
 
1818
//   if (op->mask.bo)
1819
//       kgem_bo_destroy(&sna->kgem, op->mask.bo);
1820
//   if (op->src.bo)
1821
//       kgem_bo_destroy(&sna->kgem, op->src.bo);
1822
 
1823
//   sna_render_composite_redirect_done(sna, op);
1824
}
1825
 
1826
static bool
1827
gen6_composite_set_target(struct sna *sna,
1828
			  struct sna_composite_op *op,
1829
			  PicturePtr dst,
1830
			  int x, int y, int w, int h)
1831
{
1832
	BoxRec box;
1833
 
1834
	op->dst.pixmap = get_drawable_pixmap(dst->pDrawable);
1835
	op->dst.format = dst->format;
1836
	op->dst.width = op->dst.pixmap->drawable.width;
1837
	op->dst.height = op->dst.pixmap->drawable.height;
1838
 
1839
	if (w && h) {
1840
		box.x1 = x;
1841
		box.y1 = y;
1842
		box.x2 = x + w;
1843
		box.y2 = y + h;
1844
	} else
1845
		sna_render_picture_extents(dst, &box);
1846
 
1847
	op->dst.bo = sna_drawable_use_bo (dst->pDrawable,
1848
					  PREFER_GPU | FORCE_GPU | RENDER_GPU,
1849
					  &box, &op->damage);
1850
	if (op->dst.bo == NULL)
1851
		return false;
1852
 
1853
	get_drawable_deltas(dst->pDrawable, op->dst.pixmap,
1854
			    &op->dst.x, &op->dst.y);
1855
 
1856
	DBG(("%s: pixmap=%p, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d),damage=%p\n",
1857
	     __FUNCTION__,
1858
	     op->dst.pixmap, (int)op->dst.format,
1859
	     op->dst.width, op->dst.height,
1860
	     op->dst.bo->pitch,
1861
	     op->dst.x, op->dst.y,
1862
	     op->damage ? *op->damage : (void *)-1));
1863
 
1864
	assert(op->dst.bo->proxy == NULL);
1865
 
1866
	if (too_large(op->dst.width, op->dst.height) &&
1867
	    !sna_render_composite_redirect(sna, op, x, y, w, h))
1868
		return false;
1869
 
1870
	return true;
1871
}
1872
 
1873
 
1874
 
1875
static bool
1876
gen6_render_composite(struct sna *sna,
1877
              uint8_t op,
1878
		      PicturePtr src,
1879
		      PicturePtr mask,
1880
		      PicturePtr dst,
1881
              int16_t src_x, int16_t src_y,
1882
              int16_t msk_x, int16_t msk_y,
1883
              int16_t dst_x, int16_t dst_y,
1884
              int16_t width, int16_t height,
1885
              struct sna_composite_op *tmp)
1886
{
1887
	if (op >= ARRAY_SIZE(gen6_blend_op))
1888
		return false;
1889
 
1890
    DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
1891
         width, height, sna->kgem.ring));
1892
 
1893
	if (mask == NULL &&
1894
	    try_blt(sna, dst, src, width, height) &&
1895
	    sna_blt_composite(sna, op,
1896
			      src, dst,
1897
			      src_x, src_y,
1898
			      dst_x, dst_y,
1899
			      width, height,
1900
			      tmp, false))
1901
		return true;
1902
 
1903
	if (gen6_composite_fallback(sna, src, mask, dst))
1904
		return false;
1905
 
1906
	if (need_tiling(sna, width, height))
1907
		return sna_tiling_composite(op, src, mask, dst,
1908
					    src_x, src_y,
1909
					    msk_x, msk_y,
1910
					    dst_x, dst_y,
1911
					    width, height,
1912
					    tmp);
1913
 
1914
	if (op == PictOpClear)
1915
		op = PictOpSrc;
1916
	tmp->op = op;
1917
	if (!gen6_composite_set_target(sna, tmp, dst,
1918
				       dst_x, dst_y, width, height))
1919
		return false;
1920
 
1921
	switch (gen6_composite_picture(sna, src, &tmp->src,
1922
				       src_x, src_y,
1923
				       width, height,
1924
				       dst_x, dst_y,
1925
				       dst->polyMode == PolyModePrecise)) {
1926
	case -1:
1927
		goto cleanup_dst;
1928
	case 0:
1929
		if (!gen4_channel_init_solid(sna, &tmp->src, 0))
1930
			goto cleanup_dst;
1931
		/* fall through to fixup */
1932
	case 1:
1933
		/* Did we just switch rings to prepare the source? */
1934
		if (mask == NULL &&
1935
		    prefer_blt_composite(sna, tmp) &&
1936
		    sna_blt_composite__convert(sna,
1937
					       dst_x, dst_y, width, height,
1938
					       tmp))
1939
			return true;
1940
 
1941
		gen6_composite_channel_convert(&tmp->src);
1942
		break;
1943
	}
1944
 
1945
	tmp->is_affine = tmp->src.is_affine;
1946
	tmp->has_component_alpha = false;
1947
	tmp->need_magic_ca_pass = false;
1948
 
1949
	tmp->mask.bo = NULL;
1950
    tmp->mask.filter = SAMPLER_FILTER_NEAREST;
1951
    tmp->mask.repeat = SAMPLER_EXTEND_NONE;
1952
 
1953
	if (mask) {
1954
		if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
1955
			tmp->has_component_alpha = true;
1956
 
1957
			/* Check if it's component alpha that relies on a source alpha and on
1958
			 * the source value.  We can only get one of those into the single
1959
			 * source value that we get to blend with.
1960
			 */
1961
			if (gen6_blend_op[op].src_alpha &&
1962
			    (gen6_blend_op[op].src_blend != GEN6_BLENDFACTOR_ZERO)) {
1963
				if (op != PictOpOver)
1964
					goto cleanup_src;
1965
 
1966
				tmp->need_magic_ca_pass = true;
1967
				tmp->op = PictOpOutReverse;
1968
			}
1969
		}
1970
 
1971
		if (!reuse_source(sna,
1972
				  src, &tmp->src, src_x, src_y,
1973
				  mask, &tmp->mask, msk_x, msk_y)) {
1974
			switch (gen6_composite_picture(sna, mask, &tmp->mask,
1975
						       msk_x, msk_y,
1976
						       width, height,
1977
						       dst_x, dst_y,
1978
						       dst->polyMode == PolyModePrecise)) {
1979
			case -1:
1980
				goto cleanup_src;
1981
			case 0:
1982
				if (!gen4_channel_init_solid(sna, &tmp->mask, 0))
1983
					goto cleanup_src;
1984
				/* fall through to fixup */
1985
			case 1:
1986
				gen6_composite_channel_convert(&tmp->mask);
1987
				break;
1988
			}
1989
		}
1990
 
1991
		tmp->is_affine &= tmp->mask.is_affine;
1992
	}
1993
 
1994
	tmp->u.gen6.flags =
1995
		GEN6_SET_FLAGS(SAMPLER_OFFSET(tmp->src.filter,
1996
					      tmp->src.repeat,
1997
					      tmp->mask.filter,
1998
					      tmp->mask.repeat),
1999
			       gen6_get_blend(tmp->op,
2000
					      tmp->has_component_alpha,
2001
					      tmp->dst.format),
2002
			       gen6_choose_composite_kernel(tmp->op,
2003
							    tmp->mask.bo != NULL,
2004
							    tmp->has_component_alpha,
2005
							    tmp->is_affine),
2006
			       gen4_choose_composite_emitter(tmp));
2007
 
2008
	tmp->blt   = gen6_render_composite_blt;
2009
    tmp->box   = gen6_render_composite_box;
2010
	tmp->boxes = gen6_render_composite_boxes__blt;
2011
	if (tmp->emit_boxes) {
2012
		tmp->boxes = gen6_render_composite_boxes;
2013
		tmp->thread_boxes = gen6_render_composite_boxes__thread;
2014
	}
2015
	tmp->done  = gen6_render_composite_done;
2016
 
2017
 
2018
 
2019
    gen6_emit_composite_state(sna, tmp);
2020
    gen6_align_vertex(sna, tmp);
2021
	return true;
2022
 
2023
cleanup_mask:
2024
	if (tmp->mask.bo)
2025
		kgem_bo_destroy(&sna->kgem, tmp->mask.bo);
2026
cleanup_src:
2027
	if (tmp->src.bo)
2028
		kgem_bo_destroy(&sna->kgem, tmp->src.bo);
2029
cleanup_dst:
2030
	if (tmp->redirect.real_bo)
2031
		kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
2032
	return false;
2033
}
2034
 
2035
#if !NO_COMPOSITE_SPANS
2036
fastcall static void
2037
gen6_render_composite_spans_box(struct sna *sna,
2038
				const struct sna_composite_spans_op *op,
2039
				const BoxRec *box, float opacity)
2040
{
2041
	DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n",
2042
	     __FUNCTION__,
2043
	     op->base.src.offset[0], op->base.src.offset[1],
2044
	     opacity,
2045
	     op->base.dst.x, op->base.dst.y,
2046
	     box->x1, box->y1,
2047
	     box->x2 - box->x1,
2048
	     box->y2 - box->y1));
2049
 
2050
	gen6_get_rectangles(sna, &op->base, 1, gen6_emit_composite_state);
2051
	op->prim_emit(sna, op, box, opacity);
2052
}
2053
 
2054
static void
2055
gen6_render_composite_spans_boxes(struct sna *sna,
2056
				  const struct sna_composite_spans_op *op,
2057
				  const BoxRec *box, int nbox,
2058
				  float opacity)
2059
{
2060
	DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n",
2061
	     __FUNCTION__, nbox,
2062
	     op->base.src.offset[0], op->base.src.offset[1],
2063
	     opacity,
2064
	     op->base.dst.x, op->base.dst.y));
2065
 
2066
	do {
2067
		int nbox_this_time;
2068
 
2069
		nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox,
2070
						     gen6_emit_composite_state);
2071
		nbox -= nbox_this_time;
2072
 
2073
		do {
2074
			DBG(("  %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
2075
			     box->x1, box->y1,
2076
			     box->x2 - box->x1,
2077
			     box->y2 - box->y1));
2078
 
2079
			op->prim_emit(sna, op, box++, opacity);
2080
		} while (--nbox_this_time);
2081
	} while (nbox);
2082
}
2083
 
2084
fastcall static void
2085
gen6_render_composite_spans_boxes__thread(struct sna *sna,
2086
					  const struct sna_composite_spans_op *op,
2087
					  const struct sna_opacity_box *box,
2088
					  int nbox)
2089
{
2090
	DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
2091
	     __FUNCTION__, nbox,
2092
	     op->base.src.offset[0], op->base.src.offset[1],
2093
	     op->base.dst.x, op->base.dst.y));
2094
 
2095
	sna_vertex_lock(&sna->render);
2096
	do {
2097
		int nbox_this_time;
2098
		float *v;
2099
 
2100
		nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox,
2101
						     gen6_emit_composite_state);
2102
		assert(nbox_this_time);
2103
		nbox -= nbox_this_time;
2104
 
2105
		v = sna->render.vertices + sna->render.vertex_used;
2106
		sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect;
2107
 
2108
		sna_vertex_acquire__locked(&sna->render);
2109
		sna_vertex_unlock(&sna->render);
2110
 
2111
		op->emit_boxes(op, box, nbox_this_time, v);
2112
		box += nbox_this_time;
2113
 
2114
		sna_vertex_lock(&sna->render);
2115
		sna_vertex_release__locked(&sna->render);
2116
	} while (nbox);
2117
	sna_vertex_unlock(&sna->render);
2118
}
2119
 
2120
fastcall static void
2121
gen6_render_composite_spans_done(struct sna *sna,
2122
				 const struct sna_composite_spans_op *op)
2123
{
2124
	DBG(("%s()\n", __FUNCTION__));
2125
	assert(!sna->render.active);
2126
 
2127
	if (sna->render.vertex_offset)
2128
		gen4_vertex_flush(sna);
2129
 
2130
	if (op->base.src.bo)
2131
		kgem_bo_destroy(&sna->kgem, op->base.src.bo);
2132
 
2133
	sna_render_composite_redirect_done(sna, &op->base);
2134
}
2135
 
2136
static bool
2137
gen6_check_composite_spans(struct sna *sna,
2138
			   uint8_t op, PicturePtr src, PicturePtr dst,
2139
			   int16_t width, int16_t height,
2140
			   unsigned flags)
2141
{
2142
	DBG(("%s: op=%d, width=%d, height=%d, flags=%x\n",
2143
	     __FUNCTION__, op, width, height, flags));
2144
 
2145
	if (op >= ARRAY_SIZE(gen6_blend_op))
2146
		return false;
2147
 
2148
	if (gen6_composite_fallback(sna, src, NULL, dst)) {
2149
		DBG(("%s: operation would fallback\n", __FUNCTION__));
2150
		return false;
2151
	}
2152
 
2153
	if (need_tiling(sna, width, height) &&
2154
	    !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) {
2155
		DBG(("%s: fallback, tiled operation not on GPU\n",
2156
		     __FUNCTION__));
2157
		return false;
2158
	}
2159
 
2160
	if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0) {
2161
		struct sna_pixmap *priv = sna_pixmap_from_drawable(dst->pDrawable);
2162
		assert(priv);
2163
 
2164
		if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo))
2165
			return true;
2166
 
2167
		if (flags & COMPOSITE_SPANS_INPLACE_HINT)
2168
			return false;
2169
 
2170
		return priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo);
2171
	}
2172
 
2173
	return true;
2174
}
2175
 
2176
static bool
2177
gen6_render_composite_spans(struct sna *sna,
2178
			    uint8_t op,
2179
			    PicturePtr src,
2180
			    PicturePtr dst,
2181
			    int16_t src_x,  int16_t src_y,
2182
			    int16_t dst_x,  int16_t dst_y,
2183
			    int16_t width,  int16_t height,
2184
			    unsigned flags,
2185
			    struct sna_composite_spans_op *tmp)
2186
{
2187
	DBG(("%s: %dx%d with flags=%x, current mode=%d\n", __FUNCTION__,
2188
	     width, height, flags, sna->kgem.ring));
2189
 
2190
	assert(gen6_check_composite_spans(sna, op, src, dst, width, height, flags));
2191
 
2192
	if (need_tiling(sna, width, height)) {
2193
		DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n",
2194
		     __FUNCTION__, width, height));
2195
		return sna_tiling_composite_spans(op, src, dst,
2196
						  src_x, src_y, dst_x, dst_y,
2197
						  width, height, flags, tmp);
2198
	}
2199
 
2200
	tmp->base.op = op;
2201
	if (!gen6_composite_set_target(sna, &tmp->base, dst,
2202
				       dst_x, dst_y, width, height))
2203
		return false;
2204
 
2205
	switch (gen6_composite_picture(sna, src, &tmp->base.src,
2206
				       src_x, src_y,
2207
				       width, height,
2208
				       dst_x, dst_y,
2209
				       dst->polyMode == PolyModePrecise)) {
2210
	case -1:
2211
		goto cleanup_dst;
2212
	case 0:
2213
		if (!gen4_channel_init_solid(sna, &tmp->base.src, 0))
2214
			goto cleanup_dst;
2215
		/* fall through to fixup */
2216
	case 1:
2217
		gen6_composite_channel_convert(&tmp->base.src);
2218
		break;
2219
	}
2220
	tmp->base.mask.bo = NULL;
2221
 
2222
	tmp->base.is_affine = tmp->base.src.is_affine;
2223
	tmp->base.need_magic_ca_pass = false;
2224
 
2225
	tmp->base.u.gen6.flags =
2226
		GEN6_SET_FLAGS(SAMPLER_OFFSET(tmp->base.src.filter,
2227
					      tmp->base.src.repeat,
2228
					      SAMPLER_FILTER_NEAREST,
2229
					      SAMPLER_EXTEND_PAD),
2230
			       gen6_get_blend(tmp->base.op, false, tmp->base.dst.format),
2231
			       GEN6_WM_KERNEL_OPACITY | !tmp->base.is_affine,
2232
			       gen4_choose_spans_emitter(tmp));
2233
 
2234
	tmp->box   = gen6_render_composite_spans_box;
2235
	tmp->boxes = gen6_render_composite_spans_boxes;
2236
	if (tmp->emit_boxes)
2237
		tmp->thread_boxes = gen6_render_composite_spans_boxes__thread;
2238
	tmp->done  = gen6_render_composite_spans_done;
2239
 
2240
	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->base.dst.bo);
2241
	if (!kgem_check_bo(&sna->kgem,
2242
			   tmp->base.dst.bo, tmp->base.src.bo,
2243
			   NULL)) {
2244
		kgem_submit(&sna->kgem);
2245
		if (!kgem_check_bo(&sna->kgem,
2246
				   tmp->base.dst.bo, tmp->base.src.bo,
2247
				   NULL))
2248
			goto cleanup_src;
2249
		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
2250
	}
2251
 
2252
	gen6_emit_composite_state(sna, &tmp->base);
2253
	gen6_align_vertex(sna, &tmp->base);
2254
	return true;
2255
 
2256
cleanup_src:
2257
	if (tmp->base.src.bo)
2258
		kgem_bo_destroy(&sna->kgem, tmp->base.src.bo);
2259
cleanup_dst:
2260
	if (tmp->base.redirect.real_bo)
2261
		kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo);
2262
	return false;
2263
}
2264
#endif
2265
 
2266
#endif
2267
 
2268
static void
2269
gen6_emit_copy_state(struct sna *sna,
2270
		     const struct sna_composite_op *op)
2271
{
2272
	uint32_t *binding_table;
2273
	uint16_t offset;
2274
	bool dirty;
2275
 
2276
	dirty = gen6_get_batch(sna, op);
2277
 
2278
	binding_table = gen6_composite_get_binding_table(sna, &offset);
2279
 
2280
	binding_table[0] =
2281
		gen6_bind_bo(sna,
2282
			     op->dst.bo, op->dst.width, op->dst.height,
2283
			     gen6_get_dest_format(op->dst.format),
2284
			     true);
2285
	binding_table[1] =
2286
		gen6_bind_bo(sna,
2287
			     op->src.bo, op->src.width, op->src.height,
2288
			     op->src.card_format,
2289
			     false);
2290
 
2291
	if (sna->kgem.surface == offset &&
2292
	    *(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table) {
2293
		sna->kgem.surface += sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t);
2294
		offset = sna->render_state.gen6.surface_table;
2295
	}
2296
 
2297
	gen6_emit_state(sna, op, offset | dirty);
2298
}
2299
 
2300
#if 0
2301
 
2302
static inline bool prefer_blt_copy(struct sna *sna,
2303
				   struct kgem_bo *src_bo,
2304
				   struct kgem_bo *dst_bo,
2305
				   unsigned flags)
2306
{
2307
	if (flags & COPY_SYNC)
2308
		return false;
2309
 
2310
	if (PREFER_RENDER)
2311
		return PREFER_RENDER > 0;
2312
 
2313
	if (sna->kgem.ring == KGEM_BLT)
2314
		return true;
2315
 
2316
	if (src_bo == dst_bo && can_switch_to_blt(sna, dst_bo, flags))
2317
		return true;
2318
 
2319
	if (untiled_tlb_miss(src_bo) ||
2320
	    untiled_tlb_miss(dst_bo))
2321
		return true;
2322
 
2323
	if (!prefer_blt_ring(sna, dst_bo, flags))
2324
		return false;
2325
 
2326
	return (prefer_blt_bo(sna, src_bo) >= 0 &&
2327
		prefer_blt_bo(sna, dst_bo) > 0);
2328
}
2329
 
2330
inline static void boxes_extents(const BoxRec *box, int n, BoxRec *extents)
2331
{
2332
	*extents = box[0];
2333
	while (--n) {
2334
		box++;
2335
 
2336
		if (box->x1 < extents->x1)
2337
			extents->x1 = box->x1;
2338
		if (box->x2 > extents->x2)
2339
			extents->x2 = box->x2;
2340
 
2341
		if (box->y1 < extents->y1)
2342
			extents->y1 = box->y1;
2343
		if (box->y2 > extents->y2)
2344
			extents->y2 = box->y2;
2345
	}
2346
}
2347
 
2348
static inline bool
2349
overlaps(struct sna *sna,
2350
	 struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
2351
	 struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
2352
	 const BoxRec *box, int n, BoxRec *extents)
2353
{
2354
	if (src_bo != dst_bo)
2355
		return false;
2356
 
2357
	boxes_extents(box, n, extents);
2358
	return (extents->x2 + src_dx > extents->x1 + dst_dx &&
2359
		extents->x1 + src_dx < extents->x2 + dst_dx &&
2360
		extents->y2 + src_dy > extents->y1 + dst_dy &&
2361
		extents->y1 + src_dy < extents->y2 + dst_dy);
2362
}
2363
 
2364
static bool
2365
gen6_render_copy_boxes(struct sna *sna, uint8_t alu,
2366
		       PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
2367
		       PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
2368
		       const BoxRec *box, int n, unsigned flags)
2369
{
2370
	struct sna_composite_op tmp;
2371
	BoxRec extents;
2372
 
2373
	DBG(("%s (%d, %d)->(%d, %d) x %d, alu=%x, self-copy=%d, overlaps? %d\n",
2374
	     __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n, alu,
2375
	     src_bo == dst_bo,
2376
	     overlaps(sna,
2377
		      src_bo, src_dx, src_dy,
2378
		      dst_bo, dst_dx, dst_dy,
2379
		      box, n, &extents)));
2380
 
2381
	if (prefer_blt_copy(sna, src_bo, dst_bo, flags) &&
2382
	    sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
2383
	    sna_blt_copy_boxes(sna, alu,
2384
			       src_bo, src_dx, src_dy,
2385
			       dst_bo, dst_dx, dst_dy,
2386
			       dst->drawable.bitsPerPixel,
2387
			       box, n))
2388
		return true;
2389
 
2390
	if (!(alu == GXcopy || alu == GXclear)) {
2391
fallback_blt:
2392
		if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
2393
			return false;
2394
 
2395
		return sna_blt_copy_boxes_fallback(sna, alu,
2396
						   src, src_bo, src_dx, src_dy,
2397
						   dst, dst_bo, dst_dx, dst_dy,
2398
						   box, n);
2399
	}
2400
 
2401
	if (overlaps(sna,
2402
		     src_bo, src_dx, src_dy,
2403
		     dst_bo, dst_dx, dst_dy,
2404
		     box, n, &extents)) {
2405
		if (too_large(extents.x2-extents.x1, extents.y2-extents.y1))
2406
			goto fallback_blt;
2407
 
2408
		if (can_switch_to_blt(sna, dst_bo, flags) &&
2409
		    sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
2410
		    sna_blt_copy_boxes(sna, alu,
2411
				       src_bo, src_dx, src_dy,
2412
				       dst_bo, dst_dx, dst_dy,
2413
				       dst->drawable.bitsPerPixel,
2414
				       box, n))
2415
			return true;
2416
 
2417
		return sna_render_copy_boxes__overlap(sna, alu,
2418
						      src, src_bo, src_dx, src_dy,
2419
						      dst, dst_bo, dst_dx, dst_dy,
2420
						      box, n, &extents);
2421
	}
2422
 
2423
	if (dst->drawable.depth == src->drawable.depth) {
2424
		tmp.dst.format = sna_render_format_for_depth(dst->drawable.depth);
2425
		tmp.src.pict_format = tmp.dst.format;
2426
	} else {
2427
		tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
2428
		tmp.src.pict_format = sna_format_for_depth(src->drawable.depth);
2429
	}
2430
	if (!gen6_check_format(tmp.src.pict_format))
2431
		goto fallback_blt;
2432
 
2433
	tmp.dst.pixmap = dst;
2434
	tmp.dst.width  = dst->drawable.width;
2435
	tmp.dst.height = dst->drawable.height;
2436
	tmp.dst.bo = dst_bo;
2437
	tmp.dst.x = tmp.dst.y = 0;
2438
	tmp.damage = NULL;
2439
 
2440
	sna_render_composite_redirect_init(&tmp);
2441
	if (too_large(tmp.dst.width, tmp.dst.height)) {
2442
		int i;
2443
 
2444
		extents = box[0];
2445
		for (i = 1; i < n; i++) {
2446
			if (box[i].x1 < extents.x1)
2447
				extents.x1 = box[i].x1;
2448
			if (box[i].y1 < extents.y1)
2449
				extents.y1 = box[i].y1;
2450
 
2451
			if (box[i].x2 > extents.x2)
2452
				extents.x2 = box[i].x2;
2453
			if (box[i].y2 > extents.y2)
2454
				extents.y2 = box[i].y2;
2455
		}
2456
 
2457
		if (!sna_render_composite_redirect(sna, &tmp,
2458
						   extents.x1 + dst_dx,
2459
						   extents.y1 + dst_dy,
2460
						   extents.x2 - extents.x1,
2461
						   extents.y2 - extents.y1))
2462
			goto fallback_tiled;
2463
 
2464
		dst_dx += tmp.dst.x;
2465
		dst_dy += tmp.dst.y;
2466
 
2467
		tmp.dst.x = tmp.dst.y = 0;
2468
	}
2469
 
2470
	tmp.src.card_format = gen6_get_card_format(tmp.src.pict_format);
2471
	if (too_large(src->drawable.width, src->drawable.height)) {
2472
		int i;
2473
 
2474
		extents = box[0];
2475
		for (i = 1; i < n; i++) {
2476
			if (extents.x1 < box[i].x1)
2477
				extents.x1 = box[i].x1;
2478
			if (extents.y1 < box[i].y1)
2479
				extents.y1 = box[i].y1;
2480
 
2481
			if (extents.x2 > box[i].x2)
2482
				extents.x2 = box[i].x2;
2483
			if (extents.y2 > box[i].y2)
2484
				extents.y2 = box[i].y2;
2485
		}
2486
 
2487
		if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src,
2488
					       extents.x1 + src_dx,
2489
					       extents.y1 + src_dy,
2490
					       extents.x2 - extents.x1,
2491
					       extents.y2 - extents.y1)) {
2492
			DBG(("%s: unable to extract partial pixmap\n", __FUNCTION__));
2493
			goto fallback_tiled_dst;
2494
		}
2495
 
2496
		src_dx += tmp.src.offset[0];
2497
		src_dy += tmp.src.offset[1];
2498
	} else {
2499
		tmp.src.bo = src_bo;
2500
		tmp.src.width  = src->drawable.width;
2501
		tmp.src.height = src->drawable.height;
2502
	}
2503
 
2504
	tmp.mask.bo = NULL;
2505
 
2506
	tmp.floats_per_vertex = 2;
2507
	tmp.floats_per_rect = 6;
2508
	tmp.need_magic_ca_pass = 0;
2509
 
2510
	tmp.u.gen6.flags = COPY_FLAGS(alu);
2511
	assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK);
2512
	assert(GEN6_SAMPLER(tmp.u.gen6.flags) == COPY_SAMPLER);
2513
	assert(GEN6_VERTEX(tmp.u.gen6.flags) == COPY_VERTEX);
2514
 
2515
	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo);
2516
	if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) {
2517
		kgem_submit(&sna->kgem);
2518
		if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) {
2519
			DBG(("%s: too large for a single operation\n",
2520
			     __FUNCTION__));
2521
			goto fallback_tiled_src;
2522
		}
2523
		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
2524
	}
2525
 
2526
	gen6_emit_copy_state(sna, &tmp);
2527
	gen6_align_vertex(sna, &tmp);
2528
 
2529
	do {
2530
		int16_t *v;
2531
		int n_this_time;
2532
 
2533
		n_this_time = gen6_get_rectangles(sna, &tmp, n,
2534
						  gen6_emit_copy_state);
2535
		n -= n_this_time;
2536
 
2537
		v = (int16_t *)(sna->render.vertices + sna->render.vertex_used);
2538
		sna->render.vertex_used += 6 * n_this_time;
2539
		assert(sna->render.vertex_used <= sna->render.vertex_size);
2540
		do {
2541
 
2542
			DBG(("	(%d, %d) -> (%d, %d) + (%d, %d)\n",
2543
			     box->x1 + src_dx, box->y1 + src_dy,
2544
			     box->x1 + dst_dx, box->y1 + dst_dy,
2545
			     box->x2 - box->x1, box->y2 - box->y1));
2546
			v[0] = box->x2 + dst_dx;
2547
			v[2] = box->x2 + src_dx;
2548
			v[1]  = v[5] = box->y2 + dst_dy;
2549
			v[3]  = v[7] = box->y2 + src_dy;
2550
			v[8]  = v[4] = box->x1 + dst_dx;
2551
			v[10] = v[6] = box->x1 + src_dx;
2552
			v[9]  = box->y1 + dst_dy;
2553
			v[11] = box->y1 + src_dy;
2554
			v += 12; box++;
2555
		} while (--n_this_time);
2556
	} while (n);
2557
 
2558
	gen4_vertex_flush(sna);
2559
	sna_render_composite_redirect_done(sna, &tmp);
2560
	if (tmp.src.bo != src_bo)
2561
		kgem_bo_destroy(&sna->kgem, tmp.src.bo);
2562
	return true;
2563
 
2564
fallback_tiled_src:
2565
	if (tmp.src.bo != src_bo)
2566
		kgem_bo_destroy(&sna->kgem, tmp.src.bo);
2567
fallback_tiled_dst:
2568
	if (tmp.redirect.real_bo)
2569
		kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
2570
fallback_tiled:
2571
	if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
2572
	    sna_blt_copy_boxes(sna, alu,
2573
			       src_bo, src_dx, src_dy,
2574
			       dst_bo, dst_dx, dst_dy,
2575
			       dst->drawable.bitsPerPixel,
2576
			       box, n))
2577
		return true;
2578
 
2579
	return sna_tiling_copy_boxes(sna, alu,
2580
				     src, src_bo, src_dx, src_dy,
2581
				     dst, dst_bo, dst_dx, dst_dy,
2582
				     box, n);
2583
}
2584
 
2585
#endif
2586
 
2587
static void
2588
gen6_render_copy_blt(struct sna *sna,
2589
		     const struct sna_copy_op *op,
2590
		     int16_t sx, int16_t sy,
2591
		     int16_t w,  int16_t h,
2592
		     int16_t dx, int16_t dy)
2593
{
2594
	int16_t *v;
2595
 
2596
	gen6_get_rectangles(sna, &op->base, 1, gen6_emit_copy_state);
2597
 
2598
	v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
2599
	sna->render.vertex_used += 6;
2600
	assert(sna->render.vertex_used <= sna->render.vertex_size);
2601
 
2602
	v[0]  = dx+w; v[1]  = dy+h;
2603
	v[2]  = sx+w; v[3]  = sy+h;
2604
	v[4]  = dx;   v[5]  = dy+h;
2605
	v[6]  = sx;   v[7]  = sy+h;
2606
	v[8]  = dx;   v[9]  = dy;
2607
	v[10] = sx;   v[11] = sy;
2608
}
2609
 
2610
static void
2611
gen6_render_copy_done(struct sna *sna, const struct sna_copy_op *op)
2612
{
2613
	DBG(("%s()\n", __FUNCTION__));
2614
 
2615
	assert(!sna->render.active);
2616
	if (sna->render.vertex_offset)
2617
		gen4_vertex_flush(sna);
2618
}
2619
 
2620
static bool
2621
gen6_render_copy(struct sna *sna, uint8_t alu,
2622
		 PixmapPtr src, struct kgem_bo *src_bo,
2623
		 PixmapPtr dst, struct kgem_bo *dst_bo,
2624
		 struct sna_copy_op *op)
2625
{
2626
	DBG(("%s (alu=%d, src=(%dx%d), dst=(%dx%d))\n",
2627
	     __FUNCTION__, alu,
2628
	     src->drawable.width, src->drawable.height,
2629
	     dst->drawable.width, dst->drawable.height));
2630
 
2631
fallback:
2632
 
2633
    op->base.dst.format = PIXMAN_a8r8g8b8;
2634
	op->base.src.pict_format = op->base.dst.format;
2635
 
2636
	op->base.dst.pixmap = dst;
2637
	op->base.dst.width  = dst->drawable.width;
2638
	op->base.dst.height = dst->drawable.height;
2639
	op->base.dst.bo = dst_bo;
2640
 
2641
	op->base.src.bo = src_bo;
2642
	op->base.src.card_format =
2643
		gen6_get_card_format(op->base.src.pict_format);
2644
	op->base.src.width  = src->drawable.width;
2645
	op->base.src.height = src->drawable.height;
2646
 
2647
	op->base.mask.bo = NULL;
2648
 
2649
	op->base.floats_per_vertex = 2;
2650
	op->base.floats_per_rect = 6;
2651
 
2652
	op->base.u.gen6.flags = COPY_FLAGS(alu);
2653
	assert(GEN6_KERNEL(op->base.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK);
2654
	assert(GEN6_SAMPLER(op->base.u.gen6.flags) == COPY_SAMPLER);
2655
	assert(GEN6_VERTEX(op->base.u.gen6.flags) == COPY_VERTEX);
2656
 
2657
 
2658
	gen6_emit_copy_state(sna, &op->base);
2659
	gen6_align_vertex(sna, &op->base);
2660
 
2661
	op->blt  = gen6_render_copy_blt;
2662
	op->done = gen6_render_copy_done;
2663
	return true;
2664
}
2665
 
2666
#if 0
2667
 
2668
static void
2669
gen6_emit_fill_state(struct sna *sna, const struct sna_composite_op *op)
2670
{
2671
	uint32_t *binding_table;
2672
	uint16_t offset;
2673
	bool dirty;
2674
 
2675
	dirty = gen6_get_batch(sna, op);
2676
 
2677
	binding_table = gen6_composite_get_binding_table(sna, &offset);
2678
 
2679
	binding_table[0] =
2680
		gen6_bind_bo(sna,
2681
			     op->dst.bo, op->dst.width, op->dst.height,
2682
			     gen6_get_dest_format(op->dst.format),
2683
			     true);
2684
	binding_table[1] =
2685
		gen6_bind_bo(sna,
2686
			     op->src.bo, 1, 1,
2687
			     GEN6_SURFACEFORMAT_B8G8R8A8_UNORM,
2688
			     false);
2689
 
2690
	if (sna->kgem.surface == offset &&
2691
	    *(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table) {
2692
		sna->kgem.surface +=
2693
			sizeof(struct gen6_surface_state_padded)/sizeof(uint32_t);
2694
		offset = sna->render_state.gen6.surface_table;
2695
	}
2696
 
2697
	gen6_emit_state(sna, op, offset | dirty);
2698
}
2699
 
2700
static inline bool prefer_blt_fill(struct sna *sna,
2701
				   struct kgem_bo *bo)
2702
{
2703
	if (PREFER_RENDER)
2704
		return PREFER_RENDER < 0;
2705
 
2706
	if (untiled_tlb_miss(bo))
2707
		return true;
2708
 
2709
	return prefer_blt_ring(sna, bo, 0) || prefer_blt_bo(sna, bo) >= 0;
2710
}
2711
 
2712
static bool
2713
gen6_render_fill_boxes(struct sna *sna,
2714
		       CARD8 op,
2715
		       PictFormat format,
2716
		       const xRenderColor *color,
2717
		       PixmapPtr dst, struct kgem_bo *dst_bo,
2718
		       const BoxRec *box, int n)
2719
{
2720
	struct sna_composite_op tmp;
2721
	uint32_t pixel;
2722
 
2723
	DBG(("%s (op=%d, color=(%04x, %04x, %04x, %04x) [%08x])\n",
2724
	     __FUNCTION__, op,
2725
	     color->red, color->green, color->blue, color->alpha, (int)format));
2726
 
2727
	if (op >= ARRAY_SIZE(gen6_blend_op)) {
2728
		DBG(("%s: fallback due to unhandled blend op: %d\n",
2729
		     __FUNCTION__, op));
2730
		return false;
2731
	}
2732
 
2733
	if (prefer_blt_fill(sna, dst_bo) || !gen6_check_dst_format(format)) {
2734
		uint8_t alu = GXinvalid;
2735
 
2736
		if (op <= PictOpSrc) {
2737
			pixel = 0;
2738
			if (op == PictOpClear)
2739
				alu = GXclear;
2740
			else if (sna_get_pixel_from_rgba(&pixel,
2741
							 color->red,
2742
							 color->green,
2743
							 color->blue,
2744
							 color->alpha,
2745
							 format))
2746
				alu = GXcopy;
2747
		}
2748
 
2749
		if (alu != GXinvalid &&
2750
		    sna_blt_fill_boxes(sna, alu,
2751
				       dst_bo, dst->drawable.bitsPerPixel,
2752
				       pixel, box, n))
2753
			return true;
2754
 
2755
		if (!gen6_check_dst_format(format))
2756
			return false;
2757
	}
2758
 
2759
	if (op == PictOpClear) {
2760
		pixel = 0;
2761
		op = PictOpSrc;
2762
	} else if (!sna_get_pixel_from_rgba(&pixel,
2763
					    color->red,
2764
					    color->green,
2765
					    color->blue,
2766
					    color->alpha,
2767
					    PICT_a8r8g8b8))
2768
		return false;
2769
 
2770
	DBG(("%s(%08x x %d [(%d, %d), (%d, %d) ...])\n",
2771
	     __FUNCTION__, pixel, n,
2772
	     box[0].x1, box[0].y1, box[0].x2, box[0].y2));
2773
 
2774
	tmp.dst.pixmap = dst;
2775
	tmp.dst.width  = dst->drawable.width;
2776
	tmp.dst.height = dst->drawable.height;
2777
	tmp.dst.format = format;
2778
	tmp.dst.bo = dst_bo;
2779
	tmp.dst.x = tmp.dst.y = 0;
2780
	tmp.damage = NULL;
2781
 
2782
	sna_render_composite_redirect_init(&tmp);
2783
	if (too_large(dst->drawable.width, dst->drawable.height)) {
2784
		BoxRec extents;
2785
 
2786
		boxes_extents(box, n, &extents);
2787
		if (!sna_render_composite_redirect(sna, &tmp,
2788
						   extents.x1, extents.y1,
2789
						   extents.x2 - extents.x1,
2790
						   extents.y2 - extents.y1))
2791
			return sna_tiling_fill_boxes(sna, op, format, color,
2792
						     dst, dst_bo, box, n);
2793
	}
2794
 
2795
	tmp.src.bo = sna_render_get_solid(sna, pixel);
2796
	tmp.mask.bo = NULL;
2797
 
2798
	tmp.floats_per_vertex = 2;
2799
	tmp.floats_per_rect = 6;
2800
	tmp.need_magic_ca_pass = false;
2801
 
2802
	tmp.u.gen6.flags = FILL_FLAGS(op, format);
2803
	assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK);
2804
	assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER);
2805
	assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX);
2806
 
2807
	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
2808
		kgem_submit(&sna->kgem);
2809
		assert(kgem_check_bo(&sna->kgem, dst_bo, NULL));
2810
	}
2811
 
2812
	gen6_emit_fill_state(sna, &tmp);
2813
	gen6_align_vertex(sna, &tmp);
2814
 
2815
	do {
2816
		int n_this_time;
2817
		int16_t *v;
2818
 
2819
		n_this_time = gen6_get_rectangles(sna, &tmp, n,
2820
						  gen6_emit_fill_state);
2821
		n -= n_this_time;
2822
 
2823
		v = (int16_t *)(sna->render.vertices + sna->render.vertex_used);
2824
		sna->render.vertex_used += 6 * n_this_time;
2825
		assert(sna->render.vertex_used <= sna->render.vertex_size);
2826
		do {
2827
			DBG(("	(%d, %d), (%d, %d)\n",
2828
			     box->x1, box->y1, box->x2, box->y2));
2829
 
2830
			v[0] = box->x2;
2831
			v[5] = v[1] = box->y2;
2832
			v[8] = v[4] = box->x1;
2833
			v[9] = box->y1;
2834
			v[2] = v[3]  = v[7]  = 1;
2835
			v[6] = v[10] = v[11] = 0;
2836
			v += 12; box++;
2837
		} while (--n_this_time);
2838
	} while (n);
2839
 
2840
	gen4_vertex_flush(sna);
2841
	kgem_bo_destroy(&sna->kgem, tmp.src.bo);
2842
	sna_render_composite_redirect_done(sna, &tmp);
2843
	return true;
2844
}
2845
 
2846
static void
2847
gen6_render_op_fill_blt(struct sna *sna,
2848
			const struct sna_fill_op *op,
2849
			int16_t x, int16_t y, int16_t w, int16_t h)
2850
{
2851
	int16_t *v;
2852
 
2853
	DBG(("%s: (%d, %d)x(%d, %d)\n", __FUNCTION__, x, y, w, h));
2854
 
2855
	gen6_get_rectangles(sna, &op->base, 1, gen6_emit_fill_state);
2856
 
2857
	v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
2858
	sna->render.vertex_used += 6;
2859
	assert(sna->render.vertex_used <= sna->render.vertex_size);
2860
 
2861
	v[0] = x+w;
2862
	v[4] = v[8] = x;
2863
	v[1] = v[5] = y+h;
2864
	v[9] = y;
2865
 
2866
	v[2] = v[3]  = v[7]  = 1;
2867
	v[6] = v[10] = v[11] = 0;
2868
}
2869
 
2870
fastcall static void
2871
gen6_render_op_fill_box(struct sna *sna,
2872
			const struct sna_fill_op *op,
2873
			const BoxRec *box)
2874
{
2875
	int16_t *v;
2876
 
2877
	DBG(("%s: (%d, %d),(%d, %d)\n", __FUNCTION__,
2878
	     box->x1, box->y1, box->x2, box->y2));
2879
 
2880
	gen6_get_rectangles(sna, &op->base, 1, gen6_emit_fill_state);
2881
 
2882
	v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
2883
	sna->render.vertex_used += 6;
2884
	assert(sna->render.vertex_used <= sna->render.vertex_size);
2885
 
2886
	v[0] = box->x2;
2887
	v[8] = v[4] = box->x1;
2888
	v[5] = v[1] = box->y2;
2889
	v[9] = box->y1;
2890
 
2891
	v[7] = v[2]  = v[3]  = 1;
2892
	v[6] = v[10] = v[11] = 0;
2893
}
2894
 
2895
fastcall static void
2896
gen6_render_op_fill_boxes(struct sna *sna,
2897
			  const struct sna_fill_op *op,
2898
			  const BoxRec *box,
2899
			  int nbox)
2900
{
2901
	DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__,
2902
	     box->x1, box->y1, box->x2, box->y2, nbox));
2903
 
2904
	do {
2905
		int nbox_this_time;
2906
		int16_t *v;
2907
 
2908
		nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox,
2909
						     gen6_emit_fill_state);
2910
		nbox -= nbox_this_time;
2911
 
2912
		v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
2913
		sna->render.vertex_used += 6 * nbox_this_time;
2914
		assert(sna->render.vertex_used <= sna->render.vertex_size);
2915
 
2916
		do {
2917
			v[0] = box->x2;
2918
			v[8] = v[4] = box->x1;
2919
			v[5] = v[1] = box->y2;
2920
			v[9] = box->y1;
2921
			v[7] = v[2]  = v[3]  = 1;
2922
			v[6] = v[10] = v[11] = 0;
2923
			box++; v += 12;
2924
		} while (--nbox_this_time);
2925
	} while (nbox);
2926
}
2927
 
2928
static void
2929
gen6_render_op_fill_done(struct sna *sna, const struct sna_fill_op *op)
2930
{
2931
	DBG(("%s()\n", __FUNCTION__));
2932
 
2933
	assert(!sna->render.active);
2934
	if (sna->render.vertex_offset)
2935
		gen4_vertex_flush(sna);
2936
	kgem_bo_destroy(&sna->kgem, op->base.src.bo);
2937
}
2938
 
2939
static bool
2940
gen6_render_fill(struct sna *sna, uint8_t alu,
2941
		 PixmapPtr dst, struct kgem_bo *dst_bo,
2942
		 uint32_t color,
2943
		 struct sna_fill_op *op)
2944
{
2945
	DBG(("%s: (alu=%d, color=%x)\n", __FUNCTION__, alu, color));
2946
 
2947
	if (prefer_blt_fill(sna, dst_bo) &&
2948
	    sna_blt_fill(sna, alu,
2949
			 dst_bo, dst->drawable.bitsPerPixel,
2950
			 color,
2951
			 op))
2952
		return true;
2953
 
2954
	if (!(alu == GXcopy || alu == GXclear) ||
2955
	    too_large(dst->drawable.width, dst->drawable.height))
2956
		return sna_blt_fill(sna, alu,
2957
				    dst_bo, dst->drawable.bitsPerPixel,
2958
				    color,
2959
				    op);
2960
 
2961
	if (alu == GXclear)
2962
		color = 0;
2963
 
2964
	op->base.dst.pixmap = dst;
2965
	op->base.dst.width  = dst->drawable.width;
2966
	op->base.dst.height = dst->drawable.height;
2967
	op->base.dst.format = sna_format_for_depth(dst->drawable.depth);
2968
	op->base.dst.bo = dst_bo;
2969
	op->base.dst.x = op->base.dst.y = 0;
2970
 
2971
	op->base.src.bo =
2972
		sna_render_get_solid(sna,
2973
				     sna_rgba_for_color(color,
2974
							dst->drawable.depth));
2975
	op->base.mask.bo = NULL;
2976
 
2977
	op->base.need_magic_ca_pass = false;
2978
	op->base.floats_per_vertex = 2;
2979
	op->base.floats_per_rect = 6;
2980
 
2981
	op->base.u.gen6.flags = FILL_FLAGS_NOBLEND;
2982
	assert(GEN6_KERNEL(op->base.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK);
2983
	assert(GEN6_SAMPLER(op->base.u.gen6.flags) == FILL_SAMPLER);
2984
	assert(GEN6_VERTEX(op->base.u.gen6.flags) == FILL_VERTEX);
2985
 
2986
	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL)) {
2987
		kgem_submit(&sna->kgem);
2988
		assert(kgem_check_bo(&sna->kgem, dst_bo, NULL));
2989
	}
2990
 
2991
	gen6_emit_fill_state(sna, &op->base);
2992
	gen6_align_vertex(sna, &op->base);
2993
 
2994
	op->blt  = gen6_render_op_fill_blt;
2995
	op->box  = gen6_render_op_fill_box;
2996
	op->boxes = gen6_render_op_fill_boxes;
2997
	op->done = gen6_render_op_fill_done;
2998
	return true;
2999
}
3000
 
3001
static bool
3002
gen6_render_fill_one_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
3003
			     uint32_t color,
3004
			     int16_t x1, int16_t y1, int16_t x2, int16_t y2,
3005
			     uint8_t alu)
3006
{
3007
	BoxRec box;
3008
 
3009
	box.x1 = x1;
3010
	box.y1 = y1;
3011
	box.x2 = x2;
3012
	box.y2 = y2;
3013
 
3014
	return sna_blt_fill_boxes(sna, alu,
3015
				  bo, dst->drawable.bitsPerPixel,
3016
				  color, &box, 1);
3017
}
3018
 
3019
static bool
3020
gen6_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
3021
		     uint32_t color,
3022
		     int16_t x1, int16_t y1,
3023
		     int16_t x2, int16_t y2,
3024
		     uint8_t alu)
3025
{
3026
	struct sna_composite_op tmp;
3027
	int16_t *v;
3028
 
3029
	/* Prefer to use the BLT if already engaged */
3030
	if (prefer_blt_fill(sna, bo) &&
3031
	    gen6_render_fill_one_try_blt(sna, dst, bo, color,
3032
					 x1, y1, x2, y2, alu))
3033
		return true;
3034
 
3035
	/* Must use the BLT if we can't RENDER... */
3036
	if (!(alu == GXcopy || alu == GXclear) ||
3037
	    too_large(dst->drawable.width, dst->drawable.height))
3038
		return gen6_render_fill_one_try_blt(sna, dst, bo, color,
3039
						    x1, y1, x2, y2, alu);
3040
 
3041
	if (alu == GXclear)
3042
		color = 0;
3043
 
3044
	tmp.dst.pixmap = dst;
3045
	tmp.dst.width  = dst->drawable.width;
3046
	tmp.dst.height = dst->drawable.height;
3047
	tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
3048
	tmp.dst.bo = bo;
3049
	tmp.dst.x = tmp.dst.y = 0;
3050
 
3051
	tmp.src.bo =
3052
		sna_render_get_solid(sna,
3053
				     sna_rgba_for_color(color,
3054
							dst->drawable.depth));
3055
	tmp.mask.bo = NULL;
3056
 
3057
	tmp.floats_per_vertex = 2;
3058
	tmp.floats_per_rect = 6;
3059
	tmp.need_magic_ca_pass = false;
3060
 
3061
	tmp.u.gen6.flags = FILL_FLAGS_NOBLEND;
3062
	assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK);
3063
	assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER);
3064
	assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX);
3065
 
3066
	if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
3067
		kgem_submit(&sna->kgem);
3068
		if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
3069
			kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3070
			return false;
3071
		}
3072
	}
3073
 
3074
	gen6_emit_fill_state(sna, &tmp);
3075
	gen6_align_vertex(sna, &tmp);
3076
 
3077
	gen6_get_rectangles(sna, &tmp, 1, gen6_emit_fill_state);
3078
 
3079
	DBG(("	(%d, %d), (%d, %d)\n", x1, y1, x2, y2));
3080
 
3081
	v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
3082
	sna->render.vertex_used += 6;
3083
	assert(sna->render.vertex_used <= sna->render.vertex_size);
3084
 
3085
	v[0] = x2;
3086
	v[8] = v[4] = x1;
3087
	v[5] = v[1] = y2;
3088
	v[9] = y1;
3089
	v[7] = v[2]  = v[3]  = 1;
3090
	v[6] = v[10] = v[11] = 0;
3091
 
3092
	gen4_vertex_flush(sna);
3093
	kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3094
 
3095
	return true;
3096
}
3097
 
3098
static bool
3099
gen6_render_clear_try_blt(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo)
3100
{
3101
	BoxRec box;
3102
 
3103
	box.x1 = 0;
3104
	box.y1 = 0;
3105
	box.x2 = dst->drawable.width;
3106
	box.y2 = dst->drawable.height;
3107
 
3108
	return sna_blt_fill_boxes(sna, GXclear,
3109
				  bo, dst->drawable.bitsPerPixel,
3110
				  0, &box, 1);
3111
}
3112
 
3113
static bool
3114
gen6_render_clear(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo)
3115
{
3116
	struct sna_composite_op tmp;
3117
	int16_t *v;
3118
 
3119
	DBG(("%s: %dx%d\n",
3120
	     __FUNCTION__,
3121
	     dst->drawable.width,
3122
	     dst->drawable.height));
3123
 
3124
	/* Prefer to use the BLT if, and only if, already engaged */
3125
	if (sna->kgem.ring == KGEM_BLT &&
3126
	    gen6_render_clear_try_blt(sna, dst, bo))
3127
		return true;
3128
 
3129
	/* Must use the BLT if we can't RENDER... */
3130
	if (too_large(dst->drawable.width, dst->drawable.height))
3131
		return gen6_render_clear_try_blt(sna, dst, bo);
3132
 
3133
	tmp.dst.pixmap = dst;
3134
	tmp.dst.width  = dst->drawable.width;
3135
	tmp.dst.height = dst->drawable.height;
3136
	tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
3137
	tmp.dst.bo = bo;
3138
	tmp.dst.x = tmp.dst.y = 0;
3139
 
3140
	tmp.src.bo = sna_render_get_solid(sna, 0);
3141
	tmp.mask.bo = NULL;
3142
 
3143
	tmp.floats_per_vertex = 2;
3144
	tmp.floats_per_rect = 6;
3145
	tmp.need_magic_ca_pass = false;
3146
 
3147
	tmp.u.gen6.flags = FILL_FLAGS_NOBLEND;
3148
	assert(GEN6_KERNEL(tmp.u.gen6.flags) == GEN6_WM_KERNEL_NOMASK);
3149
	assert(GEN6_SAMPLER(tmp.u.gen6.flags) == FILL_SAMPLER);
3150
	assert(GEN6_VERTEX(tmp.u.gen6.flags) == FILL_VERTEX);
3151
 
3152
 
3153
 
3154
	gen6_emit_fill_state(sna, &tmp);
3155
	gen6_align_vertex(sna, &tmp);
3156
 
3157
	gen6_get_rectangles(sna, &tmp, 1, gen6_emit_fill_state);
3158
 
3159
	v = (int16_t *)&sna->render.vertices[sna->render.vertex_used];
3160
	sna->render.vertex_used += 6;
3161
	assert(sna->render.vertex_used <= sna->render.vertex_size);
3162
 
3163
	v[0] = dst->drawable.width;
3164
	v[5] = v[1] = dst->drawable.height;
3165
	v[8] = v[4] = 0;
3166
	v[9] = 0;
3167
 
3168
	v[7] = v[2]  = v[3]  = 1;
3169
	v[6] = v[10] = v[11] = 0;
3170
 
3171
	gen4_vertex_flush(sna);
3172
	kgem_bo_destroy(&sna->kgem, tmp.src.bo);
3173
 
3174
	return true;
3175
}
3176
 
3177
static void gen6_render_flush(struct sna *sna)
3178
{
3179
	gen4_vertex_close(sna);
3180
 
3181
	assert(sna->render.vb_id == 0);
3182
	assert(sna->render.vertex_offset == 0);
3183
}
3184
 
3185
#endif
3186
 
3187
static void
3188
gen6_render_retire(struct kgem *kgem)
3189
{
3190
	struct sna *sna;
3191
 
3192
	if (kgem->ring && (kgem->has_semaphores || !kgem->need_retire))
3193
		kgem->ring = kgem->mode;
3194
 
3195
	sna = container_of(kgem, struct sna, kgem);
3196
	if (kgem->nbatch == 0 && sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) {
3197
		DBG(("%s: resetting idle vbo handle=%d\n", __FUNCTION__, sna->render.vbo->handle));
3198
		sna->render.vertex_used = 0;
3199
		sna->render.vertex_index = 0;
3200
	}
3201
}
3202
 
3203
 
3204
static void gen6_render_reset(struct sna *sna)
3205
{
3206
	sna->render_state.gen6.needs_invariant = true;
3207
	sna->render_state.gen6.first_state_packet = true;
3208
	sna->render_state.gen6.ve_id = 3 << 2;
3209
	sna->render_state.gen6.last_primitive = -1;
3210
 
3211
	sna->render_state.gen6.num_sf_outputs = 0;
3212
	sna->render_state.gen6.samplers = -1;
3213
	sna->render_state.gen6.blend = -1;
3214
	sna->render_state.gen6.kernel = -1;
3215
	sna->render_state.gen6.drawrect_offset = -1;
3216
	sna->render_state.gen6.drawrect_limit = -1;
3217
	sna->render_state.gen6.surface_table = -1;
3218
 
3219
	sna->render.vertex_offset = 0;
3220
	sna->render.nvertex_reloc = 0;
3221
	sna->render.vb_id = 0;
3222
}
3223
 
3224
static void gen6_render_fini(struct sna *sna)
3225
{
3226
//   kgem_bo_destroy(&sna->kgem, sna->render_state.gen6.general_bo);
3227
}
3228
 
3229
static bool is_gt2(struct sna *sna)
3230
{
3231
	return DEVICE_ID(sna->PciInfo) & 0x30;
3232
}
3233
 
3234
static bool is_mobile(struct sna *sna)
3235
{
3236
	return (DEVICE_ID(sna->PciInfo) & 0xf) == 0x6;
3237
}
3238
 
3239
static bool gen6_render_setup(struct sna *sna)
3240
{
3241
	struct gen6_render_state *state = &sna->render_state.gen6;
3242
	struct sna_static_stream general;
3243
	struct gen6_sampler_state *ss;
3244
	int i, j, k, l, m;
3245
 
3246
	state->info = >1_info;
3247
	if (is_gt2(sna))
3248
		state->info = >2_info; /* XXX requires GT_MODE WiZ disabled */
3249
 
3250
    sna_static_stream_init(&general);
3251
 
3252
	/* Zero pad the start. If you see an offset of 0x0 in the batchbuffer
3253
	 * dumps, you know it points to zero.
3254
	 */
3255
    null_create(&general);
3256
    scratch_create(&general);
3257
 
3258
	for (m = 0; m < GEN6_KERNEL_COUNT; m++) {
3259
		if (wm_kernels[m].size) {
3260
			state->wm_kernel[m][1] =
3261
			sna_static_stream_add(&general,
3262
					       wm_kernels[m].data,
3263
					       wm_kernels[m].size,
3264
					       64);
3265
		} else {
3266
			if (USE_8_PIXEL_DISPATCH) {
3267
				state->wm_kernel[m][0] =
3268
					sna_static_stream_compile_wm(sna, &general,
3269
								     wm_kernels[m].data, 8);
3270
			}
3271
 
3272
			if (USE_16_PIXEL_DISPATCH) {
3273
				state->wm_kernel[m][1] =
3274
					sna_static_stream_compile_wm(sna, &general,
3275
								     wm_kernels[m].data, 16);
3276
			}
3277
 
3278
			if (USE_32_PIXEL_DISPATCH) {
3279
				state->wm_kernel[m][2] =
3280
					sna_static_stream_compile_wm(sna, &general,
3281
								     wm_kernels[m].data, 32);
3282
			}
3283
		}
3284
		if ((state->wm_kernel[m][0]|state->wm_kernel[m][1]|state->wm_kernel[m][2]) == 0) {
3285
			state->wm_kernel[m][1] =
3286
				sna_static_stream_compile_wm(sna, &general,
3287
							     wm_kernels[m].data, 16);
3288
		}
3289
	}
3290
 
3291
	ss = sna_static_stream_map(&general,
3292
				   2 * sizeof(*ss) *
3293
				   (2 +
3294
				   FILTER_COUNT * EXTEND_COUNT *
3295
				    FILTER_COUNT * EXTEND_COUNT),
3296
				   32);
3297
	state->wm_state = sna_static_stream_offsetof(&general, ss);
3298
	sampler_copy_init(ss); ss += 2;
3299
	sampler_fill_init(ss); ss += 2;
3300
	for (i = 0; i < FILTER_COUNT; i++) {
3301
		for (j = 0; j < EXTEND_COUNT; j++) {
3302
			for (k = 0; k < FILTER_COUNT; k++) {
3303
				for (l = 0; l < EXTEND_COUNT; l++) {
3304
					sampler_state_init(ss++, i, j);
3305
					sampler_state_init(ss++, k, l);
3306
				}
3307
			}
3308
		}
3309
	}
3310
 
3311
    state->cc_blend = gen6_composite_create_blend_state(&general);
3312
 
3313
    state->general_bo = sna_static_stream_fini(sna, &general);
3314
    return state->general_bo != NULL;
3315
}
3316
 
3317
bool gen6_render_init(struct sna *sna)
3318
{
3319
    if (!gen6_render_setup(sna))
3320
		return false;
3321
 
3322
//    sna->kgem.context_switch = gen6_render_context_switch;
3323
      sna->kgem.retire = gen6_render_retire;
3324
 
3325
//    sna->render.composite = gen6_render_composite;
3326
//    sna->render.video = gen6_render_video;
3327
 
3328
//    sna->render.copy_boxes = gen6_render_copy_boxes;
3329
    sna->render.copy = gen6_render_copy;
3330
 
3331
//    sna->render.fill_boxes = gen6_render_fill_boxes;
3332
//    sna->render.fill = gen6_render_fill;
3333
//    sna->render.fill_one = gen6_render_fill_one;
3334
//    sna->render.clear = gen6_render_clear;
3335
 
3336
//    sna->render.flush = gen6_render_flush;
3337
    sna->render.reset = gen6_render_reset;
3338
	sna->render.fini = gen6_render_fini;
3339
 
3340
    sna->render.max_3d_size = GEN6_MAX_SIZE;
3341
    sna->render.max_3d_pitch = 1 << 18;
3342
	return true;
3343
}
3256 Serge 3344
 
3345
 
3346
void gen4_vertex_flush(struct sna *sna)
3347
{
3348
	DBG(("%s[%x] = %d\n", __FUNCTION__,
3349
	     4*sna->render.vertex_offset,
3350
	     sna->render.vertex_index - sna->render.vertex_start));
3351
 
3352
	assert(sna->render.vertex_offset);
3353
	assert(sna->render.vertex_index > sna->render.vertex_start);
3354
 
3355
	sna->kgem.batch[sna->render.vertex_offset] =
3356
		sna->render.vertex_index - sna->render.vertex_start;
3357
	sna->render.vertex_offset = 0;
3358
}
3359
 
3360
int gen4_vertex_finish(struct sna *sna)
3361
{
3362
	struct kgem_bo *bo;
3363
	unsigned int i;
3364
	unsigned hint, size;
3365
 
3366
	DBG(("%s: used=%d / %d\n", __FUNCTION__,
3367
	     sna->render.vertex_used, sna->render.vertex_size));
3368
	assert(sna->render.vertex_offset == 0);
3369
	assert(sna->render.vertex_used);
3370
 
3371
//	sna_vertex_wait__locked(&sna->render);
3372
 
3373
	/* Note: we only need dword alignment (currently) */
3374
 
3375
	bo = sna->render.vbo;
3376
	if (bo) {
3377
		for (i = 0; i < sna->render.nvertex_reloc; i++) {
3378
			DBG(("%s: reloc[%d] = %d\n", __FUNCTION__,
3379
			     i, sna->render.vertex_reloc[i]));
3380
 
3381
			sna->kgem.batch[sna->render.vertex_reloc[i]] =
3382
				kgem_add_reloc(&sna->kgem,
3383
					       sna->render.vertex_reloc[i], bo,
3384
					       I915_GEM_DOMAIN_VERTEX << 16,
3385
					       0);
3386
		}
3387
 
3388
		assert(!sna->render.active);
3389
		sna->render.nvertex_reloc = 0;
3390
		sna->render.vertex_used = 0;
3391
		sna->render.vertex_index = 0;
3392
		sna->render.vbo = NULL;
3393
		sna->render.vb_id = 0;
3394
 
3395
		kgem_bo_destroy(&sna->kgem, bo);
3396
	}
3397
 
3398
	hint = CREATE_GTT_MAP;
3399
	if (bo)
3400
		hint |= CREATE_CACHED | CREATE_NO_THROTTLE;
3401
 
3402
	size = 256*1024;
3403
	assert(!sna->render.active);
3404
	sna->render.vertices = NULL;
3405
	sna->render.vbo = kgem_create_linear(&sna->kgem, size, hint);
3406
	while (sna->render.vbo == NULL && size > 16*1024) {
3407
		size /= 2;
3408
		sna->render.vbo = kgem_create_linear(&sna->kgem, size, hint);
3409
	}
3410
	if (sna->render.vbo == NULL)
3411
		sna->render.vbo = kgem_create_linear(&sna->kgem,
3412
						     256*1024, CREATE_GTT_MAP);
3413
	if (sna->render.vbo)
3414
		sna->render.vertices = kgem_bo_map(&sna->kgem, sna->render.vbo);
3415
	if (sna->render.vertices == NULL) {
3416
		if (sna->render.vbo) {
3417
			kgem_bo_destroy(&sna->kgem, sna->render.vbo);
3418
			sna->render.vbo = NULL;
3419
		}
3420
		sna->render.vertices = sna->render.vertex_data;
3421
		sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
3422
		return 0;
3423
	}
3424
 
3425
	if (sna->render.vertex_used) {
3426
		DBG(("%s: copying initial buffer x %d to handle=%d\n",
3427
		     __FUNCTION__,
3428
		     sna->render.vertex_used,
3429
		     sna->render.vbo->handle));
3430
		assert(sizeof(float)*sna->render.vertex_used <=
3431
		       __kgem_bo_size(sna->render.vbo));
3432
		memcpy(sna->render.vertices,
3433
		       sna->render.vertex_data,
3434
		       sizeof(float)*sna->render.vertex_used);
3435
	}
3436
 
3437
	size = __kgem_bo_size(sna->render.vbo)/4;
3438
	if (size >= UINT16_MAX)
3439
		size = UINT16_MAX - 1;
3440
 
3441
	DBG(("%s: create vbo handle=%d, size=%d\n",
3442
	     __FUNCTION__, sna->render.vbo->handle, size));
3443
 
3444
	sna->render.vertex_size = size;
3445
	return sna->render.vertex_size - sna->render.vertex_used;
3446
}
3447
 
3448
void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo)
3449
{
3450
    return NULL;
3451
};
3452
 
3453