Subversion Repositories Kolibri OS

Rev

Rev 4359 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
4304 Serge 1
/*
2
 * Copyright © 2010-2011 Intel Corporation
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
 * SOFTWARE.
22
 *
23
 * Authors:
24
 *    Chris Wilson 
25
 *
26
 */
27
 
28
#ifdef HAVE_CONFIG_H
29
#include "config.h"
30
#endif
31
 
32
#include "sna.h"
33
#include "sna_render.h"
34
#include "sna_render_inline.h"
35
#include "sna_reg.h"
36
//#include "sna_video.h"
37
 
38
#include "gen3_render.h"
39
 
40
#define NO_COMPOSITE 0
41
#define NO_COMPOSITE_SPANS 0
42
#define NO_COPY 0
43
#define NO_COPY_BOXES 0
44
#define NO_FILL 0
45
#define NO_FILL_ONE 0
46
#define NO_FILL_BOXES 0
47
 
48
#define PREFER_BLT_FILL 1
49
 
50
enum {
51
	SHADER_NONE = 0,
52
	SHADER_ZERO,
53
	SHADER_BLACK,
54
	SHADER_WHITE,
55
	SHADER_CONSTANT,
56
	SHADER_LINEAR,
57
	SHADER_RADIAL,
58
	SHADER_TEXTURE,
59
	SHADER_OPACITY,
60
};
61
 
62
#define MAX_3D_SIZE 2048
63
#define MAX_3D_PITCH 8192
64
 
65
#define OUT_BATCH(v) batch_emit(sna, v)
66
#define OUT_BATCH_F(v) batch_emit_float(sna, v)
67
#define OUT_VERTEX(v) vertex_emit(sna, v)
68
 
69
enum gen3_radial_mode {
70
	RADIAL_ONE,
71
	RADIAL_TWO
72
};
73
 
74
static const struct blendinfo {
75
	bool dst_alpha;
76
	bool src_alpha;
77
	uint32_t src_blend;
78
	uint32_t dst_blend;
79
} gen3_blend_op[] = {
80
	/* Clear */	{0, 0, BLENDFACT_ZERO, BLENDFACT_ZERO},
81
	/* Src */	{0, 0, BLENDFACT_ONE, BLENDFACT_ZERO},
82
	/* Dst */	{0, 0, BLENDFACT_ZERO, BLENDFACT_ONE},
83
	/* Over */	{0, 1, BLENDFACT_ONE, BLENDFACT_INV_SRC_ALPHA},
84
	/* OverReverse */ {1, 0, BLENDFACT_INV_DST_ALPHA, BLENDFACT_ONE},
85
	/* In */	{1, 0, BLENDFACT_DST_ALPHA, BLENDFACT_ZERO},
86
	/* InReverse */ {0, 1, BLENDFACT_ZERO, BLENDFACT_SRC_ALPHA},
87
	/* Out */	{1, 0, BLENDFACT_INV_DST_ALPHA, BLENDFACT_ZERO},
88
	/* OutReverse */ {0, 1, BLENDFACT_ZERO, BLENDFACT_INV_SRC_ALPHA},
89
	/* Atop */	{1, 1, BLENDFACT_DST_ALPHA, BLENDFACT_INV_SRC_ALPHA},
90
	/* AtopReverse */ {1, 1, BLENDFACT_INV_DST_ALPHA, BLENDFACT_SRC_ALPHA},
91
	/* Xor */	{1, 1, BLENDFACT_INV_DST_ALPHA, BLENDFACT_INV_SRC_ALPHA},
92
	/* Add */	{0, 0, BLENDFACT_ONE, BLENDFACT_ONE},
93
};
94
 
95
#define S6_COLOR_WRITE_ONLY \
96
	(S6_COLOR_WRITE_ENABLE | \
97
	 BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT | \
98
	 BLENDFACT_ONE << S6_CBUF_SRC_BLEND_FACT_SHIFT | \
99
	 BLENDFACT_ZERO << S6_CBUF_DST_BLEND_FACT_SHIFT)
100
 
101
static const struct formatinfo {
102
	unsigned int fmt, xfmt;
103
	uint32_t card_fmt;
104
	bool rb_reversed;
105
} gen3_tex_formats[] = {
106
	{PICT_a8, 0, MAPSURF_8BIT | MT_8BIT_A8, false},
107
	{PICT_a8r8g8b8, 0, MAPSURF_32BIT | MT_32BIT_ARGB8888, false},
108
	{PICT_x8r8g8b8, 0, MAPSURF_32BIT | MT_32BIT_XRGB8888, false},
109
	{PICT_a8b8g8r8, 0, MAPSURF_32BIT | MT_32BIT_ABGR8888, false},
110
	{PICT_x8b8g8r8, 0, MAPSURF_32BIT | MT_32BIT_XBGR8888, false},
111
	{PICT_a2r10g10b10, PICT_x2r10g10b10, MAPSURF_32BIT | MT_32BIT_ARGB2101010, false},
112
	{PICT_a2b10g10r10, PICT_x2b10g10r10, MAPSURF_32BIT | MT_32BIT_ABGR2101010, false},
113
	{PICT_r5g6b5, 0, MAPSURF_16BIT | MT_16BIT_RGB565, false},
114
	{PICT_b5g6r5, 0, MAPSURF_16BIT | MT_16BIT_RGB565, true},
115
	{PICT_a1r5g5b5, PICT_x1r5g5b5, MAPSURF_16BIT | MT_16BIT_ARGB1555, false},
116
	{PICT_a1b5g5r5, PICT_x1b5g5r5, MAPSURF_16BIT | MT_16BIT_ARGB1555, true},
117
	{PICT_a4r4g4b4, PICT_x4r4g4b4, MAPSURF_16BIT | MT_16BIT_ARGB4444, false},
118
	{PICT_a4b4g4r4, PICT_x4b4g4r4, MAPSURF_16BIT | MT_16BIT_ARGB4444, true},
119
};
120
 
121
#define xFixedToDouble(f) pixman_fixed_to_double(f)
122
 
123
static inline bool too_large(int width, int height)
124
{
125
	return width > MAX_3D_SIZE || height > MAX_3D_SIZE;
126
}
127
 
128
static inline uint32_t gen3_buf_tiling(uint32_t tiling)
129
{
130
	uint32_t v = 0;
131
	switch (tiling) {
132
	case I915_TILING_Y: v |= BUF_3D_TILE_WALK_Y;
133
	case I915_TILING_X: v |= BUF_3D_TILED_SURFACE;
134
	case I915_TILING_NONE: break;
135
	}
136
	return v;
137
}
138
static uint32_t gen3_get_blend_cntl(int op,
139
				    bool has_component_alpha,
140
				    uint32_t dst_format)
141
{
142
	uint32_t sblend;
143
	uint32_t dblend;
144
 
145
    sblend = BLENDFACT_ONE;
146
    dblend = BLENDFACT_INV_SRC_ALPHA;
147
 
148
#if 0
149
	if (op <= PictOpSrc) /* for clear and src disable blending */
150
		return S6_COLOR_WRITE_ONLY;
151
 
152
	/* If there's no dst alpha channel, adjust the blend op so that we'll
153
	 * treat it as always 1.
154
	 */
155
	if (gen3_blend_op[op].dst_alpha) {
156
		if (PICT_FORMAT_A(dst_format) == 0) {
157
			if (sblend == BLENDFACT_DST_ALPHA)
158
				sblend = BLENDFACT_ONE;
159
			else if (sblend == BLENDFACT_INV_DST_ALPHA)
160
				sblend = BLENDFACT_ZERO;
161
		}
162
 
163
		/* gen3 engine reads 8bit color buffer into green channel
164
		 * in cases like color buffer blending etc., and also writes
165
		 * back green channel.  So with dst_alpha blend we should use
166
		 * color factor. See spec on "8-bit rendering".
167
		 */
168
		if (dst_format == PICT_a8) {
169
			if (sblend == BLENDFACT_DST_ALPHA)
170
				sblend = BLENDFACT_DST_COLR;
171
			else if (sblend == BLENDFACT_INV_DST_ALPHA)
172
				sblend = BLENDFACT_INV_DST_COLR;
173
		}
174
	}
175
 
176
	/* If the source alpha is being used, then we should only be in a case
177
	 * where the source blend factor is 0, and the source blend value is the
178
	 * mask channels multiplied by the source picture's alpha.
179
	 */
180
	if (has_component_alpha && gen3_blend_op[op].src_alpha) {
181
		if (dblend == BLENDFACT_SRC_ALPHA)
182
			dblend = BLENDFACT_SRC_COLR;
183
		else if (dblend == BLENDFACT_INV_SRC_ALPHA)
184
			dblend = BLENDFACT_INV_SRC_COLR;
185
	}
186
#endif
187
 
188
	return (S6_CBUF_BLEND_ENABLE | S6_COLOR_WRITE_ENABLE |
189
		BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT |
190
		sblend << S6_CBUF_SRC_BLEND_FACT_SHIFT |
191
		dblend << S6_CBUF_DST_BLEND_FACT_SHIFT);
192
}
193
static bool gen3_dst_rb_reversed(uint32_t format)
194
{
195
	switch (format) {
196
	case PICT_a8r8g8b8:
197
	case PICT_x8r8g8b8:
198
	case PICT_r5g6b5:
199
	case PICT_a1r5g5b5:
200
	case PICT_x1r5g5b5:
201
	case PICT_a2r10g10b10:
202
	case PICT_x2r10g10b10:
203
	case PICT_a8:
204
	case PICT_a4r4g4b4:
205
	case PICT_x4r4g4b4:
206
		return false;
207
	default:
208
		return true;
209
	}
210
}
211
 
212
#define DSTORG_HORT_BIAS(x)             ((x)<<20)
213
#define DSTORG_VERT_BIAS(x)             ((x)<<16)
214
 
215
static uint32_t gen3_get_dst_format(uint32_t format)
216
{
217
#define BIAS (DSTORG_HORT_BIAS(0x8) | DSTORG_VERT_BIAS(0x8))
218
	switch (format) {
219
	default:
220
	case PICT_a8r8g8b8:
221
	case PICT_x8r8g8b8:
222
	case PICT_a8b8g8r8:
223
	case PICT_x8b8g8r8:
224
		return BIAS | COLR_BUF_ARGB8888;
225
	case PICT_r5g6b5:
226
	case PICT_b5g6r5:
227
		return BIAS | COLR_BUF_RGB565;
228
	case PICT_a1r5g5b5:
229
	case PICT_x1r5g5b5:
230
	case PICT_a1b5g5r5:
231
	case PICT_x1b5g5r5:
232
		return BIAS | COLR_BUF_ARGB1555;
233
	case PICT_a2r10g10b10:
234
	case PICT_x2r10g10b10:
235
	case PICT_a2b10g10r10:
236
	case PICT_x2b10g10r10:
237
		return BIAS | COLR_BUF_ARGB2AAA;
238
	case PICT_a8:
239
		return BIAS | COLR_BUF_8BIT;
240
	case PICT_a4r4g4b4:
241
	case PICT_x4r4g4b4:
242
	case PICT_a4b4g4r4:
243
	case PICT_x4b4g4r4:
244
		return BIAS | COLR_BUF_ARGB4444;
245
	}
246
#undef BIAS
247
}
248
 
249
 
250
#if 0
251
static bool gen3_check_repeat(PicturePtr p)
252
{
253
	if (!p->repeat)
254
		return true;
255
 
256
	switch (p->repeatType) {
257
	case RepeatNone:
258
	case RepeatNormal:
259
	case RepeatPad:
260
	case RepeatReflect:
261
		return true;
262
	default:
263
		return false;
264
	}
265
}
266
 
267
static uint32_t gen3_filter(uint32_t filter)
268
{
269
	switch (filter) {
270
	default:
271
		assert(0);
272
	case PictFilterNearest:
273
		return (FILTER_NEAREST << SS2_MAG_FILTER_SHIFT |
274
			FILTER_NEAREST << SS2_MIN_FILTER_SHIFT |
275
			MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT);
276
	case PictFilterBilinear:
277
		return (FILTER_LINEAR  << SS2_MAG_FILTER_SHIFT |
278
			FILTER_LINEAR  << SS2_MIN_FILTER_SHIFT |
279
			MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT);
280
	}
281
}
282
 
283
static bool gen3_check_filter(PicturePtr p)
284
{
285
	switch (p->filter) {
286
	case PictFilterNearest:
287
	case PictFilterBilinear:
288
		return true;
289
	default:
290
		return false;
291
	}
292
}
293
fastcall static void
294
gen3_emit_composite_primitive_identity_gradient(struct sna *sna,
295
						const struct sna_composite_op *op,
296
						const struct sna_composite_rectangles *r)
297
{
298
	int16_t dst_x, dst_y;
299
	int16_t src_x, src_y;
300
 
301
	dst_x = r->dst.x + op->dst.x;
302
	dst_y = r->dst.y + op->dst.y;
303
	src_x = r->src.x + op->src.offset[0];
304
	src_y = r->src.y + op->src.offset[1];
305
 
306
	gen3_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height);
307
	OUT_VERTEX(src_x + r->width);
308
	OUT_VERTEX(src_y + r->height);
309
 
310
	gen3_emit_composite_dstcoord(sna, dst_x, dst_y + r->height);
311
	OUT_VERTEX(src_x);
312
	OUT_VERTEX(src_y + r->height);
313
 
314
	gen3_emit_composite_dstcoord(sna, dst_x, dst_y);
315
	OUT_VERTEX(src_x);
316
	OUT_VERTEX(src_y);
317
}
318
 
319
fastcall static void
320
gen3_emit_composite_boxes_identity_gradient(const struct sna_composite_op *op,
321
					    const BoxRec *box, int nbox,
322
					    float *v)
323
{
324
	do {
325
		v[0] = box->x2;
326
		v[1] = box->y2;
327
		v[2] = box->x2 + op->src.offset[0];
328
		v[3] = box->y2 + op->src.offset[1];
329
 
330
		v[4] = box->x1;
331
		v[5] = box->y2;
332
		v[6] = box->x1 + op->src.offset[0];
333
		v[7] = box->y2 + op->src.offset[1];
334
 
335
		v[8] = box->x1;
336
		v[9] = box->y1;
337
		v[10] = box->x1 + op->src.offset[0];
338
		v[11] = box->y1 + op->src.offset[1];
339
 
340
		v += 12;
341
		box++;
342
	} while (--nbox);
343
}
344
fastcall static void
345
gen3_emit_composite_boxes_affine_gradient(const struct sna_composite_op *op,
346
					  const BoxRec *box, int nbox,
347
					  float *v)
348
{
349
	const PictTransform *transform = op->src.transform;
350
 
351
	do {
352
		v[0] = box->x2;
353
		v[1] = box->y2;
354
		_sna_get_transformed_scaled(box->x2 + op->src.offset[0],
355
					    box->y2 + op->src.offset[1],
356
					    transform, op->src.scale,
357
					    &v[2], &v[3]);
358
 
359
		v[4] = box->x1;
360
		v[5] = box->y2;
361
		_sna_get_transformed_scaled(box->x1 + op->src.offset[0],
362
					    box->y2 + op->src.offset[1],
363
					    transform, op->src.scale,
364
					    &v[6], &v[7]);
365
 
366
		v[8] = box->x1;
367
		v[9] = box->y1;
368
		_sna_get_transformed_scaled(box->x1 + op->src.offset[0],
369
					    box->y1 + op->src.offset[1],
370
					    transform, op->src.scale,
371
					    &v[10], &v[11]);
372
 
373
		box++;
374
		v += 12;
375
	} while (--nbox);
376
}
377
 
378
fastcall static void
379
gen3_emit_composite_primitive_identity_source(struct sna *sna,
380
					      const struct sna_composite_op *op,
381
					      const struct sna_composite_rectangles *r)
382
{
383
	float w = r->width;
384
	float h = r->height;
385
	float *v;
386
 
387
	v = sna->render.vertices + sna->render.vertex_used;
388
	sna->render.vertex_used += 12;
389
 
390
	v[8] = v[4] = r->dst.x + op->dst.x;
391
	v[0] = v[4] + w;
392
 
393
	v[9] = r->dst.y + op->dst.y;
394
	v[5] = v[1] = v[9] + h;
395
 
396
	v[10] = v[6] = (r->src.x + op->src.offset[0]) * op->src.scale[0];
397
	v[2] = v[6] + w * op->src.scale[0];
398
 
399
	v[11] = (r->src.y + op->src.offset[1]) * op->src.scale[1];
400
	v[7] = v[3] = v[11] + h * op->src.scale[1];
401
}
402
 
403
fastcall static void
404
gen3_emit_composite_boxes_identity_source(const struct sna_composite_op *op,
405
					  const BoxRec *box, int nbox,
406
					  float *v)
407
{
408
	do {
409
		v[0] = box->x2 + op->dst.x;
410
		v[8] = v[4] = box->x1 + op->dst.x;
411
		v[5] = v[1] = box->y2 + op->dst.y;
412
		v[9] = box->y1 + op->dst.y;
413
 
414
		v[10] = v[6] = (box->x1 + op->src.offset[0]) * op->src.scale[0];
415
		v[2] = (box->x2 + op->src.offset[0]) * op->src.scale[0];
416
 
417
		v[11] = (box->y1 + op->src.offset[1]) * op->src.scale[1];
418
		v[7] = v[3] = (box->y2 + op->src.offset[1]) * op->src.scale[1];
419
 
420
		v += 12;
421
		box++;
422
	} while (--nbox);
423
}
424
 
425
fastcall static void
426
gen3_emit_composite_primitive_identity_source_no_offset(struct sna *sna,
427
							const struct sna_composite_op *op,
428
							const struct sna_composite_rectangles *r)
429
{
430
	float w = r->width;
431
	float h = r->height;
432
	float *v;
433
 
434
	v = sna->render.vertices + sna->render.vertex_used;
435
	sna->render.vertex_used += 12;
436
 
437
	v[8] = v[4] = r->dst.x;
438
	v[9] = r->dst.y;
439
 
440
	v[0] = v[4] + w;
441
	v[5] = v[1] = v[9] + h;
442
 
443
	v[10] = v[6] = r->src.x * op->src.scale[0];
444
	v[11] = r->src.y * op->src.scale[1];
445
 
446
	v[2] = v[6] + w * op->src.scale[0];
447
	v[7] = v[3] = v[11] + h * op->src.scale[1];
448
}
449
fastcall static void
450
gen3_emit_composite_primitive_constant_identity_mask(struct sna *sna,
451
						     const struct sna_composite_op *op,
452
						     const struct sna_composite_rectangles *r)
453
{
454
	float w = r->width;
455
	float h = r->height;
456
	float *v;
457
 
458
	v = sna->render.vertices + sna->render.vertex_used;
459
	sna->render.vertex_used += 12;
460
 
461
	v[8] = v[4] = r->dst.x + op->dst.x;
462
	v[0] = v[4] + w;
463
 
464
	v[9] = r->dst.y + op->dst.y;
465
	v[5] = v[1] = v[9] + h;
466
 
467
	v[10] = v[6] = (r->mask.x + op->mask.offset[0]) * op->mask.scale[0];
468
	v[2] = v[6] + w * op->mask.scale[0];
469
 
470
	v[11] = (r->mask.y + op->mask.offset[1]) * op->mask.scale[1];
471
	v[7] = v[3] = v[11] + h * op->mask.scale[1];
472
}
473
#endif
474
 
475
fastcall static void
476
gen3_emit_composite_primitive_identity_source_mask(struct sna *sna,
477
						   const struct sna_composite_op *op,
478
						   const struct sna_composite_rectangles *r)
479
{
480
	float dst_x, dst_y;
481
	float src_x, src_y;
482
	float msk_x, msk_y;
483
	float w, h;
484
	float *v;
485
 
486
	dst_x = r->dst.x + op->dst.x;
487
	dst_y = r->dst.y + op->dst.y;
488
	src_x = r->src.x + op->src.offset[0];
489
	src_y = r->src.y + op->src.offset[1];
490
	msk_x = r->mask.x + op->mask.offset[0];
491
	msk_y = r->mask.y + op->mask.offset[1];
492
	w = r->width;
493
	h = r->height;
494
 
495
	v = sna->render.vertices + sna->render.vertex_used;
496
	sna->render.vertex_used += 18;
497
 
498
	v[0] = dst_x + w;
499
	v[1] = dst_y + h;
500
	v[2] = (src_x + w) * op->src.scale[0];
501
	v[3] = (src_y + h) * op->src.scale[1];
502
	v[4] = (msk_x + w) * op->mask.scale[0];
503
	v[5] = (msk_y + h) * op->mask.scale[1];
504
 
505
	v[6] = dst_x;
506
	v[7] = v[1];
507
	v[8] = src_x * op->src.scale[0];
508
	v[9] = v[3];
509
	v[10] = msk_x * op->mask.scale[0];
510
	v[11] =v[5];
511
 
512
	v[12] = v[6];
513
	v[13] = dst_y;
514
	v[14] = v[8];
515
	v[15] = src_y * op->src.scale[1];
516
	v[16] = v[10];
517
	v[17] = msk_y * op->mask.scale[1];
518
}
519
 
520
 
521
 
522
 
523
 
524
 
525
 
526
 
527
 
528
 
529
 
530
 
531
 
532
 
533
 
534
 
535
 
536
 
537
 
538
 
539
 
540
 
541
 
542
 
543
 
544
 
545
 
546
 
547
 
548
 
549
 
550
 
551
 
552
 
553
 
554
 
555
 
556
 
557
 
558
 
559
 
560
 
561
 
562
 
563
 
564
 
565
 
566
 
567
 
568
 
569
 
570
 
571
 
572
 
573
 
574
 
575
 
576
 
577
 
578
 
579
 
580
 
581
 
582
static inline void
583
gen3_2d_perspective(struct sna *sna, int in, int out)
584
{
585
	gen3_fs_rcp(out, 0, gen3_fs_operand(in, W, W, W, W));
586
	gen3_fs_mul(out,
587
		    gen3_fs_operand(in, X, Y, ZERO, ONE),
588
		    gen3_fs_operand_reg(out));
589
}
590
 
591
static inline void
592
gen3_linear_coord(struct sna *sna,
593
		  const struct sna_composite_channel *channel,
594
		  int in, int out)
595
{
596
	int c = channel->u.gen3.constants;
597
 
598
	if (!channel->is_affine) {
599
		gen3_2d_perspective(sna, in, FS_U0);
600
		in = FS_U0;
601
	}
602
 
603
	gen3_fs_mov(out, gen3_fs_operand_zero());
604
	gen3_fs_dp3(out, MASK_X,
605
		    gen3_fs_operand(in, X, Y, ONE, ZERO),
606
		    gen3_fs_operand_reg(c));
607
}
608
 
609
static void
610
gen3_radial_coord(struct sna *sna,
611
		  const struct sna_composite_channel *channel,
612
		  int in, int out)
613
{
614
	int c = channel->u.gen3.constants;
615
 
616
	if (!channel->is_affine) {
617
		gen3_2d_perspective(sna, in, FS_U0);
618
		in = FS_U0;
619
	}
620
 
621
	switch (channel->u.gen3.mode) {
622
	case RADIAL_ONE:
623
		/*
624
		   pdx = (x - c1x) / dr, pdy = (y - c1y) / dr;
625
		   r? = pdx*pdx + pdy*pdy
626
		   t = r?/sqrt(r?) - r1/dr;
627
		   */
628
		gen3_fs_mad(FS_U0, MASK_X | MASK_Y,
629
			    gen3_fs_operand(in, X, Y, ZERO, ZERO),
630
			    gen3_fs_operand(c, Z, Z, ZERO, ZERO),
631
			    gen3_fs_operand(c, NEG_X, NEG_Y, ZERO, ZERO));
632
		gen3_fs_dp2add(FS_U0, MASK_X,
633
			       gen3_fs_operand(FS_U0, X, Y, ZERO, ZERO),
634
			       gen3_fs_operand(FS_U0, X, Y, ZERO, ZERO),
635
			       gen3_fs_operand_zero());
636
		gen3_fs_rsq(out, MASK_X, gen3_fs_operand(FS_U0, X, X, X, X));
637
		gen3_fs_mad(out, 0,
638
			    gen3_fs_operand(FS_U0, X, ZERO, ZERO, ZERO),
639
			    gen3_fs_operand(out, X, ZERO, ZERO, ZERO),
640
			    gen3_fs_operand(c, W, ZERO, ZERO, ZERO));
641
		break;
642
 
643
	case RADIAL_TWO:
644
		/*
645
		   pdx = x - c1x, pdy = y - c1y;
646
		   A = dx? + dy? - dr?
647
		   B = -2*(pdx*dx + pdy*dy + r1*dr);
648
		   C = pdx? + pdy? - r1?;
649
		   det = B*B - 4*A*C;
650
		   t = (-B + sqrt (det)) / (2 * A)
651
		   */
652
 
653
		/* u0.x = pdx, u0.y = pdy, u[0].z = r1; */
654
		gen3_fs_add(FS_U0,
655
			    gen3_fs_operand(in, X, Y, ZERO, ZERO),
656
			    gen3_fs_operand(c, X, Y, Z, ZERO));
657
		/* u0.x = pdx, u0.y = pdy, u[0].z = r1, u[0].w = B; */
658
		gen3_fs_dp3(FS_U0, MASK_W,
659
			    gen3_fs_operand(FS_U0, X, Y, ONE, ZERO),
660
			    gen3_fs_operand(c+1, X, Y, Z, ZERO));
661
		/* u1.x = pdx? + pdy? - r1?; [C] */
662
		gen3_fs_dp3(FS_U1, MASK_X,
663
			    gen3_fs_operand(FS_U0, X, Y, Z, ZERO),
664
			    gen3_fs_operand(FS_U0, X, Y, NEG_Z, ZERO));
665
		/* u1.x = C, u1.y = B, u1.z=-4*A; */
666
		gen3_fs_mov_masked(FS_U1, MASK_Y, gen3_fs_operand(FS_U0, W, W, W, W));
667
		gen3_fs_mov_masked(FS_U1, MASK_Z, gen3_fs_operand(c, W, W, W, W));
668
		/* u1.x = B? - 4*A*C */
669
		gen3_fs_dp2add(FS_U1, MASK_X,
670
			       gen3_fs_operand(FS_U1, X, Y, ZERO, ZERO),
671
			       gen3_fs_operand(FS_U1, Z, Y, ZERO, ZERO),
672
			       gen3_fs_operand_zero());
673
		/* out.x = -B + sqrt (B? - 4*A*C), */
674
		gen3_fs_rsq(out, MASK_X, gen3_fs_operand(FS_U1, X, X, X, X));
675
		gen3_fs_mad(out, MASK_X,
676
			    gen3_fs_operand(out, X, ZERO, ZERO, ZERO),
677
			    gen3_fs_operand(FS_U1, X, ZERO, ZERO, ZERO),
678
			    gen3_fs_operand(FS_U0, NEG_W, ZERO, ZERO, ZERO));
679
		/* out.x = (-B + sqrt (B? - 4*A*C)) / (2 * A), */
680
		gen3_fs_mul(out,
681
			    gen3_fs_operand(out, X, ZERO, ZERO, ZERO),
682
			    gen3_fs_operand(c+1, W, ZERO, ZERO, ZERO));
683
		break;
684
	}
685
}
686
 
687
static void
688
gen3_composite_emit_shader(struct sna *sna,
689
			   const struct sna_composite_op *op,
690
			   uint8_t blend)
691
{
692
	bool dst_is_alpha = PIXMAN_FORMAT_RGB(op->dst.format) == 0;
693
	const struct sna_composite_channel *src, *mask;
694
	struct gen3_render_state *state = &sna->render_state.gen3;
695
	uint32_t shader_offset, id;
696
	int src_reg, mask_reg;
697
	int t, length;
698
 
699
	src = &op->src;
700
	mask = &op->mask;
701
	if (mask->u.gen3.type == SHADER_NONE)
702
		mask = NULL;
703
 
704
	id = (src->u.gen3.type |
705
	      src->is_affine << 4 |
706
	      src->alpha_fixup << 5 |
707
	      src->rb_reversed << 6);
708
	if (mask) {
709
		id |= (mask->u.gen3.type << 8 |
710
		       mask->is_affine << 12 |
711
		       gen3_blend_op[blend].src_alpha << 13 |
712
		       op->has_component_alpha << 14 |
713
		       mask->alpha_fixup << 15 |
714
		       mask->rb_reversed << 16);
715
	}
716
	id |= dst_is_alpha << 24;
717
	id |= op->rb_reversed << 25;
718
 
719
	if (id == state->last_shader)
720
		return;
721
 
722
	state->last_shader = id;
723
 
724
	shader_offset = sna->kgem.nbatch++;
725
	t = 0;
726
	switch (src->u.gen3.type) {
727
	case SHADER_NONE:
728
	case SHADER_OPACITY:
729
		assert(0);
730
	case SHADER_ZERO:
731
	case SHADER_BLACK:
732
	case SHADER_WHITE:
733
		break;
734
	case SHADER_CONSTANT:
735
		gen3_fs_dcl(FS_T8);
736
		src_reg = FS_T8;
737
		break;
738
	case SHADER_TEXTURE:
739
	case SHADER_RADIAL:
740
	case SHADER_LINEAR:
741
		gen3_fs_dcl(FS_S0);
742
		gen3_fs_dcl(FS_T0);
743
		t++;
744
		break;
745
	}
746
 
747
	if (mask == NULL) {
748
		switch (src->u.gen3.type) {
749
		case SHADER_ZERO:
750
			gen3_fs_mov(FS_OC, gen3_fs_operand_zero());
751
			goto done;
752
		case SHADER_BLACK:
753
			if (dst_is_alpha)
754
				gen3_fs_mov(FS_OC, gen3_fs_operand_one());
755
			else
756
				gen3_fs_mov(FS_OC, gen3_fs_operand(FS_R0, ZERO, ZERO, ZERO, ONE));
757
			goto done;
758
		case SHADER_WHITE:
759
			gen3_fs_mov(FS_OC, gen3_fs_operand_one());
760
			goto done;
761
		}
762
		if (src->alpha_fixup && dst_is_alpha) {
763
			gen3_fs_mov(FS_OC, gen3_fs_operand_one());
764
			goto done;
765
		}
766
		/* No mask, so load directly to output color */
767
		if (src->u.gen3.type != SHADER_CONSTANT) {
768
			if (dst_is_alpha || src->rb_reversed ^ op->rb_reversed)
769
				src_reg = FS_R0;
770
			else
771
				src_reg = FS_OC;
772
		}
773
		switch (src->u.gen3.type) {
774
		case SHADER_LINEAR:
775
			gen3_linear_coord(sna, src, FS_T0, FS_R0);
776
			gen3_fs_texld(src_reg, FS_S0, FS_R0);
777
			break;
778
 
779
		case SHADER_RADIAL:
780
			gen3_radial_coord(sna, src, FS_T0, FS_R0);
781
			gen3_fs_texld(src_reg, FS_S0, FS_R0);
782
			break;
783
 
784
		case SHADER_TEXTURE:
785
			if (src->is_affine)
786
				gen3_fs_texld(src_reg, FS_S0, FS_T0);
787
			else
788
				gen3_fs_texldp(src_reg, FS_S0, FS_T0);
789
			break;
790
 
791
		case SHADER_NONE:
792
		case SHADER_WHITE:
793
		case SHADER_BLACK:
794
		case SHADER_ZERO:
795
			assert(0);
796
		case SHADER_CONSTANT:
797
			break;
798
		}
799
 
800
		if (src_reg != FS_OC) {
801
			if (src->alpha_fixup)
802
				gen3_fs_mov(FS_OC,
803
					    src->rb_reversed ^ op->rb_reversed ?
804
					    gen3_fs_operand(src_reg, Z, Y, X, ONE) :
805
					    gen3_fs_operand(src_reg, X, Y, Z, ONE));
806
			else if (dst_is_alpha)
807
				gen3_fs_mov(FS_OC, gen3_fs_operand(src_reg, W, W, W, W));
808
			else if (src->rb_reversed ^ op->rb_reversed)
809
				gen3_fs_mov(FS_OC, gen3_fs_operand(src_reg, Z, Y, X, W));
810
			else
811
				gen3_fs_mov(FS_OC, gen3_fs_operand_reg(src_reg));
812
		} else if (src->alpha_fixup)
813
			gen3_fs_mov_masked(FS_OC, MASK_W, gen3_fs_operand_one());
814
	} else {
815
		int out_reg = FS_OC;
816
		if (op->rb_reversed)
817
			out_reg = FS_U0;
818
 
819
		switch (mask->u.gen3.type) {
820
		case SHADER_CONSTANT:
821
			gen3_fs_dcl(FS_T9);
822
			mask_reg = FS_T9;
823
			break;
824
		case SHADER_TEXTURE:
825
		case SHADER_LINEAR:
826
		case SHADER_RADIAL:
827
			gen3_fs_dcl(FS_S0 + t);
828
			/* fall through */
829
		case SHADER_OPACITY:
830
			gen3_fs_dcl(FS_T0 + t);
831
			break;
832
		case SHADER_ZERO:
833
		case SHADER_BLACK:
834
			assert(0);
835
		case SHADER_NONE:
836
		case SHADER_WHITE:
837
			break;
838
		}
839
 
840
		t = 0;
841
		switch (src->u.gen3.type) {
842
		case SHADER_LINEAR:
843
			gen3_linear_coord(sna, src, FS_T0, FS_R0);
844
			gen3_fs_texld(FS_R0, FS_S0, FS_R0);
845
			src_reg = FS_R0;
846
			t++;
847
			break;
848
 
849
		case SHADER_RADIAL:
850
			gen3_radial_coord(sna, src, FS_T0, FS_R0);
851
			gen3_fs_texld(FS_R0, FS_S0, FS_R0);
852
			src_reg = FS_R0;
853
			t++;
854
			break;
855
 
856
		case SHADER_TEXTURE:
857
			if (src->is_affine)
858
				gen3_fs_texld(FS_R0, FS_S0, FS_T0);
859
			else
860
				gen3_fs_texldp(FS_R0, FS_S0, FS_T0);
861
			src_reg = FS_R0;
862
			t++;
863
			break;
864
 
865
		case SHADER_CONSTANT:
866
		case SHADER_NONE:
867
		case SHADER_ZERO:
868
		case SHADER_BLACK:
869
		case SHADER_WHITE:
870
			break;
871
		}
872
		if (src->alpha_fixup)
873
			gen3_fs_mov_masked(src_reg, MASK_W, gen3_fs_operand_one());
874
		if (src->rb_reversed)
875
			gen3_fs_mov(src_reg, gen3_fs_operand(src_reg, Z, Y, X, W));
876
 
877
		switch (mask->u.gen3.type) {
878
		case SHADER_LINEAR:
879
			gen3_linear_coord(sna, mask, FS_T0 + t, FS_R1);
880
			gen3_fs_texld(FS_R1, FS_S0 + t, FS_R1);
881
			mask_reg = FS_R1;
882
			break;
883
 
884
		case SHADER_RADIAL:
885
			gen3_radial_coord(sna, mask, FS_T0 + t, FS_R1);
886
			gen3_fs_texld(FS_R1, FS_S0 + t, FS_R1);
887
			mask_reg = FS_R1;
888
			break;
889
 
890
		case SHADER_TEXTURE:
891
			if (mask->is_affine)
892
				gen3_fs_texld(FS_R1, FS_S0 + t, FS_T0 + t);
893
			else
894
				gen3_fs_texldp(FS_R1, FS_S0 + t, FS_T0 + t);
895
			mask_reg = FS_R1;
896
			break;
897
 
898
		case SHADER_OPACITY:
899
			switch (src->u.gen3.type) {
900
			case SHADER_BLACK:
901
			case SHADER_WHITE:
902
				if (dst_is_alpha || src->u.gen3.type == SHADER_WHITE) {
903
					gen3_fs_mov(out_reg,
904
						    gen3_fs_operand(FS_T0 + t, X, X, X, X));
905
				} else {
906
					gen3_fs_mov(out_reg,
907
						    gen3_fs_operand(FS_T0 + t, ZERO, ZERO, ZERO, X));
908
				}
909
				break;
910
			default:
911
				if (dst_is_alpha) {
912
					gen3_fs_mul(out_reg,
913
						    gen3_fs_operand(src_reg, W, W, W, W),
914
						    gen3_fs_operand(FS_T0 + t, X, X, X, X));
915
				} else {
916
					gen3_fs_mul(out_reg,
917
						    gen3_fs_operand(src_reg, X, Y, Z, W),
918
						    gen3_fs_operand(FS_T0 + t, X, X, X, X));
919
				}
920
			}
921
			goto mask_done;
922
 
923
		case SHADER_CONSTANT:
924
		case SHADER_ZERO:
925
		case SHADER_BLACK:
926
		case SHADER_WHITE:
927
		case SHADER_NONE:
928
			break;
929
		}
930
		if (mask->alpha_fixup)
931
			gen3_fs_mov_masked(mask_reg, MASK_W, gen3_fs_operand_one());
932
		if (mask->rb_reversed)
933
			gen3_fs_mov(mask_reg, gen3_fs_operand(mask_reg, Z, Y, X, W));
934
 
935
		if (dst_is_alpha) {
936
			switch (src->u.gen3.type) {
937
			case SHADER_BLACK:
938
			case SHADER_WHITE:
939
				gen3_fs_mov(out_reg,
940
					    gen3_fs_operand(mask_reg, W, W, W, W));
941
				break;
942
			default:
943
				gen3_fs_mul(out_reg,
944
					    gen3_fs_operand(src_reg, W, W, W, W),
945
					    gen3_fs_operand(mask_reg, W, W, W, W));
946
				break;
947
			}
948
		} else {
949
			/* If component alpha is active in the mask and the blend
950
			 * operation uses the source alpha, then we know we don't
951
			 * need the source value (otherwise we would have hit a
952
			 * fallback earlier), so we provide the source alpha (src.A *
953
			 * mask.X) as output color.
954
			 * Conversely, if CA is set and we don't need the source alpha,
955
			 * then we produce the source value (src.X * mask.X) and the
956
			 * source alpha is unused.  Otherwise, we provide the non-CA
957
			 * source value (src.X * mask.A).
958
			 */
959
			if (op->has_component_alpha) {
960
				switch (src->u.gen3.type) {
961
				case SHADER_BLACK:
962
					if (gen3_blend_op[blend].src_alpha)
963
						gen3_fs_mov(out_reg,
964
							    gen3_fs_operand_reg(mask_reg));
965
					else
966
						gen3_fs_mov(out_reg,
967
							    gen3_fs_operand(mask_reg, ZERO, ZERO, ZERO, W));
968
					break;
969
				case SHADER_WHITE:
970
					gen3_fs_mov(out_reg,
971
						    gen3_fs_operand_reg(mask_reg));
972
					break;
973
				default:
974
					if (gen3_blend_op[blend].src_alpha)
975
						gen3_fs_mul(out_reg,
976
							    gen3_fs_operand(src_reg, W, W, W, W),
977
							    gen3_fs_operand_reg(mask_reg));
978
					else
979
						gen3_fs_mul(out_reg,
980
							    gen3_fs_operand_reg(src_reg),
981
							    gen3_fs_operand_reg(mask_reg));
982
					break;
983
				}
984
			} else {
985
				switch (src->u.gen3.type) {
986
				case SHADER_WHITE:
987
					gen3_fs_mov(out_reg,
988
						    gen3_fs_operand(mask_reg, W, W, W, W));
989
					break;
990
				case SHADER_BLACK:
991
					gen3_fs_mov(out_reg,
992
						    gen3_fs_operand(mask_reg, ZERO, ZERO, ZERO, W));
993
					break;
994
				default:
995
					gen3_fs_mul(out_reg,
996
						    gen3_fs_operand_reg(src_reg),
997
						    gen3_fs_operand(mask_reg, W, W, W, W));
998
					break;
999
				}
1000
			}
1001
		}
1002
mask_done:
1003
		if (op->rb_reversed)
1004
			gen3_fs_mov(FS_OC, gen3_fs_operand(FS_U0, Z, Y, X, W));
1005
	}
1006
 
1007
done:
1008
	length = sna->kgem.nbatch - shader_offset;
1009
	sna->kgem.batch[shader_offset] =
1010
		_3DSTATE_PIXEL_SHADER_PROGRAM | (length - 2);
1011
}
1012
 
1013
static uint32_t gen3_ms_tiling(uint32_t tiling)
1014
{
1015
	uint32_t v = 0;
1016
	switch (tiling) {
1017
	case I915_TILING_Y: v |= MS3_TILE_WALK;
1018
	case I915_TILING_X: v |= MS3_TILED_SURFACE;
1019
	case I915_TILING_NONE: break;
1020
	}
1021
	return v;
1022
}
1023
 
1024
static void gen3_emit_invariant(struct sna *sna)
1025
{
1026
	/* Disable independent alpha blend */
1027
	OUT_BATCH(_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD | IAB_MODIFY_ENABLE |
1028
		  IAB_MODIFY_FUNC | BLENDFUNC_ADD << IAB_FUNC_SHIFT |
1029
		  IAB_MODIFY_SRC_FACTOR | BLENDFACT_ONE << IAB_SRC_FACTOR_SHIFT |
1030
		  IAB_MODIFY_DST_FACTOR | BLENDFACT_ZERO << IAB_DST_FACTOR_SHIFT);
1031
 
1032
	OUT_BATCH(_3DSTATE_COORD_SET_BINDINGS |
1033
		  CSB_TCB(0, 0) |
1034
		  CSB_TCB(1, 1) |
1035
		  CSB_TCB(2, 2) |
1036
		  CSB_TCB(3, 3) |
1037
		  CSB_TCB(4, 4) |
1038
		  CSB_TCB(5, 5) |
1039
		  CSB_TCB(6, 6) |
1040
		  CSB_TCB(7, 7));
1041
 
1042
	OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | I1_LOAD_S(4) | I1_LOAD_S(5) | I1_LOAD_S(6) | 3);
1043
	OUT_BATCH(0); /* Disable texture coordinate wrap-shortest */
1044
	OUT_BATCH((1 << S4_POINT_WIDTH_SHIFT) |
1045
		  S4_LINE_WIDTH_ONE |
1046
		  S4_CULLMODE_NONE |
1047
		  S4_VFMT_XY);
1048
	OUT_BATCH(0); /* Disable fog/stencil. *Enable* write mask. */
1049
	OUT_BATCH(S6_COLOR_WRITE_ONLY); /* Disable blending, depth */
1050
 
1051
	OUT_BATCH(_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT);
1052
	OUT_BATCH(_3DSTATE_DEPTH_SUBRECT_DISABLE);
1053
 
1054
	OUT_BATCH(_3DSTATE_LOAD_INDIRECT);
1055
	OUT_BATCH(0x00000000);
1056
 
1057
	OUT_BATCH(_3DSTATE_STIPPLE);
1058
	OUT_BATCH(0x00000000);
1059
 
1060
	sna->render_state.gen3.need_invariant = false;
1061
}
1062
 
1063
#define MAX_OBJECTS 3 /* worst case: dst + src + mask  */
1064
 
1065
static void
1066
gen3_get_batch(struct sna *sna, const struct sna_composite_op *op)
1067
{
1068
	kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
1069
 
1070
	if (!kgem_check_batch(&sna->kgem, 200)) {
1071
		DBG(("%s: flushing batch: size %d > %d\n",
1072
		     __FUNCTION__, 200,
1073
		     sna->kgem.surface-sna->kgem.nbatch));
1074
		kgem_submit(&sna->kgem);
1075
		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
1076
	}
1077
 
1078
	if (!kgem_check_reloc(&sna->kgem, MAX_OBJECTS)) {
1079
		DBG(("%s: flushing batch: reloc %d >= %d\n",
1080
		     __FUNCTION__,
1081
		     sna->kgem.nreloc,
1082
		     (int)KGEM_RELOC_SIZE(&sna->kgem) - MAX_OBJECTS));
1083
		kgem_submit(&sna->kgem);
1084
		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
1085
	}
1086
 
1087
	if (!kgem_check_exec(&sna->kgem, MAX_OBJECTS)) {
1088
		DBG(("%s: flushing batch: exec %d >= %d\n",
1089
		     __FUNCTION__,
1090
		     sna->kgem.nexec,
1091
		     (int)KGEM_EXEC_SIZE(&sna->kgem) - MAX_OBJECTS - 1));
1092
		kgem_submit(&sna->kgem);
1093
		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
1094
	}
1095
 
1096
	if (sna->render_state.gen3.need_invariant)
1097
		gen3_emit_invariant(sna);
1098
#undef MAX_OBJECTS
1099
}
1100
 
1101
static void gen3_emit_target(struct sna *sna,
1102
			     struct kgem_bo *bo,
1103
			     int width,
1104
			     int height,
1105
			     int format)
1106
{
1107
	struct gen3_render_state *state = &sna->render_state.gen3;
1108
 
1109
	assert(!too_large(width, height));
1110
 
1111
	/* BUF_INFO is an implicit flush, so skip if the target is unchanged. */
1112
	assert(bo->unique_id != 0);
1113
	if (bo->unique_id != state->current_dst) {
1114
		uint32_t v;
1115
 
1116
		DBG(("%s: setting new target id=%d, handle=%d\n",
1117
		     __FUNCTION__, bo->unique_id, bo->handle));
1118
 
1119
		OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
1120
		OUT_BATCH(BUF_3D_ID_COLOR_BACK |
1121
			  gen3_buf_tiling(bo->tiling) |
1122
			  bo->pitch);
1123
		OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
1124
					 bo,
1125
					 I915_GEM_DOMAIN_RENDER << 16 |
1126
					 I915_GEM_DOMAIN_RENDER,
1127
					 0));
1128
 
1129
		OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD);
1130
		OUT_BATCH(gen3_get_dst_format(format));
1131
 
1132
		v = DRAW_YMAX(height - 1) | DRAW_XMAX(width - 1);
1133
		if (v != state->last_drawrect_limit) {
1134
			OUT_BATCH(_3DSTATE_DRAW_RECT_CMD);
1135
			OUT_BATCH(0); /* XXX dither origin? */
1136
			OUT_BATCH(0);
1137
			OUT_BATCH(v);
1138
			OUT_BATCH(0);
1139
			state->last_drawrect_limit = v;
1140
		}
1141
 
1142
		state->current_dst = bo->unique_id;
1143
	}
1144
	assert(bo->exec);
1145
	kgem_bo_mark_dirty(bo);
1146
}
1147
 
1148
static void gen3_emit_composite_state(struct sna *sna,
1149
				      const struct sna_composite_op *op)
1150
{
1151
	struct gen3_render_state *state = &sna->render_state.gen3;
1152
	uint32_t map[4];
1153
	uint32_t sampler[4];
1154
	struct kgem_bo *bo[2];
1155
	unsigned int tex_count, n;
1156
	uint32_t ss2;
1157
 
1158
	gen3_get_batch(sna, op);
1159
 
1160
	if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
1161
		if (op->src.bo == op->dst.bo || op->mask.bo == op->dst.bo)
1162
			OUT_BATCH(MI_FLUSH | MI_INVALIDATE_MAP_CACHE);
1163
		else
1164
			OUT_BATCH(_3DSTATE_MODES_5_CMD |
1165
				  PIPELINE_FLUSH_RENDER_CACHE |
1166
				  PIPELINE_FLUSH_TEXTURE_CACHE);
1167
		kgem_clear_dirty(&sna->kgem);
1168
	}
1169
 
1170
	gen3_emit_target(sna,
1171
			 op->dst.bo,
1172
			 op->dst.width,
1173
			 op->dst.height,
1174
			 op->dst.format);
1175
 
1176
	ss2 = ~0;
1177
	tex_count = 0;
1178
	switch (op->src.u.gen3.type) {
1179
	case SHADER_OPACITY:
1180
	case SHADER_NONE:
1181
		assert(0);
1182
	case SHADER_ZERO:
1183
	case SHADER_BLACK:
1184
	case SHADER_WHITE:
1185
		break;
1186
	case SHADER_CONSTANT:
1187
		if (op->src.u.gen3.mode != state->last_diffuse) {
1188
			OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD);
1189
			OUT_BATCH(op->src.u.gen3.mode);
1190
			state->last_diffuse = op->src.u.gen3.mode;
1191
		}
1192
		break;
1193
	case SHADER_LINEAR:
1194
	case SHADER_RADIAL:
1195
	case SHADER_TEXTURE:
1196
		ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT);
1197
		ss2 |= S2_TEXCOORD_FMT(tex_count,
1198
				       op->src.is_affine ? TEXCOORDFMT_2D : TEXCOORDFMT_4D);
1199
		assert(op->src.card_format);
1200
		map[tex_count * 2 + 0] =
1201
			op->src.card_format |
1202
			gen3_ms_tiling(op->src.bo->tiling) |
1203
			(op->src.height - 1) << MS3_HEIGHT_SHIFT |
1204
			(op->src.width - 1) << MS3_WIDTH_SHIFT;
1205
		map[tex_count * 2 + 1] =
1206
			(op->src.bo->pitch / 4 - 1) << MS4_PITCH_SHIFT;
1207
 
1208
		sampler[tex_count * 2 + 0] = op->src.filter;
1209
		sampler[tex_count * 2 + 1] =
1210
			op->src.repeat |
1211
			tex_count << SS3_TEXTUREMAP_INDEX_SHIFT;
1212
		bo[tex_count] = op->src.bo;
1213
		tex_count++;
1214
		break;
1215
	}
1216
	switch (op->mask.u.gen3.type) {
1217
	case SHADER_NONE:
1218
	case SHADER_ZERO:
1219
	case SHADER_BLACK:
1220
	case SHADER_WHITE:
1221
		break;
1222
	case SHADER_CONSTANT:
1223
		if (op->mask.u.gen3.mode != state->last_specular) {
1224
			OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD);
1225
			OUT_BATCH(op->mask.u.gen3.mode);
1226
			state->last_specular = op->mask.u.gen3.mode;
1227
		}
1228
		break;
1229
	case SHADER_LINEAR:
1230
	case SHADER_RADIAL:
1231
	case SHADER_TEXTURE:
1232
		ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT);
1233
		ss2 |= S2_TEXCOORD_FMT(tex_count,
1234
				       op->mask.is_affine ? TEXCOORDFMT_2D : TEXCOORDFMT_4D);
1235
		assert(op->mask.card_format);
1236
		map[tex_count * 2 + 0] =
1237
			op->mask.card_format |
1238
			gen3_ms_tiling(op->mask.bo->tiling) |
1239
			(op->mask.height - 1) << MS3_HEIGHT_SHIFT |
1240
			(op->mask.width - 1) << MS3_WIDTH_SHIFT;
1241
		map[tex_count * 2 + 1] =
1242
			(op->mask.bo->pitch / 4 - 1) << MS4_PITCH_SHIFT;
1243
 
1244
		sampler[tex_count * 2 + 0] = op->mask.filter;
1245
		sampler[tex_count * 2 + 1] =
1246
			op->mask.repeat |
1247
			tex_count << SS3_TEXTUREMAP_INDEX_SHIFT;
1248
		bo[tex_count] = op->mask.bo;
1249
		tex_count++;
1250
		break;
1251
	case SHADER_OPACITY:
1252
		ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT);
1253
		ss2 |= S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_1D);
1254
		break;
1255
	}
1256
 
1257
	{
1258
		uint32_t blend_offset = sna->kgem.nbatch;
1259
 
1260
		OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) | I1_LOAD_S(6) | 1);
1261
		OUT_BATCH(ss2);
1262
		OUT_BATCH(gen3_get_blend_cntl(op->op,
1263
					      op->has_component_alpha,
1264
					      op->dst.format));
1265
 
1266
		if (memcmp(sna->kgem.batch + state->last_blend + 1,
1267
			   sna->kgem.batch + blend_offset + 1,
1268
			   2 * 4) == 0)
1269
			sna->kgem.nbatch = blend_offset;
1270
		else
1271
			state->last_blend = blend_offset;
1272
	}
1273
 
1274
	if (op->u.gen3.num_constants) {
1275
		int count = op->u.gen3.num_constants;
1276
		if (state->last_constants) {
1277
			int last = sna->kgem.batch[state->last_constants+1];
1278
			if (last == (1 << (count >> 2)) - 1 &&
1279
			    memcmp(&sna->kgem.batch[state->last_constants+2],
1280
				   op->u.gen3.constants,
1281
				   count * sizeof(uint32_t)) == 0)
1282
				count = 0;
1283
		}
1284
		if (count) {
1285
			state->last_constants = sna->kgem.nbatch;
1286
			OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | count);
1287
			OUT_BATCH((1 << (count >> 2)) - 1);
1288
 
1289
			memcpy(sna->kgem.batch + sna->kgem.nbatch,
1290
			       op->u.gen3.constants,
1291
			       count * sizeof(uint32_t));
1292
			sna->kgem.nbatch += count;
1293
		}
1294
	}
1295
 
1296
	if (tex_count != 0) {
1297
		uint32_t rewind;
1298
 
1299
		n = 0;
1300
		if (tex_count == state->tex_count) {
1301
			for (; n < tex_count; n++) {
1302
				if (map[2*n+0] != state->tex_map[2*n+0] ||
1303
				    map[2*n+1] != state->tex_map[2*n+1] ||
1304
				    state->tex_handle[n] != bo[n]->handle ||
1305
				    state->tex_delta[n] != bo[n]->delta)
1306
					break;
1307
			}
1308
		}
1309
		if (n < tex_count) {
1310
			OUT_BATCH(_3DSTATE_MAP_STATE | (3 * tex_count));
1311
			OUT_BATCH((1 << tex_count) - 1);
1312
			for (n = 0; n < tex_count; n++) {
1313
				OUT_BATCH(kgem_add_reloc(&sna->kgem,
1314
							 sna->kgem.nbatch,
1315
							 bo[n],
1316
							 I915_GEM_DOMAIN_SAMPLER<< 16,
1317
							 0));
1318
				OUT_BATCH(map[2*n + 0]);
1319
				OUT_BATCH(map[2*n + 1]);
1320
 
1321
				state->tex_map[2*n+0] = map[2*n+0];
1322
				state->tex_map[2*n+1] = map[2*n+1];
1323
				state->tex_handle[n] = bo[n]->handle;
1324
				state->tex_delta[n] = bo[n]->delta;
1325
			}
1326
			state->tex_count = n;
1327
		}
1328
 
1329
		rewind = sna->kgem.nbatch;
1330
		OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3 * tex_count));
1331
		OUT_BATCH((1 << tex_count) - 1);
1332
		for (n = 0; n < tex_count; n++) {
1333
			OUT_BATCH(sampler[2*n + 0]);
1334
			OUT_BATCH(sampler[2*n + 1]);
1335
			OUT_BATCH(0);
1336
		}
1337
		if (state->last_sampler &&
1338
		    memcmp(&sna->kgem.batch[state->last_sampler+1],
1339
			   &sna->kgem.batch[rewind + 1],
1340
			   (3*tex_count + 1)*sizeof(uint32_t)) == 0)
1341
			sna->kgem.nbatch = rewind;
1342
		else
1343
			state->last_sampler = rewind;
1344
	}
1345
 
1346
	gen3_composite_emit_shader(sna, op, op->op);
1347
}
1348
 
1349
static bool gen3_magic_ca_pass(struct sna *sna,
1350
			       const struct sna_composite_op *op)
1351
{
1352
	if (!op->need_magic_ca_pass)
1353
		return false;
1354
 
1355
	DBG(("%s(%d)\n", __FUNCTION__,
1356
	     sna->render.vertex_index - sna->render.vertex_start));
1357
 
1358
	OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(6) | 0);
1359
	OUT_BATCH(gen3_get_blend_cntl(PictOpAdd, true, op->dst.format));
1360
	gen3_composite_emit_shader(sna, op, PictOpAdd);
1361
 
1362
	OUT_BATCH(PRIM3D_RECTLIST | PRIM3D_INDIRECT_SEQUENTIAL |
1363
		  (sna->render.vertex_index - sna->render.vertex_start));
1364
	OUT_BATCH(sna->render.vertex_start);
1365
 
1366
	sna->render_state.gen3.last_blend = 0;
1367
	return true;
1368
}
1369
 
1370
static void gen3_vertex_flush(struct sna *sna)
1371
{
1372
	assert(sna->render.vertex_offset);
1373
 
1374
	DBG(("%s[%x] = %d\n", __FUNCTION__,
1375
	     4*sna->render.vertex_offset,
1376
	     sna->render.vertex_index - sna->render.vertex_start));
1377
 
1378
	sna->kgem.batch[sna->render.vertex_offset] =
1379
		PRIM3D_RECTLIST | PRIM3D_INDIRECT_SEQUENTIAL |
1380
		(sna->render.vertex_index - sna->render.vertex_start);
1381
	sna->kgem.batch[sna->render.vertex_offset + 1] =
1382
		sna->render.vertex_start;
1383
 
1384
	sna->render.vertex_offset = 0;
1385
}
1386
 
1387
static int gen3_vertex_finish(struct sna *sna)
1388
{
1389
	struct kgem_bo *bo;
1390
 
1391
	DBG(("%s: used=%d/%d, vbo active? %d\n",
1392
	     __FUNCTION__, sna->render.vertex_used, sna->render.vertex_size,
1393
	     sna->render.vbo ? sna->render.vbo->handle : 0));
1394
	assert(sna->render.vertex_offset == 0);
1395
	assert(sna->render.vertex_used);
1396
	assert(sna->render.vertex_used <= sna->render.vertex_size);
1397
 
1398
	sna_vertex_wait__locked(&sna->render);
1399
 
1400
	bo = sna->render.vbo;
1401
	if (bo) {
1402
		DBG(("%s: reloc = %d\n", __FUNCTION__,
1403
		     sna->render.vertex_reloc[0]));
1404
 
1405
		if (sna->render.vertex_reloc[0]) {
1406
			sna->kgem.batch[sna->render.vertex_reloc[0]] =
1407
				kgem_add_reloc(&sna->kgem, sna->render.vertex_reloc[0],
1408
					       bo, I915_GEM_DOMAIN_VERTEX << 16, 0);
1409
 
1410
			sna->render.vertex_reloc[0] = 0;
1411
		}
1412
		sna->render.vertex_used = 0;
1413
		sna->render.vertex_index = 0;
1414
		sna->render.vbo = NULL;
1415
 
1416
		kgem_bo_destroy(&sna->kgem, bo);
1417
	}
1418
 
1419
	sna->render.vertices = NULL;
1420
	sna->render.vbo = kgem_create_linear(&sna->kgem,
1421
					     256*1024, CREATE_GTT_MAP);
1422
	if (sna->render.vbo)
1423
		sna->render.vertices = kgem_bo_map(&sna->kgem, sna->render.vbo);
1424
	if (sna->render.vertices == NULL) {
1425
		if (sna->render.vbo)
1426
			kgem_bo_destroy(&sna->kgem, sna->render.vbo);
1427
		sna->render.vbo = NULL;
1428
		return 0;
1429
	}
1430
	assert(sna->render.vbo->snoop == false);
1431
 
1432
	if (sna->render.vertex_used) {
1433
		memcpy(sna->render.vertices,
1434
		       sna->render.vertex_data,
1435
		       sizeof(float)*sna->render.vertex_used);
1436
	}
1437
	sna->render.vertex_size = 64 * 1024 - 1;
1438
	return sna->render.vertex_size - sna->render.vertex_used;
1439
}
1440
 
1441
static void gen3_vertex_close(struct sna *sna)
1442
{
1443
	struct kgem_bo *bo, *free_bo = NULL;
1444
	unsigned int delta = 0;
1445
 
1446
	assert(sna->render.vertex_offset == 0);
1447
	if (sna->render.vertex_reloc[0] == 0)
1448
		return;
1449
 
1450
	DBG(("%s: used=%d/%d, vbo active? %d\n",
1451
	     __FUNCTION__, sna->render.vertex_used, sna->render.vertex_size,
1452
	     sna->render.vbo ? sna->render.vbo->handle : 0));
1453
 
1454
	bo = sna->render.vbo;
1455
	if (bo) {
1456
		if (sna->render.vertex_size - sna->render.vertex_used < 64) {
1457
			DBG(("%s: discarding full vbo\n", __FUNCTION__));
1458
			sna->render.vbo = NULL;
1459
			sna->render.vertices = sna->render.vertex_data;
1460
			sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
1461
			free_bo = bo;
4501 Serge 1462
		} else if (sna->render.vertices == MAP(bo->map__cpu)) {
4304 Serge 1463
			DBG(("%s: converting CPU map to GTT\n", __FUNCTION__));
1464
			sna->render.vertices = kgem_bo_map__gtt(&sna->kgem, bo);
1465
			if (sna->render.vertices == NULL) {
1466
				DBG(("%s: discarding non-mappable vertices\n",__FUNCTION__));
1467
				sna->render.vbo = NULL;
1468
				sna->render.vertices = sna->render.vertex_data;
1469
				sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
1470
				free_bo = bo;
1471
			}
1472
		}
1473
	} else {
1474
		if (sna->kgem.nbatch + sna->render.vertex_used <= sna->kgem.surface) {
1475
			DBG(("%s: copy to batch: %d @ %d\n", __FUNCTION__,
1476
			     sna->render.vertex_used, sna->kgem.nbatch));
1477
			memcpy(sna->kgem.batch + sna->kgem.nbatch,
1478
			       sna->render.vertex_data,
1479
			       sna->render.vertex_used * 4);
1480
			delta = sna->kgem.nbatch * 4;
1481
			bo = NULL;
1482
			sna->kgem.nbatch += sna->render.vertex_used;
1483
		} else {
1484
			DBG(("%s: new vbo: %d\n", __FUNCTION__,
1485
			     sna->render.vertex_used));
1486
			bo = kgem_create_linear(&sna->kgem,
1487
						4*sna->render.vertex_used,
1488
						CREATE_NO_THROTTLE);
1489
			if (bo) {
1490
				assert(bo->snoop == false);
1491
				kgem_bo_write(&sna->kgem, bo,
1492
					      sna->render.vertex_data,
1493
					      4*sna->render.vertex_used);
1494
			}
1495
			free_bo = bo;
1496
		}
1497
	}
1498
 
1499
	DBG(("%s: reloc = %d\n", __FUNCTION__, sna->render.vertex_reloc[0]));
1500
	sna->kgem.batch[sna->render.vertex_reloc[0]] =
1501
		kgem_add_reloc(&sna->kgem, sna->render.vertex_reloc[0],
1502
			       bo, I915_GEM_DOMAIN_VERTEX << 16, delta);
1503
	sna->render.vertex_reloc[0] = 0;
1504
 
1505
	if (sna->render.vbo == NULL) {
1506
		DBG(("%s: resetting vbo\n", __FUNCTION__));
1507
		sna->render.vertex_used = 0;
1508
		sna->render.vertex_index = 0;
1509
		assert(sna->render.vertices == sna->render.vertex_data);
1510
		assert(sna->render.vertex_size == ARRAY_SIZE(sna->render.vertex_data));
1511
	}
1512
 
1513
	if (free_bo)
1514
		kgem_bo_destroy(&sna->kgem, free_bo);
1515
}
1516
 
1517
static bool gen3_rectangle_begin(struct sna *sna,
1518
				 const struct sna_composite_op *op)
1519
{
1520
	struct gen3_render_state *state = &sna->render_state.gen3;
1521
	int ndwords, i1_cmd = 0, i1_len = 0;
1522
 
1523
	if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
1524
		return true;
1525
 
1526
	ndwords = 2;
1527
	if (op->need_magic_ca_pass)
1528
		ndwords += 100;
1529
	if (sna->render.vertex_reloc[0] == 0)
1530
		i1_len++, i1_cmd |= I1_LOAD_S(0), ndwords++;
1531
	if (state->floats_per_vertex != op->floats_per_vertex)
1532
		i1_len++, i1_cmd |= I1_LOAD_S(1), ndwords++;
1533
 
1534
	if (!kgem_check_batch(&sna->kgem, ndwords+1))
1535
		return false;
1536
 
1537
	if (i1_cmd) {
1538
		OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | i1_cmd | (i1_len - 1));
1539
		if (sna->render.vertex_reloc[0] == 0)
1540
			sna->render.vertex_reloc[0] = sna->kgem.nbatch++;
1541
		if (state->floats_per_vertex != op->floats_per_vertex) {
1542
			state->floats_per_vertex = op->floats_per_vertex;
1543
			OUT_BATCH(state->floats_per_vertex << S1_VERTEX_WIDTH_SHIFT |
1544
				  state->floats_per_vertex << S1_VERTEX_PITCH_SHIFT);
1545
		}
1546
	}
1547
 
1548
	if (sna->kgem.nbatch == 2 + state->last_vertex_offset &&
1549
	    !op->need_magic_ca_pass) {
1550
		sna->render.vertex_offset = state->last_vertex_offset;
1551
	} else {
1552
		sna->render.vertex_offset = sna->kgem.nbatch;
1553
		OUT_BATCH(MI_NOOP); /* to be filled later */
1554
		OUT_BATCH(MI_NOOP);
1555
		sna->render.vertex_start = sna->render.vertex_index;
1556
		state->last_vertex_offset = sna->render.vertex_offset;
1557
	}
1558
 
1559
	return true;
1560
}
1561
 
1562
static int gen3_get_rectangles__flush(struct sna *sna,
1563
				      const struct sna_composite_op *op)
1564
{
1565
	/* Preventing discarding new vbo after lock contention */
1566
	if (sna_vertex_wait__locked(&sna->render)) {
1567
		int rem = vertex_space(sna);
1568
		if (rem > op->floats_per_rect)
1569
			return rem;
1570
	}
1571
 
1572
	if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 105: 5))
1573
		return 0;
1574
	if (!kgem_check_reloc_and_exec(&sna->kgem, 1))
1575
		return 0;
1576
 
1577
	if (sna->render.vertex_offset) {
1578
		gen3_vertex_flush(sna);
1579
		if (gen3_magic_ca_pass(sna, op)) {
1580
			OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(6) | 0);
1581
			OUT_BATCH(gen3_get_blend_cntl(op->op,
1582
						      op->has_component_alpha,
1583
						      op->dst.format));
1584
			gen3_composite_emit_shader(sna, op, op->op);
1585
		}
1586
	}
1587
 
1588
	return gen3_vertex_finish(sna);
1589
}
1590
 
1591
inline static int gen3_get_rectangles(struct sna *sna,
1592
				      const struct sna_composite_op *op,
1593
				      int want)
1594
{
1595
	int rem;
1596
 
1597
	DBG(("%s: want=%d, rem=%d\n",
1598
	     __FUNCTION__, want*op->floats_per_rect, vertex_space(sna)));
1599
 
1600
	assert(want);
1601
	assert(sna->render.vertex_index * op->floats_per_vertex == sna->render.vertex_used);
1602
 
1603
start:
1604
	rem = vertex_space(sna);
1605
	if (unlikely(op->floats_per_rect > rem)) {
1606
		DBG(("flushing vbo for %s: %d < %d\n",
1607
		     __FUNCTION__, rem, op->floats_per_rect));
1608
		rem = gen3_get_rectangles__flush(sna, op);
1609
		if (unlikely(rem == 0))
1610
			goto flush;
1611
	}
1612
 
1613
	if (unlikely(sna->render.vertex_offset == 0)) {
1614
		if (!gen3_rectangle_begin(sna, op))
1615
			goto flush;
1616
		else
1617
			goto start;
1618
	}
1619
 
1620
	assert(rem <= vertex_space(sna));
1621
	assert(op->floats_per_rect <= rem);
1622
	if (want > 1 && want * op->floats_per_rect > rem)
1623
		want = rem / op->floats_per_rect;
1624
	sna->render.vertex_index += 3*want;
1625
 
1626
	assert(want);
1627
	assert(sna->render.vertex_index * op->floats_per_vertex <= sna->render.vertex_size);
1628
	return want;
1629
 
1630
flush:
1631
	DBG(("%s: flushing batch\n", __FUNCTION__));
1632
	if (sna->render.vertex_offset) {
1633
		gen3_vertex_flush(sna);
1634
		gen3_magic_ca_pass(sna, op);
1635
	}
1636
	sna_vertex_wait__locked(&sna->render);
1637
	_kgem_submit(&sna->kgem);
1638
	gen3_emit_composite_state(sna, op);
1639
	assert(sna->render.vertex_offset == 0);
1640
	assert(sna->render.vertex_reloc[0] == 0);
1641
	goto start;
1642
}
1643
 
1644
fastcall static void
1645
gen3_render_composite_blt(struct sna *sna,
1646
			  const struct sna_composite_op *op,
1647
			  const struct sna_composite_rectangles *r)
1648
{
1649
	DBG(("%s: src=(%d, %d)+(%d, %d), mask=(%d, %d)+(%d, %d), dst=(%d, %d)+(%d, %d), size=(%d, %d)\n", __FUNCTION__,
1650
	     r->src.x, r->src.y, op->src.offset[0], op->src.offset[1],
1651
	     r->mask.x, r->mask.y, op->mask.offset[0], op->mask.offset[1],
1652
	     r->dst.x, r->dst.y, op->dst.x, op->dst.y,
1653
	     r->width, r->height));
1654
 
1655
	gen3_get_rectangles(sna, op, 1);
1656
 
1657
	op->prim_emit(sna, op, r);
1658
}
1659
 
4501 Serge 1660
#if 0
1661
fastcall static void
1662
gen3_render_composite_box(struct sna *sna,
1663
			  const struct sna_composite_op *op,
1664
			  const BoxRec *box)
1665
{
1666
	struct sna_composite_rectangles r;
1667
 
1668
	DBG(("%s: src=+(%d, %d), mask=+(%d, %d), dst=+(%d, %d)\n",
1669
	     __FUNCTION__,
1670
	     op->src.offset[0], op->src.offset[1],
1671
	     op->mask.offset[0], op->mask.offset[1],
1672
	     op->dst.x, op->dst.y));
1673
 
1674
	gen3_get_rectangles(sna, op, 1);
1675
 
1676
	r.dst.x  = box->x1;
1677
	r.dst.y  = box->y1;
1678
	r.width  = box->x2 - box->x1;
1679
	r.height = box->y2 - box->y1;
1680
	r.src = r.mask = r.dst;
1681
 
1682
	op->prim_emit(sna, op, &r);
1683
}
1684
 
4304 Serge 1685
static void
4501 Serge 1686
gen3_render_composite_boxes__blt(struct sna *sna,
1687
				 const struct sna_composite_op *op,
1688
				 const BoxRec *box, int nbox)
1689
{
1690
	DBG(("%s: nbox=%d, src=+(%d, %d), mask=+(%d, %d), dst=+(%d, %d)\n",
1691
	     __FUNCTION__, nbox,
1692
	     op->src.offset[0], op->src.offset[1],
1693
	     op->mask.offset[0], op->mask.offset[1],
1694
	     op->dst.x, op->dst.y));
1695
 
1696
	do {
1697
		int nbox_this_time;
1698
 
1699
		nbox_this_time = gen3_get_rectangles(sna, op, nbox);
1700
		nbox -= nbox_this_time;
1701
 
1702
		do {
1703
			struct sna_composite_rectangles r;
1704
 
1705
			DBG(("  %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
1706
			     box->x1, box->y1,
1707
			     box->x2 - box->x1,
1708
			     box->y2 - box->y1));
1709
 
1710
			r.dst.x  = box->x1; r.dst.y  = box->y1;
1711
			r.width = box->x2 - box->x1;
1712
			r.height = box->y2 - box->y1;
1713
			r.src = r.mask = r.dst;
1714
 
1715
			op->prim_emit(sna, op, &r);
1716
			box++;
1717
		} while (--nbox_this_time);
1718
	} while (nbox);
1719
}
1720
 
1721
static void
1722
gen3_render_composite_boxes(struct sna *sna,
1723
			    const struct sna_composite_op *op,
1724
			    const BoxRec *box, int nbox)
1725
{
1726
	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1727
 
1728
	do {
1729
		int nbox_this_time;
1730
		float *v;
1731
 
1732
		nbox_this_time = gen3_get_rectangles(sna, op, nbox);
1733
		assert(nbox_this_time);
1734
		nbox -= nbox_this_time;
1735
 
1736
		v = sna->render.vertices + sna->render.vertex_used;
1737
		sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
1738
 
1739
		op->emit_boxes(op, box, nbox_this_time, v);
1740
		box += nbox_this_time;
1741
	} while (nbox);
1742
}
1743
 
1744
static void
1745
gen3_render_composite_boxes__thread(struct sna *sna,
1746
				    const struct sna_composite_op *op,
1747
				    const BoxRec *box, int nbox)
1748
{
1749
	DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
1750
 
1751
	sna_vertex_lock(&sna->render);
1752
	do {
1753
		int nbox_this_time;
1754
		float *v;
1755
 
1756
		nbox_this_time = gen3_get_rectangles(sna, op, nbox);
1757
		assert(nbox_this_time);
1758
		nbox -= nbox_this_time;
1759
 
1760
		v = sna->render.vertices + sna->render.vertex_used;
1761
		sna->render.vertex_used += nbox_this_time * op->floats_per_rect;
1762
 
1763
		sna_vertex_acquire__locked(&sna->render);
1764
		sna_vertex_unlock(&sna->render);
1765
 
1766
		op->emit_boxes(op, box, nbox_this_time, v);
1767
		box += nbox_this_time;
1768
 
1769
		sna_vertex_lock(&sna->render);
1770
		sna_vertex_release__locked(&sna->render);
1771
	} while (nbox);
1772
	sna_vertex_unlock(&sna->render);
1773
}
1774
#endif
1775
 
1776
static void
4304 Serge 1777
gen3_render_composite_done(struct sna *sna,
1778
			   const struct sna_composite_op *op)
1779
{
1780
	DBG(("%s()\n", __FUNCTION__));
1781
 
1782
	if (sna->render.vertex_offset) {
1783
		gen3_vertex_flush(sna);
1784
		gen3_magic_ca_pass(sna, op);
1785
	}
1786
 
1787
}
1788
 
1789
static void
1790
discard_vbo(struct sna *sna)
1791
{
1792
	kgem_bo_destroy(&sna->kgem, sna->render.vbo);
1793
	sna->render.vbo = NULL;
1794
	sna->render.vertices = sna->render.vertex_data;
1795
	sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
1796
	sna->render.vertex_used = 0;
1797
	sna->render.vertex_index = 0;
1798
}
1799
 
1800
static void
1801
gen3_render_reset(struct sna *sna)
1802
{
1803
	struct gen3_render_state *state = &sna->render_state.gen3;
1804
 
1805
	state->need_invariant = true;
1806
	state->current_dst = 0;
1807
	state->tex_count = 0;
1808
	state->last_drawrect_limit = ~0U;
1809
	state->last_target = 0;
1810
	state->last_blend = 0;
1811
	state->last_constants = 0;
1812
	state->last_sampler = 0;
1813
	state->last_shader = 0x7fffffff;
1814
	state->last_diffuse = 0xcc00ffee;
1815
	state->last_specular = 0xcc00ffee;
1816
 
1817
	state->floats_per_vertex = 0;
1818
	state->last_floats_per_vertex = 0;
1819
	state->last_vertex_offset = 0;
1820
 
4501 Serge 1821
	if (sna->render.vbo && !kgem_bo_can_map(&sna->kgem, sna->render.vbo)) {
4304 Serge 1822
		DBG(("%s: discarding vbo as next access will stall: %d\n",
1823
		     __FUNCTION__, sna->render.vbo->presumed_offset));
1824
		discard_vbo(sna);
1825
	}
1826
 
1827
	sna->render.vertex_reloc[0] = 0;
1828
	sna->render.vertex_offset = 0;
1829
}
1830
 
1831
static void
1832
gen3_render_retire(struct kgem *kgem)
1833
{
1834
	struct sna *sna;
1835
 
1836
	sna = container_of(kgem, struct sna, kgem);
1837
	if (sna->render.vertex_reloc[0] == 0 &&
1838
	    sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) {
1839
		DBG(("%s: resetting idle vbo\n", __FUNCTION__));
1840
		sna->render.vertex_used = 0;
1841
		sna->render.vertex_index = 0;
1842
	}
1843
}
1844
 
1845
static void
1846
gen3_render_expire(struct kgem *kgem)
1847
{
1848
	struct sna *sna;
1849
 
1850
	sna = container_of(kgem, struct sna, kgem);
1851
	if (sna->render.vbo && !sna->render.vertex_used) {
1852
		DBG(("%s: discarding vbo\n", __FUNCTION__));
1853
		discard_vbo(sna);
1854
	}
1855
}
1856
 
1857
static bool gen3_composite_channel_set_format(struct sna_composite_channel *channel,
1858
					      CARD32 format)
1859
{
1860
	unsigned int i;
1861
 
1862
	for (i = 0; i < ARRAY_SIZE(gen3_tex_formats); i++) {
1863
		if (gen3_tex_formats[i].fmt == format) {
1864
			channel->card_format = gen3_tex_formats[i].card_fmt;
1865
			channel->rb_reversed = gen3_tex_formats[i].rb_reversed;
1866
			return true;
1867
		}
1868
	}
1869
	return false;
1870
}
1871
 
1872
#if 0
4501 Serge 1873
static bool source_is_covered(PicturePtr picture,
1874
			      int x, int y,
1875
			      int width, int height)
1876
{
1877
	int x1, y1, x2, y2;
1878
 
1879
	if (picture->repeat && picture->repeatType != RepeatNone)
1880
		return true;
1881
 
1882
	if (picture->pDrawable == NULL)
1883
		return false;
1884
 
1885
	if (picture->transform) {
1886
		pixman_box16_t sample;
1887
 
1888
		sample.x1 = x;
1889
		sample.y1 = y;
1890
		sample.x2 = x + width;
1891
		sample.y2 = y + height;
1892
 
1893
		pixman_transform_bounds(picture->transform, &sample);
1894
 
1895
		x1 = sample.x1;
1896
		x2 = sample.x2;
1897
		y1 = sample.y1;
1898
		y2 = sample.y2;
1899
	} else {
1900
		x1 = x;
1901
		y1 = y;
1902
		x2 = x + width;
1903
		y2 = y + height;
1904
	}
1905
 
1906
	return
1907
		x1 >= 0 && y1 >= 0 &&
1908
		x2 <= picture->pDrawable->width &&
1909
		y2 <= picture->pDrawable->height;
1910
}
1911
 
1912
static bool gen3_composite_channel_set_xformat(PicturePtr picture,
1913
					       struct sna_composite_channel *channel,
1914
					       int x, int y,
1915
					       int width, int height)
1916
{
1917
	unsigned int i;
1918
 
1919
	if (PICT_FORMAT_A(picture->format) != 0)
1920
		return false;
1921
 
1922
	if (width == 0 || height == 0)
1923
		return false;
1924
 
1925
	if (!source_is_covered(picture, x, y, width, height))
1926
		return false;
1927
 
1928
	for (i = 0; i < ARRAY_SIZE(gen3_tex_formats); i++) {
1929
		if (gen3_tex_formats[i].xfmt == picture->format) {
1930
			channel->card_format = gen3_tex_formats[i].card_fmt;
1931
			channel->rb_reversed = gen3_tex_formats[i].rb_reversed;
1932
			channel->alpha_fixup = true;
1933
			return true;
1934
		}
1935
	}
1936
 
1937
	return false;
1938
}
1939
 
4304 Serge 1940
static int
4501 Serge 1941
gen3_init_solid(struct sna_composite_channel *channel, uint32_t color)
1942
{
1943
	channel->u.gen3.mode = color;
1944
	channel->u.gen3.type = SHADER_CONSTANT;
1945
	if (color == 0)
1946
		channel->u.gen3.type = SHADER_ZERO;
1947
	else if (color == 0xff000000)
1948
		channel->u.gen3.type = SHADER_BLACK;
1949
	else if (color == 0xffffffff)
1950
		channel->u.gen3.type = SHADER_WHITE;
1951
 
1952
	channel->bo = NULL;
1953
	channel->is_opaque = (color >> 24) == 0xff;
1954
	channel->is_affine = 1;
1955
	channel->alpha_fixup = 0;
1956
	channel->rb_reversed = 0;
1957
 
1958
	DBG(("%s: color=%08x, is_opaque=%d, type=%d\n",
1959
	     __FUNCTION__, color, channel->is_opaque, channel->u.gen3.type));
1960
 
1961
	/* for consistency */
1962
	channel->repeat = RepeatNormal;
1963
	channel->filter = PictFilterNearest;
1964
	channel->pict_format = PICT_a8r8g8b8;
1965
	channel->card_format = MAPSURF_32BIT | MT_32BIT_ARGB8888;
1966
 
1967
	return 1;
1968
}
1969
 
1970
static void gen3_composite_channel_convert(struct sna_composite_channel *channel)
1971
{
1972
	if (channel->u.gen3.type == SHADER_TEXTURE)
1973
		channel->repeat = gen3_texture_repeat(channel->repeat);
1974
	else
1975
		channel->repeat = gen3_gradient_repeat(channel->repeat);
1976
 
1977
	channel->filter = gen3_filter(channel->filter);
1978
	if (channel->card_format == 0)
1979
		gen3_composite_channel_set_format(channel, channel->pict_format);
1980
	assert(channel->card_format);
1981
}
1982
 
1983
static bool gen3_gradient_setup(struct sna *sna,
1984
				PicturePtr picture,
1985
				struct sna_composite_channel *channel,
1986
				int16_t ox, int16_t oy)
1987
{
1988
	int16_t dx, dy;
1989
 
1990
	if (picture->repeat == 0) {
1991
		channel->repeat = RepeatNone;
1992
	} else switch (picture->repeatType) {
1993
	case RepeatNone:
1994
	case RepeatNormal:
1995
	case RepeatPad:
1996
	case RepeatReflect:
1997
		channel->repeat = picture->repeatType;
1998
		break;
1999
	default:
2000
		return false;
2001
	}
2002
 
2003
	channel->bo =
2004
		sna_render_get_gradient(sna,
2005
					(PictGradient *)picture->pSourcePict);
2006
	if (channel->bo == NULL)
2007
		return false;
2008
 
2009
	channel->pict_format = PICT_a8r8g8b8;
2010
	channel->card_format = MAPSURF_32BIT | MT_32BIT_ARGB8888;
2011
	channel->filter = PictFilterNearest;
2012
	channel->is_affine = sna_transform_is_affine(picture->transform);
2013
	if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) {
2014
		DBG(("%s: integer translation (%d, %d), removing\n",
2015
		     __FUNCTION__, dx, dy));
2016
		ox += dx;
2017
		oy += dy;
2018
		channel->transform = NULL;
2019
	} else
2020
		channel->transform = picture->transform;
2021
	channel->width  = channel->bo->pitch / 4;
2022
	channel->height = 1;
2023
	channel->offset[0] = ox;
2024
	channel->offset[1] = oy;
2025
	channel->scale[0] = channel->scale[1] = 1;
2026
	return true;
2027
}
2028
 
2029
static int
2030
gen3_init_linear(struct sna *sna,
2031
		 PicturePtr picture,
2032
		 struct sna_composite_op *op,
2033
		 struct sna_composite_channel *channel,
2034
		 int ox, int oy)
2035
{
2036
	PictLinearGradient *linear =
2037
		(PictLinearGradient *)picture->pSourcePict;
2038
	float x0, y0, sf;
2039
	float dx, dy, offset;
2040
	int n;
2041
 
2042
	DBG(("%s: p1=(%f, %f), p2=(%f, %f)\n",
2043
	     __FUNCTION__,
2044
	     xFixedToDouble(linear->p1.x), xFixedToDouble(linear->p1.y),
2045
	     xFixedToDouble(linear->p2.x), xFixedToDouble(linear->p2.y)));
2046
 
2047
	if (linear->p2.x == linear->p1.x && linear->p2.y == linear->p1.y)
2048
		return 0;
2049
 
2050
	dx = xFixedToDouble(linear->p2.x - linear->p1.x);
2051
	dy = xFixedToDouble(linear->p2.y - linear->p1.y);
2052
	sf = dx*dx + dy*dy;
2053
	dx /= sf;
2054
	dy /= sf;
2055
 
2056
	x0 = xFixedToDouble(linear->p1.x);
2057
	y0 = xFixedToDouble(linear->p1.y);
2058
	offset = dx*x0 + dy*y0;
2059
 
2060
	n = op->u.gen3.num_constants;
2061
	channel->u.gen3.constants = FS_C0 + n / 4;
2062
	op->u.gen3.constants[n++] = dx;
2063
	op->u.gen3.constants[n++] = dy;
2064
	op->u.gen3.constants[n++] = -offset;
2065
	op->u.gen3.constants[n++] = 0;
2066
 
2067
	if (!gen3_gradient_setup(sna, picture, channel, ox, oy))
2068
		return -1;
2069
 
2070
	channel->u.gen3.type = SHADER_LINEAR;
2071
	op->u.gen3.num_constants = n;
2072
 
2073
	DBG(("%s: dx=%f, dy=%f, offset=%f, constants=%d\n",
2074
	     __FUNCTION__, dx, dy, -offset, channel->u.gen3.constants - FS_C0));
2075
	return 1;
2076
}
2077
 
2078
static int
2079
gen3_init_radial(struct sna *sna,
2080
		 PicturePtr picture,
2081
		 struct sna_composite_op *op,
2082
		 struct sna_composite_channel *channel,
2083
		 int ox, int oy)
2084
{
2085
	PictRadialGradient *radial = (PictRadialGradient *)picture->pSourcePict;
2086
	double dx, dy, dr, r1;
2087
	int n;
2088
 
2089
	dx = xFixedToDouble(radial->c2.x - radial->c1.x);
2090
	dy = xFixedToDouble(radial->c2.y - radial->c1.y);
2091
	dr = xFixedToDouble(radial->c2.radius - radial->c1.radius);
2092
 
2093
	r1 = xFixedToDouble(radial->c1.radius);
2094
 
2095
	n = op->u.gen3.num_constants;
2096
	channel->u.gen3.constants = FS_C0 + n / 4;
2097
	if (radial->c2.x == radial->c1.x && radial->c2.y == radial->c1.y) {
2098
		if (radial->c2.radius == radial->c1.radius) {
2099
			channel->u.gen3.type = SHADER_ZERO;
2100
			return 1;
2101
		}
2102
 
2103
		op->u.gen3.constants[n++] = xFixedToDouble(radial->c1.x) / dr;
2104
		op->u.gen3.constants[n++] = xFixedToDouble(radial->c1.y) / dr;
2105
		op->u.gen3.constants[n++] = 1. / dr;
2106
		op->u.gen3.constants[n++] = -r1 / dr;
2107
 
2108
		channel->u.gen3.mode = RADIAL_ONE;
2109
	} else {
2110
		op->u.gen3.constants[n++] = -xFixedToDouble(radial->c1.x);
2111
		op->u.gen3.constants[n++] = -xFixedToDouble(radial->c1.y);
2112
		op->u.gen3.constants[n++] = r1;
2113
		op->u.gen3.constants[n++] = -4 * (dx*dx + dy*dy - dr*dr);
2114
 
2115
		op->u.gen3.constants[n++] = -2 * dx;
2116
		op->u.gen3.constants[n++] = -2 * dy;
2117
		op->u.gen3.constants[n++] = -2 * r1 * dr;
2118
		op->u.gen3.constants[n++] = 1 / (2 * (dx*dx + dy*dy - dr*dr));
2119
 
2120
		channel->u.gen3.mode = RADIAL_TWO;
2121
	}
2122
 
2123
	if (!gen3_gradient_setup(sna, picture, channel, ox, oy))
2124
		return -1;
2125
 
2126
	channel->u.gen3.type = SHADER_RADIAL;
2127
	op->u.gen3.num_constants = n;
2128
	return 1;
2129
}
2130
 
2131
static bool
2132
sna_picture_is_clear(PicturePtr picture,
2133
		     int x, int y, int w, int h,
2134
		     uint32_t *color)
2135
{
2136
	struct sna_pixmap *priv;
2137
 
2138
	if (!picture->pDrawable)
2139
		return false;
2140
 
2141
	priv = sna_pixmap(get_drawable_pixmap(picture->pDrawable));
2142
	if (priv == NULL || !priv->clear)
2143
		return false;
2144
 
2145
	if (!source_is_covered(picture, x, y, w, h))
2146
		return false;
2147
 
2148
	*color = priv->clear_color;
2149
	return true;
2150
}
2151
 
2152
static int
4304 Serge 2153
gen3_composite_picture(struct sna *sna,
2154
		       PicturePtr picture,
2155
		       struct sna_composite_op *op,
2156
		       struct sna_composite_channel *channel,
2157
		       int16_t x, int16_t y,
2158
		       int16_t w, int16_t h,
2159
		       int16_t dst_x, int16_t dst_y,
2160
		       bool precise)
2161
{
2162
	PixmapPtr pixmap;
2163
	uint32_t color;
2164
	int16_t dx, dy;
2165
 
2166
	DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n",
2167
	     __FUNCTION__, x, y, w, h, dst_x, dst_y));
2168
 
2169
	channel->card_format = 0;
2170
 
2171
	if (picture->pDrawable == NULL) {
2172
		SourcePict *source = picture->pSourcePict;
2173
		int ret = -1;
2174
 
2175
		switch (source->type) {
2176
		case SourcePictTypeSolidFill:
2177
			DBG(("%s: solid fill [%08x], format %08x\n",
2178
			     __FUNCTION__,
2179
			     (unsigned)source->solidFill.color,
2180
			     (unsigned)picture->format));
2181
			ret = gen3_init_solid(channel, source->solidFill.color);
2182
			break;
2183
 
2184
		case SourcePictTypeLinear:
2185
			ret = gen3_init_linear(sna, picture, op, channel,
2186
					       x - dst_x, y - dst_y);
2187
			break;
2188
 
2189
		case SourcePictTypeRadial:
2190
			ret = gen3_init_radial(sna, picture, op, channel,
2191
					       x - dst_x, y - dst_y);
2192
			break;
2193
		}
2194
 
2195
		if (ret == -1) {
2196
			if (!precise)
2197
				ret = sna_render_picture_approximate_gradient(sna, picture, channel,
2198
									      x, y, w, h, dst_x, dst_y);
2199
			if (ret == -1)
2200
				ret = sna_render_picture_fixup(sna, picture, channel,
2201
							       x, y, w, h, dst_x, dst_y);
2202
		}
2203
		return ret;
2204
	}
2205
 
2206
	if (picture->alphaMap) {
2207
		DBG(("%s -- fallback, alphamap\n", __FUNCTION__));
2208
		return sna_render_picture_fixup(sna, picture, channel,
2209
						x, y, w, h, dst_x, dst_y);
2210
	}
2211
 
2212
	if (sna_picture_is_solid(picture, &color)) {
2213
		DBG(("%s: solid drawable [%08x]\n", __FUNCTION__, color));
2214
		return gen3_init_solid(channel, color);
2215
	}
2216
 
2217
	if (sna_picture_is_clear(picture, x, y, w, h, &color)) {
2218
		DBG(("%s: clear drawable [%08x]\n", __FUNCTION__, color));
2219
		return gen3_init_solid(channel, color_convert(color, picture->format, PICT_a8r8g8b8));
2220
	}
2221
 
2222
	if (!gen3_check_repeat(picture))
2223
		return sna_render_picture_fixup(sna, picture, channel,
2224
						x, y, w, h, dst_x, dst_y);
2225
 
2226
	if (!gen3_check_filter(picture))
2227
		return sna_render_picture_fixup(sna, picture, channel,
2228
						x, y, w, h, dst_x, dst_y);
2229
 
2230
	channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
2231
	channel->filter = picture->filter;
2232
	channel->pict_format = picture->format;
2233
 
2234
	pixmap = get_drawable_pixmap(picture->pDrawable);
2235
	get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy);
2236
 
2237
	x += dx + picture->pDrawable->x;
2238
	y += dy + picture->pDrawable->y;
2239
 
2240
	if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) {
2241
		DBG(("%s: integer translation (%d, %d), removing\n",
2242
		     __FUNCTION__, dx, dy));
2243
		x += dx;
2244
		y += dy;
2245
		channel->transform = NULL;
2246
		channel->filter = PictFilterNearest;
2247
	} else {
2248
		channel->transform = picture->transform;
2249
		channel->is_affine = sna_transform_is_affine(picture->transform);
2250
	}
2251
 
2252
	if (!gen3_composite_channel_set_format(channel, picture->format) &&
2253
	    !gen3_composite_channel_set_xformat(picture, channel, x, y, w, h))
2254
		return sna_render_picture_convert(sna, picture, channel, pixmap,
2255
						  x, y, w, h, dst_x, dst_y,
2256
						  false);
2257
	assert(channel->card_format);
2258
 
2259
	if (too_large(pixmap->drawable.width, pixmap->drawable.height)) {
2260
		DBG(("%s: pixmap too large (%dx%d), extracting (%d, %d)x(%d,%d)\n",
2261
		     __FUNCTION__,
2262
		     pixmap->drawable.width, pixmap->drawable.height,
2263
		     x, y, w, h));
2264
		return sna_render_picture_extract(sna, picture, channel,
2265
						  x, y, w, h, dst_x, dst_y);
2266
	}
2267
 
2268
	return sna_render_pixmap_bo(sna, channel, pixmap,
2269
				    x, y, w, h, dst_x, dst_y);
2270
}
4501 Serge 2271
#endif
4304 Serge 2272
 
4501 Serge 2273
static void
2274
gen3_align_vertex(struct sna *sna,
2275
		  const struct sna_composite_op *op)
4304 Serge 2276
{
4501 Serge 2277
	int vertex_index;
4304 Serge 2278
 
4501 Serge 2279
	if (op->floats_per_vertex == sna->render_state.gen3.last_floats_per_vertex)
2280
		return;
4304 Serge 2281
 
4501 Serge 2282
	DBG(("aligning vertex: was %d, now %d floats per vertex\n",
2283
	     sna->render_state.gen3.last_floats_per_vertex,
2284
	     op->floats_per_vertex));
4304 Serge 2285
 
4501 Serge 2286
	assert(op->floats_per_rect == 3*op->floats_per_vertex);
4304 Serge 2287
 
4501 Serge 2288
	vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
2289
	if ((int)sna->render.vertex_size - vertex_index * op->floats_per_vertex < 2*op->floats_per_rect) {
2290
		DBG(("%s: flushing vertex buffer: new index=%d, max=%d\n",
2291
		     __FUNCTION__, vertex_index, sna->render.vertex_size / op->floats_per_vertex));
2292
		if (gen3_vertex_finish(sna) < op->floats_per_vertex)
2293
			kgem_submit(&sna->kgem);
4304 Serge 2294
 
4501 Serge 2295
		vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
4304 Serge 2296
	}
2297
 
4501 Serge 2298
	sna->render.vertex_index = vertex_index;
2299
	sna->render.vertex_used = vertex_index * op->floats_per_vertex;
4304 Serge 2300
}
2301
 
2302
static inline bool is_constant_ps(uint32_t type)
2303
{
2304
	switch (type) {
2305
	case SHADER_NONE: /* be warned! */
2306
	case SHADER_ZERO:
2307
	case SHADER_BLACK:
2308
	case SHADER_WHITE:
2309
	case SHADER_CONSTANT:
2310
		return true;
2311
	default:
2312
		return false;
2313
	}
2314
}
2315
 
2316
#if 0
2317
static bool
2318
gen3_composite_fallback(struct sna *sna,
2319
			uint8_t op,
2320
			PicturePtr src,
2321
			PicturePtr mask,
2322
			PicturePtr dst)
2323
{
2324
	PixmapPtr src_pixmap;
2325
	PixmapPtr mask_pixmap;
2326
	PixmapPtr dst_pixmap;
2327
	bool src_fallback, mask_fallback;
2328
 
2329
	if (!gen3_check_dst_format(dst->format)) {
2330
		DBG(("%s: unknown destination format: %d\n",
2331
		     __FUNCTION__, dst->format));
2332
		return true;
2333
	}
2334
 
2335
	dst_pixmap = get_drawable_pixmap(dst->pDrawable);
2336
 
2337
	src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL;
2338
	src_fallback = source_fallback(src, src_pixmap,
2339
				       dst->polyMode == PolyModePrecise);
2340
 
2341
	if (mask) {
2342
		mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL;
2343
		mask_fallback = source_fallback(mask, mask_pixmap,
2344
						dst->polyMode == PolyModePrecise);
2345
	} else {
2346
		mask_pixmap = NULL;
2347
		mask_fallback = false;
2348
	}
2349
 
2350
	/* If we are using the destination as a source and need to
2351
	 * readback in order to upload the source, do it all
2352
	 * on the cpu.
2353
	 */
2354
	if (src_pixmap == dst_pixmap && src_fallback) {
2355
		DBG(("%s: src is dst and will fallback\n",__FUNCTION__));
2356
		return true;
2357
	}
2358
	if (mask_pixmap == dst_pixmap && mask_fallback) {
2359
		DBG(("%s: mask is dst and will fallback\n",__FUNCTION__));
2360
		return true;
2361
	}
2362
 
2363
	if (mask &&
2364
	    mask->componentAlpha && PICT_FORMAT_RGB(mask->format) &&
2365
	    gen3_blend_op[op].src_alpha &&
2366
	    gen3_blend_op[op].src_blend != BLENDFACT_ZERO &&
2367
	    op != PictOpOver) {
2368
		DBG(("%s: component-alpha mask with op=%d, should fallback\n",
2369
		     __FUNCTION__, op));
2370
		return true;
2371
	}
2372
 
2373
	/* If anything is on the GPU, push everything out to the GPU */
2374
	if (dst_use_gpu(dst_pixmap)) {
2375
		DBG(("%s: dst is already on the GPU, try to use GPU\n",
2376
		     __FUNCTION__));
2377
		return false;
2378
	}
2379
 
2380
	if (src_pixmap && !src_fallback) {
2381
		DBG(("%s: src is already on the GPU, try to use GPU\n",
2382
		     __FUNCTION__));
2383
		return false;
2384
	}
2385
	if (mask_pixmap && !mask_fallback) {
2386
		DBG(("%s: mask is already on the GPU, try to use GPU\n",
2387
		     __FUNCTION__));
2388
		return false;
2389
	}
2390
 
2391
	/* However if the dst is not on the GPU and we need to
2392
	 * render one of the sources using the CPU, we may
2393
	 * as well do the entire operation in place onthe CPU.
2394
	 */
2395
	if (src_fallback) {
2396
		DBG(("%s: dst is on the CPU and src will fallback\n",
2397
		     __FUNCTION__));
2398
		return true;
2399
	}
2400
 
2401
	if (mask && mask_fallback) {
2402
		DBG(("%s: dst is on the CPU and mask will fallback\n",
2403
		     __FUNCTION__));
2404
		return true;
2405
	}
2406
 
2407
	if (too_large(dst_pixmap->drawable.width,
2408
		      dst_pixmap->drawable.height) &&
2409
	    dst_is_cpu(dst_pixmap)) {
2410
		DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__));
2411
		return true;
2412
	}
2413
 
2414
	DBG(("%s: dst is not on the GPU and the operation should not fallback: use-cpu? %d\n",
2415
	     __FUNCTION__, dst_use_cpu(dst_pixmap)));
2416
	return dst_use_cpu(dst_pixmap);
2417
}
2418
 
4501 Serge 2419
static int
2420
reuse_source(struct sna *sna,
2421
	     PicturePtr src, struct sna_composite_channel *sc, int src_x, int src_y,
2422
	     PicturePtr mask, struct sna_composite_channel *mc, int msk_x, int msk_y)
2423
{
2424
	if (src_x != msk_x || src_y != msk_y)
2425
		return false;
2426
 
2427
	if (mask == src) {
2428
		*mc = *sc;
2429
		if (mc->bo)
2430
			kgem_bo_reference(mc->bo);
2431
		return true;
2432
	}
2433
 
2434
	if ((src->pDrawable == NULL || mask->pDrawable != src->pDrawable))
2435
		return false;
2436
 
2437
	if (sc->is_solid)
2438
		return false;
2439
 
2440
	DBG(("%s: mask reuses source drawable\n", __FUNCTION__));
2441
 
2442
	if (!sna_transform_equal(src->transform, mask->transform))
2443
		return false;
2444
 
2445
	if (!sna_picture_alphamap_equal(src, mask))
2446
		return false;
2447
 
2448
	if (!gen3_check_repeat(mask))
2449
		return false;
2450
 
2451
	if (!gen3_check_filter(mask))
2452
		return false;
2453
 
2454
	if (!gen3_check_format(mask))
2455
		return false;
2456
 
2457
	DBG(("%s: reusing source channel for mask with a twist\n",
2458
	     __FUNCTION__));
2459
 
2460
	*mc = *sc;
2461
	mc->repeat = gen3_texture_repeat(mask->repeat ? mask->repeatType : RepeatNone);
2462
	mc->filter = gen3_filter(mask->filter);
2463
	mc->pict_format = mask->format;
2464
	gen3_composite_channel_set_format(mc, mask->format);
2465
	assert(mc->card_format);
2466
	if (mc->bo)
2467
		kgem_bo_reference(mc->bo);
2468
	return true;
2469
}
2470
 
4304 Serge 2471
static bool
2472
gen3_render_composite(struct sna *sna,
2473
		      uint8_t op,
2474
		      PicturePtr src,
2475
		      PicturePtr mask,
2476
		      PicturePtr dst,
2477
		      int16_t src_x,  int16_t src_y,
2478
		      int16_t mask_x, int16_t mask_y,
2479
		      int16_t dst_x,  int16_t dst_y,
2480
		      int16_t width,  int16_t height,
2481
		      struct sna_composite_op *tmp)
2482
{
2483
	DBG(("%s()\n", __FUNCTION__));
2484
 
2485
	if (op >= ARRAY_SIZE(gen3_blend_op)) {
2486
		DBG(("%s: fallback due to unhandled blend op: %d\n",
2487
		     __FUNCTION__, op));
2488
		return false;
2489
	}
2490
 
2491
	/* Try to use the BLT engine unless it implies a
2492
	 * 3D -> 2D context switch.
2493
	 */
2494
	if (mask == NULL &&
2495
	    sna_blt_composite(sna,
2496
			      op, src, dst,
2497
			      src_x, src_y,
2498
			      dst_x, dst_y,
2499
			      width, height,
2500
			      tmp, false))
2501
		return true;
2502
 
2503
	if (gen3_composite_fallback(sna, op, src, mask, dst))
4501 Serge 2504
		goto fallback;
4304 Serge 2505
 
2506
	if (need_tiling(sna, width, height))
2507
		return sna_tiling_composite(op, src, mask, dst,
2508
					    src_x,  src_y,
2509
					    mask_x, mask_y,
2510
					    dst_x,  dst_y,
2511
					    width,  height,
2512
					    tmp);
2513
 
2514
	if (!gen3_composite_set_target(sna, tmp, dst,
2515
				       dst_x, dst_y, width, height)) {
2516
		DBG(("%s: unable to set render target\n",
2517
		     __FUNCTION__));
2518
		return false;
2519
	}
2520
 
2521
	tmp->op = op;
2522
	tmp->rb_reversed = gen3_dst_rb_reversed(tmp->dst.format);
2523
	if (too_large(tmp->dst.width, tmp->dst.height) ||
2524
	    !gen3_check_pitch_3d(tmp->dst.bo)) {
2525
		if (!sna_render_composite_redirect(sna, tmp,
2526
						   dst_x, dst_y, width, height,
2527
						   op > PictOpSrc || dst->pCompositeClip->data))
4501 Serge 2528
			goto fallback;
4304 Serge 2529
	}
2530
 
2531
	tmp->u.gen3.num_constants = 0;
2532
	tmp->src.u.gen3.type = SHADER_TEXTURE;
2533
	tmp->src.is_affine = true;
2534
	DBG(("%s: preparing source\n", __FUNCTION__));
2535
	switch (gen3_composite_picture(sna, src, tmp, &tmp->src,
2536
				       src_x, src_y,
2537
				       width, height,
2538
				       dst_x, dst_y,
2539
				       dst->polyMode == PolyModePrecise)) {
2540
	case -1:
2541
		goto cleanup_dst;
2542
	case 0:
2543
		tmp->src.u.gen3.type = SHADER_ZERO;
2544
		break;
2545
	case 1:
2546
		if (mask == NULL && tmp->src.bo &&
2547
		    sna_blt_composite__convert(sna,
2548
					       dst_x, dst_y, width, height,
2549
					       tmp))
2550
			return true;
2551
 
2552
		gen3_composite_channel_convert(&tmp->src);
2553
		break;
2554
	}
2555
	DBG(("%s: source type=%d\n", __FUNCTION__, tmp->src.u.gen3.type));
2556
 
2557
	tmp->mask.u.gen3.type = SHADER_NONE;
2558
	tmp->mask.is_affine = true;
2559
	tmp->need_magic_ca_pass = false;
2560
	tmp->has_component_alpha = false;
2561
	if (mask && tmp->src.u.gen3.type != SHADER_ZERO) {
2562
		if (!reuse_source(sna,
2563
				  src, &tmp->src, src_x, src_y,
2564
				  mask, &tmp->mask, mask_x, mask_y)) {
2565
			tmp->mask.u.gen3.type = SHADER_TEXTURE;
2566
			DBG(("%s: preparing mask\n", __FUNCTION__));
2567
			switch (gen3_composite_picture(sna, mask, tmp, &tmp->mask,
2568
						       mask_x, mask_y,
2569
						       width,  height,
2570
						       dst_x,  dst_y,
2571
						       dst->polyMode == PolyModePrecise)) {
2572
			case -1:
2573
				goto cleanup_src;
2574
			case 0:
2575
				tmp->mask.u.gen3.type = SHADER_ZERO;
2576
				break;
2577
			case 1:
2578
				gen3_composite_channel_convert(&tmp->mask);
2579
				break;
2580
			}
2581
		}
2582
		DBG(("%s: mask type=%d\n", __FUNCTION__, tmp->mask.u.gen3.type));
2583
		if (tmp->mask.u.gen3.type == SHADER_ZERO) {
2584
			if (tmp->src.bo) {
2585
				kgem_bo_destroy(&sna->kgem,
2586
						tmp->src.bo);
2587
				tmp->src.bo = NULL;
2588
			}
2589
			tmp->src.u.gen3.type = SHADER_ZERO;
2590
			tmp->mask.u.gen3.type = SHADER_NONE;
2591
		}
2592
 
2593
		if (tmp->mask.u.gen3.type != SHADER_NONE) {
2594
			if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
2595
				/* Check if it's component alpha that relies on a source alpha
2596
				 * and on the source value.  We can only get one of those
2597
				 * into the single source value that we get to blend with.
2598
				 */
2599
				DBG(("%s: component-alpha mask: %d\n",
2600
				     __FUNCTION__, tmp->mask.u.gen3.type));
2601
				tmp->has_component_alpha = true;
2602
				if (tmp->mask.u.gen3.type == SHADER_WHITE) {
2603
					tmp->mask.u.gen3.type = SHADER_NONE;
2604
					tmp->has_component_alpha = false;
2605
				} else if (gen3_blend_op[op].src_alpha &&
2606
					   gen3_blend_op[op].src_blend != BLENDFACT_ZERO) {
2607
					if (op != PictOpOver)
2608
						goto cleanup_mask;
2609
 
2610
					tmp->need_magic_ca_pass = true;
2611
					tmp->op = PictOpOutReverse;
2612
				}
2613
			} else {
2614
				if (tmp->mask.is_opaque) {
2615
					tmp->mask.u.gen3.type = SHADER_NONE;
2616
				} else if (is_constant_ps(tmp->src.u.gen3.type) &&
2617
					   is_constant_ps(tmp->mask.u.gen3.type)) {
2618
					uint32_t v;
2619
 
2620
					v = multa(tmp->src.u.gen3.mode,
2621
						  tmp->mask.u.gen3.mode,
2622
						  24);
2623
					v |= multa(tmp->src.u.gen3.mode,
2624
						   tmp->mask.u.gen3.mode,
2625
						   16);
2626
					v |= multa(tmp->src.u.gen3.mode,
2627
						   tmp->mask.u.gen3.mode,
2628
						   8);
2629
					v |= multa(tmp->src.u.gen3.mode,
2630
						   tmp->mask.u.gen3.mode,
2631
						   0);
2632
 
2633
					DBG(("%s: combining constant source/mask: %x x %x -> %x\n",
2634
					     __FUNCTION__,
2635
					     tmp->src.u.gen3.mode,
2636
					     tmp->mask.u.gen3.mode,
2637
					     v));
2638
 
2639
					tmp->src.u.gen3.type = SHADER_CONSTANT;
2640
					tmp->src.u.gen3.mode = v;
2641
					tmp->src.is_opaque = false;
2642
 
2643
					tmp->mask.u.gen3.type = SHADER_NONE;
2644
				}
2645
			}
2646
		}
2647
	}
2648
	DBG(("%s: final src/mask type=%d/%d, affine=%d/%d\n", __FUNCTION__,
2649
	     tmp->src.u.gen3.type, tmp->mask.u.gen3.type,
2650
	     tmp->src.is_affine, tmp->mask.is_affine));
2651
 
2652
	tmp->prim_emit = gen3_emit_composite_primitive;
2653
	if (is_constant_ps(tmp->mask.u.gen3.type)) {
2654
		switch (tmp->src.u.gen3.type) {
2655
		case SHADER_NONE:
2656
		case SHADER_ZERO:
2657
		case SHADER_BLACK:
2658
		case SHADER_WHITE:
2659
		case SHADER_CONSTANT:
2660
#if defined(sse2) && !defined(__x86_64__)
2661
			if (sna->cpu_features & SSE2) {
2662
				tmp->prim_emit = gen3_emit_composite_primitive_constant__sse2;
2663
				tmp->emit_boxes = gen3_emit_composite_boxes_constant__sse2;
2664
			} else
2665
#endif
2666
			{
2667
				tmp->prim_emit = gen3_emit_composite_primitive_constant;
2668
				tmp->emit_boxes = gen3_emit_composite_boxes_constant;
2669
			}
2670
 
2671
			break;
2672
		case SHADER_LINEAR:
2673
		case SHADER_RADIAL:
2674
			if (tmp->src.transform == NULL) {
2675
#if defined(sse2) && !defined(__x86_64__)
2676
				if (sna->cpu_features & SSE2) {
2677
					tmp->prim_emit = gen3_emit_composite_primitive_identity_gradient__sse2;
2678
					tmp->emit_boxes = gen3_emit_composite_boxes_identity_gradient__sse2;
2679
				} else
2680
#endif
2681
				{
2682
					tmp->prim_emit = gen3_emit_composite_primitive_identity_gradient;
2683
					tmp->emit_boxes = gen3_emit_composite_boxes_identity_gradient;
2684
				}
2685
			} else if (tmp->src.is_affine) {
2686
				tmp->src.scale[1] = tmp->src.scale[0] = 1. / tmp->src.transform->matrix[2][2];
2687
#if defined(sse2) && !defined(__x86_64__)
2688
				if (sna->cpu_features & SSE2) {
2689
					tmp->prim_emit = gen3_emit_composite_primitive_affine_gradient__sse2;
2690
					tmp->emit_boxes = gen3_emit_composite_boxes_affine_gradient__sse2;
2691
				} else
2692
#endif
2693
				{
2694
					tmp->prim_emit = gen3_emit_composite_primitive_affine_gradient;
2695
					tmp->emit_boxes = gen3_emit_composite_boxes_affine_gradient;
2696
				}
2697
			}
2698
			break;
2699
		case SHADER_TEXTURE:
2700
			if (tmp->src.transform == NULL) {
2701
				if ((tmp->src.offset[0]|tmp->src.offset[1]|tmp->dst.x|tmp->dst.y) == 0) {
2702
#if defined(sse2) && !defined(__x86_64__)
2703
					if (sna->cpu_features & SSE2) {
2704
						tmp->prim_emit = gen3_emit_composite_primitive_identity_source_no_offset__sse2;
2705
						tmp->emit_boxes = gen3_emit_composite_boxes_identity_source_no_offset__sse2;
2706
					} else
2707
#endif
2708
					{
2709
						tmp->prim_emit = gen3_emit_composite_primitive_identity_source_no_offset;
2710
						tmp->emit_boxes = gen3_emit_composite_boxes_identity_source_no_offset;
2711
					}
2712
				} else {
2713
#if defined(sse2) && !defined(__x86_64__)
2714
					if (sna->cpu_features & SSE2) {
2715
						tmp->prim_emit = gen3_emit_composite_primitive_identity_source__sse2;
2716
						tmp->emit_boxes = gen3_emit_composite_boxes_identity_source__sse2;
2717
					} else
2718
#endif
2719
					{
2720
						tmp->prim_emit = gen3_emit_composite_primitive_identity_source;
2721
						tmp->emit_boxes = gen3_emit_composite_boxes_identity_source;
2722
					}
2723
				}
2724
			} else if (tmp->src.is_affine) {
2725
				tmp->src.scale[0] /= tmp->src.transform->matrix[2][2];
2726
				tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
2727
#if defined(sse2) && !defined(__x86_64__)
2728
				if (sna->cpu_features & SSE2) {
2729
					tmp->prim_emit = gen3_emit_composite_primitive_affine_source__sse2;
2730
					tmp->emit_boxes = gen3_emit_composite_boxes_affine_source__sse2;
2731
				} else
2732
#endif
2733
				{
2734
					tmp->prim_emit = gen3_emit_composite_primitive_affine_source;
2735
					tmp->emit_boxes = gen3_emit_composite_boxes_affine_source;
2736
				}
2737
			}
2738
			break;
2739
		}
2740
	} else if (tmp->mask.u.gen3.type == SHADER_TEXTURE) {
2741
		if (tmp->mask.transform == NULL) {
2742
			if (is_constant_ps(tmp->src.u.gen3.type)) {
2743
				if ((tmp->mask.offset[0]|tmp->mask.offset[1]|tmp->dst.x|tmp->dst.y) == 0) {
2744
#if defined(sse2) && !defined(__x86_64__)
2745
					if (sna->cpu_features & SSE2) {
2746
						tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask_no_offset__sse2;
2747
					} else
2748
#endif
2749
					{
2750
						tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask_no_offset;
2751
					}
2752
				} else {
2753
#if defined(sse2) && !defined(__x86_64__)
2754
					if (sna->cpu_features & SSE2) {
2755
						tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask__sse2;
2756
					} else
2757
#endif
2758
					{
2759
						tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask;
2760
					}
2761
				}
2762
			} else if (tmp->src.transform == NULL) {
2763
#if defined(sse2) && !defined(__x86_64__)
2764
				if (sna->cpu_features & SSE2) {
2765
					tmp->prim_emit = gen3_emit_composite_primitive_identity_source_mask__sse2;
2766
				} else
2767
#endif
2768
				{
2769
					tmp->prim_emit = gen3_emit_composite_primitive_identity_source_mask;
2770
				}
2771
			} else if (tmp->src.is_affine) {
2772
				tmp->src.scale[0] /= tmp->src.transform->matrix[2][2];
2773
				tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
2774
#if defined(sse2) && !defined(__x86_64__)
2775
				if (sna->cpu_features & SSE2) {
2776
					tmp->prim_emit = gen3_emit_composite_primitive_affine_source_mask__sse2;
2777
				} else
2778
#endif
2779
				{
2780
					tmp->prim_emit = gen3_emit_composite_primitive_affine_source_mask;
2781
				}
2782
			}
2783
		}
2784
	}
2785
 
2786
	tmp->floats_per_vertex = 2;
2787
	if (!is_constant_ps(tmp->src.u.gen3.type))
2788
		tmp->floats_per_vertex += tmp->src.is_affine ? 2 : 4;
2789
	if (!is_constant_ps(tmp->mask.u.gen3.type))
2790
		tmp->floats_per_vertex += tmp->mask.is_affine ? 2 : 4;
2791
	DBG(("%s: floats_per_vertex = 2 + %d + %d = %d [specialised emitter? %d]\n", __FUNCTION__,
2792
	     !is_constant_ps(tmp->src.u.gen3.type) ? tmp->src.is_affine ? 2 : 4 : 0,
2793
	     !is_constant_ps(tmp->mask.u.gen3.type) ? tmp->mask.is_affine ? 2 : 4 : 0,
2794
	     tmp->floats_per_vertex,
2795
	     tmp->prim_emit != gen3_emit_composite_primitive));
2796
	tmp->floats_per_rect = 3 * tmp->floats_per_vertex;
2797
 
2798
	tmp->blt   = gen3_render_composite_blt;
2799
	tmp->box   = gen3_render_composite_box;
2800
	tmp->boxes = gen3_render_composite_boxes__blt;
2801
	if (tmp->emit_boxes) {
2802
		tmp->boxes = gen3_render_composite_boxes;
2803
		tmp->thread_boxes = gen3_render_composite_boxes__thread;
2804
	}
2805
	tmp->done  = gen3_render_composite_done;
2806
 
2807
	if (!kgem_check_bo(&sna->kgem,
2808
			   tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
2809
			   NULL)) {
2810
		kgem_submit(&sna->kgem);
2811
		if (!kgem_check_bo(&sna->kgem,
2812
				   tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
2813
				   NULL))
2814
			goto cleanup_mask;
2815
	}
2816
 
4501 Serge 2817
	gen3_align_vertex(sna, tmp);
4304 Serge 2818
	gen3_emit_composite_state(sna, tmp);
2819
	return true;
2820
 
2821
cleanup_mask:
2822
	if (tmp->mask.bo)
2823
		kgem_bo_destroy(&sna->kgem, tmp->mask.bo);
2824
cleanup_src:
2825
	if (tmp->src.bo)
2826
		kgem_bo_destroy(&sna->kgem, tmp->src.bo);
2827
cleanup_dst:
2828
	if (tmp->redirect.real_bo)
2829
		kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
4501 Serge 2830
fallback:
2831
	return (mask == NULL &&
2832
		sna_blt_composite(sna,
2833
				  op, src, dst,
2834
				  src_x, src_y,
2835
				  dst_x, dst_y,
2836
				  width, height,
2837
				  tmp, true));
4304 Serge 2838
}
2839
 
4501 Serge 2840
static void
2841
gen3_emit_composite_spans_vertex(struct sna *sna,
2842
				 const struct sna_composite_spans_op *op,
2843
				 int16_t x, int16_t y,
2844
				 float opacity)
2845
{
2846
	gen3_emit_composite_dstcoord(sna, x + op->base.dst.x, y + op->base.dst.y);
2847
	gen3_emit_composite_texcoord(sna, &op->base.src, x, y);
2848
	OUT_VERTEX(opacity);
2849
}
4304 Serge 2850
 
4501 Serge 2851
fastcall static void
2852
gen3_emit_composite_spans_primitive_zero(struct sna *sna,
2853
					 const struct sna_composite_spans_op *op,
2854
					 const BoxRec *box,
2855
					 float opacity)
2856
{
2857
	float *v = sna->render.vertices + sna->render.vertex_used;
2858
	sna->render.vertex_used += 6;
4304 Serge 2859
 
4501 Serge 2860
	v[0] = op->base.dst.x + box->x2;
2861
	v[1] = op->base.dst.y + box->y2;
4304 Serge 2862
 
4501 Serge 2863
	v[2] = op->base.dst.x + box->x1;
2864
	v[3] = v[1];
4304 Serge 2865
 
4501 Serge 2866
	v[4] = v[2];
2867
	v[5] = op->base.dst.x + box->y1;
2868
}
4304 Serge 2869
 
4501 Serge 2870
fastcall static void
2871
gen3_emit_composite_spans_primitive_zero__boxes(const struct sna_composite_spans_op *op,
2872
						const struct sna_opacity_box *b,
2873
						int nbox, float *v)
2874
{
2875
	do {
2876
		v[0] = op->base.dst.x + b->box.x2;
2877
		v[1] = op->base.dst.y + b->box.y2;
4304 Serge 2878
 
4501 Serge 2879
		v[2] = op->base.dst.x + b->box.x1;
2880
		v[3] = v[1];
4304 Serge 2881
 
4501 Serge 2882
		v[4] = v[2];
2883
		v[5] = op->base.dst.x + b->box.y1;
4304 Serge 2884
 
4501 Serge 2885
		v += 6;
2886
		b++;
2887
	} while (--nbox);
2888
}
4304 Serge 2889
 
4501 Serge 2890
fastcall static void
2891
gen3_emit_composite_spans_primitive_zero_no_offset(struct sna *sna,
2892
						   const struct sna_composite_spans_op *op,
2893
						   const BoxRec *box,
2894
						   float opacity)
2895
{
2896
	float *v = sna->render.vertices + sna->render.vertex_used;
2897
	sna->render.vertex_used += 6;
4304 Serge 2898
 
4501 Serge 2899
	v[0] = box->x2;
2900
	v[3] = v[1] = box->y2;
2901
	v[4] = v[2] = box->x1;
2902
	v[5] = box->y1;
2903
}
4304 Serge 2904
 
4501 Serge 2905
fastcall static void
2906
gen3_emit_composite_spans_primitive_zero_no_offset__boxes(const struct sna_composite_spans_op *op,
2907
							  const struct sna_opacity_box *b,
2908
							  int nbox, float *v)
2909
{
2910
	do {
2911
		v[0] = b->box.x2;
2912
		v[3] = v[1] = b->box.y2;
2913
		v[4] = v[2] = b->box.x1;
2914
		v[5] = b->box.y1;
4304 Serge 2915
 
4501 Serge 2916
		b++;
2917
		v += 6;
2918
	} while (--nbox);
2919
}
4304 Serge 2920
 
4501 Serge 2921
fastcall static void
2922
gen3_emit_composite_spans_primitive_constant(struct sna *sna,
2923
					     const struct sna_composite_spans_op *op,
2924
					     const BoxRec *box,
2925
					     float opacity)
2926
{
2927
	float *v = sna->render.vertices + sna->render.vertex_used;
2928
	sna->render.vertex_used += 9;
4304 Serge 2929
 
4501 Serge 2930
	v[0] = op->base.dst.x + box->x2;
2931
	v[6] = v[3] = op->base.dst.x + box->x1;
2932
	v[4] = v[1] = op->base.dst.y + box->y2;
2933
	v[7] = op->base.dst.y + box->y1;
2934
	v[8] = v[5] = v[2] = opacity;
2935
}
4304 Serge 2936
 
4501 Serge 2937
fastcall static void
2938
gen3_emit_composite_spans_primitive_constant__boxes(const struct sna_composite_spans_op *op,
2939
						    const struct sna_opacity_box *b,
2940
						    int nbox,
2941
						    float *v)
2942
{
2943
	do {
2944
		v[0] = op->base.dst.x + b->box.x2;
2945
		v[6] = v[3] = op->base.dst.x + b->box.x1;
2946
		v[4] = v[1] = op->base.dst.y + b->box.y2;
2947
		v[7] = op->base.dst.y + b->box.y1;
2948
		v[8] = v[5] = v[2] = b->alpha;
4304 Serge 2949
 
4501 Serge 2950
		v += 9;
2951
		b++;
2952
	} while (--nbox);
2953
}
4304 Serge 2954
 
4501 Serge 2955
fastcall static void
2956
gen3_emit_composite_spans_primitive_constant_no_offset(struct sna *sna,
2957
						       const struct sna_composite_spans_op *op,
2958
						       const BoxRec *box,
2959
						       float opacity)
2960
{
2961
	float *v = sna->render.vertices + sna->render.vertex_used;
2962
	sna->render.vertex_used += 9;
4304 Serge 2963
 
4501 Serge 2964
	v[0] = box->x2;
2965
	v[6] = v[3] = box->x1;
2966
	v[4] = v[1] = box->y2;
2967
	v[7] = box->y1;
2968
	v[8] = v[5] = v[2] = opacity;
2969
}
4304 Serge 2970
 
4501 Serge 2971
fastcall static void
2972
gen3_emit_composite_spans_primitive_constant_no_offset__boxes(const struct sna_composite_spans_op *op,
2973
							      const struct sna_opacity_box *b,
2974
							      int nbox, float *v)
2975
{
2976
	do {
2977
		v[0] = b->box.x2;
2978
		v[6] = v[3] = b->box.x1;
2979
		v[4] = v[1] = b->box.y2;
2980
		v[7] = b->box.y1;
2981
		v[8] = v[5] = v[2] = b->alpha;
4304 Serge 2982
 
4501 Serge 2983
		v += 9;
2984
		b++;
2985
	} while (--nbox);
2986
}
4304 Serge 2987
 
4501 Serge 2988
fastcall static void
2989
gen3_emit_composite_spans_primitive_identity_source(struct sna *sna,
2990
						    const struct sna_composite_spans_op *op,
2991
						    const BoxRec *box,
2992
						    float opacity)
2993
{
2994
	float *v = sna->render.vertices + sna->render.vertex_used;
2995
	sna->render.vertex_used += 15;
4304 Serge 2996
 
4501 Serge 2997
	v[0] = op->base.dst.x + box->x2;
2998
	v[1] = op->base.dst.y + box->y2;
2999
	v[2] = (op->base.src.offset[0] + box->x2) * op->base.src.scale[0];
3000
	v[3] = (op->base.src.offset[1] + box->y2) * op->base.src.scale[1];
3001
	v[4] = opacity;
4304 Serge 3002
 
4501 Serge 3003
	v[5] = op->base.dst.x + box->x1;
3004
	v[6] = v[1];
3005
	v[7] = (op->base.src.offset[0] + box->x1) * op->base.src.scale[0];
3006
	v[8] = v[3];
3007
	v[9] = opacity;
4304 Serge 3008
 
4501 Serge 3009
	v[10] = v[5];
3010
	v[11] = op->base.dst.y + box->y1;
3011
	v[12] = v[7];
3012
	v[13] = (op->base.src.offset[1] + box->y1) * op->base.src.scale[1];
3013
	v[14] = opacity;
3014
}
4304 Serge 3015
 
4501 Serge 3016
fastcall static void
3017
gen3_emit_composite_spans_primitive_identity_source__boxes(const struct sna_composite_spans_op *op,
3018
							   const struct sna_opacity_box *b,
3019
							   int nbox,
3020
							   float *v)
3021
{
3022
	do {
3023
		v[0] = op->base.dst.x + b->box.x2;
3024
		v[1] = op->base.dst.y + b->box.y2;
3025
		v[2] = (op->base.src.offset[0] + b->box.x2) * op->base.src.scale[0];
3026
		v[3] = (op->base.src.offset[1] + b->box.y2) * op->base.src.scale[1];
3027
		v[4] = b->alpha;
4304 Serge 3028
 
4501 Serge 3029
		v[5] = op->base.dst.x + b->box.x1;
3030
		v[6] = v[1];
3031
		v[7] = (op->base.src.offset[0] + b->box.x1) * op->base.src.scale[0];
3032
		v[8] = v[3];
3033
		v[9] = b->alpha;
4304 Serge 3034
 
4501 Serge 3035
		v[10] = v[5];
3036
		v[11] = op->base.dst.y + b->box.y1;
3037
		v[12] = v[7];
3038
		v[13] = (op->base.src.offset[1] + b->box.y1) * op->base.src.scale[1];
3039
		v[14] = b->alpha;
4304 Serge 3040
 
4501 Serge 3041
		v += 15;
3042
		b++;
3043
	} while (--nbox);
3044
}
4304 Serge 3045
 
4501 Serge 3046
fastcall static void
3047
gen3_emit_composite_spans_primitive_affine_source(struct sna *sna,
3048
						  const struct sna_composite_spans_op *op,
3049
						  const BoxRec *box,
3050
						  float opacity)
3051
{
3052
	PictTransform *transform = op->base.src.transform;
3053
	float *v;
4304 Serge 3054
 
4501 Serge 3055
	v = sna->render.vertices + sna->render.vertex_used;
3056
	sna->render.vertex_used += 15;
4304 Serge 3057
 
4501 Serge 3058
	v[0]  = op->base.dst.x + box->x2;
3059
	v[6]  = v[1] = op->base.dst.y + box->y2;
3060
	v[10] = v[5] = op->base.dst.x + box->x1;
3061
	v[11] = op->base.dst.y + box->y1;
3062
	v[14] = v[9] = v[4]  = opacity;
4304 Serge 3063
 
4501 Serge 3064
	_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x2,
3065
				    (int)op->base.src.offset[1] + box->y2,
3066
				    transform, op->base.src.scale,
3067
				    &v[2], &v[3]);
4304 Serge 3068
 
4501 Serge 3069
	_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1,
3070
				    (int)op->base.src.offset[1] + box->y2,
3071
				    transform, op->base.src.scale,
3072
				    &v[7], &v[8]);
4304 Serge 3073
 
4501 Serge 3074
	_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1,
3075
				    (int)op->base.src.offset[1] + box->y1,
3076
				    transform, op->base.src.scale,
3077
				    &v[12], &v[13]);
3078
}
4304 Serge 3079
 
4501 Serge 3080
fastcall static void
3081
gen3_emit_composite_spans_primitive_affine_source__boxes(const struct sna_composite_spans_op *op,
3082
							 const struct sna_opacity_box *b,
3083
							 int nbox,
3084
							 float *v)
3085
{
3086
	PictTransform *transform = op->base.src.transform;
4304 Serge 3087
 
4501 Serge 3088
	do {
3089
		v[0]  = op->base.dst.x + b->box.x2;
3090
		v[6]  = v[1] = op->base.dst.y + b->box.y2;
3091
		v[10] = v[5] = op->base.dst.x + b->box.x1;
3092
		v[11] = op->base.dst.y + b->box.y1;
3093
		v[14] = v[9] = v[4]  = b->alpha;
4304 Serge 3094
 
4501 Serge 3095
		_sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x2,
3096
					    (int)op->base.src.offset[1] + b->box.y2,
3097
					    transform, op->base.src.scale,
3098
					    &v[2], &v[3]);
4304 Serge 3099
 
4501 Serge 3100
		_sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x1,
3101
					    (int)op->base.src.offset[1] + b->box.y2,
3102
					    transform, op->base.src.scale,
3103
					    &v[7], &v[8]);
4304 Serge 3104
 
4501 Serge 3105
		_sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x1,
3106
					    (int)op->base.src.offset[1] + b->box.y1,
3107
					    transform, op->base.src.scale,
3108
					    &v[12], &v[13]);
3109
		v += 15;
3110
		b++;
3111
	} while (--nbox);
3112
}
4304 Serge 3113
 
4501 Serge 3114
fastcall static void
3115
gen3_emit_composite_spans_primitive_identity_gradient(struct sna *sna,
3116
						      const struct sna_composite_spans_op *op,
3117
						      const BoxRec *box,
3118
						      float opacity)
3119
{
3120
	float *v = sna->render.vertices + sna->render.vertex_used;
3121
	sna->render.vertex_used += 15;
4304 Serge 3122
 
4501 Serge 3123
	v[0] = op->base.dst.x + box->x2;
3124
	v[1] = op->base.dst.y + box->y2;
3125
	v[2] = op->base.src.offset[0] + box->x2;
3126
	v[3] = op->base.src.offset[1] + box->y2;
3127
	v[4] = opacity;
4304 Serge 3128
 
4501 Serge 3129
	v[5] = op->base.dst.x + box->x1;
3130
	v[6] = v[1];
3131
	v[7] = op->base.src.offset[0] + box->x1;
3132
	v[8] = v[3];
3133
	v[9] = opacity;
4304 Serge 3134
 
4501 Serge 3135
	v[10] = v[5];
3136
	v[11] = op->base.dst.y + box->y1;
3137
	v[12] = v[7];
3138
	v[13] = op->base.src.offset[1] + box->y1;
3139
	v[14] = opacity;
3140
}
4304 Serge 3141
 
4501 Serge 3142
fastcall static void
3143
gen3_emit_composite_spans_primitive_identity_gradient__boxes(const struct sna_composite_spans_op *op,
3144
							     const struct sna_opacity_box *b,
3145
							     int nbox,
3146
							     float *v)
3147
{
3148
	do {
3149
		v[0] = op->base.dst.x + b->box.x2;
3150
		v[1] = op->base.dst.y + b->box.y2;
3151
		v[2] = op->base.src.offset[0] + b->box.x2;
3152
		v[3] = op->base.src.offset[1] + b->box.y2;
3153
		v[4] = b->alpha;
4304 Serge 3154
 
4501 Serge 3155
		v[5] = op->base.dst.x + b->box.x1;
3156
		v[6] = v[1];
3157
		v[7] = op->base.src.offset[0] + b->box.x1;
3158
		v[8] = v[3];
3159
		v[9] = b->alpha;
4304 Serge 3160
 
4501 Serge 3161
		v[10] = v[5];
3162
		v[11] = op->base.dst.y + b->box.y1;
3163
		v[12] = v[7];
3164
		v[13] = op->base.src.offset[1] + b->box.y1;
3165
		v[14] = b->alpha;
4304 Serge 3166
 
4501 Serge 3167
		v += 15;
3168
		b++;
3169
	} while (--nbox);
3170
}
4304 Serge 3171
 
4501 Serge 3172
#if defined(sse2) && !defined(__x86_64__)
3173
sse2 fastcall static void
3174
gen3_emit_composite_spans_primitive_constant__sse2(struct sna *sna,
3175
						   const struct sna_composite_spans_op *op,
3176
						   const BoxRec *box,
3177
						   float opacity)
3178
{
3179
	float *v = sna->render.vertices + sna->render.vertex_used;
3180
	sna->render.vertex_used += 9;
4304 Serge 3181
 
4501 Serge 3182
	v[0] = op->base.dst.x + box->x2;
3183
	v[6] = v[3] = op->base.dst.x + box->x1;
3184
	v[4] = v[1] = op->base.dst.y + box->y2;
3185
	v[7] = op->base.dst.y + box->y1;
3186
	v[8] = v[5] = v[2] = opacity;
3187
}
4304 Serge 3188
 
4501 Serge 3189
sse2 fastcall static void
3190
gen3_emit_composite_spans_primitive_constant__sse2__boxes(const struct sna_composite_spans_op *op,
3191
							  const struct sna_opacity_box *b,
3192
							  int nbox,
3193
							  float *v)
3194
{
3195
	do {
3196
		v[0] = op->base.dst.x + b->box.x2;
3197
		v[6] = v[3] = op->base.dst.x + b->box.x1;
3198
		v[4] = v[1] = op->base.dst.y + b->box.y2;
3199
		v[7] = op->base.dst.y + b->box.y1;
3200
		v[8] = v[5] = v[2] = b->alpha;
4304 Serge 3201
 
4501 Serge 3202
		v += 9;
3203
		b++;
3204
	} while (--nbox);
3205
}
4304 Serge 3206
 
4501 Serge 3207
sse2 fastcall static void
3208
gen3_render_composite_spans_constant_box__sse2(struct sna *sna,
3209
					       const struct sna_composite_spans_op *op,
3210
					       const BoxRec *box, float opacity)
3211
{
3212
	float *v;
3213
	DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n",
3214
	     __FUNCTION__,
3215
	     op->base.src.offset[0], op->base.src.offset[1],
3216
	     opacity,
3217
	     op->base.dst.x, op->base.dst.y,
3218
	     box->x1, box->y1,
3219
	     box->x2 - box->x1,
3220
	     box->y2 - box->y1));
4304 Serge 3221
 
4501 Serge 3222
	gen3_get_rectangles(sna, &op->base, 1);
4304 Serge 3223
 
4501 Serge 3224
	v = sna->render.vertices + sna->render.vertex_used;
3225
	sna->render.vertex_used += 9;
4304 Serge 3226
 
4501 Serge 3227
	v[0] = box->x2;
3228
	v[6] = v[3] = box->x1;
3229
	v[4] = v[1] = box->y2;
3230
	v[7] = box->y1;
3231
	v[8] = v[5] = v[2] = opacity;
3232
}
4304 Serge 3233
 
4501 Serge 3234
sse2 fastcall static void
3235
gen3_render_composite_spans_constant_thread__sse2__boxes(struct sna *sna,
3236
							 const struct sna_composite_spans_op *op,
3237
							 const struct sna_opacity_box *box,
3238
							 int nbox)
3239
{
3240
	DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
3241
	     __FUNCTION__, nbox,
3242
	     op->base.src.offset[0], op->base.src.offset[1],
3243
	     op->base.dst.x, op->base.dst.y));
4304 Serge 3244
 
4501 Serge 3245
	sna_vertex_lock(&sna->render);
3246
	do {
3247
		int nbox_this_time;
3248
		float *v;
4304 Serge 3249
 
4501 Serge 3250
		nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox);
3251
		assert(nbox_this_time);
3252
		nbox -= nbox_this_time;
4304 Serge 3253
 
4501 Serge 3254
		v = sna->render.vertices + sna->render.vertex_used;
3255
		sna->render.vertex_used += nbox_this_time * 9;
4304 Serge 3256
 
4501 Serge 3257
		sna_vertex_acquire__locked(&sna->render);
3258
		sna_vertex_unlock(&sna->render);
4304 Serge 3259
 
4501 Serge 3260
		do {
3261
			v[0] = box->box.x2;
3262
			v[6] = v[3] = box->box.x1;
3263
			v[4] = v[1] = box->box.y2;
3264
			v[7] = box->box.y1;
3265
			v[8] = v[5] = v[2] = box->alpha;
3266
			v += 9;
3267
			box++;
3268
		} while (--nbox_this_time);
4304 Serge 3269
 
4501 Serge 3270
		sna_vertex_lock(&sna->render);
3271
		sna_vertex_release__locked(&sna->render);
3272
	} while (nbox);
3273
	sna_vertex_unlock(&sna->render);
3274
}
4304 Serge 3275
 
4501 Serge 3276
sse2 fastcall static void
3277
gen3_emit_composite_spans_primitive_constant__sse2__no_offset(struct sna *sna,
3278
							      const struct sna_composite_spans_op *op,
3279
							      const BoxRec *box,
3280
							      float opacity)
3281
{
3282
	float *v = sna->render.vertices + sna->render.vertex_used;
3283
	sna->render.vertex_used += 9;
4304 Serge 3284
 
4501 Serge 3285
	v[0] = box->x2;
3286
	v[6] = v[3] = box->x1;
3287
	v[4] = v[1] = box->y2;
3288
	v[7] = box->y1;
3289
	v[8] = v[5] = v[2] = opacity;
3290
}
4304 Serge 3291
 
4501 Serge 3292
sse2 fastcall static void
3293
gen3_emit_composite_spans_primitive_constant__sse2__no_offset__boxes(const struct sna_composite_spans_op *op,
3294
								     const struct sna_opacity_box *b,
3295
								     int nbox, float *v)
3296
{
3297
	do {
3298
		v[0] = b->box.x2;
3299
		v[6] = v[3] = b->box.x1;
3300
		v[4] = v[1] = b->box.y2;
3301
		v[7] = b->box.y1;
3302
		v[8] = v[5] = v[2] = b->alpha;
4304 Serge 3303
 
4501 Serge 3304
		v += 9;
3305
		b++;
3306
	} while (--nbox);
3307
}
4304 Serge 3308
 
4501 Serge 3309
sse2 fastcall static void
3310
gen3_emit_composite_spans_primitive_identity_source__sse2(struct sna *sna,
3311
							  const struct sna_composite_spans_op *op,
3312
							  const BoxRec *box,
3313
							  float opacity)
3314
{
3315
	float *v = sna->render.vertices + sna->render.vertex_used;
3316
	sna->render.vertex_used += 15;
4304 Serge 3317
 
4501 Serge 3318
	v[0] = op->base.dst.x + box->x2;
3319
	v[1] = op->base.dst.y + box->y2;
3320
	v[2] = (op->base.src.offset[0] + box->x2) * op->base.src.scale[0];
3321
	v[3] = (op->base.src.offset[1] + box->y2) * op->base.src.scale[1];
3322
	v[4] = opacity;
4304 Serge 3323
 
4501 Serge 3324
	v[5] = op->base.dst.x + box->x1;
3325
	v[6] = v[1];
3326
	v[7] = (op->base.src.offset[0] + box->x1) * op->base.src.scale[0];
3327
	v[8] = v[3];
3328
	v[9] = opacity;
4304 Serge 3329
 
4501 Serge 3330
	v[10] = v[5];
3331
	v[11] = op->base.dst.y + box->y1;
3332
	v[12] = v[7];
3333
	v[13] = (op->base.src.offset[1] + box->y1) * op->base.src.scale[1];
3334
	v[14] = opacity;
3335
}
4304 Serge 3336
 
4501 Serge 3337
sse2 fastcall static void
3338
gen3_emit_composite_spans_primitive_identity_source__sse2__boxes(const struct sna_composite_spans_op *op,
3339
								 const struct sna_opacity_box *b,
3340
								 int nbox,
3341
								 float *v)
3342
{
3343
	do {
3344
		v[0] = op->base.dst.x + b->box.x2;
3345
		v[1] = op->base.dst.y + b->box.y2;
3346
		v[2] = (op->base.src.offset[0] + b->box.x2) * op->base.src.scale[0];
3347
		v[3] = (op->base.src.offset[1] + b->box.y2) * op->base.src.scale[1];
3348
		v[4] = b->alpha;
4304 Serge 3349
 
4501 Serge 3350
		v[5] = op->base.dst.x + b->box.x1;
3351
		v[6] = v[1];
3352
		v[7] = (op->base.src.offset[0] + b->box.x1) * op->base.src.scale[0];
3353
		v[8] = v[3];
3354
		v[9] = b->alpha;
4304 Serge 3355
 
4501 Serge 3356
		v[10] = v[5];
3357
		v[11] = op->base.dst.y + b->box.y1;
3358
		v[12] = v[7];
3359
		v[13] = (op->base.src.offset[1] + b->box.y1) * op->base.src.scale[1];
3360
		v[14] = b->alpha;
4304 Serge 3361
 
4501 Serge 3362
		v += 15;
3363
		b++;
3364
	} while (--nbox);
3365
}
3366
sse2 fastcall static void
3367
gen3_emit_composite_spans_primitive_affine_source__sse2(struct sna *sna,
3368
							const struct sna_composite_spans_op *op,
3369
							const BoxRec *box,
3370
							float opacity)
3371
{
3372
	PictTransform *transform = op->base.src.transform;
3373
	float *v;
4304 Serge 3374
 
4501 Serge 3375
	v = sna->render.vertices + sna->render.vertex_used;
3376
	sna->render.vertex_used += 15;
4304 Serge 3377
 
4501 Serge 3378
	v[0]  = op->base.dst.x + box->x2;
3379
	v[6]  = v[1] = op->base.dst.y + box->y2;
3380
	v[10] = v[5] = op->base.dst.x + box->x1;
3381
	v[11] = op->base.dst.y + box->y1;
3382
	v[14] = v[9] = v[4]  = opacity;
4304 Serge 3383
 
4501 Serge 3384
	_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x2,
3385
				    (int)op->base.src.offset[1] + box->y2,
3386
				    transform, op->base.src.scale,
3387
				    &v[2], &v[3]);
4304 Serge 3388
 
4501 Serge 3389
	_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1,
3390
				    (int)op->base.src.offset[1] + box->y2,
3391
				    transform, op->base.src.scale,
3392
				    &v[7], &v[8]);
4304 Serge 3393
 
4501 Serge 3394
	_sna_get_transformed_scaled((int)op->base.src.offset[0] + box->x1,
3395
				    (int)op->base.src.offset[1] + box->y1,
3396
				    transform, op->base.src.scale,
3397
				    &v[12], &v[13]);
3398
}
4304 Serge 3399
 
4501 Serge 3400
sse2 fastcall static void
3401
gen3_emit_composite_spans_primitive_affine_source__sse2__boxes(const struct sna_composite_spans_op *op,
3402
							       const struct sna_opacity_box *b,
3403
							       int nbox,
3404
							       float *v)
3405
{
3406
	PictTransform *transform = op->base.src.transform;
4304 Serge 3407
 
4501 Serge 3408
	do {
3409
		v[0]  = op->base.dst.x + b->box.x2;
3410
		v[6]  = v[1] = op->base.dst.y + b->box.y2;
3411
		v[10] = v[5] = op->base.dst.x + b->box.x1;
3412
		v[11] = op->base.dst.y + b->box.y1;
3413
		v[14] = v[9] = v[4]  = b->alpha;
4304 Serge 3414
 
4501 Serge 3415
		_sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x2,
3416
					    (int)op->base.src.offset[1] + b->box.y2,
3417
					    transform, op->base.src.scale,
3418
					    &v[2], &v[3]);
4304 Serge 3419
 
4501 Serge 3420
		_sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x1,
3421
					    (int)op->base.src.offset[1] + b->box.y2,
3422
					    transform, op->base.src.scale,
3423
					    &v[7], &v[8]);
4304 Serge 3424
 
4501 Serge 3425
		_sna_get_transformed_scaled((int)op->base.src.offset[0] + b->box.x1,
3426
					    (int)op->base.src.offset[1] + b->box.y1,
3427
					    transform, op->base.src.scale,
3428
					    &v[12], &v[13]);
3429
		v += 15;
3430
		b++;
3431
	} while (--nbox);
3432
}
4304 Serge 3433
 
4501 Serge 3434
sse2 fastcall static void
3435
gen3_emit_composite_spans_primitive_identity_gradient__sse2(struct sna *sna,
3436
							    const struct sna_composite_spans_op *op,
3437
							    const BoxRec *box,
3438
							    float opacity)
3439
{
3440
	float *v = sna->render.vertices + sna->render.vertex_used;
3441
	sna->render.vertex_used += 15;
4304 Serge 3442
 
4501 Serge 3443
	v[0] = op->base.dst.x + box->x2;
3444
	v[1] = op->base.dst.y + box->y2;
3445
	v[2] = op->base.src.offset[0] + box->x2;
3446
	v[3] = op->base.src.offset[1] + box->y2;
3447
	v[4] = opacity;
4304 Serge 3448
 
4501 Serge 3449
	v[5] = op->base.dst.x + box->x1;
3450
	v[6] = v[1];
3451
	v[7] = op->base.src.offset[0] + box->x1;
3452
	v[8] = v[3];
3453
	v[9] = opacity;
4304 Serge 3454
 
4501 Serge 3455
	v[10] = v[5];
3456
	v[11] = op->base.dst.y + box->y1;
3457
	v[12] = v[7];
3458
	v[13] = op->base.src.offset[1] + box->y1;
3459
	v[14] = opacity;
3460
}
4304 Serge 3461
 
4501 Serge 3462
sse2 fastcall static void
3463
gen3_emit_composite_spans_primitive_identity_gradient__sse2__boxes(const struct sna_composite_spans_op *op,
3464
								   const struct sna_opacity_box *b,
3465
								   int nbox,
3466
								   float *v)
3467
{
3468
	do {
3469
		v[0] = op->base.dst.x + b->box.x2;
3470
		v[1] = op->base.dst.y + b->box.y2;
3471
		v[2] = op->base.src.offset[0] + b->box.x2;
3472
		v[3] = op->base.src.offset[1] + b->box.y2;
3473
		v[4] = b->alpha;
4304 Serge 3474
 
4501 Serge 3475
		v[5] = op->base.dst.x + b->box.x1;
3476
		v[6] = v[1];
3477
		v[7] = op->base.src.offset[0] + b->box.x1;
3478
		v[8] = v[3];
3479
		v[9] = b->alpha;
4304 Serge 3480
 
4501 Serge 3481
		v[10] = v[5];
3482
		v[11] = op->base.dst.y + b->box.y1;
3483
		v[12] = v[7];
3484
		v[13] = op->base.src.offset[1] + b->box.y1;
3485
		v[14] = b->alpha;
4304 Serge 3486
 
4501 Serge 3487
		v += 15;
3488
		b++;
3489
	} while (--nbox);
3490
}
4304 Serge 3491
 
4501 Serge 3492
sse2 fastcall static void
3493
gen3_emit_composite_spans_primitive_affine_gradient__sse2(struct sna *sna,
3494
							  const struct sna_composite_spans_op *op,
3495
							  const BoxRec *box,
3496
							  float opacity)
3497
{
3498
	PictTransform *transform = op->base.src.transform;
3499
	float *v = sna->render.vertices + sna->render.vertex_used;
3500
	sna->render.vertex_used += 15;
4304 Serge 3501
 
4501 Serge 3502
	v[0] = op->base.dst.x + box->x2;
3503
	v[1] = op->base.dst.y + box->y2;
3504
	_sna_get_transformed_scaled(op->base.src.offset[0] + box->x2,
3505
				    op->base.src.offset[1] + box->y2,
3506
				    transform, op->base.src.scale,
3507
				    &v[2], &v[3]);
3508
	v[4] = opacity;
4304 Serge 3509
 
4501 Serge 3510
	v[5] = op->base.dst.x + box->x1;
3511
	v[6] = v[1];
3512
	_sna_get_transformed_scaled(op->base.src.offset[0] + box->x1,
3513
				    op->base.src.offset[1] + box->y2,
3514
				    transform, op->base.src.scale,
3515
				    &v[7], &v[8]);
3516
	v[9] = opacity;
4304 Serge 3517
 
4501 Serge 3518
	v[10] = v[5];
3519
	v[11] = op->base.dst.y + box->y1;
3520
	_sna_get_transformed_scaled(op->base.src.offset[0] + box->x1,
3521
				    op->base.src.offset[1] + box->y1,
3522
				    transform, op->base.src.scale,
3523
				    &v[12], &v[13]);
3524
	v[14] = opacity;
3525
}
4304 Serge 3526
 
4501 Serge 3527
sse2 fastcall static void
3528
gen3_emit_composite_spans_primitive_affine_gradient__sse2__boxes(const struct sna_composite_spans_op *op,
3529
								 const struct sna_opacity_box *b,
3530
								 int nbox,
3531
								 float *v)
3532
{
3533
	PictTransform *transform = op->base.src.transform;
4304 Serge 3534
 
4501 Serge 3535
	do {
3536
		v[0] = op->base.dst.x + b->box.x2;
3537
		v[1] = op->base.dst.y + b->box.y2;
3538
		_sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x2,
3539
					    op->base.src.offset[1] + b->box.y2,
3540
					    transform, op->base.src.scale,
3541
					    &v[2], &v[3]);
3542
		v[4] = b->alpha;
4304 Serge 3543
 
4501 Serge 3544
		v[5] = op->base.dst.x + b->box.x1;
3545
		v[6] = v[1];
3546
		_sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1,
3547
					    op->base.src.offset[1] + b->box.y2,
3548
					    transform, op->base.src.scale,
3549
					    &v[7], &v[8]);
3550
		v[9] = b->alpha;
4304 Serge 3551
 
4501 Serge 3552
		v[10] = v[5];
3553
		v[11] = op->base.dst.y + b->box.y1;
3554
		_sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1,
3555
					    op->base.src.offset[1] + b->box.y1,
3556
					    transform, op->base.src.scale,
3557
					    &v[12], &v[13]);
3558
		v[14] = b->alpha;
3559
		v += 15;
3560
		b++;
3561
	} while (--nbox);
3562
}
3563
#endif
4304 Serge 3564
 
4501 Serge 3565
fastcall static void
3566
gen3_emit_composite_spans_primitive_affine_gradient(struct sna *sna,
3567
						    const struct sna_composite_spans_op *op,
3568
						    const BoxRec *box,
3569
						    float opacity)
3570
{
3571
	PictTransform *transform = op->base.src.transform;
3572
	float *v = sna->render.vertices + sna->render.vertex_used;
3573
	sna->render.vertex_used += 15;
4304 Serge 3574
 
4501 Serge 3575
	v[0] = op->base.dst.x + box->x2;
3576
	v[1] = op->base.dst.y + box->y2;
3577
	_sna_get_transformed_scaled(op->base.src.offset[0] + box->x2,
3578
				    op->base.src.offset[1] + box->y2,
3579
				    transform, op->base.src.scale,
3580
				    &v[2], &v[3]);
3581
	v[4] = opacity;
4304 Serge 3582
 
4501 Serge 3583
	v[5] = op->base.dst.x + box->x1;
3584
	v[6] = v[1];
3585
	_sna_get_transformed_scaled(op->base.src.offset[0] + box->x1,
3586
				    op->base.src.offset[1] + box->y2,
3587
				    transform, op->base.src.scale,
3588
				    &v[7], &v[8]);
3589
	v[9] = opacity;
4304 Serge 3590
 
4501 Serge 3591
	v[10] = v[5];
3592
	v[11] = op->base.dst.y + box->y1;
3593
	_sna_get_transformed_scaled(op->base.src.offset[0] + box->x1,
3594
				    op->base.src.offset[1] + box->y1,
3595
				    transform, op->base.src.scale,
3596
				    &v[12], &v[13]);
3597
	v[14] = opacity;
3598
}
4304 Serge 3599
 
4501 Serge 3600
fastcall static void
3601
gen3_emit_composite_spans_primitive_affine_gradient__boxes(const struct sna_composite_spans_op *op,
3602
							   const struct sna_opacity_box *b,
3603
							   int nbox,
3604
							   float *v)
3605
{
3606
	PictTransform *transform = op->base.src.transform;
4304 Serge 3607
 
4501 Serge 3608
	do {
3609
		v[0] = op->base.dst.x + b->box.x2;
3610
		v[1] = op->base.dst.y + b->box.y2;
3611
		_sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x2,
3612
					    op->base.src.offset[1] + b->box.y2,
3613
					    transform, op->base.src.scale,
3614
					    &v[2], &v[3]);
3615
		v[4] = b->alpha;
4304 Serge 3616
 
4501 Serge 3617
		v[5] = op->base.dst.x + b->box.x1;
3618
		v[6] = v[1];
3619
		_sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1,
3620
					    op->base.src.offset[1] + b->box.y2,
3621
					    transform, op->base.src.scale,
3622
					    &v[7], &v[8]);
3623
		v[9] = b->alpha;
4304 Serge 3624
 
4501 Serge 3625
		v[10] = v[5];
3626
		v[11] = op->base.dst.y + b->box.y1;
3627
		_sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1,
3628
					    op->base.src.offset[1] + b->box.y1,
3629
					    transform, op->base.src.scale,
3630
					    &v[12], &v[13]);
3631
		v[14] = b->alpha;
3632
		v += 15;
3633
		b++;
3634
	} while (--nbox);
3635
}
4304 Serge 3636
 
4501 Serge 3637
fastcall static void
3638
gen3_emit_composite_spans_primitive(struct sna *sna,
3639
				    const struct sna_composite_spans_op *op,
3640
				    const BoxRec *box,
3641
				    float opacity)
3642
{
3643
	gen3_emit_composite_spans_vertex(sna, op,
3644
					 box->x2, box->y2,
3645
					 opacity);
3646
	gen3_emit_composite_spans_vertex(sna, op,
3647
					 box->x1, box->y2,
3648
					 opacity);
3649
	gen3_emit_composite_spans_vertex(sna, op,
3650
					 box->x1, box->y1,
3651
					 opacity);
3652
}
4304 Serge 3653
 
4501 Serge 3654
fastcall static void
3655
gen3_render_composite_spans_constant_box(struct sna *sna,
3656
					 const struct sna_composite_spans_op *op,
3657
					 const BoxRec *box, float opacity)
3658
{
3659
	float *v;
3660
	DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n",
3661
	     __FUNCTION__,
3662
	     op->base.src.offset[0], op->base.src.offset[1],
3663
	     opacity,
3664
	     op->base.dst.x, op->base.dst.y,
3665
	     box->x1, box->y1,
3666
	     box->x2 - box->x1,
3667
	     box->y2 - box->y1));
4304 Serge 3668
 
4501 Serge 3669
	gen3_get_rectangles(sna, &op->base, 1);
4304 Serge 3670
 
4501 Serge 3671
	v = sna->render.vertices + sna->render.vertex_used;
3672
	sna->render.vertex_used += 9;
4304 Serge 3673
 
4501 Serge 3674
	v[0] = box->x2;
3675
	v[6] = v[3] = box->x1;
3676
	v[4] = v[1] = box->y2;
3677
	v[7] = box->y1;
3678
	v[8] = v[5] = v[2] = opacity;
3679
}
4304 Serge 3680
 
4501 Serge 3681
fastcall static void
3682
gen3_render_composite_spans_constant_thread_boxes(struct sna *sna,
3683
						  const struct sna_composite_spans_op *op,
3684
						  const struct sna_opacity_box *box,
3685
						  int nbox)
3686
{
3687
	DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
3688
	     __FUNCTION__, nbox,
3689
	     op->base.src.offset[0], op->base.src.offset[1],
3690
	     op->base.dst.x, op->base.dst.y));
4304 Serge 3691
 
4501 Serge 3692
	sna_vertex_lock(&sna->render);
3693
	do {
3694
		int nbox_this_time;
3695
		float *v;
4304 Serge 3696
 
4501 Serge 3697
		nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox);
3698
		assert(nbox_this_time);
3699
		nbox -= nbox_this_time;
4304 Serge 3700
 
4501 Serge 3701
		v = sna->render.vertices + sna->render.vertex_used;
3702
		sna->render.vertex_used += nbox_this_time * 9;
4304 Serge 3703
 
4501 Serge 3704
		sna_vertex_acquire__locked(&sna->render);
3705
		sna_vertex_unlock(&sna->render);
4304 Serge 3706
 
4501 Serge 3707
		do {
3708
			v[0] = box->box.x2;
3709
			v[6] = v[3] = box->box.x1;
3710
			v[4] = v[1] = box->box.y2;
3711
			v[7] = box->box.y1;
3712
			v[8] = v[5] = v[2] = box->alpha;
3713
			v += 9;
3714
			box++;
3715
		} while (--nbox_this_time);
4304 Serge 3716
 
4501 Serge 3717
		sna_vertex_lock(&sna->render);
3718
		sna_vertex_release__locked(&sna->render);
3719
	} while (nbox);
3720
	sna_vertex_unlock(&sna->render);
3721
}
4304 Serge 3722
 
4501 Serge 3723
fastcall static void
3724
gen3_render_composite_spans_box(struct sna *sna,
3725
				const struct sna_composite_spans_op *op,
3726
				const BoxRec *box, float opacity)
3727
{
3728
	DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n",
3729
	     __FUNCTION__,
3730
	     op->base.src.offset[0], op->base.src.offset[1],
3731
	     opacity,
3732
	     op->base.dst.x, op->base.dst.y,
3733
	     box->x1, box->y1,
3734
	     box->x2 - box->x1,
3735
	     box->y2 - box->y1));
4304 Serge 3736
 
4501 Serge 3737
	gen3_get_rectangles(sna, &op->base, 1);
3738
	op->prim_emit(sna, op, box, opacity);
3739
}
4304 Serge 3740
 
4501 Serge 3741
static void
3742
gen3_render_composite_spans_boxes(struct sna *sna,
3743
				  const struct sna_composite_spans_op *op,
3744
				  const BoxRec *box, int nbox,
3745
				  float opacity)
3746
{
3747
	DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n",
3748
	     __FUNCTION__, nbox,
3749
	     op->base.src.offset[0], op->base.src.offset[1],
3750
	     opacity,
3751
	     op->base.dst.x, op->base.dst.y));
4304 Serge 3752
 
4501 Serge 3753
	do {
3754
		int nbox_this_time;
4304 Serge 3755
 
4501 Serge 3756
		nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox);
3757
		nbox -= nbox_this_time;
4304 Serge 3758
 
4501 Serge 3759
		do {
3760
			DBG(("  %s: (%d, %d) x (%d, %d)\n", __FUNCTION__,
3761
			     box->x1, box->y1,
3762
			     box->x2 - box->x1,
3763
			     box->y2 - box->y1));
4304 Serge 3764
 
4501 Serge 3765
			op->prim_emit(sna, op, box++, opacity);
3766
		} while (--nbox_this_time);
3767
	} while (nbox);
3768
}
4304 Serge 3769
 
4501 Serge 3770
fastcall static void
3771
gen3_render_composite_spans_boxes__thread(struct sna *sna,
3772
					  const struct sna_composite_spans_op *op,
3773
					  const struct sna_opacity_box *box,
3774
					  int nbox)
3775
{
3776
	DBG(("%s: nbox=%d, src=+(%d, %d), dst=+(%d, %d)\n",
3777
	     __FUNCTION__, nbox,
3778
	     op->base.src.offset[0], op->base.src.offset[1],
3779
	     op->base.dst.x, op->base.dst.y));
4304 Serge 3780
 
4501 Serge 3781
	sna_vertex_lock(&sna->render);
3782
	do {
3783
		int nbox_this_time;
3784
		float *v;
4304 Serge 3785
 
4501 Serge 3786
		nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox);
3787
		assert(nbox_this_time);
3788
		nbox -= nbox_this_time;
4304 Serge 3789
 
4501 Serge 3790
		v = sna->render.vertices + sna->render.vertex_used;
3791
		sna->render.vertex_used += nbox_this_time * op->base.floats_per_rect;
4304 Serge 3792
 
4501 Serge 3793
		sna_vertex_acquire__locked(&sna->render);
3794
		sna_vertex_unlock(&sna->render);
4304 Serge 3795
 
4501 Serge 3796
		op->emit_boxes(op, box, nbox_this_time, v);
3797
		box += nbox_this_time;
4304 Serge 3798
 
4501 Serge 3799
		sna_vertex_lock(&sna->render);
3800
		sna_vertex_release__locked(&sna->render);
3801
	} while (nbox);
3802
	sna_vertex_unlock(&sna->render);
3803
}
4304 Serge 3804
 
4501 Serge 3805
fastcall static void
3806
gen3_render_composite_spans_done(struct sna *sna,
3807
				 const struct sna_composite_spans_op *op)
3808
{
3809
	if (sna->render.vertex_offset)
3810
		gen3_vertex_flush(sna);
4304 Serge 3811
 
4501 Serge 3812
	DBG(("%s()\n", __FUNCTION__));
4304 Serge 3813
 
4501 Serge 3814
	if (op->base.src.bo)
3815
		kgem_bo_destroy(&sna->kgem, op->base.src.bo);
4304 Serge 3816
 
4501 Serge 3817
	sna_render_composite_redirect_done(sna, &op->base);
3818
}
4304 Serge 3819
 
4501 Serge 3820
static bool
3821
gen3_check_composite_spans(struct sna *sna,
3822
			   uint8_t op, PicturePtr src, PicturePtr dst,
3823
			   int16_t width, int16_t height, unsigned flags)
3824
{
3825
	if (op >= ARRAY_SIZE(gen3_blend_op))
3826
		return false;
4304 Serge 3827
 
4501 Serge 3828
	if (gen3_composite_fallback(sna, op, src, NULL, dst))
3829
		return false;
4304 Serge 3830
 
4501 Serge 3831
	if (need_tiling(sna, width, height) &&
3832
	    !is_gpu(sna, dst->pDrawable, PREFER_GPU_SPANS)) {
3833
		DBG(("%s: fallback, tiled operation not on GPU\n",
3834
		     __FUNCTION__));
3835
		return false;
3836
	}
4304 Serge 3837
 
4501 Serge 3838
	return true;
3839
}
4304 Serge 3840
 
4501 Serge 3841
static bool
3842
gen3_render_composite_spans(struct sna *sna,
3843
			    uint8_t op,
3844
			    PicturePtr src,
3845
			    PicturePtr dst,
3846
			    int16_t src_x,  int16_t src_y,
3847
			    int16_t dst_x,  int16_t dst_y,
3848
			    int16_t width,  int16_t height,
3849
			    unsigned flags,
3850
			    struct sna_composite_spans_op *tmp)
3851
{
3852
	bool no_offset;
4304 Serge 3853
 
4501 Serge 3854
	DBG(("%s(src=(%d, %d), dst=(%d, %d), size=(%d, %d))\n", __FUNCTION__,
3855
	     src_x, src_y, dst_x, dst_y, width, height));
4304 Serge 3856
 
4501 Serge 3857
	assert(gen3_check_composite_spans(sna, op, src, dst, width, height, flags));
4304 Serge 3858
 
4501 Serge 3859
	if (need_tiling(sna, width, height)) {
3860
		DBG(("%s: tiling, operation (%dx%d) too wide for pipeline\n",
3861
		     __FUNCTION__, width, height));
3862
		return sna_tiling_composite_spans(op, src, dst,
3863
						  src_x, src_y, dst_x, dst_y,
3864
						  width, height, flags, tmp);
3865
	}
4304 Serge 3866
 
4501 Serge 3867
	if (!gen3_composite_set_target(sna, &tmp->base, dst,
3868
				       dst_x, dst_y, width, height)) {
3869
		DBG(("%s: unable to set render target\n",
3870
		     __FUNCTION__));
3871
		return false;
3872
	}
4304 Serge 3873
 
4501 Serge 3874
	tmp->base.op = op;
3875
	tmp->base.rb_reversed = gen3_dst_rb_reversed(tmp->base.dst.format);
3876
	if (too_large(tmp->base.dst.width, tmp->base.dst.height) ||
3877
	    !gen3_check_pitch_3d(tmp->base.dst.bo)) {
3878
		if (!sna_render_composite_redirect(sna, &tmp->base,
3879
						   dst_x, dst_y, width, height,
3880
						   true))
3881
			return false;
3882
	}
4304 Serge 3883
 
4501 Serge 3884
	tmp->base.src.u.gen3.type = SHADER_TEXTURE;
3885
	tmp->base.src.is_affine = true;
3886
	DBG(("%s: preparing source\n", __FUNCTION__));
3887
	switch (gen3_composite_picture(sna, src, &tmp->base, &tmp->base.src,
3888
				       src_x, src_y,
3889
				       width, height,
3890
				       dst_x, dst_y,
3891
				       dst->polyMode == PolyModePrecise)) {
3892
	case -1:
3893
		goto cleanup_dst;
3894
	case 0:
3895
		tmp->base.src.u.gen3.type = SHADER_ZERO;
3896
		break;
3897
	case 1:
3898
		gen3_composite_channel_convert(&tmp->base.src);
3899
		break;
3900
	}
3901
	DBG(("%s: source type=%d\n", __FUNCTION__, tmp->base.src.u.gen3.type));
4304 Serge 3902
 
4501 Serge 3903
	if (tmp->base.src.u.gen3.type != SHADER_ZERO)
3904
		tmp->base.mask.u.gen3.type = SHADER_OPACITY;
4304 Serge 3905
 
4501 Serge 3906
	no_offset = tmp->base.dst.x == 0 && tmp->base.dst.y == 0;
3907
	tmp->box   = gen3_render_composite_spans_box;
3908
	tmp->boxes = gen3_render_composite_spans_boxes;
3909
	tmp->thread_boxes = gen3_render_composite_spans_boxes__thread;
3910
	tmp->done  = gen3_render_composite_spans_done;
3911
	tmp->prim_emit = gen3_emit_composite_spans_primitive;
3912
	switch (tmp->base.src.u.gen3.type) {
3913
	case SHADER_NONE:
3914
		assert(0);
3915
	case SHADER_ZERO:
3916
		if (no_offset) {
3917
			tmp->prim_emit = gen3_emit_composite_spans_primitive_zero_no_offset;
3918
			tmp->emit_boxes = gen3_emit_composite_spans_primitive_zero_no_offset__boxes;
3919
		} else {
3920
			tmp->prim_emit = gen3_emit_composite_spans_primitive_zero;
3921
			tmp->emit_boxes = gen3_emit_composite_spans_primitive_zero__boxes;
3922
		}
3923
		break;
3924
	case SHADER_BLACK:
3925
	case SHADER_WHITE:
3926
	case SHADER_CONSTANT:
3927
		if (no_offset) {
3928
#if defined(sse2) && !defined(__x86_64__)
3929
			if (sna->cpu_features & SSE2) {
3930
				tmp->box = gen3_render_composite_spans_constant_box__sse2;
3931
				tmp->thread_boxes = gen3_render_composite_spans_constant_thread__sse2__boxes;
3932
				tmp->prim_emit = gen3_emit_composite_spans_primitive_constant__sse2__no_offset;
3933
				tmp->emit_boxes = gen3_emit_composite_spans_primitive_constant__sse2__no_offset__boxes;
3934
			} else
3935
#endif
3936
			{
3937
				tmp->box = gen3_render_composite_spans_constant_box;
3938
				tmp->thread_boxes = gen3_render_composite_spans_constant_thread_boxes;
3939
				tmp->prim_emit = gen3_emit_composite_spans_primitive_constant_no_offset;
3940
				tmp->emit_boxes = gen3_emit_composite_spans_primitive_constant_no_offset__boxes;
3941
			}
3942
		} else {
3943
#if defined(sse2) && !defined(__x86_64__)
3944
			if (sna->cpu_features & SSE2) {
3945
				tmp->prim_emit = gen3_emit_composite_spans_primitive_constant__sse2;
3946
				tmp->emit_boxes = gen3_emit_composite_spans_primitive_constant__sse2__boxes;
3947
			} else
3948
#endif
3949
			{
3950
				tmp->prim_emit = gen3_emit_composite_spans_primitive_constant;
3951
				tmp->emit_boxes = gen3_emit_composite_spans_primitive_constant__boxes;
3952
			}
3953
		}
3954
		break;
3955
	case SHADER_LINEAR:
3956
	case SHADER_RADIAL:
3957
		if (tmp->base.src.transform == NULL) {
3958
#if defined(sse2) && !defined(__x86_64__)
3959
			if (sna->cpu_features & SSE2) {
3960
				tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_gradient__sse2;
3961
				tmp->emit_boxes = gen3_emit_composite_spans_primitive_identity_gradient__sse2__boxes;
3962
			} else
3963
#endif
3964
			{
3965
				tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_gradient;
3966
				tmp->emit_boxes = gen3_emit_composite_spans_primitive_identity_gradient__boxes;
3967
			}
3968
		} else if (tmp->base.src.is_affine) {
3969
			tmp->base.src.scale[1] = tmp->base.src.scale[0] = 1. / tmp->base.src.transform->matrix[2][2];
3970
#if defined(sse2) && !defined(__x86_64__)
3971
			if (sna->cpu_features & SSE2) {
3972
				tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_gradient__sse2;
3973
				tmp->emit_boxes = gen3_emit_composite_spans_primitive_affine_gradient__sse2__boxes;
3974
			} else
3975
#endif
3976
			{
3977
				tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_gradient;
3978
				tmp->emit_boxes = gen3_emit_composite_spans_primitive_affine_gradient__boxes;
3979
			}
3980
		}
3981
		break;
3982
	case SHADER_TEXTURE:
3983
		if (tmp->base.src.transform == NULL) {
3984
#if defined(sse2) && !defined(__x86_64__)
3985
			if (sna->cpu_features & SSE2) {
3986
				tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_source__sse2;
3987
				tmp->emit_boxes = gen3_emit_composite_spans_primitive_identity_source__sse2__boxes;
3988
			} else
3989
#endif
3990
			{
3991
				tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_source;
3992
				tmp->emit_boxes = gen3_emit_composite_spans_primitive_identity_source__boxes;
3993
			}
3994
		} else if (tmp->base.src.is_affine) {
3995
			tmp->base.src.scale[0] /= tmp->base.src.transform->matrix[2][2];
3996
			tmp->base.src.scale[1] /= tmp->base.src.transform->matrix[2][2];
3997
#if defined(sse2) && !defined(__x86_64__)
3998
			if (sna->cpu_features & SSE2) {
3999
				tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_source__sse2;
4000
				tmp->emit_boxes = gen3_emit_composite_spans_primitive_affine_source__sse2__boxes;
4001
			} else
4002
#endif
4003
			{
4004
				tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_source;
4005
				tmp->emit_boxes = gen3_emit_composite_spans_primitive_affine_source__boxes;
4006
			}
4007
		}
4008
		break;
4009
	}
4010
	if (tmp->emit_boxes == NULL)
4011
		tmp->thread_boxes = NULL;
4304 Serge 4012
 
4501 Serge 4013
	tmp->base.mask.bo = NULL;
4304 Serge 4014
 
4501 Serge 4015
	tmp->base.floats_per_vertex = 2;
4016
	if (!is_constant_ps(tmp->base.src.u.gen3.type))
4017
		tmp->base.floats_per_vertex += tmp->base.src.is_affine ? 2 : 3;
4018
	tmp->base.floats_per_vertex +=
4019
		tmp->base.mask.u.gen3.type == SHADER_OPACITY;
4020
	tmp->base.floats_per_rect = 3 * tmp->base.floats_per_vertex;
4304 Serge 4021
 
4501 Serge 4022
	if (!kgem_check_bo(&sna->kgem,
4023
			   tmp->base.dst.bo, tmp->base.src.bo,
4024
			   NULL)) {
4025
		kgem_submit(&sna->kgem);
4026
		if (!kgem_check_bo(&sna->kgem,
4027
				   tmp->base.dst.bo, tmp->base.src.bo,
4028
				   NULL))
4029
			goto cleanup_src;
4030
	}
4304 Serge 4031
 
4501 Serge 4032
	gen3_align_vertex(sna, &tmp->base);
4033
	gen3_emit_composite_state(sna, &tmp->base);
4034
	return true;
4304 Serge 4035
 
4501 Serge 4036
cleanup_src:
4037
	if (tmp->base.src.bo)
4038
		kgem_bo_destroy(&sna->kgem, tmp->base.src.bo);
4039
cleanup_dst:
4040
	if (tmp->base.redirect.real_bo)
4041
		kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo);
4042
	return false;
4043
}
4304 Serge 4044
 
4501 Serge 4045
static void
4046
gen3_emit_video_state(struct sna *sna,
4047
		      struct sna_video *video,
4048
		      struct sna_video_frame *frame,
4049
		      PixmapPtr pixmap,
4050
		      struct kgem_bo *dst_bo,
4051
		      int width, int height,
4052
		      bool bilinear)
4053
{
4054
	struct gen3_render_state *state = &sna->render_state.gen3;
4055
	uint32_t id, ms3, rewind;
4304 Serge 4056
 
4501 Serge 4057
	gen3_emit_target(sna, dst_bo, width, height,
4058
			 sna_format_for_depth(pixmap->drawable.depth));
4304 Serge 4059
 
4501 Serge 4060
	/* XXX share with composite? Is it worth the effort? */
4061
	if ((state->last_shader & (1<<31)) == 0) {
4062
		OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
4063
			  I1_LOAD_S(1) | I1_LOAD_S(2) | I1_LOAD_S(6) |
4064
			  2);
4065
		OUT_BATCH((4 << S1_VERTEX_WIDTH_SHIFT) | (4 << S1_VERTEX_PITCH_SHIFT));
4066
		OUT_BATCH(S2_TEXCOORD_FMT(0, TEXCOORDFMT_2D) |
4067
			  S2_TEXCOORD_FMT(1, TEXCOORDFMT_NOT_PRESENT) |
4068
			  S2_TEXCOORD_FMT(2, TEXCOORDFMT_NOT_PRESENT) |
4069
			  S2_TEXCOORD_FMT(3, TEXCOORDFMT_NOT_PRESENT) |
4070
			  S2_TEXCOORD_FMT(4, TEXCOORDFMT_NOT_PRESENT) |
4071
			  S2_TEXCOORD_FMT(5, TEXCOORDFMT_NOT_PRESENT) |
4072
			  S2_TEXCOORD_FMT(6, TEXCOORDFMT_NOT_PRESENT) |
4073
			  S2_TEXCOORD_FMT(7, TEXCOORDFMT_NOT_PRESENT));
4074
		OUT_BATCH((2 << S6_CBUF_SRC_BLEND_FACT_SHIFT) |
4075
			  (1 << S6_CBUF_DST_BLEND_FACT_SHIFT) |
4076
			  S6_COLOR_WRITE_ENABLE);
4304 Serge 4077
 
4501 Serge 4078
		state->last_blend = 0;
4079
		state->floats_per_vertex = 4;
4080
	}
4304 Serge 4081
 
4501 Serge 4082
	if (!is_planar_fourcc(frame->id)) {
4083
		rewind = sna->kgem.nbatch;
4084
		OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | 4);
4085
		OUT_BATCH(0x0000001);	/* constant 0 */
4086
		/* constant 0: brightness/contrast */
4087
		OUT_BATCH_F(video->brightness / 128.0);
4088
		OUT_BATCH_F(video->contrast / 255.0);
4089
		OUT_BATCH_F(0.0);
4090
		OUT_BATCH_F(0.0);
4091
		if (state->last_constants &&
4092
		    memcmp(&sna->kgem.batch[state->last_constants],
4093
			   &sna->kgem.batch[rewind],
4094
			   6*sizeof(uint32_t)) == 0)
4095
			sna->kgem.nbatch = rewind;
4096
		else
4097
			state->last_constants = rewind;
4304 Serge 4098
 
4501 Serge 4099
		rewind = sna->kgem.nbatch;
4100
		OUT_BATCH(_3DSTATE_SAMPLER_STATE | 3);
4101
		OUT_BATCH(0x00000001);
4102
		OUT_BATCH(SS2_COLORSPACE_CONVERSION |
4103
			  (FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
4104
			  (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
4105
		OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) |
4106
			  (TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT) |
4107
			  (0 << SS3_TEXTUREMAP_INDEX_SHIFT) |
4108
			  SS3_NORMALIZED_COORDS);
4109
		OUT_BATCH(0x00000000);
4110
		if (state->last_sampler &&
4111
		    memcmp(&sna->kgem.batch[state->last_sampler],
4112
			   &sna->kgem.batch[rewind],
4113
			   5*sizeof(uint32_t)) == 0)
4114
			sna->kgem.nbatch = rewind;
4115
		else
4116
			state->last_sampler = rewind;
4304 Serge 4117
 
4501 Serge 4118
		OUT_BATCH(_3DSTATE_MAP_STATE | 3);
4119
		OUT_BATCH(0x00000001);	/* texture map #1 */
4120
		OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
4121
					 frame->bo,
4122
					 I915_GEM_DOMAIN_SAMPLER << 16,
4123
					 0));
4304 Serge 4124
 
4501 Serge 4125
		ms3 = MAPSURF_422;
4126
		switch (frame->id) {
4127
		case FOURCC_YUY2:
4128
			ms3 |= MT_422_YCRCB_NORMAL;
4129
			break;
4130
		case FOURCC_UYVY:
4131
			ms3 |= MT_422_YCRCB_SWAPY;
4132
			break;
4133
		}
4134
		ms3 |= (frame->height - 1) << MS3_HEIGHT_SHIFT;
4135
		ms3 |= (frame->width - 1) << MS3_WIDTH_SHIFT;
4136
		OUT_BATCH(ms3);
4137
		OUT_BATCH(((frame->pitch[0] / 4) - 1) << MS4_PITCH_SHIFT);
4304 Serge 4138
 
4501 Serge 4139
		id = 1<<31 | 1<<1 | !!video->brightness;
4140
		if (state->last_shader != id) {
4141
			state->last_shader = id;
4142
			id = sna->kgem.nbatch++;
4304 Serge 4143
 
4501 Serge 4144
			gen3_fs_dcl(FS_S0);
4145
			gen3_fs_dcl(FS_T0);
4146
			gen3_fs_texld(FS_OC, FS_S0, FS_T0);
4147
			if (video->brightness != 0) {
4148
				gen3_fs_add(FS_OC,
4149
					    gen3_fs_operand_reg(FS_OC),
4150
					    gen3_fs_operand(FS_C0, X, X, X, ZERO));
4151
			}
4304 Serge 4152
 
4501 Serge 4153
			sna->kgem.batch[id] =
4154
				_3DSTATE_PIXEL_SHADER_PROGRAM |
4155
				(sna->kgem.nbatch - id - 2);
4156
		}
4157
	} else {
4158
		/* For the planar formats, we set up three samplers --
4159
		 * one for each plane, in a Y8 format.  Because I
4160
		 * couldn't get the special PLANAR_TO_PACKED
4161
		 * shader setup to work, I did the manual pixel shader:
4162
		 *
4163
		 * y' = y - .0625
4164
		 * u' = u - .5
4165
		 * v' = v - .5;
4166
		 *
4167
		 * r = 1.1643 * y' + 0.0     * u' + 1.5958  * v'
4168
		 * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v'
4169
		 * b = 1.1643 * y' + 2.017   * u' + 0.0     * v'
4170
		 *
4171
		 * register assignment:
4172
		 * r0 = (y',u',v',0)
4173
		 * r1 = (y,y,y,y)
4174
		 * r2 = (u,u,u,u)
4175
		 * r3 = (v,v,v,v)
4176
		 * OC = (r,g,b,1)
4177
		 */
4178
		rewind = sna->kgem.nbatch;
4179
		OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | (22 - 2));
4180
		OUT_BATCH(0x000001f);	/* constants 0-4 */
4181
		/* constant 0: normalization offsets */
4182
		OUT_BATCH_F(-0.0625);
4183
		OUT_BATCH_F(-0.5);
4184
		OUT_BATCH_F(-0.5);
4185
		OUT_BATCH_F(0.0);
4186
		/* constant 1: r coefficients */
4187
		OUT_BATCH_F(1.1643);
4188
		OUT_BATCH_F(0.0);
4189
		OUT_BATCH_F(1.5958);
4190
		OUT_BATCH_F(0.0);
4191
		/* constant 2: g coefficients */
4192
		OUT_BATCH_F(1.1643);
4193
		OUT_BATCH_F(-0.39173);
4194
		OUT_BATCH_F(-0.81290);
4195
		OUT_BATCH_F(0.0);
4196
		/* constant 3: b coefficients */
4197
		OUT_BATCH_F(1.1643);
4198
		OUT_BATCH_F(2.017);
4199
		OUT_BATCH_F(0.0);
4200
		OUT_BATCH_F(0.0);
4201
		/* constant 4: brightness/contrast */
4202
		OUT_BATCH_F(video->brightness / 128.0);
4203
		OUT_BATCH_F(video->contrast / 255.0);
4204
		OUT_BATCH_F(0.0);
4205
		OUT_BATCH_F(0.0);
4206
		if (state->last_constants &&
4207
		    memcmp(&sna->kgem.batch[state->last_constants],
4208
			   &sna->kgem.batch[rewind],
4209
			   22*sizeof(uint32_t)) == 0)
4210
			sna->kgem.nbatch = rewind;
4211
		else
4212
			state->last_constants = rewind;
4304 Serge 4213
 
4501 Serge 4214
		rewind = sna->kgem.nbatch;
4215
		OUT_BATCH(_3DSTATE_SAMPLER_STATE | 9);
4216
		OUT_BATCH(0x00000007);
4217
		/* sampler 0 */
4218
		OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
4219
			  (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
4220
		OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) |
4221
			  (TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT) |
4222
			  (0 << SS3_TEXTUREMAP_INDEX_SHIFT) |
4223
			  SS3_NORMALIZED_COORDS);
4224
		OUT_BATCH(0x00000000);
4225
		/* sampler 1 */
4226
		OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
4227
			  (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
4228
		OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) |
4229
			  (TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT) |
4230
			  (1 << SS3_TEXTUREMAP_INDEX_SHIFT) |
4231
			  SS3_NORMALIZED_COORDS);
4232
		OUT_BATCH(0x00000000);
4233
		/* sampler 2 */
4234
		OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) |
4235
			  (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT));
4236
		OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << SS3_TCX_ADDR_MODE_SHIFT) |
4237
			  (TEXCOORDMODE_CLAMP_EDGE << SS3_TCY_ADDR_MODE_SHIFT) |
4238
			  (2 << SS3_TEXTUREMAP_INDEX_SHIFT) |
4239
			  SS3_NORMALIZED_COORDS);
4240
		OUT_BATCH(0x00000000);
4241
		if (state->last_sampler &&
4242
		    memcmp(&sna->kgem.batch[state->last_sampler],
4243
			   &sna->kgem.batch[rewind],
4244
			   11*sizeof(uint32_t)) == 0)
4245
			sna->kgem.nbatch = rewind;
4246
		else
4247
			state->last_sampler = rewind;
4304 Serge 4248
 
4501 Serge 4249
		OUT_BATCH(_3DSTATE_MAP_STATE | 9);
4250
		OUT_BATCH(0x00000007);
4304 Serge 4251
 
4501 Serge 4252
		OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
4253
					 frame->bo,
4254
					 I915_GEM_DOMAIN_SAMPLER << 16,
4255
					 0));
4304 Serge 4256
 
4501 Serge 4257
		ms3 = MAPSURF_8BIT | MT_8BIT_I8;
4258
		ms3 |= (frame->height - 1) << MS3_HEIGHT_SHIFT;
4259
		ms3 |= (frame->width - 1) << MS3_WIDTH_SHIFT;
4260
		OUT_BATCH(ms3);
4261
		/* check to see if Y has special pitch than normal
4262
		 * double u/v pitch, e.g i915 XvMC hw requires at
4263
		 * least 1K alignment, so Y pitch might
4264
		 * be same as U/V's.*/
4265
		if (frame->pitch[1])
4266
			OUT_BATCH(((frame->pitch[1] / 4) - 1) << MS4_PITCH_SHIFT);
4267
		else
4268
			OUT_BATCH(((frame->pitch[0] * 2 / 4) - 1) << MS4_PITCH_SHIFT);
4304 Serge 4269
 
4501 Serge 4270
		OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
4271
					 frame->bo,
4272
					 I915_GEM_DOMAIN_SAMPLER << 16,
4273
					 frame->UBufOffset));
4304 Serge 4274
 
4501 Serge 4275
		ms3 = MAPSURF_8BIT | MT_8BIT_I8;
4276
		ms3 |= (frame->height / 2 - 1) << MS3_HEIGHT_SHIFT;
4277
		ms3 |= (frame->width / 2 - 1) << MS3_WIDTH_SHIFT;
4278
		OUT_BATCH(ms3);
4279
		OUT_BATCH(((frame->pitch[0] / 4) - 1) << MS4_PITCH_SHIFT);
4304 Serge 4280
 
4501 Serge 4281
		OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
4282
					 frame->bo,
4283
					 I915_GEM_DOMAIN_SAMPLER << 16,
4284
					 frame->VBufOffset));
4304 Serge 4285
 
4501 Serge 4286
		ms3 = MAPSURF_8BIT | MT_8BIT_I8;
4287
		ms3 |= (frame->height / 2 - 1) << MS3_HEIGHT_SHIFT;
4288
		ms3 |= (frame->width / 2 - 1) << MS3_WIDTH_SHIFT;
4289
		OUT_BATCH(ms3);
4290
		OUT_BATCH(((frame->pitch[0] / 4) - 1) << MS4_PITCH_SHIFT);
4304 Serge 4291
 
4501 Serge 4292
		id = 1<<31 | 2<<1 | !!video->brightness;
4293
		if (state->last_shader != id) {
4294
			state->last_shader = id;
4295
			id = sna->kgem.nbatch++;
4304 Serge 4296
 
4501 Serge 4297
			/* Declare samplers */
4298
			gen3_fs_dcl(FS_S0);	/* Y */
4299
			gen3_fs_dcl(FS_S1);	/* U */
4300
			gen3_fs_dcl(FS_S2);	/* V */
4301
			gen3_fs_dcl(FS_T0);	/* normalized coords */
4304 Serge 4302
 
4501 Serge 4303
			/* Load samplers to temporaries. */
4304
			gen3_fs_texld(FS_R1, FS_S0, FS_T0);
4305
			gen3_fs_texld(FS_R2, FS_S1, FS_T0);
4306
			gen3_fs_texld(FS_R3, FS_S2, FS_T0);
4304 Serge 4307
 
4501 Serge 4308
			/* Move the sampled YUV data in R[123] to the first
4309
			 * 3 channels of R0.
4310
			 */
4311
			gen3_fs_mov_masked(FS_R0, MASK_X,
4312
					   gen3_fs_operand_reg(FS_R1));
4313
			gen3_fs_mov_masked(FS_R0, MASK_Y,
4314
					   gen3_fs_operand_reg(FS_R2));
4315
			gen3_fs_mov_masked(FS_R0, MASK_Z,
4316
					   gen3_fs_operand_reg(FS_R3));
4304 Serge 4317
 
4501 Serge 4318
			/* Normalize the YUV data */
4319
			gen3_fs_add(FS_R0, gen3_fs_operand_reg(FS_R0),
4320
				    gen3_fs_operand_reg(FS_C0));
4321
			/* dot-product the YUV data in R0 by the vectors of
4322
			 * coefficients for calculating R, G, and B, storing
4323
			 * the results in the R, G, or B channels of the output
4324
			 * color.  The OC results are implicitly clamped
4325
			 * at the end of the program.
4326
			 */
4327
			gen3_fs_dp3(FS_OC, MASK_X,
4328
				    gen3_fs_operand_reg(FS_R0),
4329
				    gen3_fs_operand_reg(FS_C1));
4330
			gen3_fs_dp3(FS_OC, MASK_Y,
4331
				    gen3_fs_operand_reg(FS_R0),
4332
				    gen3_fs_operand_reg(FS_C2));
4333
			gen3_fs_dp3(FS_OC, MASK_Z,
4334
				    gen3_fs_operand_reg(FS_R0),
4335
				    gen3_fs_operand_reg(FS_C3));
4336
			/* Set alpha of the output to 1.0, by wiring W to 1
4337
			 * and not actually using the source.
4338
			 */
4339
			gen3_fs_mov_masked(FS_OC, MASK_W,
4340
					   gen3_fs_operand_one());
4304 Serge 4341
 
4501 Serge 4342
			if (video->brightness != 0) {
4343
				gen3_fs_add(FS_OC,
4344
					    gen3_fs_operand_reg(FS_OC),
4345
					    gen3_fs_operand(FS_C4, X, X, X, ZERO));
4346
			}
4304 Serge 4347
 
4501 Serge 4348
			sna->kgem.batch[id] =
4349
				_3DSTATE_PIXEL_SHADER_PROGRAM |
4350
				(sna->kgem.nbatch - id - 2);
4351
		}
4352
	}
4353
}
4304 Serge 4354
 
4501 Serge 4355
static void
4356
gen3_video_get_batch(struct sna *sna, struct kgem_bo *bo)
4357
{
4358
	kgem_set_mode(&sna->kgem, KGEM_RENDER, bo);
4304 Serge 4359
 
4501 Serge 4360
	if (!kgem_check_batch(&sna->kgem, 120) ||
4361
	    !kgem_check_reloc(&sna->kgem, 4) ||
4362
	    !kgem_check_exec(&sna->kgem, 2)) {
4363
		_kgem_submit(&sna->kgem);
4364
		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
4365
	}
4304 Serge 4366
 
4501 Serge 4367
	if (sna->render_state.gen3.need_invariant)
4368
		gen3_emit_invariant(sna);
4369
}
4304 Serge 4370
 
4501 Serge 4371
static int
4372
gen3_get_inline_rectangles(struct sna *sna, int want, int floats_per_vertex)
4373
{
4374
	int size = floats_per_vertex * 3;
4375
	int rem = batch_space(sna) - 1;
4304 Serge 4376
 
4501 Serge 4377
	if (size * want > rem)
4378
		want = rem / size;
4304 Serge 4379
 
4501 Serge 4380
	return want;
4381
}
4304 Serge 4382
 
4501 Serge 4383
static bool
4384
gen3_render_video(struct sna *sna,
4385
		  struct sna_video *video,
4386
		  struct sna_video_frame *frame,
4387
		  RegionPtr dstRegion,
4388
		  PixmapPtr pixmap)
4389
{
4390
	struct sna_pixmap *priv = sna_pixmap(pixmap);
4391
	BoxPtr pbox = REGION_RECTS(dstRegion);
4392
	int nbox = REGION_NUM_RECTS(dstRegion);
4393
	int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1;
4394
	int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1;
4395
	int src_width = frame->src.x2 - frame->src.x1;
4396
	int src_height = frame->src.y2 - frame->src.y1;
4397
	float src_offset_x, src_offset_y;
4398
	float src_scale_x, src_scale_y;
4399
	int pix_xoff, pix_yoff;
4400
	struct kgem_bo *dst_bo;
4401
	bool bilinear;
4402
	int copy = 0;
4304 Serge 4403
 
4501 Serge 4404
	DBG(("%s: src:%dx%d (frame:%dx%d) -> dst:%dx%d\n", __FUNCTION__,
4405
	     src_width, src_height, frame->width, frame->height, dst_width, dst_height));
4304 Serge 4406
 
4501 Serge 4407
	dst_bo = priv->gpu_bo;
4408
	if (dst_bo == NULL)
4409
		return false;
4304 Serge 4410
 
4501 Serge 4411
	bilinear = src_width != dst_width || src_height != dst_height;
4304 Serge 4412
 
4501 Serge 4413
	src_scale_x = (float)src_width / dst_width / frame->width;
4414
	src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x;
4304 Serge 4415
 
4501 Serge 4416
	src_scale_y = (float)src_height / dst_height / frame->height;
4417
	src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y;
4418
	DBG(("%s: src offset (%f, %f), scale (%f, %f)\n",
4419
	     __FUNCTION__, src_offset_x, src_offset_y, src_scale_x, src_scale_y));
4304 Serge 4420
 
4501 Serge 4421
	if (too_large(pixmap->drawable.width, pixmap->drawable.height) ||
4422
	    !gen3_check_pitch_3d(dst_bo)) {
4423
		int bpp = pixmap->drawable.bitsPerPixel;
4304 Serge 4424
 
4501 Serge 4425
		if (too_large(dst_width, dst_height))
4426
			return false;
4304 Serge 4427
 
4501 Serge 4428
		dst_bo = kgem_create_2d(&sna->kgem,
4429
					dst_width, dst_height, bpp,
4430
					kgem_choose_tiling(&sna->kgem,
4431
							   I915_TILING_X,
4432
							   dst_width, dst_height, bpp),
4433
					0);
4434
		if (!dst_bo)
4435
			return false;
4304 Serge 4436
 
4501 Serge 4437
		pix_xoff = -dstRegion->extents.x1;
4438
		pix_yoff = -dstRegion->extents.y1;
4439
		copy = 1;
4440
	} else {
4441
		/* Set up the offset for translating from the given region
4442
		 * (in screen coordinates) to the backing pixmap.
4443
		 */
4444
#ifdef COMPOSITE
4445
		pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
4446
		pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
4447
#else
4448
		pix_xoff = 0;
4449
		pix_yoff = 0;
4450
#endif
4304 Serge 4451
 
4501 Serge 4452
		dst_width  = pixmap->drawable.width;
4453
		dst_height = pixmap->drawable.height;
4454
	}
4304 Serge 4455
 
4501 Serge 4456
	gen3_video_get_batch(sna, dst_bo);
4457
	gen3_emit_video_state(sna, video, frame, pixmap,
4458
			      dst_bo, dst_width, dst_height, bilinear);
4459
	do {
4460
		int nbox_this_time = gen3_get_inline_rectangles(sna, nbox, 4);
4461
		if (nbox_this_time == 0) {
4462
			gen3_video_get_batch(sna, dst_bo);
4463
			gen3_emit_video_state(sna, video, frame, pixmap,
4464
					      dst_bo, dst_width, dst_height, bilinear);
4465
			nbox_this_time = gen3_get_inline_rectangles(sna, nbox, 4);
4466
			assert(nbox_this_time);
4467
		}
4468
		nbox -= nbox_this_time;
4304 Serge 4469
 
4501 Serge 4470
		OUT_BATCH(PRIM3D_RECTLIST | (12 * nbox_this_time - 1));
4471
		do {
4472
			int box_x1 = pbox->x1;
4473
			int box_y1 = pbox->y1;
4474
			int box_x2 = pbox->x2;
4475
			int box_y2 = pbox->y2;
4304 Serge 4476
 
4501 Serge 4477
			pbox++;
4304 Serge 4478
 
4501 Serge 4479
			DBG(("%s: dst (%d, %d), (%d, %d) + (%d, %d); src (%f, %f), (%f, %f)\n",
4480
			     __FUNCTION__, box_x1, box_y1, box_x2, box_y2, pix_xoff, pix_yoff,
4481
			     box_x1 * src_scale_x + src_offset_x,
4482
			     box_y1 * src_scale_y + src_offset_y,
4483
			     box_x2 * src_scale_x + src_offset_x,
4484
			     box_y2 * src_scale_y + src_offset_y));
4304 Serge 4485
 
4501 Serge 4486
			/* bottom right */
4487
			OUT_BATCH_F(box_x2 + pix_xoff);
4488
			OUT_BATCH_F(box_y2 + pix_yoff);
4489
			OUT_BATCH_F(box_x2 * src_scale_x + src_offset_x);
4490
			OUT_BATCH_F(box_y2 * src_scale_y + src_offset_y);
4304 Serge 4491
 
4501 Serge 4492
			/* bottom left */
4493
			OUT_BATCH_F(box_x1 + pix_xoff);
4494
			OUT_BATCH_F(box_y2 + pix_yoff);
4495
			OUT_BATCH_F(box_x1 * src_scale_x + src_offset_x);
4496
			OUT_BATCH_F(box_y2 * src_scale_y + src_offset_y);
4304 Serge 4497
 
4501 Serge 4498
			/* top left */
4499
			OUT_BATCH_F(box_x1 + pix_xoff);
4500
			OUT_BATCH_F(box_y1 + pix_yoff);
4501
			OUT_BATCH_F(box_x1 * src_scale_x + src_offset_x);
4502
			OUT_BATCH_F(box_y1 * src_scale_y + src_offset_y);
4503
		} while (--nbox_this_time);
4504
	} while (nbox);
4304 Serge 4505
 
4501 Serge 4506
	if (copy) {
4507
#ifdef COMPOSITE
4508
		pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
4509
		pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
4510
#else
4511
		pix_xoff = 0;
4512
		pix_yoff = 0;
4513
#endif
4514
		sna_blt_copy_boxes(sna, GXcopy,
4515
				   dst_bo, -dstRegion->extents.x1, -dstRegion->extents.y1,
4516
				   priv->gpu_bo, pix_xoff, pix_yoff,
4517
				   pixmap->drawable.bitsPerPixel,
4518
				   REGION_RECTS(dstRegion),
4519
				   REGION_NUM_RECTS(dstRegion));
4304 Serge 4520
 
4501 Serge 4521
		kgem_bo_destroy(&sna->kgem, dst_bo);
4522
	}
4304 Serge 4523
 
4501 Serge 4524
	if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
4525
		if ((pix_xoff | pix_yoff) == 0) {
4526
			sna_damage_add(&priv->gpu_damage, dstRegion);
4527
			sna_damage_subtract(&priv->cpu_damage, dstRegion);
4528
		} else {
4529
			sna_damage_add_boxes(&priv->gpu_damage,
4530
					     REGION_RECTS(dstRegion),
4531
					     REGION_NUM_RECTS(dstRegion),
4532
					     pix_xoff, pix_yoff);
4533
			sna_damage_subtract_boxes(&priv->cpu_damage,
4534
						  REGION_RECTS(dstRegion),
4535
						  REGION_NUM_RECTS(dstRegion),
4536
						  pix_xoff, pix_yoff);
4537
		}
4538
	}
4304 Serge 4539
 
4501 Serge 4540
	return true;
4541
}
4304 Serge 4542
 
4501 Serge 4543
#endif
4304 Serge 4544
 
4545
 
4546
 
4547
 
4548
 
4549
 
4550
 
4551
 
4552
 
4553
 
4554
 
4555
 
4556
 
4557
 
4558
 
4559
 
4560
 
4561
 
4562
 
4563
 
4564
 
4565
 
4566
 
4567
 
4501 Serge 4568
 
4569
 
4570
 
4571
 
4572
 
4573
 
4574
 
4575
 
4576
 
4577
 
4578
 
4579
 
4580
 
4581
 
4582
 
4583
 
4584
 
4585
 
4586
 
4587
 
4588
 
4589
 
4590
 
4591
 
4592
 
4593
 
4594
 
4595
 
4596
 
4597
 
4598
 
4599
 
4600
 
4601
 
4602
 
4603
 
4604
 
4605
 
4606
 
4607
 
4608
 
4609
 
4610
 
4611
 
4612
 
4613
 
4614
 
4615
 
4616
 
4617
 
4618
 
4619
 
4620
 
4621
 
4622
 
4623
 
4624
 
4625
 
4626
#if 0
4627
static bool
4628
gen3_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
4629
		     uint32_t color,
4630
		     int16_t x1, int16_t y1,
4631
		     int16_t x2, int16_t y2,
4632
		     uint8_t alu)
4633
{
4634
	struct sna_composite_op tmp;
4635
 
4636
#if NO_FILL_ONE
4637
	return gen3_render_fill_one_try_blt(sna, dst, bo, color,
4638
					    x1, y1, x2, y2, alu);
4639
#endif
4640
 
4641
	/* Prefer to use the BLT if already engaged */
4642
	if (prefer_fill_blt(sna) &&
4643
	    gen3_render_fill_one_try_blt(sna, dst, bo, color,
4644
					 x1, y1, x2, y2, alu))
4645
		return true;
4646
 
4647
	/* Must use the BLT if we can't RENDER... */
4648
	if (!(alu == GXcopy || alu == GXclear) ||
4649
	    too_large(dst->drawable.width, dst->drawable.height) ||
4650
	    bo->pitch > MAX_3D_PITCH)
4651
		return gen3_render_fill_one_try_blt(sna, dst, bo, color,
4652
						    x1, y1, x2, y2, alu);
4653
 
4654
	if (alu == GXclear)
4655
		color = 0;
4656
 
4657
	tmp.op = color == 0 ? PictOpClear : PictOpSrc;
4658
	tmp.dst.pixmap = dst;
4659
	tmp.dst.width = dst->drawable.width;
4660
	tmp.dst.height = dst->drawable.height;
4661
	tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
4662
	tmp.dst.bo = bo;
4663
	tmp.floats_per_vertex = 2;
4664
	tmp.floats_per_rect = 6;
4665
	tmp.need_magic_ca_pass = 0;
4666
	tmp.has_component_alpha = 0;
4667
	tmp.rb_reversed = 0;
4668
 
4669
	gen3_init_solid(&tmp.src,
4670
			sna_rgba_for_color(color, dst->drawable.depth));
4671
	tmp.mask.bo = NULL;
4672
	tmp.mask.u.gen3.type = SHADER_NONE;
4673
	tmp.u.gen3.num_constants = 0;
4674
 
4675
	if (!kgem_check_bo(&sna->kgem, bo, NULL)) {
4676
		kgem_submit(&sna->kgem);
4677
 
4678
		if (gen3_render_fill_one_try_blt(sna, dst, bo, color,
4679
						 x1, y1, x2, y2, alu))
4680
			return true;
4681
 
4682
		if (!kgem_check_bo(&sna->kgem, bo, NULL))
4683
			return false;
4684
	}
4685
 
4686
	gen3_align_vertex(sna, &tmp);
4687
	gen3_emit_composite_state(sna, &tmp);
4688
	gen3_get_rectangles(sna, &tmp, 1);
4689
	DBG(("	(%d, %d), (%d, %d): %x\n", x1, y1, x2, y2, color));
4690
	OUT_VERTEX(x2);
4691
	OUT_VERTEX(y2);
4692
	OUT_VERTEX(x1);
4693
	OUT_VERTEX(y2);
4694
	OUT_VERTEX(x1);
4695
	OUT_VERTEX(y1);
4696
	gen3_vertex_flush(sna);
4697
 
4698
	return true;
4699
}
4700
#endif
4701
 
4304 Serge 4702
static void gen3_render_flush(struct sna *sna)
4703
{
4704
	gen3_vertex_close(sna);
4705
 
4706
	assert(sna->render.vertex_reloc[0] == 0);
4707
	assert(sna->render.vertex_offset == 0);
4708
}
4709
 
4710
static void
4711
gen3_render_fini(struct sna *sna)
4712
{
4713
}
4714
 
4715
const char *gen3_render_init(struct sna *sna, const char *backend)
4716
{
4717
	struct sna_render *render = &sna->render;
4718
 
4719
#if 0
4720
#if !NO_COMPOSITE
4721
	render->composite = gen3_render_composite;
4722
	render->prefer_gpu |= PREFER_GPU_RENDER;
4723
#endif
4724
#if !NO_COMPOSITE_SPANS
4725
	render->check_composite_spans = gen3_check_composite_spans;
4726
	render->composite_spans = gen3_render_composite_spans;
4727
	render->prefer_gpu |= PREFER_GPU_SPANS;
4728
#endif
4729
 
4730
	render->video = gen3_render_video;
4731
 
4732
	render->copy_boxes = gen3_render_copy_boxes;
4733
	render->copy = gen3_render_copy;
4734
 
4735
	render->fill_boxes = gen3_render_fill_boxes;
4736
	render->fill = gen3_render_fill;
4737
	render->fill_one = gen3_render_fill_one;
4738
#endif
4739
 
4740
    render->blit_tex = gen3_blit_tex;
4741
    render->caps = HW_BIT_BLIT | HW_TEX_BLIT;
4742
 
4743
	render->reset = gen3_render_reset;
4744
	render->flush = gen3_render_flush;
4745
	render->fini = gen3_render_fini;
4746
 
4747
	render->max_3d_size = MAX_3D_SIZE;
4748
	render->max_3d_pitch = MAX_3D_PITCH;
4749
 
4750
	sna->kgem.retire = gen3_render_retire;
4751
	sna->kgem.expire = gen3_render_expire;
4752
	return "Alviso (gen3)";
4753
}
4754
 
4755
static bool
4756
gen3_blit_tex(struct sna *sna,
4757
              uint8_t op, bool scale,
4758
		      PixmapPtr src, struct kgem_bo *src_bo,
4759
		      PixmapPtr mask,struct kgem_bo *mask_bo,
4760
		      PixmapPtr dst, struct kgem_bo *dst_bo,
4761
              int32_t src_x, int32_t src_y,
4762
              int32_t msk_x, int32_t msk_y,
4763
              int32_t dst_x, int32_t dst_y,
4764
              int32_t width, int32_t height,
4765
              struct sna_composite_op *tmp)
4766
{
4767
 
4768
    DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
4769
         width, height, sna->kgem.ring));
4770
 
4771
    tmp->op = PictOpSrc;
4772
 
4773
    tmp->dst.pixmap = dst;
4774
    tmp->dst.bo     = dst_bo;
4775
    tmp->dst.width  = dst->drawable.width;
4776
    tmp->dst.height = dst->drawable.height;
4777
    tmp->dst.format = PICT_x8r8g8b8;
4778
 
4779
	tmp->rb_reversed = gen3_dst_rb_reversed(tmp->dst.format);
4780
 
4781
	tmp->u.gen3.num_constants = 0;
4782
	tmp->src.u.gen3.type = SHADER_TEXTURE;
4783
	tmp->src.is_affine = true;
4784
 
4785
 
4786
	tmp->src.repeat = RepeatNone;
4787
	tmp->src.filter = PictFilterNearest;
4788
 
4789
    tmp->src.bo = src_bo;
4790
	tmp->src.pict_format = PICT_x8r8g8b8;
4791
 
4792
	gen3_composite_channel_set_format(&tmp->src, tmp->src.pict_format);
4793
 
4794
    tmp->src.width  = src->drawable.width;
4795
    tmp->src.height = src->drawable.height;
4796
 
4797
	tmp->mask.u.gen3.type = SHADER_TEXTURE;
4798
	tmp->mask.is_affine = true;
4799
	tmp->need_magic_ca_pass = false;
4800
	tmp->has_component_alpha = false;
4801
 
4802
 
4803
 	tmp->mask.repeat = RepeatNone;
4804
	tmp->mask.filter = PictFilterNearest;
4805
    tmp->mask.is_affine = true;
4806
 
4807
    tmp->mask.bo = mask_bo;
4808
    tmp->mask.pict_format = PIXMAN_a8;
4809
	gen3_composite_channel_set_format(&tmp->mask, tmp->mask.pict_format);
4810
    tmp->mask.width  = mask->drawable.width;
4811
    tmp->mask.height = mask->drawable.height;
4812
 
4813
    if( scale )
4814
    {
4815
        tmp->src.scale[0] = 1.f/width;
4816
        tmp->src.scale[1] = 1.f/height;
4817
    }
4818
    else
4819
    {
4820
        tmp->src.scale[0] = 1.f/src->drawable.width;
4821
        tmp->src.scale[1] = 1.f/src->drawable.height;
4822
    }
4823
 
4824
    tmp->mask.scale[0] = 1.f/mask->drawable.width;
4825
    tmp->mask.scale[1] = 1.f/mask->drawable.height;
4826
 
4827
	tmp->prim_emit = gen3_emit_composite_primitive_identity_source_mask;
4828
 
4829
 
4830
	tmp->floats_per_vertex = 2;
4831
	if (!is_constant_ps(tmp->src.u.gen3.type))
4832
		tmp->floats_per_vertex += tmp->src.is_affine ? 2 : 4;
4833
	if (!is_constant_ps(tmp->mask.u.gen3.type))
4834
		tmp->floats_per_vertex += tmp->mask.is_affine ? 2 : 4;
4835
//	DBG(("%s: floats_per_vertex = 2 + %d + %d = %d [specialised emitter? %d]\n", __FUNCTION__,
4836
//	     !is_constant_ps(tmp->src.u.gen3.type) ? tmp->src.is_affine ? 2 : 4 : 0,
4837
//	     !is_constant_ps(tmp->mask.u.gen3.type) ? tmp->mask.is_affine ? 2 : 4 : 0,
4838
//	     tmp->floats_per_vertex,
4839
//	     tmp->prim_emit != gen3_emit_composite_primitive));
4840
	tmp->floats_per_rect = 3 * tmp->floats_per_vertex;
4841
 
4842
	tmp->blt   = gen3_render_composite_blt;
4843
 
4844
	tmp->done  = gen3_render_composite_done;
4845
 
4846
	if (!kgem_check_bo(&sna->kgem,
4847
			   tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
4848
			   NULL)) {
4849
		kgem_submit(&sna->kgem);
4850
	}
4851
 
4501 Serge 4852
	gen3_align_vertex(sna, tmp);
4304 Serge 4853
	gen3_emit_composite_state(sna, tmp);
4854
	return true;
4855
}