Subversion Repositories Kolibri OS

Rev

Rev 4245 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
3299 Serge 1
/*
2
 * Copyright © 2010-2011 Intel Corporation
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
 * SOFTWARE.
22
 *
23
 * Authors:
24
 *    Chris Wilson 
25
 *
26
 */
27
 
28
#ifdef HAVE_CONFIG_H
29
#include "config.h"
30
#endif
31
 
32
#include "sna.h"
33
#include "sna_render.h"
34
#include "sna_render_inline.h"
35
#include "sna_reg.h"
36
//#include "sna_video.h"
37
 
38
#include "gen3_render.h"
39
 
40
#define NO_COMPOSITE 0
41
#define NO_COMPOSITE_SPANS 0
42
#define NO_COPY 0
43
#define NO_COPY_BOXES 0
44
#define NO_FILL 0
45
#define NO_FILL_ONE 0
46
#define NO_FILL_BOXES 0
47
 
48
#define PREFER_BLT_FILL 1
49
 
50
enum {
51
	SHADER_NONE = 0,
52
	SHADER_ZERO,
53
	SHADER_BLACK,
54
	SHADER_WHITE,
55
	SHADER_CONSTANT,
56
	SHADER_LINEAR,
57
	SHADER_RADIAL,
58
	SHADER_TEXTURE,
59
	SHADER_OPACITY,
60
};
61
 
62
#define MAX_3D_SIZE 2048
63
#define MAX_3D_PITCH 8192
64
 
65
#define OUT_BATCH(v) batch_emit(sna, v)
66
#define OUT_BATCH_F(v) batch_emit_float(sna, v)
67
#define OUT_VERTEX(v) vertex_emit(sna, v)
68
 
69
enum gen3_radial_mode {
70
	RADIAL_ONE,
71
	RADIAL_TWO
72
};
73
 
74
static const struct blendinfo {
75
	bool dst_alpha;
76
	bool src_alpha;
77
	uint32_t src_blend;
78
	uint32_t dst_blend;
79
} gen3_blend_op[] = {
80
	/* Clear */	{0, 0, BLENDFACT_ZERO, BLENDFACT_ZERO},
81
	/* Src */	{0, 0, BLENDFACT_ONE, BLENDFACT_ZERO},
82
	/* Dst */	{0, 0, BLENDFACT_ZERO, BLENDFACT_ONE},
83
	/* Over */	{0, 1, BLENDFACT_ONE, BLENDFACT_INV_SRC_ALPHA},
84
	/* OverReverse */ {1, 0, BLENDFACT_INV_DST_ALPHA, BLENDFACT_ONE},
85
	/* In */	{1, 0, BLENDFACT_DST_ALPHA, BLENDFACT_ZERO},
86
	/* InReverse */ {0, 1, BLENDFACT_ZERO, BLENDFACT_SRC_ALPHA},
87
	/* Out */	{1, 0, BLENDFACT_INV_DST_ALPHA, BLENDFACT_ZERO},
88
	/* OutReverse */ {0, 1, BLENDFACT_ZERO, BLENDFACT_INV_SRC_ALPHA},
89
	/* Atop */	{1, 1, BLENDFACT_DST_ALPHA, BLENDFACT_INV_SRC_ALPHA},
90
	/* AtopReverse */ {1, 1, BLENDFACT_INV_DST_ALPHA, BLENDFACT_SRC_ALPHA},
91
	/* Xor */	{1, 1, BLENDFACT_INV_DST_ALPHA, BLENDFACT_INV_SRC_ALPHA},
92
	/* Add */	{0, 0, BLENDFACT_ONE, BLENDFACT_ONE},
93
};
94
 
95
#define S6_COLOR_WRITE_ONLY \
96
	(S6_COLOR_WRITE_ENABLE | \
97
	 BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT | \
98
	 BLENDFACT_ONE << S6_CBUF_SRC_BLEND_FACT_SHIFT | \
99
	 BLENDFACT_ZERO << S6_CBUF_DST_BLEND_FACT_SHIFT)
100
 
101
static const struct formatinfo {
102
	unsigned int fmt, xfmt;
103
	uint32_t card_fmt;
104
	bool rb_reversed;
105
} gen3_tex_formats[] = {
106
	{PICT_a8, 0, MAPSURF_8BIT | MT_8BIT_A8, false},
107
	{PICT_a8r8g8b8, 0, MAPSURF_32BIT | MT_32BIT_ARGB8888, false},
108
	{PICT_x8r8g8b8, 0, MAPSURF_32BIT | MT_32BIT_XRGB8888, false},
109
	{PICT_a8b8g8r8, 0, MAPSURF_32BIT | MT_32BIT_ABGR8888, false},
4251 Serge 110
	{PICT_x8b8g8r8, 0, MAPSURF_32BIT | MT_32BIT_XBGR8888, false},
111
	{PICT_a2r10g10b10, PICT_x2r10g10b10, MAPSURF_32BIT | MT_32BIT_ARGB2101010, false},
112
	{PICT_a2b10g10r10, PICT_x2b10g10r10, MAPSURF_32BIT | MT_32BIT_ABGR2101010, false},
113
	{PICT_r5g6b5, 0, MAPSURF_16BIT | MT_16BIT_RGB565, false},
114
	{PICT_b5g6r5, 0, MAPSURF_16BIT | MT_16BIT_RGB565, true},
115
	{PICT_a1r5g5b5, PICT_x1r5g5b5, MAPSURF_16BIT | MT_16BIT_ARGB1555, false},
116
	{PICT_a1b5g5r5, PICT_x1b5g5r5, MAPSURF_16BIT | MT_16BIT_ARGB1555, true},
117
	{PICT_a4r4g4b4, PICT_x4r4g4b4, MAPSURF_16BIT | MT_16BIT_ARGB4444, false},
118
	{PICT_a4b4g4r4, PICT_x4b4g4r4, MAPSURF_16BIT | MT_16BIT_ARGB4444, true},
3299 Serge 119
};
120
 
121
#define xFixedToDouble(f) pixman_fixed_to_double(f)
122
 
123
static inline bool too_large(int width, int height)
124
{
125
	return width > MAX_3D_SIZE || height > MAX_3D_SIZE;
126
}
127
 
128
static inline uint32_t gen3_buf_tiling(uint32_t tiling)
129
{
130
	uint32_t v = 0;
131
	switch (tiling) {
132
	case I915_TILING_Y: v |= BUF_3D_TILE_WALK_Y;
133
	case I915_TILING_X: v |= BUF_3D_TILED_SURFACE;
134
	case I915_TILING_NONE: break;
135
	}
136
	return v;
137
}
138
static uint32_t gen3_get_blend_cntl(int op,
139
				    bool has_component_alpha,
140
				    uint32_t dst_format)
141
{
142
	uint32_t sblend;
143
	uint32_t dblend;
144
 
4245 Serge 145
    sblend = BLENDFACT_ONE;
146
    dblend = BLENDFACT_INV_SRC_ALPHA;
3299 Serge 147
 
148
#if 0
149
	if (op <= PictOpSrc) /* for clear and src disable blending */
150
		return S6_COLOR_WRITE_ONLY;
151
 
152
	/* If there's no dst alpha channel, adjust the blend op so that we'll
153
	 * treat it as always 1.
154
	 */
155
	if (gen3_blend_op[op].dst_alpha) {
156
		if (PICT_FORMAT_A(dst_format) == 0) {
157
			if (sblend == BLENDFACT_DST_ALPHA)
158
				sblend = BLENDFACT_ONE;
159
			else if (sblend == BLENDFACT_INV_DST_ALPHA)
160
				sblend = BLENDFACT_ZERO;
161
		}
162
 
163
		/* gen3 engine reads 8bit color buffer into green channel
164
		 * in cases like color buffer blending etc., and also writes
165
		 * back green channel.  So with dst_alpha blend we should use
166
		 * color factor. See spec on "8-bit rendering".
167
		 */
168
		if (dst_format == PICT_a8) {
169
			if (sblend == BLENDFACT_DST_ALPHA)
170
				sblend = BLENDFACT_DST_COLR;
171
			else if (sblend == BLENDFACT_INV_DST_ALPHA)
172
				sblend = BLENDFACT_INV_DST_COLR;
173
		}
174
	}
175
 
176
	/* If the source alpha is being used, then we should only be in a case
177
	 * where the source blend factor is 0, and the source blend value is the
178
	 * mask channels multiplied by the source picture's alpha.
179
	 */
180
	if (has_component_alpha && gen3_blend_op[op].src_alpha) {
181
		if (dblend == BLENDFACT_SRC_ALPHA)
182
			dblend = BLENDFACT_SRC_COLR;
183
		else if (dblend == BLENDFACT_INV_SRC_ALPHA)
184
			dblend = BLENDFACT_INV_SRC_COLR;
185
	}
186
#endif
187
 
188
	return (S6_CBUF_BLEND_ENABLE | S6_COLOR_WRITE_ENABLE |
189
		BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT |
190
		sblend << S6_CBUF_SRC_BLEND_FACT_SHIFT |
191
		dblend << S6_CBUF_DST_BLEND_FACT_SHIFT);
192
}
193
static bool gen3_dst_rb_reversed(uint32_t format)
194
{
195
	switch (format) {
196
	case PICT_a8r8g8b8:
197
	case PICT_x8r8g8b8:
4251 Serge 198
	case PICT_r5g6b5:
199
	case PICT_a1r5g5b5:
200
	case PICT_x1r5g5b5:
201
	case PICT_a2r10g10b10:
202
	case PICT_x2r10g10b10:
3299 Serge 203
	case PICT_a8:
4251 Serge 204
	case PICT_a4r4g4b4:
205
	case PICT_x4r4g4b4:
3299 Serge 206
		return false;
207
	default:
208
		return true;
209
	}
210
}
211
 
212
#define DSTORG_HORT_BIAS(x)             ((x)<<20)
213
#define DSTORG_VERT_BIAS(x)             ((x)<<16)
214
 
215
static uint32_t gen3_get_dst_format(uint32_t format)
216
{
217
#define BIAS (DSTORG_HORT_BIAS(0x8) | DSTORG_VERT_BIAS(0x8))
218
	switch (format) {
219
	default:
220
	case PICT_a8r8g8b8:
221
	case PICT_x8r8g8b8:
222
	case PICT_a8b8g8r8:
223
	case PICT_x8b8g8r8:
224
		return BIAS | COLR_BUF_ARGB8888;
4251 Serge 225
	case PICT_r5g6b5:
226
	case PICT_b5g6r5:
227
		return BIAS | COLR_BUF_RGB565;
228
	case PICT_a1r5g5b5:
229
	case PICT_x1r5g5b5:
230
	case PICT_a1b5g5r5:
231
	case PICT_x1b5g5r5:
232
		return BIAS | COLR_BUF_ARGB1555;
233
	case PICT_a2r10g10b10:
234
	case PICT_x2r10g10b10:
235
	case PICT_a2b10g10r10:
236
	case PICT_x2b10g10r10:
237
		return BIAS | COLR_BUF_ARGB2AAA;
3299 Serge 238
	case PICT_a8:
239
		return BIAS | COLR_BUF_8BIT;
4251 Serge 240
	case PICT_a4r4g4b4:
241
	case PICT_x4r4g4b4:
242
	case PICT_a4b4g4r4:
243
	case PICT_x4b4g4r4:
244
		return BIAS | COLR_BUF_ARGB4444;
3299 Serge 245
	}
246
#undef BIAS
247
}
248
 
249
 
4251 Serge 250
#if 0
251
static bool gen3_check_repeat(PicturePtr p)
252
{
253
	if (!p->repeat)
254
		return true;
3299 Serge 255
 
4251 Serge 256
	switch (p->repeatType) {
257
	case RepeatNone:
258
	case RepeatNormal:
259
	case RepeatPad:
260
	case RepeatReflect:
261
		return true;
262
	default:
263
		return false;
264
	}
265
}
266
 
267
static uint32_t gen3_filter(uint32_t filter)
268
{
269
	switch (filter) {
270
	default:
271
		assert(0);
272
	case PictFilterNearest:
273
		return (FILTER_NEAREST << SS2_MAG_FILTER_SHIFT |
274
			FILTER_NEAREST << SS2_MIN_FILTER_SHIFT |
275
			MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT);
276
	case PictFilterBilinear:
277
		return (FILTER_LINEAR  << SS2_MAG_FILTER_SHIFT |
278
			FILTER_LINEAR  << SS2_MIN_FILTER_SHIFT |
279
			MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT);
280
	}
281
}
282
 
283
static bool gen3_check_filter(PicturePtr p)
284
{
285
	switch (p->filter) {
286
	case PictFilterNearest:
287
	case PictFilterBilinear:
288
		return true;
289
	default:
290
		return false;
291
	}
292
}
3299 Serge 293
fastcall static void
4251 Serge 294
gen3_emit_composite_primitive_identity_gradient(struct sna *sna,
295
						const struct sna_composite_op *op,
296
						const struct sna_composite_rectangles *r)
297
{
298
	int16_t dst_x, dst_y;
299
	int16_t src_x, src_y;
300
 
301
	dst_x = r->dst.x + op->dst.x;
302
	dst_y = r->dst.y + op->dst.y;
303
	src_x = r->src.x + op->src.offset[0];
304
	src_y = r->src.y + op->src.offset[1];
305
 
306
	gen3_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height);
307
	OUT_VERTEX(src_x + r->width);
308
	OUT_VERTEX(src_y + r->height);
309
 
310
	gen3_emit_composite_dstcoord(sna, dst_x, dst_y + r->height);
311
	OUT_VERTEX(src_x);
312
	OUT_VERTEX(src_y + r->height);
313
 
314
	gen3_emit_composite_dstcoord(sna, dst_x, dst_y);
315
	OUT_VERTEX(src_x);
316
	OUT_VERTEX(src_y);
317
}
318
 
319
fastcall static void
320
gen3_emit_composite_boxes_identity_gradient(const struct sna_composite_op *op,
321
					    const BoxRec *box, int nbox,
322
					    float *v)
323
{
324
	do {
325
		v[0] = box->x2;
326
		v[1] = box->y2;
327
		v[2] = box->x2 + op->src.offset[0];
328
		v[3] = box->y2 + op->src.offset[1];
329
 
330
		v[4] = box->x1;
331
		v[5] = box->y2;
332
		v[6] = box->x1 + op->src.offset[0];
333
		v[7] = box->y2 + op->src.offset[1];
334
 
335
		v[8] = box->x1;
336
		v[9] = box->y1;
337
		v[10] = box->x1 + op->src.offset[0];
338
		v[11] = box->y1 + op->src.offset[1];
339
 
340
		v += 12;
341
		box++;
342
	} while (--nbox);
343
}
344
fastcall static void
345
gen3_emit_composite_boxes_affine_gradient(const struct sna_composite_op *op,
346
					  const BoxRec *box, int nbox,
347
					  float *v)
348
{
349
	const PictTransform *transform = op->src.transform;
350
 
351
	do {
352
		v[0] = box->x2;
353
		v[1] = box->y2;
354
		_sna_get_transformed_scaled(box->x2 + op->src.offset[0],
355
					    box->y2 + op->src.offset[1],
356
					    transform, op->src.scale,
357
					    &v[2], &v[3]);
358
 
359
		v[4] = box->x1;
360
		v[5] = box->y2;
361
		_sna_get_transformed_scaled(box->x1 + op->src.offset[0],
362
					    box->y2 + op->src.offset[1],
363
					    transform, op->src.scale,
364
					    &v[6], &v[7]);
365
 
366
		v[8] = box->x1;
367
		v[9] = box->y1;
368
		_sna_get_transformed_scaled(box->x1 + op->src.offset[0],
369
					    box->y1 + op->src.offset[1],
370
					    transform, op->src.scale,
371
					    &v[10], &v[11]);
372
 
373
		box++;
374
		v += 12;
375
	} while (--nbox);
376
}
377
 
378
fastcall static void
379
gen3_emit_composite_primitive_identity_source(struct sna *sna,
380
					      const struct sna_composite_op *op,
381
					      const struct sna_composite_rectangles *r)
382
{
383
	float w = r->width;
384
	float h = r->height;
385
	float *v;
386
 
387
	v = sna->render.vertices + sna->render.vertex_used;
388
	sna->render.vertex_used += 12;
389
 
390
	v[8] = v[4] = r->dst.x + op->dst.x;
391
	v[0] = v[4] + w;
392
 
393
	v[9] = r->dst.y + op->dst.y;
394
	v[5] = v[1] = v[9] + h;
395
 
396
	v[10] = v[6] = (r->src.x + op->src.offset[0]) * op->src.scale[0];
397
	v[2] = v[6] + w * op->src.scale[0];
398
 
399
	v[11] = (r->src.y + op->src.offset[1]) * op->src.scale[1];
400
	v[7] = v[3] = v[11] + h * op->src.scale[1];
401
}
402
 
403
fastcall static void
404
gen3_emit_composite_boxes_identity_source(const struct sna_composite_op *op,
405
					  const BoxRec *box, int nbox,
406
					  float *v)
407
{
408
	do {
409
		v[0] = box->x2 + op->dst.x;
410
		v[8] = v[4] = box->x1 + op->dst.x;
411
		v[5] = v[1] = box->y2 + op->dst.y;
412
		v[9] = box->y1 + op->dst.y;
413
 
414
		v[10] = v[6] = (box->x1 + op->src.offset[0]) * op->src.scale[0];
415
		v[2] = (box->x2 + op->src.offset[0]) * op->src.scale[0];
416
 
417
		v[11] = (box->y1 + op->src.offset[1]) * op->src.scale[1];
418
		v[7] = v[3] = (box->y2 + op->src.offset[1]) * op->src.scale[1];
419
 
420
		v += 12;
421
		box++;
422
	} while (--nbox);
423
}
424
 
425
fastcall static void
426
gen3_emit_composite_primitive_identity_source_no_offset(struct sna *sna,
427
							const struct sna_composite_op *op,
428
							const struct sna_composite_rectangles *r)
429
{
430
	float w = r->width;
431
	float h = r->height;
432
	float *v;
433
 
434
	v = sna->render.vertices + sna->render.vertex_used;
435
	sna->render.vertex_used += 12;
436
 
437
	v[8] = v[4] = r->dst.x;
438
	v[9] = r->dst.y;
439
 
440
	v[0] = v[4] + w;
441
	v[5] = v[1] = v[9] + h;
442
 
443
	v[10] = v[6] = r->src.x * op->src.scale[0];
444
	v[11] = r->src.y * op->src.scale[1];
445
 
446
	v[2] = v[6] + w * op->src.scale[0];
447
	v[7] = v[3] = v[11] + h * op->src.scale[1];
448
}
449
fastcall static void
450
gen3_emit_composite_primitive_constant_identity_mask(struct sna *sna,
451
						     const struct sna_composite_op *op,
452
						     const struct sna_composite_rectangles *r)
453
{
454
	float w = r->width;
455
	float h = r->height;
456
	float *v;
457
 
458
	v = sna->render.vertices + sna->render.vertex_used;
459
	sna->render.vertex_used += 12;
460
 
461
	v[8] = v[4] = r->dst.x + op->dst.x;
462
	v[0] = v[4] + w;
463
 
464
	v[9] = r->dst.y + op->dst.y;
465
	v[5] = v[1] = v[9] + h;
466
 
467
	v[10] = v[6] = (r->mask.x + op->mask.offset[0]) * op->mask.scale[0];
468
	v[2] = v[6] + w * op->mask.scale[0];
469
 
470
	v[11] = (r->mask.y + op->mask.offset[1]) * op->mask.scale[1];
471
	v[7] = v[3] = v[11] + h * op->mask.scale[1];
472
}
473
#endif
474
 
475
fastcall static void
3299 Serge 476
gen3_emit_composite_primitive_identity_source_mask(struct sna *sna,
477
						   const struct sna_composite_op *op,
478
						   const struct sna_composite_rectangles *r)
479
{
480
	float dst_x, dst_y;
481
	float src_x, src_y;
482
	float msk_x, msk_y;
483
	float w, h;
484
	float *v;
485
 
486
	dst_x = r->dst.x + op->dst.x;
487
	dst_y = r->dst.y + op->dst.y;
488
	src_x = r->src.x + op->src.offset[0];
489
	src_y = r->src.y + op->src.offset[1];
490
	msk_x = r->mask.x + op->mask.offset[0];
491
	msk_y = r->mask.y + op->mask.offset[1];
492
	w = r->width;
493
	h = r->height;
494
 
495
	v = sna->render.vertices + sna->render.vertex_used;
496
	sna->render.vertex_used += 18;
497
 
498
	v[0] = dst_x + w;
499
	v[1] = dst_y + h;
500
	v[2] = (src_x + w) * op->src.scale[0];
501
	v[3] = (src_y + h) * op->src.scale[1];
502
	v[4] = (msk_x + w) * op->mask.scale[0];
503
	v[5] = (msk_y + h) * op->mask.scale[1];
504
 
505
	v[6] = dst_x;
506
	v[7] = v[1];
507
	v[8] = src_x * op->src.scale[0];
508
	v[9] = v[3];
509
	v[10] = msk_x * op->mask.scale[0];
510
	v[11] =v[5];
511
 
512
	v[12] = v[6];
513
	v[13] = dst_y;
514
	v[14] = v[8];
515
	v[15] = src_y * op->src.scale[1];
516
	v[16] = v[10];
517
	v[17] = msk_y * op->mask.scale[1];
518
}
519
 
520
 
521
 
522
 
523
 
524
 
525
 
526
 
527
 
528
 
529
 
530
 
531
 
532
 
533
 
534
 
535
 
536
 
537
 
538
 
539
 
540
 
541
 
542
 
543
 
544
 
545
 
546
 
547
 
548
 
549
 
550
 
551
 
552
 
553
 
554
 
555
 
556
 
557
 
558
 
559
 
560
 
561
 
562
 
563
 
564
 
565
 
566
 
567
 
568
 
569
 
570
 
571
 
572
 
573
 
574
 
575
 
576
 
577
 
578
 
579
 
580
 
581
 
582
static inline void
583
gen3_2d_perspective(struct sna *sna, int in, int out)
584
{
585
	gen3_fs_rcp(out, 0, gen3_fs_operand(in, W, W, W, W));
586
	gen3_fs_mul(out,
587
		    gen3_fs_operand(in, X, Y, ZERO, ONE),
588
		    gen3_fs_operand_reg(out));
589
}
590
 
591
static inline void
592
gen3_linear_coord(struct sna *sna,
593
		  const struct sna_composite_channel *channel,
594
		  int in, int out)
595
{
596
	int c = channel->u.gen3.constants;
597
 
598
	if (!channel->is_affine) {
599
		gen3_2d_perspective(sna, in, FS_U0);
600
		in = FS_U0;
601
	}
602
 
603
	gen3_fs_mov(out, gen3_fs_operand_zero());
604
	gen3_fs_dp3(out, MASK_X,
605
		    gen3_fs_operand(in, X, Y, ONE, ZERO),
606
		    gen3_fs_operand_reg(c));
607
}
608
 
609
static void
610
gen3_radial_coord(struct sna *sna,
611
		  const struct sna_composite_channel *channel,
612
		  int in, int out)
613
{
614
	int c = channel->u.gen3.constants;
615
 
616
	if (!channel->is_affine) {
617
		gen3_2d_perspective(sna, in, FS_U0);
618
		in = FS_U0;
619
	}
620
 
621
	switch (channel->u.gen3.mode) {
622
	case RADIAL_ONE:
623
		/*
624
		   pdx = (x - c1x) / dr, pdy = (y - c1y) / dr;
625
		   r? = pdx*pdx + pdy*pdy
626
		   t = r?/sqrt(r?) - r1/dr;
627
		   */
628
		gen3_fs_mad(FS_U0, MASK_X | MASK_Y,
629
			    gen3_fs_operand(in, X, Y, ZERO, ZERO),
630
			    gen3_fs_operand(c, Z, Z, ZERO, ZERO),
631
			    gen3_fs_operand(c, NEG_X, NEG_Y, ZERO, ZERO));
632
		gen3_fs_dp2add(FS_U0, MASK_X,
633
			       gen3_fs_operand(FS_U0, X, Y, ZERO, ZERO),
634
			       gen3_fs_operand(FS_U0, X, Y, ZERO, ZERO),
635
			       gen3_fs_operand_zero());
636
		gen3_fs_rsq(out, MASK_X, gen3_fs_operand(FS_U0, X, X, X, X));
637
		gen3_fs_mad(out, 0,
638
			    gen3_fs_operand(FS_U0, X, ZERO, ZERO, ZERO),
639
			    gen3_fs_operand(out, X, ZERO, ZERO, ZERO),
640
			    gen3_fs_operand(c, W, ZERO, ZERO, ZERO));
641
		break;
642
 
643
	case RADIAL_TWO:
644
		/*
645
		   pdx = x - c1x, pdy = y - c1y;
646
		   A = dx? + dy? - dr?
647
		   B = -2*(pdx*dx + pdy*dy + r1*dr);
648
		   C = pdx? + pdy? - r1?;
649
		   det = B*B - 4*A*C;
650
		   t = (-B + sqrt (det)) / (2 * A)
651
		   */
652
 
653
		/* u0.x = pdx, u0.y = pdy, u[0].z = r1; */
654
		gen3_fs_add(FS_U0,
655
			    gen3_fs_operand(in, X, Y, ZERO, ZERO),
656
			    gen3_fs_operand(c, X, Y, Z, ZERO));
657
		/* u0.x = pdx, u0.y = pdy, u[0].z = r1, u[0].w = B; */
658
		gen3_fs_dp3(FS_U0, MASK_W,
659
			    gen3_fs_operand(FS_U0, X, Y, ONE, ZERO),
660
			    gen3_fs_operand(c+1, X, Y, Z, ZERO));
661
		/* u1.x = pdx? + pdy? - r1?; [C] */
662
		gen3_fs_dp3(FS_U1, MASK_X,
663
			    gen3_fs_operand(FS_U0, X, Y, Z, ZERO),
664
			    gen3_fs_operand(FS_U0, X, Y, NEG_Z, ZERO));
665
		/* u1.x = C, u1.y = B, u1.z=-4*A; */
666
		gen3_fs_mov_masked(FS_U1, MASK_Y, gen3_fs_operand(FS_U0, W, W, W, W));
667
		gen3_fs_mov_masked(FS_U1, MASK_Z, gen3_fs_operand(c, W, W, W, W));
668
		/* u1.x = B? - 4*A*C */
669
		gen3_fs_dp2add(FS_U1, MASK_X,
670
			       gen3_fs_operand(FS_U1, X, Y, ZERO, ZERO),
671
			       gen3_fs_operand(FS_U1, Z, Y, ZERO, ZERO),
672
			       gen3_fs_operand_zero());
673
		/* out.x = -B + sqrt (B? - 4*A*C), */
674
		gen3_fs_rsq(out, MASK_X, gen3_fs_operand(FS_U1, X, X, X, X));
675
		gen3_fs_mad(out, MASK_X,
676
			    gen3_fs_operand(out, X, ZERO, ZERO, ZERO),
677
			    gen3_fs_operand(FS_U1, X, ZERO, ZERO, ZERO),
678
			    gen3_fs_operand(FS_U0, NEG_W, ZERO, ZERO, ZERO));
679
		/* out.x = (-B + sqrt (B? - 4*A*C)) / (2 * A), */
680
		gen3_fs_mul(out,
681
			    gen3_fs_operand(out, X, ZERO, ZERO, ZERO),
682
			    gen3_fs_operand(c+1, W, ZERO, ZERO, ZERO));
683
		break;
684
	}
685
}
686
 
687
static void
688
gen3_composite_emit_shader(struct sna *sna,
689
			   const struct sna_composite_op *op,
690
			   uint8_t blend)
691
{
692
	bool dst_is_alpha = PIXMAN_FORMAT_RGB(op->dst.format) == 0;
693
	const struct sna_composite_channel *src, *mask;
694
	struct gen3_render_state *state = &sna->render_state.gen3;
695
	uint32_t shader_offset, id;
696
	int src_reg, mask_reg;
697
	int t, length;
698
 
699
	src = &op->src;
700
	mask = &op->mask;
701
	if (mask->u.gen3.type == SHADER_NONE)
702
		mask = NULL;
703
 
704
	id = (src->u.gen3.type |
705
	      src->is_affine << 4 |
706
	      src->alpha_fixup << 5 |
707
	      src->rb_reversed << 6);
708
	if (mask) {
709
		id |= (mask->u.gen3.type << 8 |
710
		       mask->is_affine << 12 |
711
		       gen3_blend_op[blend].src_alpha << 13 |
712
		       op->has_component_alpha << 14 |
713
		       mask->alpha_fixup << 15 |
714
		       mask->rb_reversed << 16);
715
	}
716
	id |= dst_is_alpha << 24;
717
	id |= op->rb_reversed << 25;
718
 
719
	if (id == state->last_shader)
720
		return;
721
 
722
	state->last_shader = id;
723
 
724
	shader_offset = sna->kgem.nbatch++;
725
	t = 0;
726
	switch (src->u.gen3.type) {
727
	case SHADER_NONE:
728
	case SHADER_OPACITY:
729
		assert(0);
730
	case SHADER_ZERO:
731
	case SHADER_BLACK:
732
	case SHADER_WHITE:
733
		break;
734
	case SHADER_CONSTANT:
735
		gen3_fs_dcl(FS_T8);
736
		src_reg = FS_T8;
737
		break;
738
	case SHADER_TEXTURE:
739
	case SHADER_RADIAL:
740
	case SHADER_LINEAR:
741
		gen3_fs_dcl(FS_S0);
742
		gen3_fs_dcl(FS_T0);
743
		t++;
744
		break;
745
	}
746
 
747
	if (mask == NULL) {
748
		switch (src->u.gen3.type) {
749
		case SHADER_ZERO:
750
			gen3_fs_mov(FS_OC, gen3_fs_operand_zero());
751
			goto done;
752
		case SHADER_BLACK:
753
			if (dst_is_alpha)
754
				gen3_fs_mov(FS_OC, gen3_fs_operand_one());
755
			else
756
				gen3_fs_mov(FS_OC, gen3_fs_operand(FS_R0, ZERO, ZERO, ZERO, ONE));
757
			goto done;
758
		case SHADER_WHITE:
759
			gen3_fs_mov(FS_OC, gen3_fs_operand_one());
760
			goto done;
761
		}
762
		if (src->alpha_fixup && dst_is_alpha) {
763
			gen3_fs_mov(FS_OC, gen3_fs_operand_one());
764
			goto done;
765
		}
766
		/* No mask, so load directly to output color */
767
		if (src->u.gen3.type != SHADER_CONSTANT) {
768
			if (dst_is_alpha || src->rb_reversed ^ op->rb_reversed)
769
				src_reg = FS_R0;
770
			else
771
				src_reg = FS_OC;
772
		}
773
		switch (src->u.gen3.type) {
774
		case SHADER_LINEAR:
775
			gen3_linear_coord(sna, src, FS_T0, FS_R0);
776
			gen3_fs_texld(src_reg, FS_S0, FS_R0);
777
			break;
778
 
779
		case SHADER_RADIAL:
780
			gen3_radial_coord(sna, src, FS_T0, FS_R0);
781
			gen3_fs_texld(src_reg, FS_S0, FS_R0);
782
			break;
783
 
784
		case SHADER_TEXTURE:
785
			if (src->is_affine)
786
				gen3_fs_texld(src_reg, FS_S0, FS_T0);
787
			else
788
				gen3_fs_texldp(src_reg, FS_S0, FS_T0);
789
			break;
790
 
791
		case SHADER_NONE:
792
		case SHADER_WHITE:
793
		case SHADER_BLACK:
794
		case SHADER_ZERO:
795
			assert(0);
796
		case SHADER_CONSTANT:
797
			break;
798
		}
799
 
800
		if (src_reg != FS_OC) {
801
			if (src->alpha_fixup)
802
				gen3_fs_mov(FS_OC,
803
					    src->rb_reversed ^ op->rb_reversed ?
804
					    gen3_fs_operand(src_reg, Z, Y, X, ONE) :
805
					    gen3_fs_operand(src_reg, X, Y, Z, ONE));
806
			else if (dst_is_alpha)
807
				gen3_fs_mov(FS_OC, gen3_fs_operand(src_reg, W, W, W, W));
808
			else if (src->rb_reversed ^ op->rb_reversed)
809
				gen3_fs_mov(FS_OC, gen3_fs_operand(src_reg, Z, Y, X, W));
810
			else
811
				gen3_fs_mov(FS_OC, gen3_fs_operand_reg(src_reg));
812
		} else if (src->alpha_fixup)
813
			gen3_fs_mov_masked(FS_OC, MASK_W, gen3_fs_operand_one());
814
	} else {
815
		int out_reg = FS_OC;
816
		if (op->rb_reversed)
817
			out_reg = FS_U0;
818
 
819
		switch (mask->u.gen3.type) {
820
		case SHADER_CONSTANT:
821
			gen3_fs_dcl(FS_T9);
822
			mask_reg = FS_T9;
823
			break;
824
		case SHADER_TEXTURE:
825
		case SHADER_LINEAR:
826
		case SHADER_RADIAL:
827
			gen3_fs_dcl(FS_S0 + t);
828
			/* fall through */
829
		case SHADER_OPACITY:
830
			gen3_fs_dcl(FS_T0 + t);
831
			break;
832
		case SHADER_ZERO:
833
		case SHADER_BLACK:
834
			assert(0);
835
		case SHADER_NONE:
836
		case SHADER_WHITE:
837
			break;
838
		}
839
 
840
		t = 0;
841
		switch (src->u.gen3.type) {
842
		case SHADER_LINEAR:
843
			gen3_linear_coord(sna, src, FS_T0, FS_R0);
844
			gen3_fs_texld(FS_R0, FS_S0, FS_R0);
845
			src_reg = FS_R0;
846
			t++;
847
			break;
848
 
849
		case SHADER_RADIAL:
850
			gen3_radial_coord(sna, src, FS_T0, FS_R0);
851
			gen3_fs_texld(FS_R0, FS_S0, FS_R0);
852
			src_reg = FS_R0;
853
			t++;
854
			break;
855
 
856
		case SHADER_TEXTURE:
857
			if (src->is_affine)
858
				gen3_fs_texld(FS_R0, FS_S0, FS_T0);
859
			else
860
				gen3_fs_texldp(FS_R0, FS_S0, FS_T0);
861
			src_reg = FS_R0;
862
			t++;
863
			break;
864
 
865
		case SHADER_CONSTANT:
866
		case SHADER_NONE:
867
		case SHADER_ZERO:
868
		case SHADER_BLACK:
869
		case SHADER_WHITE:
870
			break;
871
		}
872
		if (src->alpha_fixup)
873
			gen3_fs_mov_masked(src_reg, MASK_W, gen3_fs_operand_one());
874
		if (src->rb_reversed)
875
			gen3_fs_mov(src_reg, gen3_fs_operand(src_reg, Z, Y, X, W));
876
 
877
		switch (mask->u.gen3.type) {
878
		case SHADER_LINEAR:
879
			gen3_linear_coord(sna, mask, FS_T0 + t, FS_R1);
880
			gen3_fs_texld(FS_R1, FS_S0 + t, FS_R1);
881
			mask_reg = FS_R1;
882
			break;
883
 
884
		case SHADER_RADIAL:
885
			gen3_radial_coord(sna, mask, FS_T0 + t, FS_R1);
886
			gen3_fs_texld(FS_R1, FS_S0 + t, FS_R1);
887
			mask_reg = FS_R1;
888
			break;
889
 
890
		case SHADER_TEXTURE:
891
			if (mask->is_affine)
892
				gen3_fs_texld(FS_R1, FS_S0 + t, FS_T0 + t);
893
			else
894
				gen3_fs_texldp(FS_R1, FS_S0 + t, FS_T0 + t);
895
			mask_reg = FS_R1;
896
			break;
897
 
898
		case SHADER_OPACITY:
899
			switch (src->u.gen3.type) {
900
			case SHADER_BLACK:
901
			case SHADER_WHITE:
902
				if (dst_is_alpha || src->u.gen3.type == SHADER_WHITE) {
903
					gen3_fs_mov(out_reg,
904
						    gen3_fs_operand(FS_T0 + t, X, X, X, X));
905
				} else {
906
					gen3_fs_mov(out_reg,
907
						    gen3_fs_operand(FS_T0 + t, ZERO, ZERO, ZERO, X));
908
				}
909
				break;
910
			default:
911
				if (dst_is_alpha) {
912
					gen3_fs_mul(out_reg,
913
						    gen3_fs_operand(src_reg, W, W, W, W),
914
						    gen3_fs_operand(FS_T0 + t, X, X, X, X));
915
				} else {
916
					gen3_fs_mul(out_reg,
917
						    gen3_fs_operand(src_reg, X, Y, Z, W),
918
						    gen3_fs_operand(FS_T0 + t, X, X, X, X));
919
				}
920
			}
921
			goto mask_done;
922
 
923
		case SHADER_CONSTANT:
924
		case SHADER_ZERO:
925
		case SHADER_BLACK:
926
		case SHADER_WHITE:
927
		case SHADER_NONE:
928
			break;
929
		}
930
		if (mask->alpha_fixup)
931
			gen3_fs_mov_masked(mask_reg, MASK_W, gen3_fs_operand_one());
932
		if (mask->rb_reversed)
933
			gen3_fs_mov(mask_reg, gen3_fs_operand(mask_reg, Z, Y, X, W));
934
 
935
		if (dst_is_alpha) {
936
			switch (src->u.gen3.type) {
937
			case SHADER_BLACK:
938
			case SHADER_WHITE:
939
				gen3_fs_mov(out_reg,
940
					    gen3_fs_operand(mask_reg, W, W, W, W));
941
				break;
942
			default:
943
				gen3_fs_mul(out_reg,
944
					    gen3_fs_operand(src_reg, W, W, W, W),
945
					    gen3_fs_operand(mask_reg, W, W, W, W));
946
				break;
947
			}
948
		} else {
949
			/* If component alpha is active in the mask and the blend
950
			 * operation uses the source alpha, then we know we don't
951
			 * need the source value (otherwise we would have hit a
952
			 * fallback earlier), so we provide the source alpha (src.A *
953
			 * mask.X) as output color.
954
			 * Conversely, if CA is set and we don't need the source alpha,
955
			 * then we produce the source value (src.X * mask.X) and the
956
			 * source alpha is unused.  Otherwise, we provide the non-CA
957
			 * source value (src.X * mask.A).
958
			 */
959
			if (op->has_component_alpha) {
960
				switch (src->u.gen3.type) {
961
				case SHADER_BLACK:
962
					if (gen3_blend_op[blend].src_alpha)
963
						gen3_fs_mov(out_reg,
964
							    gen3_fs_operand_reg(mask_reg));
965
					else
966
						gen3_fs_mov(out_reg,
967
							    gen3_fs_operand(mask_reg, ZERO, ZERO, ZERO, W));
968
					break;
969
				case SHADER_WHITE:
970
					gen3_fs_mov(out_reg,
971
						    gen3_fs_operand_reg(mask_reg));
972
					break;
973
				default:
974
					if (gen3_blend_op[blend].src_alpha)
975
						gen3_fs_mul(out_reg,
976
							    gen3_fs_operand(src_reg, W, W, W, W),
977
							    gen3_fs_operand_reg(mask_reg));
978
					else
979
						gen3_fs_mul(out_reg,
980
							    gen3_fs_operand_reg(src_reg),
981
							    gen3_fs_operand_reg(mask_reg));
982
					break;
983
				}
984
			} else {
985
				switch (src->u.gen3.type) {
986
				case SHADER_WHITE:
987
					gen3_fs_mov(out_reg,
988
						    gen3_fs_operand(mask_reg, W, W, W, W));
989
					break;
990
				case SHADER_BLACK:
991
					gen3_fs_mov(out_reg,
992
						    gen3_fs_operand(mask_reg, ZERO, ZERO, ZERO, W));
993
					break;
994
				default:
995
					gen3_fs_mul(out_reg,
996
						    gen3_fs_operand_reg(src_reg),
997
						    gen3_fs_operand(mask_reg, W, W, W, W));
998
					break;
999
				}
1000
			}
1001
		}
1002
mask_done:
1003
		if (op->rb_reversed)
1004
			gen3_fs_mov(FS_OC, gen3_fs_operand(FS_U0, Z, Y, X, W));
1005
	}
1006
 
1007
done:
1008
	length = sna->kgem.nbatch - shader_offset;
1009
	sna->kgem.batch[shader_offset] =
1010
		_3DSTATE_PIXEL_SHADER_PROGRAM | (length - 2);
1011
}
1012
 
1013
static uint32_t gen3_ms_tiling(uint32_t tiling)
1014
{
1015
	uint32_t v = 0;
1016
	switch (tiling) {
1017
	case I915_TILING_Y: v |= MS3_TILE_WALK;
1018
	case I915_TILING_X: v |= MS3_TILED_SURFACE;
1019
	case I915_TILING_NONE: break;
1020
	}
1021
	return v;
1022
}
1023
 
1024
static void gen3_emit_invariant(struct sna *sna)
1025
{
1026
	/* Disable independent alpha blend */
1027
	OUT_BATCH(_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD | IAB_MODIFY_ENABLE |
1028
		  IAB_MODIFY_FUNC | BLENDFUNC_ADD << IAB_FUNC_SHIFT |
1029
		  IAB_MODIFY_SRC_FACTOR | BLENDFACT_ONE << IAB_SRC_FACTOR_SHIFT |
1030
		  IAB_MODIFY_DST_FACTOR | BLENDFACT_ZERO << IAB_DST_FACTOR_SHIFT);
1031
 
1032
	OUT_BATCH(_3DSTATE_COORD_SET_BINDINGS |
1033
		  CSB_TCB(0, 0) |
1034
		  CSB_TCB(1, 1) |
1035
		  CSB_TCB(2, 2) |
1036
		  CSB_TCB(3, 3) |
1037
		  CSB_TCB(4, 4) |
1038
		  CSB_TCB(5, 5) |
1039
		  CSB_TCB(6, 6) |
1040
		  CSB_TCB(7, 7));
1041
 
1042
	OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | I1_LOAD_S(4) | I1_LOAD_S(5) | I1_LOAD_S(6) | 3);
1043
	OUT_BATCH(0); /* Disable texture coordinate wrap-shortest */
1044
	OUT_BATCH((1 << S4_POINT_WIDTH_SHIFT) |
1045
		  S4_LINE_WIDTH_ONE |
1046
		  S4_CULLMODE_NONE |
1047
		  S4_VFMT_XY);
1048
	OUT_BATCH(0); /* Disable fog/stencil. *Enable* write mask. */
1049
	OUT_BATCH(S6_COLOR_WRITE_ONLY); /* Disable blending, depth */
1050
 
1051
	OUT_BATCH(_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT);
1052
	OUT_BATCH(_3DSTATE_DEPTH_SUBRECT_DISABLE);
1053
 
1054
	OUT_BATCH(_3DSTATE_LOAD_INDIRECT);
1055
	OUT_BATCH(0x00000000);
1056
 
1057
	OUT_BATCH(_3DSTATE_STIPPLE);
1058
	OUT_BATCH(0x00000000);
1059
 
1060
	sna->render_state.gen3.need_invariant = false;
1061
}
1062
 
1063
#define MAX_OBJECTS 3 /* worst case: dst + src + mask  */
1064
 
1065
static void
1066
gen3_get_batch(struct sna *sna, const struct sna_composite_op *op)
1067
{
1068
	kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
1069
 
1070
	if (!kgem_check_batch(&sna->kgem, 200)) {
1071
		DBG(("%s: flushing batch: size %d > %d\n",
1072
		     __FUNCTION__, 200,
1073
		     sna->kgem.surface-sna->kgem.nbatch));
1074
		kgem_submit(&sna->kgem);
1075
		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
1076
	}
1077
 
1078
	if (!kgem_check_reloc(&sna->kgem, MAX_OBJECTS)) {
1079
		DBG(("%s: flushing batch: reloc %d >= %d\n",
1080
		     __FUNCTION__,
1081
		     sna->kgem.nreloc,
1082
		     (int)KGEM_RELOC_SIZE(&sna->kgem) - MAX_OBJECTS));
1083
		kgem_submit(&sna->kgem);
1084
		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
1085
	}
1086
 
1087
	if (!kgem_check_exec(&sna->kgem, MAX_OBJECTS)) {
1088
		DBG(("%s: flushing batch: exec %d >= %d\n",
1089
		     __FUNCTION__,
1090
		     sna->kgem.nexec,
1091
		     (int)KGEM_EXEC_SIZE(&sna->kgem) - MAX_OBJECTS - 1));
1092
		kgem_submit(&sna->kgem);
1093
		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
1094
	}
1095
 
1096
	if (sna->render_state.gen3.need_invariant)
1097
		gen3_emit_invariant(sna);
1098
#undef MAX_OBJECTS
1099
}
1100
 
1101
static void gen3_emit_target(struct sna *sna,
1102
			     struct kgem_bo *bo,
1103
			     int width,
1104
			     int height,
1105
			     int format)
1106
{
1107
	struct gen3_render_state *state = &sna->render_state.gen3;
1108
 
1109
	assert(!too_large(width, height));
1110
 
1111
	/* BUF_INFO is an implicit flush, so skip if the target is unchanged. */
1112
	assert(bo->unique_id != 0);
1113
	if (bo->unique_id != state->current_dst) {
1114
		uint32_t v;
1115
 
1116
		DBG(("%s: setting new target id=%d, handle=%d\n",
1117
		     __FUNCTION__, bo->unique_id, bo->handle));
1118
 
1119
		OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
1120
		OUT_BATCH(BUF_3D_ID_COLOR_BACK |
1121
			  gen3_buf_tiling(bo->tiling) |
1122
			  bo->pitch);
1123
		OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
1124
					 bo,
1125
					 I915_GEM_DOMAIN_RENDER << 16 |
1126
					 I915_GEM_DOMAIN_RENDER,
1127
					 0));
1128
 
1129
		OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD);
1130
		OUT_BATCH(gen3_get_dst_format(format));
1131
 
1132
		v = DRAW_YMAX(height - 1) | DRAW_XMAX(width - 1);
1133
		if (v != state->last_drawrect_limit) {
1134
			OUT_BATCH(_3DSTATE_DRAW_RECT_CMD);
1135
			OUT_BATCH(0); /* XXX dither origin? */
1136
			OUT_BATCH(0);
1137
			OUT_BATCH(v);
1138
			OUT_BATCH(0);
1139
			state->last_drawrect_limit = v;
1140
		}
1141
 
1142
		state->current_dst = bo->unique_id;
1143
	}
4251 Serge 1144
	assert(bo->exec);
3299 Serge 1145
	kgem_bo_mark_dirty(bo);
1146
}
1147
 
1148
static void gen3_emit_composite_state(struct sna *sna,
1149
				      const struct sna_composite_op *op)
1150
{
1151
	struct gen3_render_state *state = &sna->render_state.gen3;
1152
	uint32_t map[4];
1153
	uint32_t sampler[4];
1154
	struct kgem_bo *bo[2];
1155
	unsigned int tex_count, n;
1156
	uint32_t ss2;
1157
 
1158
	gen3_get_batch(sna, op);
1159
 
1160
	if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
1161
		if (op->src.bo == op->dst.bo || op->mask.bo == op->dst.bo)
1162
			OUT_BATCH(MI_FLUSH | MI_INVALIDATE_MAP_CACHE);
1163
		else
1164
			OUT_BATCH(_3DSTATE_MODES_5_CMD |
1165
				  PIPELINE_FLUSH_RENDER_CACHE |
1166
				  PIPELINE_FLUSH_TEXTURE_CACHE);
1167
		kgem_clear_dirty(&sna->kgem);
1168
	}
1169
 
1170
	gen3_emit_target(sna,
1171
			 op->dst.bo,
1172
			 op->dst.width,
1173
			 op->dst.height,
1174
			 op->dst.format);
1175
 
1176
	ss2 = ~0;
1177
	tex_count = 0;
1178
	switch (op->src.u.gen3.type) {
1179
	case SHADER_OPACITY:
1180
	case SHADER_NONE:
1181
		assert(0);
1182
	case SHADER_ZERO:
1183
	case SHADER_BLACK:
1184
	case SHADER_WHITE:
1185
		break;
1186
	case SHADER_CONSTANT:
1187
		if (op->src.u.gen3.mode != state->last_diffuse) {
1188
			OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD);
1189
			OUT_BATCH(op->src.u.gen3.mode);
1190
			state->last_diffuse = op->src.u.gen3.mode;
1191
		}
1192
		break;
1193
	case SHADER_LINEAR:
1194
	case SHADER_RADIAL:
1195
	case SHADER_TEXTURE:
1196
		ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT);
1197
		ss2 |= S2_TEXCOORD_FMT(tex_count,
1198
				       op->src.is_affine ? TEXCOORDFMT_2D : TEXCOORDFMT_4D);
4251 Serge 1199
		assert(op->src.card_format);
3299 Serge 1200
		map[tex_count * 2 + 0] =
1201
			op->src.card_format |
1202
			gen3_ms_tiling(op->src.bo->tiling) |
1203
			(op->src.height - 1) << MS3_HEIGHT_SHIFT |
1204
			(op->src.width - 1) << MS3_WIDTH_SHIFT;
1205
		map[tex_count * 2 + 1] =
1206
			(op->src.bo->pitch / 4 - 1) << MS4_PITCH_SHIFT;
1207
 
1208
		sampler[tex_count * 2 + 0] = op->src.filter;
1209
		sampler[tex_count * 2 + 1] =
1210
			op->src.repeat |
1211
			tex_count << SS3_TEXTUREMAP_INDEX_SHIFT;
1212
		bo[tex_count] = op->src.bo;
1213
		tex_count++;
1214
		break;
1215
	}
1216
	switch (op->mask.u.gen3.type) {
1217
	case SHADER_NONE:
1218
	case SHADER_ZERO:
1219
	case SHADER_BLACK:
1220
	case SHADER_WHITE:
1221
		break;
1222
	case SHADER_CONSTANT:
1223
		if (op->mask.u.gen3.mode != state->last_specular) {
1224
			OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD);
1225
			OUT_BATCH(op->mask.u.gen3.mode);
1226
			state->last_specular = op->mask.u.gen3.mode;
1227
		}
1228
		break;
1229
	case SHADER_LINEAR:
1230
	case SHADER_RADIAL:
1231
	case SHADER_TEXTURE:
1232
		ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT);
1233
		ss2 |= S2_TEXCOORD_FMT(tex_count,
1234
				       op->mask.is_affine ? TEXCOORDFMT_2D : TEXCOORDFMT_4D);
4251 Serge 1235
		assert(op->mask.card_format);
3299 Serge 1236
		map[tex_count * 2 + 0] =
1237
			op->mask.card_format |
1238
			gen3_ms_tiling(op->mask.bo->tiling) |
1239
			(op->mask.height - 1) << MS3_HEIGHT_SHIFT |
1240
			(op->mask.width - 1) << MS3_WIDTH_SHIFT;
1241
		map[tex_count * 2 + 1] =
1242
			(op->mask.bo->pitch / 4 - 1) << MS4_PITCH_SHIFT;
1243
 
1244
		sampler[tex_count * 2 + 0] = op->mask.filter;
1245
		sampler[tex_count * 2 + 1] =
1246
			op->mask.repeat |
1247
			tex_count << SS3_TEXTUREMAP_INDEX_SHIFT;
1248
		bo[tex_count] = op->mask.bo;
1249
		tex_count++;
1250
		break;
1251
	case SHADER_OPACITY:
1252
		ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT);
1253
		ss2 |= S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_1D);
1254
		break;
1255
	}
1256
 
1257
	{
1258
		uint32_t blend_offset = sna->kgem.nbatch;
1259
 
1260
		OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) | I1_LOAD_S(6) | 1);
1261
		OUT_BATCH(ss2);
1262
		OUT_BATCH(gen3_get_blend_cntl(op->op,
1263
					      op->has_component_alpha,
1264
					      op->dst.format));
1265
 
1266
		if (memcmp(sna->kgem.batch + state->last_blend + 1,
1267
			   sna->kgem.batch + blend_offset + 1,
1268
			   2 * 4) == 0)
1269
			sna->kgem.nbatch = blend_offset;
1270
		else
1271
			state->last_blend = blend_offset;
1272
	}
1273
 
1274
	if (op->u.gen3.num_constants) {
1275
		int count = op->u.gen3.num_constants;
1276
		if (state->last_constants) {
1277
			int last = sna->kgem.batch[state->last_constants+1];
1278
			if (last == (1 << (count >> 2)) - 1 &&
1279
			    memcmp(&sna->kgem.batch[state->last_constants+2],
1280
				   op->u.gen3.constants,
1281
				   count * sizeof(uint32_t)) == 0)
1282
				count = 0;
1283
		}
1284
		if (count) {
1285
			state->last_constants = sna->kgem.nbatch;
1286
			OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | count);
1287
			OUT_BATCH((1 << (count >> 2)) - 1);
1288
 
1289
			memcpy(sna->kgem.batch + sna->kgem.nbatch,
1290
			       op->u.gen3.constants,
1291
			       count * sizeof(uint32_t));
1292
			sna->kgem.nbatch += count;
1293
		}
1294
	}
1295
 
1296
	if (tex_count != 0) {
1297
		uint32_t rewind;
1298
 
1299
		n = 0;
1300
		if (tex_count == state->tex_count) {
1301
			for (; n < tex_count; n++) {
1302
				if (map[2*n+0] != state->tex_map[2*n+0] ||
1303
				    map[2*n+1] != state->tex_map[2*n+1] ||
1304
				    state->tex_handle[n] != bo[n]->handle ||
1305
				    state->tex_delta[n] != bo[n]->delta)
1306
					break;
1307
			}
1308
		}
1309
		if (n < tex_count) {
1310
			OUT_BATCH(_3DSTATE_MAP_STATE | (3 * tex_count));
1311
			OUT_BATCH((1 << tex_count) - 1);
1312
			for (n = 0; n < tex_count; n++) {
1313
				OUT_BATCH(kgem_add_reloc(&sna->kgem,
1314
							 sna->kgem.nbatch,
1315
							 bo[n],
1316
							 I915_GEM_DOMAIN_SAMPLER<< 16,
1317
							 0));
1318
				OUT_BATCH(map[2*n + 0]);
1319
				OUT_BATCH(map[2*n + 1]);
1320
 
1321
				state->tex_map[2*n+0] = map[2*n+0];
1322
				state->tex_map[2*n+1] = map[2*n+1];
1323
				state->tex_handle[n] = bo[n]->handle;
1324
				state->tex_delta[n] = bo[n]->delta;
1325
			}
1326
			state->tex_count = n;
1327
		}
1328
 
1329
		rewind = sna->kgem.nbatch;
1330
		OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3 * tex_count));
1331
		OUT_BATCH((1 << tex_count) - 1);
1332
		for (n = 0; n < tex_count; n++) {
1333
			OUT_BATCH(sampler[2*n + 0]);
1334
			OUT_BATCH(sampler[2*n + 1]);
1335
			OUT_BATCH(0);
1336
		}
1337
		if (state->last_sampler &&
1338
		    memcmp(&sna->kgem.batch[state->last_sampler+1],
1339
			   &sna->kgem.batch[rewind + 1],
1340
			   (3*tex_count + 1)*sizeof(uint32_t)) == 0)
1341
			sna->kgem.nbatch = rewind;
1342
		else
1343
			state->last_sampler = rewind;
1344
	}
1345
 
1346
	gen3_composite_emit_shader(sna, op, op->op);
1347
}
1348
 
1349
static bool gen3_magic_ca_pass(struct sna *sna,
1350
			       const struct sna_composite_op *op)
1351
{
1352
	if (!op->need_magic_ca_pass)
1353
		return false;
1354
 
1355
	DBG(("%s(%d)\n", __FUNCTION__,
1356
	     sna->render.vertex_index - sna->render.vertex_start));
1357
 
1358
	OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(6) | 0);
1359
	OUT_BATCH(gen3_get_blend_cntl(PictOpAdd, true, op->dst.format));
1360
	gen3_composite_emit_shader(sna, op, PictOpAdd);
1361
 
1362
	OUT_BATCH(PRIM3D_RECTLIST | PRIM3D_INDIRECT_SEQUENTIAL |
1363
		  (sna->render.vertex_index - sna->render.vertex_start));
1364
	OUT_BATCH(sna->render.vertex_start);
1365
 
1366
	sna->render_state.gen3.last_blend = 0;
1367
	return true;
1368
}
1369
 
1370
static void gen3_vertex_flush(struct sna *sna)
1371
{
1372
	assert(sna->render.vertex_offset);
1373
 
1374
	DBG(("%s[%x] = %d\n", __FUNCTION__,
1375
	     4*sna->render.vertex_offset,
1376
	     sna->render.vertex_index - sna->render.vertex_start));
1377
 
1378
	sna->kgem.batch[sna->render.vertex_offset] =
1379
		PRIM3D_RECTLIST | PRIM3D_INDIRECT_SEQUENTIAL |
1380
		(sna->render.vertex_index - sna->render.vertex_start);
1381
	sna->kgem.batch[sna->render.vertex_offset + 1] =
1382
		sna->render.vertex_start;
1383
 
1384
	sna->render.vertex_offset = 0;
1385
}
1386
 
1387
static int gen3_vertex_finish(struct sna *sna)
1388
{
1389
	struct kgem_bo *bo;
1390
 
1391
	DBG(("%s: used=%d/%d, vbo active? %d\n",
1392
	     __FUNCTION__, sna->render.vertex_used, sna->render.vertex_size,
1393
	     sna->render.vbo ? sna->render.vbo->handle : 0));
1394
	assert(sna->render.vertex_offset == 0);
1395
	assert(sna->render.vertex_used);
1396
	assert(sna->render.vertex_used <= sna->render.vertex_size);
1397
 
1398
	sna_vertex_wait__locked(&sna->render);
1399
 
1400
	bo = sna->render.vbo;
1401
	if (bo) {
1402
		DBG(("%s: reloc = %d\n", __FUNCTION__,
1403
		     sna->render.vertex_reloc[0]));
1404
 
1405
		if (sna->render.vertex_reloc[0]) {
1406
			sna->kgem.batch[sna->render.vertex_reloc[0]] =
1407
				kgem_add_reloc(&sna->kgem, sna->render.vertex_reloc[0],
1408
					       bo, I915_GEM_DOMAIN_VERTEX << 16, 0);
1409
 
1410
			sna->render.vertex_reloc[0] = 0;
1411
		}
1412
		sna->render.vertex_used = 0;
1413
		sna->render.vertex_index = 0;
1414
		sna->render.vbo = NULL;
1415
 
1416
		kgem_bo_destroy(&sna->kgem, bo);
1417
	}
1418
 
1419
	sna->render.vertices = NULL;
1420
	sna->render.vbo = kgem_create_linear(&sna->kgem,
1421
					     256*1024, CREATE_GTT_MAP);
1422
	if (sna->render.vbo)
1423
		sna->render.vertices = kgem_bo_map(&sna->kgem, sna->render.vbo);
1424
	if (sna->render.vertices == NULL) {
1425
		if (sna->render.vbo)
1426
			kgem_bo_destroy(&sna->kgem, sna->render.vbo);
1427
		sna->render.vbo = NULL;
1428
		return 0;
1429
	}
1430
	assert(sna->render.vbo->snoop == false);
1431
 
1432
	if (sna->render.vertex_used) {
1433
		memcpy(sna->render.vertices,
1434
		       sna->render.vertex_data,
1435
		       sizeof(float)*sna->render.vertex_used);
1436
	}
1437
	sna->render.vertex_size = 64 * 1024 - 1;
1438
	return sna->render.vertex_size - sna->render.vertex_used;
1439
}
1440
 
1441
static void gen3_vertex_close(struct sna *sna)
1442
{
1443
	struct kgem_bo *bo, *free_bo = NULL;
1444
	unsigned int delta = 0;
1445
 
1446
	assert(sna->render.vertex_offset == 0);
1447
	if (sna->render.vertex_reloc[0] == 0)
1448
		return;
1449
 
1450
	DBG(("%s: used=%d/%d, vbo active? %d\n",
1451
	     __FUNCTION__, sna->render.vertex_used, sna->render.vertex_size,
1452
	     sna->render.vbo ? sna->render.vbo->handle : 0));
1453
 
1454
	bo = sna->render.vbo;
1455
	if (bo) {
1456
		if (sna->render.vertex_size - sna->render.vertex_used < 64) {
1457
			DBG(("%s: discarding full vbo\n", __FUNCTION__));
1458
			sna->render.vbo = NULL;
1459
			sna->render.vertices = sna->render.vertex_data;
1460
			sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
1461
			free_bo = bo;
1462
		} else if (IS_CPU_MAP(bo->map)) {
1463
			DBG(("%s: converting CPU map to GTT\n", __FUNCTION__));
1464
			sna->render.vertices = kgem_bo_map__gtt(&sna->kgem, bo);
1465
			if (sna->render.vertices == NULL) {
1466
				DBG(("%s: discarding non-mappable vertices\n",__FUNCTION__));
1467
				sna->render.vbo = NULL;
1468
				sna->render.vertices = sna->render.vertex_data;
1469
				sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
1470
				free_bo = bo;
1471
			}
1472
		}
1473
	} else {
1474
		if (sna->kgem.nbatch + sna->render.vertex_used <= sna->kgem.surface) {
1475
			DBG(("%s: copy to batch: %d @ %d\n", __FUNCTION__,
1476
			     sna->render.vertex_used, sna->kgem.nbatch));
1477
			memcpy(sna->kgem.batch + sna->kgem.nbatch,
1478
			       sna->render.vertex_data,
1479
			       sna->render.vertex_used * 4);
1480
			delta = sna->kgem.nbatch * 4;
1481
			bo = NULL;
1482
			sna->kgem.nbatch += sna->render.vertex_used;
1483
		} else {
1484
			DBG(("%s: new vbo: %d\n", __FUNCTION__,
1485
			     sna->render.vertex_used));
1486
			bo = kgem_create_linear(&sna->kgem,
1487
						4*sna->render.vertex_used,
1488
						CREATE_NO_THROTTLE);
1489
			if (bo) {
1490
				assert(bo->snoop == false);
1491
				kgem_bo_write(&sna->kgem, bo,
1492
					      sna->render.vertex_data,
1493
					      4*sna->render.vertex_used);
1494
			}
1495
			free_bo = bo;
1496
		}
1497
	}
1498
 
1499
	DBG(("%s: reloc = %d\n", __FUNCTION__, sna->render.vertex_reloc[0]));
1500
	sna->kgem.batch[sna->render.vertex_reloc[0]] =
1501
		kgem_add_reloc(&sna->kgem, sna->render.vertex_reloc[0],
1502
			       bo, I915_GEM_DOMAIN_VERTEX << 16, delta);
1503
	sna->render.vertex_reloc[0] = 0;
1504
 
1505
	if (sna->render.vbo == NULL) {
1506
		DBG(("%s: resetting vbo\n", __FUNCTION__));
1507
		sna->render.vertex_used = 0;
1508
		sna->render.vertex_index = 0;
1509
		assert(sna->render.vertices == sna->render.vertex_data);
1510
		assert(sna->render.vertex_size == ARRAY_SIZE(sna->render.vertex_data));
1511
	}
1512
 
1513
	if (free_bo)
1514
		kgem_bo_destroy(&sna->kgem, free_bo);
1515
}
1516
 
1517
static bool gen3_rectangle_begin(struct sna *sna,
1518
				 const struct sna_composite_op *op)
1519
{
1520
	struct gen3_render_state *state = &sna->render_state.gen3;
1521
	int ndwords, i1_cmd = 0, i1_len = 0;
1522
 
1523
	if (sna_vertex_wait__locked(&sna->render) && sna->render.vertex_offset)
1524
		return true;
1525
 
1526
	ndwords = 2;
1527
	if (op->need_magic_ca_pass)
1528
		ndwords += 100;
1529
	if (sna->render.vertex_reloc[0] == 0)
1530
		i1_len++, i1_cmd |= I1_LOAD_S(0), ndwords++;
1531
	if (state->floats_per_vertex != op->floats_per_vertex)
1532
		i1_len++, i1_cmd |= I1_LOAD_S(1), ndwords++;
1533
 
1534
	if (!kgem_check_batch(&sna->kgem, ndwords+1))
1535
		return false;
1536
 
1537
	if (i1_cmd) {
1538
		OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | i1_cmd | (i1_len - 1));
1539
		if (sna->render.vertex_reloc[0] == 0)
1540
			sna->render.vertex_reloc[0] = sna->kgem.nbatch++;
1541
		if (state->floats_per_vertex != op->floats_per_vertex) {
1542
			state->floats_per_vertex = op->floats_per_vertex;
1543
			OUT_BATCH(state->floats_per_vertex << S1_VERTEX_WIDTH_SHIFT |
1544
				  state->floats_per_vertex << S1_VERTEX_PITCH_SHIFT);
1545
		}
1546
	}
1547
 
1548
	if (sna->kgem.nbatch == 2 + state->last_vertex_offset &&
1549
	    !op->need_magic_ca_pass) {
1550
		sna->render.vertex_offset = state->last_vertex_offset;
1551
	} else {
1552
		sna->render.vertex_offset = sna->kgem.nbatch;
1553
		OUT_BATCH(MI_NOOP); /* to be filled later */
1554
		OUT_BATCH(MI_NOOP);
1555
		sna->render.vertex_start = sna->render.vertex_index;
1556
		state->last_vertex_offset = sna->render.vertex_offset;
1557
	}
1558
 
1559
	return true;
1560
}
1561
 
1562
static int gen3_get_rectangles__flush(struct sna *sna,
1563
				      const struct sna_composite_op *op)
1564
{
1565
	/* Preventing discarding new vbo after lock contention */
1566
	if (sna_vertex_wait__locked(&sna->render)) {
1567
		int rem = vertex_space(sna);
1568
		if (rem > op->floats_per_rect)
1569
			return rem;
1570
	}
1571
 
1572
	if (!kgem_check_batch(&sna->kgem, op->need_magic_ca_pass ? 105: 5))
1573
		return 0;
1574
	if (!kgem_check_reloc_and_exec(&sna->kgem, 1))
1575
		return 0;
1576
 
1577
	if (sna->render.vertex_offset) {
1578
		gen3_vertex_flush(sna);
1579
		if (gen3_magic_ca_pass(sna, op)) {
1580
			OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(6) | 0);
1581
			OUT_BATCH(gen3_get_blend_cntl(op->op,
1582
						      op->has_component_alpha,
1583
						      op->dst.format));
1584
			gen3_composite_emit_shader(sna, op, op->op);
1585
		}
1586
	}
1587
 
1588
	return gen3_vertex_finish(sna);
1589
}
1590
 
1591
inline static int gen3_get_rectangles(struct sna *sna,
1592
				      const struct sna_composite_op *op,
1593
				      int want)
1594
{
1595
	int rem;
1596
 
1597
	DBG(("%s: want=%d, rem=%d\n",
1598
	     __FUNCTION__, want*op->floats_per_rect, vertex_space(sna)));
1599
 
1600
	assert(want);
1601
	assert(sna->render.vertex_index * op->floats_per_vertex == sna->render.vertex_used);
1602
 
1603
start:
1604
	rem = vertex_space(sna);
1605
	if (unlikely(op->floats_per_rect > rem)) {
1606
		DBG(("flushing vbo for %s: %d < %d\n",
1607
		     __FUNCTION__, rem, op->floats_per_rect));
1608
		rem = gen3_get_rectangles__flush(sna, op);
1609
		if (unlikely(rem == 0))
1610
			goto flush;
1611
	}
1612
 
1613
	if (unlikely(sna->render.vertex_offset == 0)) {
1614
		if (!gen3_rectangle_begin(sna, op))
1615
			goto flush;
1616
		else
1617
			goto start;
1618
	}
1619
 
1620
	assert(rem <= vertex_space(sna));
4251 Serge 1621
	assert(op->floats_per_rect <= rem);
3299 Serge 1622
	if (want > 1 && want * op->floats_per_rect > rem)
1623
		want = rem / op->floats_per_rect;
1624
	sna->render.vertex_index += 3*want;
1625
 
1626
	assert(want);
1627
	assert(sna->render.vertex_index * op->floats_per_vertex <= sna->render.vertex_size);
1628
	return want;
1629
 
1630
flush:
1631
	DBG(("%s: flushing batch\n", __FUNCTION__));
1632
	if (sna->render.vertex_offset) {
1633
		gen3_vertex_flush(sna);
1634
		gen3_magic_ca_pass(sna, op);
1635
	}
1636
	sna_vertex_wait__locked(&sna->render);
1637
	_kgem_submit(&sna->kgem);
1638
	gen3_emit_composite_state(sna, op);
1639
	assert(sna->render.vertex_offset == 0);
1640
	assert(sna->render.vertex_reloc[0] == 0);
1641
	goto start;
1642
}
1643
 
1644
fastcall static void
1645
gen3_render_composite_blt(struct sna *sna,
1646
			  const struct sna_composite_op *op,
1647
			  const struct sna_composite_rectangles *r)
1648
{
1649
	DBG(("%s: src=(%d, %d)+(%d, %d), mask=(%d, %d)+(%d, %d), dst=(%d, %d)+(%d, %d), size=(%d, %d)\n", __FUNCTION__,
1650
	     r->src.x, r->src.y, op->src.offset[0], op->src.offset[1],
1651
	     r->mask.x, r->mask.y, op->mask.offset[0], op->mask.offset[1],
1652
	     r->dst.x, r->dst.y, op->dst.x, op->dst.y,
1653
	     r->width, r->height));
1654
 
1655
	gen3_get_rectangles(sna, op, 1);
1656
 
1657
	op->prim_emit(sna, op, r);
1658
}
1659
 
1660
static void
1661
gen3_render_composite_done(struct sna *sna,
1662
			   const struct sna_composite_op *op)
1663
{
1664
	DBG(("%s()\n", __FUNCTION__));
1665
 
1666
	if (sna->render.vertex_offset) {
1667
		gen3_vertex_flush(sna);
1668
		gen3_magic_ca_pass(sna, op);
1669
	}
1670
 
1671
}
1672
 
1673
static void
1674
discard_vbo(struct sna *sna)
1675
{
1676
	kgem_bo_destroy(&sna->kgem, sna->render.vbo);
1677
	sna->render.vbo = NULL;
1678
	sna->render.vertices = sna->render.vertex_data;
1679
	sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
1680
	sna->render.vertex_used = 0;
1681
	sna->render.vertex_index = 0;
1682
}
1683
 
1684
static void
1685
gen3_render_reset(struct sna *sna)
1686
{
1687
	struct gen3_render_state *state = &sna->render_state.gen3;
1688
 
1689
	state->need_invariant = true;
1690
	state->current_dst = 0;
1691
	state->tex_count = 0;
1692
	state->last_drawrect_limit = ~0U;
1693
	state->last_target = 0;
1694
	state->last_blend = 0;
1695
	state->last_constants = 0;
1696
	state->last_sampler = 0;
1697
	state->last_shader = 0x7fffffff;
1698
	state->last_diffuse = 0xcc00ffee;
1699
	state->last_specular = 0xcc00ffee;
1700
 
1701
	state->floats_per_vertex = 0;
1702
	state->last_floats_per_vertex = 0;
1703
	state->last_vertex_offset = 0;
1704
 
1705
	if (sna->render.vbo != NULL &&
1706
	    !kgem_bo_is_mappable(&sna->kgem, sna->render.vbo)) {
1707
		DBG(("%s: discarding vbo as next access will stall: %d\n",
1708
		     __FUNCTION__, sna->render.vbo->presumed_offset));
1709
		discard_vbo(sna);
1710
	}
1711
 
1712
	sna->render.vertex_reloc[0] = 0;
1713
	sna->render.vertex_offset = 0;
1714
}
1715
 
1716
static void
1717
gen3_render_retire(struct kgem *kgem)
1718
{
1719
	struct sna *sna;
1720
 
1721
	sna = container_of(kgem, struct sna, kgem);
1722
	if (sna->render.vertex_reloc[0] == 0 &&
1723
	    sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) {
1724
		DBG(("%s: resetting idle vbo\n", __FUNCTION__));
1725
		sna->render.vertex_used = 0;
1726
		sna->render.vertex_index = 0;
1727
	}
1728
}
1729
 
1730
static void
1731
gen3_render_expire(struct kgem *kgem)
1732
{
1733
	struct sna *sna;
1734
 
1735
	sna = container_of(kgem, struct sna, kgem);
1736
	if (sna->render.vbo && !sna->render.vertex_used) {
1737
		DBG(("%s: discarding vbo\n", __FUNCTION__));
1738
		discard_vbo(sna);
1739
	}
1740
}
1741
 
1742
static bool gen3_composite_channel_set_format(struct sna_composite_channel *channel,
1743
					      CARD32 format)
1744
{
1745
	unsigned int i;
1746
 
1747
	for (i = 0; i < ARRAY_SIZE(gen3_tex_formats); i++) {
1748
		if (gen3_tex_formats[i].fmt == format) {
1749
			channel->card_format = gen3_tex_formats[i].card_fmt;
1750
			channel->rb_reversed = gen3_tex_formats[i].rb_reversed;
1751
			return true;
1752
		}
1753
	}
1754
	return false;
1755
}
1756
 
4251 Serge 1757
#if 0
1758
static int
1759
gen3_composite_picture(struct sna *sna,
1760
		       PicturePtr picture,
1761
		       struct sna_composite_op *op,
1762
		       struct sna_composite_channel *channel,
1763
		       int16_t x, int16_t y,
1764
		       int16_t w, int16_t h,
1765
		       int16_t dst_x, int16_t dst_y,
1766
		       bool precise)
1767
{
1768
	PixmapPtr pixmap;
1769
	uint32_t color;
1770
	int16_t dx, dy;
3299 Serge 1771
 
4251 Serge 1772
	DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n",
1773
	     __FUNCTION__, x, y, w, h, dst_x, dst_y));
3299 Serge 1774
 
4251 Serge 1775
	channel->card_format = 0;
3299 Serge 1776
 
4251 Serge 1777
	if (picture->pDrawable == NULL) {
1778
		SourcePict *source = picture->pSourcePict;
1779
		int ret = -1;
3299 Serge 1780
 
4251 Serge 1781
		switch (source->type) {
1782
		case SourcePictTypeSolidFill:
1783
			DBG(("%s: solid fill [%08x], format %08x\n",
1784
			     __FUNCTION__,
1785
			     (unsigned)source->solidFill.color,
1786
			     (unsigned)picture->format));
1787
			ret = gen3_init_solid(channel, source->solidFill.color);
1788
			break;
3299 Serge 1789
 
4251 Serge 1790
		case SourcePictTypeLinear:
1791
			ret = gen3_init_linear(sna, picture, op, channel,
1792
					       x - dst_x, y - dst_y);
1793
			break;
3299 Serge 1794
 
4251 Serge 1795
		case SourcePictTypeRadial:
1796
			ret = gen3_init_radial(sna, picture, op, channel,
1797
					       x - dst_x, y - dst_y);
1798
			break;
1799
		}
3299 Serge 1800
 
4251 Serge 1801
		if (ret == -1) {
1802
			if (!precise)
1803
				ret = sna_render_picture_approximate_gradient(sna, picture, channel,
1804
									      x, y, w, h, dst_x, dst_y);
1805
			if (ret == -1)
1806
				ret = sna_render_picture_fixup(sna, picture, channel,
1807
							       x, y, w, h, dst_x, dst_y);
1808
		}
1809
		return ret;
1810
	}
3299 Serge 1811
 
4251 Serge 1812
	if (picture->alphaMap) {
1813
		DBG(("%s -- fallback, alphamap\n", __FUNCTION__));
1814
		return sna_render_picture_fixup(sna, picture, channel,
1815
						x, y, w, h, dst_x, dst_y);
1816
	}
3299 Serge 1817
 
4251 Serge 1818
	if (sna_picture_is_solid(picture, &color)) {
1819
		DBG(("%s: solid drawable [%08x]\n", __FUNCTION__, color));
1820
		return gen3_init_solid(channel, color);
1821
	}
3299 Serge 1822
 
4251 Serge 1823
	if (sna_picture_is_clear(picture, x, y, w, h, &color)) {
1824
		DBG(("%s: clear drawable [%08x]\n", __FUNCTION__, color));
1825
		return gen3_init_solid(channel, color_convert(color, picture->format, PICT_a8r8g8b8));
1826
	}
3299 Serge 1827
 
4251 Serge 1828
	if (!gen3_check_repeat(picture))
1829
		return sna_render_picture_fixup(sna, picture, channel,
1830
						x, y, w, h, dst_x, dst_y);
3299 Serge 1831
 
4251 Serge 1832
	if (!gen3_check_filter(picture))
1833
		return sna_render_picture_fixup(sna, picture, channel,
1834
						x, y, w, h, dst_x, dst_y);
3299 Serge 1835
 
4251 Serge 1836
	channel->repeat = picture->repeat ? picture->repeatType : RepeatNone;
1837
	channel->filter = picture->filter;
1838
	channel->pict_format = picture->format;
3299 Serge 1839
 
4251 Serge 1840
	pixmap = get_drawable_pixmap(picture->pDrawable);
1841
	get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy);
3299 Serge 1842
 
4251 Serge 1843
	x += dx + picture->pDrawable->x;
1844
	y += dy + picture->pDrawable->y;
3299 Serge 1845
 
4251 Serge 1846
	if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) {
1847
		DBG(("%s: integer translation (%d, %d), removing\n",
1848
		     __FUNCTION__, dx, dy));
1849
		x += dx;
1850
		y += dy;
1851
		channel->transform = NULL;
1852
		channel->filter = PictFilterNearest;
1853
	} else {
1854
		channel->transform = picture->transform;
1855
		channel->is_affine = sna_transform_is_affine(picture->transform);
1856
	}
3299 Serge 1857
 
4251 Serge 1858
	if (!gen3_composite_channel_set_format(channel, picture->format) &&
1859
	    !gen3_composite_channel_set_xformat(picture, channel, x, y, w, h))
1860
		return sna_render_picture_convert(sna, picture, channel, pixmap,
1861
						  x, y, w, h, dst_x, dst_y,
1862
						  false);
1863
	assert(channel->card_format);
3299 Serge 1864
 
4251 Serge 1865
	if (too_large(pixmap->drawable.width, pixmap->drawable.height)) {
1866
		DBG(("%s: pixmap too large (%dx%d), extracting (%d, %d)x(%d,%d)\n",
1867
		     __FUNCTION__,
1868
		     pixmap->drawable.width, pixmap->drawable.height,
1869
		     x, y, w, h));
1870
		return sna_render_picture_extract(sna, picture, channel,
1871
						  x, y, w, h, dst_x, dst_y);
1872
	}
3299 Serge 1873
 
4251 Serge 1874
	return sna_render_pixmap_bo(sna, channel, pixmap,
1875
				    x, y, w, h, dst_x, dst_y);
1876
}
3299 Serge 1877
 
4251 Serge 1878
static inline bool
1879
source_use_blt(struct sna *sna, PicturePtr picture)
1880
{
1881
	/* If it is a solid, try to use the BLT paths */
1882
	if (!picture->pDrawable)
1883
		return picture->pSourcePict->type == SourcePictTypeSolidFill;
3299 Serge 1884
 
4251 Serge 1885
	if (picture->pDrawable->width  == 1 &&
1886
	    picture->pDrawable->height == 1 &&
1887
	    picture->repeat)
1888
		return true;
3299 Serge 1889
 
4251 Serge 1890
	if (too_large(picture->pDrawable->width, picture->pDrawable->height))
1891
		return true;
3299 Serge 1892
 
4251 Serge 1893
	return !is_gpu(sna, picture->pDrawable, PREFER_GPU_RENDER);
1894
}
3299 Serge 1895
 
4251 Serge 1896
static bool
1897
try_blt(struct sna *sna,
1898
	PicturePtr dst,
1899
	PicturePtr src,
1900
	int width, int height)
1901
{
1902
	if (sna->kgem.mode != KGEM_RENDER) {
1903
		DBG(("%s: already performing BLT\n", __FUNCTION__));
1904
		return true;
1905
	}
3299 Serge 1906
 
4251 Serge 1907
	if (too_large(width, height)) {
1908
		DBG(("%s: operation too large for 3D pipe (%d, %d)\n",
1909
		     __FUNCTION__, width, height));
1910
		return true;
1911
	}
3299 Serge 1912
 
4251 Serge 1913
	if (too_large(dst->pDrawable->width, dst->pDrawable->height)) {
1914
		DBG(("%s: target too large for 3D pipe (%d, %d)\n",
1915
		     __FUNCTION__,
1916
		     dst->pDrawable->width, dst->pDrawable->height));
1917
		return true;
1918
	}
3299 Serge 1919
 
4251 Serge 1920
	/* is the source picture only in cpu memory e.g. a shm pixmap? */
1921
	return source_use_blt(sna, src);
1922
}
1923
#endif
3299 Serge 1924
 
4251 Serge 1925
static void
1926
gen3_align_vertex(struct sna *sna,
1927
		  const struct sna_composite_op *op)
1928
{
1929
	if (op->floats_per_vertex != sna->render_state.gen3.last_floats_per_vertex) {
1930
		if (sna->render.vertex_size - sna->render.vertex_used < 2*op->floats_per_rect)
1931
			gen3_vertex_finish(sna);
3299 Serge 1932
 
4251 Serge 1933
		DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n",
1934
		     sna->render_state.gen3.last_floats_per_vertex,
1935
		     op->floats_per_vertex,
1936
		     sna->render.vertex_index,
1937
		     (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex));
1938
		sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
1939
		sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex;
1940
		assert(sna->render.vertex_used < sna->render.vertex_size - op->floats_per_rect);
1941
		sna->render_state.gen3.last_floats_per_vertex = op->floats_per_vertex;
1942
	}
1943
}
3299 Serge 1944
 
4251 Serge 1945
static inline bool is_constant_ps(uint32_t type)
1946
{
1947
	switch (type) {
1948
	case SHADER_NONE: /* be warned! */
1949
	case SHADER_ZERO:
1950
	case SHADER_BLACK:
1951
	case SHADER_WHITE:
1952
	case SHADER_CONSTANT:
1953
		return true;
1954
	default:
1955
		return false;
1956
	}
1957
}
3299 Serge 1958
 
4251 Serge 1959
#if 0
1960
static bool
1961
gen3_composite_fallback(struct sna *sna,
1962
			uint8_t op,
1963
			PicturePtr src,
1964
			PicturePtr mask,
1965
			PicturePtr dst)
1966
{
1967
	PixmapPtr src_pixmap;
1968
	PixmapPtr mask_pixmap;
1969
	PixmapPtr dst_pixmap;
1970
	bool src_fallback, mask_fallback;
3299 Serge 1971
 
4251 Serge 1972
	if (!gen3_check_dst_format(dst->format)) {
1973
		DBG(("%s: unknown destination format: %d\n",
1974
		     __FUNCTION__, dst->format));
1975
		return true;
1976
	}
3299 Serge 1977
 
4251 Serge 1978
	dst_pixmap = get_drawable_pixmap(dst->pDrawable);
3299 Serge 1979
 
4251 Serge 1980
	src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL;
1981
	src_fallback = source_fallback(src, src_pixmap,
1982
				       dst->polyMode == PolyModePrecise);
3299 Serge 1983
 
4251 Serge 1984
	if (mask) {
1985
		mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL;
1986
		mask_fallback = source_fallback(mask, mask_pixmap,
1987
						dst->polyMode == PolyModePrecise);
1988
	} else {
1989
		mask_pixmap = NULL;
1990
		mask_fallback = false;
1991
	}
3299 Serge 1992
 
4251 Serge 1993
	/* If we are using the destination as a source and need to
1994
	 * readback in order to upload the source, do it all
1995
	 * on the cpu.
1996
	 */
1997
	if (src_pixmap == dst_pixmap && src_fallback) {
1998
		DBG(("%s: src is dst and will fallback\n",__FUNCTION__));
1999
		return true;
2000
	}
2001
	if (mask_pixmap == dst_pixmap && mask_fallback) {
2002
		DBG(("%s: mask is dst and will fallback\n",__FUNCTION__));
2003
		return true;
2004
	}
3299 Serge 2005
 
4251 Serge 2006
	if (mask &&
2007
	    mask->componentAlpha && PICT_FORMAT_RGB(mask->format) &&
2008
	    gen3_blend_op[op].src_alpha &&
2009
	    gen3_blend_op[op].src_blend != BLENDFACT_ZERO &&
2010
	    op != PictOpOver) {
2011
		DBG(("%s: component-alpha mask with op=%d, should fallback\n",
2012
		     __FUNCTION__, op));
2013
		return true;
2014
	}
3299 Serge 2015
 
4251 Serge 2016
	/* If anything is on the GPU, push everything out to the GPU */
2017
	if (dst_use_gpu(dst_pixmap)) {
2018
		DBG(("%s: dst is already on the GPU, try to use GPU\n",
2019
		     __FUNCTION__));
2020
		return false;
2021
	}
3299 Serge 2022
 
4251 Serge 2023
	if (src_pixmap && !src_fallback) {
2024
		DBG(("%s: src is already on the GPU, try to use GPU\n",
2025
		     __FUNCTION__));
2026
		return false;
2027
	}
2028
	if (mask_pixmap && !mask_fallback) {
2029
		DBG(("%s: mask is already on the GPU, try to use GPU\n",
2030
		     __FUNCTION__));
2031
		return false;
2032
	}
3299 Serge 2033
 
4251 Serge 2034
	/* However if the dst is not on the GPU and we need to
2035
	 * render one of the sources using the CPU, we may
2036
	 * as well do the entire operation in place onthe CPU.
2037
	 */
2038
	if (src_fallback) {
2039
		DBG(("%s: dst is on the CPU and src will fallback\n",
2040
		     __FUNCTION__));
2041
		return true;
2042
	}
3299 Serge 2043
 
4251 Serge 2044
	if (mask && mask_fallback) {
2045
		DBG(("%s: dst is on the CPU and mask will fallback\n",
2046
		     __FUNCTION__));
2047
		return true;
2048
	}
3299 Serge 2049
 
4251 Serge 2050
	if (too_large(dst_pixmap->drawable.width,
2051
		      dst_pixmap->drawable.height) &&
2052
	    dst_is_cpu(dst_pixmap)) {
2053
		DBG(("%s: dst is on the CPU and too large\n", __FUNCTION__));
2054
		return true;
2055
	}
3299 Serge 2056
 
4251 Serge 2057
	DBG(("%s: dst is not on the GPU and the operation should not fallback: use-cpu? %d\n",
2058
	     __FUNCTION__, dst_use_cpu(dst_pixmap)));
2059
	return dst_use_cpu(dst_pixmap);
2060
}
3299 Serge 2061
 
4251 Serge 2062
static bool
2063
gen3_render_composite(struct sna *sna,
2064
		      uint8_t op,
2065
		      PicturePtr src,
2066
		      PicturePtr mask,
2067
		      PicturePtr dst,
2068
		      int16_t src_x,  int16_t src_y,
2069
		      int16_t mask_x, int16_t mask_y,
2070
		      int16_t dst_x,  int16_t dst_y,
2071
		      int16_t width,  int16_t height,
2072
		      struct sna_composite_op *tmp)
2073
{
2074
	DBG(("%s()\n", __FUNCTION__));
3299 Serge 2075
 
4251 Serge 2076
	if (op >= ARRAY_SIZE(gen3_blend_op)) {
2077
		DBG(("%s: fallback due to unhandled blend op: %d\n",
2078
		     __FUNCTION__, op));
2079
		return false;
2080
	}
3299 Serge 2081
 
4251 Serge 2082
	/* Try to use the BLT engine unless it implies a
2083
	 * 3D -> 2D context switch.
2084
	 */
2085
	if (mask == NULL &&
2086
	    try_blt(sna, dst, src, width, height) &&
2087
	    sna_blt_composite(sna,
2088
			      op, src, dst,
2089
			      src_x, src_y,
2090
			      dst_x, dst_y,
2091
			      width, height,
2092
			      tmp, false))
2093
		return true;
3299 Serge 2094
 
4251 Serge 2095
	if (gen3_composite_fallback(sna, op, src, mask, dst))
2096
		return false;
3299 Serge 2097
 
4251 Serge 2098
	if (need_tiling(sna, width, height))
2099
		return sna_tiling_composite(op, src, mask, dst,
2100
					    src_x,  src_y,
2101
					    mask_x, mask_y,
2102
					    dst_x,  dst_y,
2103
					    width,  height,
2104
					    tmp);
3299 Serge 2105
 
4251 Serge 2106
	if (!gen3_composite_set_target(sna, tmp, dst,
2107
				       dst_x, dst_y, width, height)) {
2108
		DBG(("%s: unable to set render target\n",
2109
		     __FUNCTION__));
2110
		return false;
2111
	}
3299 Serge 2112
 
4251 Serge 2113
	tmp->op = op;
2114
	tmp->rb_reversed = gen3_dst_rb_reversed(tmp->dst.format);
2115
	if (too_large(tmp->dst.width, tmp->dst.height) ||
2116
	    !gen3_check_pitch_3d(tmp->dst.bo)) {
2117
		if (!sna_render_composite_redirect(sna, tmp,
2118
						   dst_x, dst_y, width, height,
2119
						   op > PictOpSrc || dst->pCompositeClip->data))
2120
			return false;
2121
	}
3299 Serge 2122
 
4251 Serge 2123
	tmp->u.gen3.num_constants = 0;
2124
	tmp->src.u.gen3.type = SHADER_TEXTURE;
2125
	tmp->src.is_affine = true;
2126
	DBG(("%s: preparing source\n", __FUNCTION__));
2127
	switch (gen3_composite_picture(sna, src, tmp, &tmp->src,
2128
				       src_x, src_y,
2129
				       width, height,
2130
				       dst_x, dst_y,
2131
				       dst->polyMode == PolyModePrecise)) {
2132
	case -1:
2133
		goto cleanup_dst;
2134
	case 0:
2135
		tmp->src.u.gen3.type = SHADER_ZERO;
2136
		break;
2137
	case 1:
2138
		if (mask == NULL && tmp->src.bo &&
2139
		    sna_blt_composite__convert(sna,
2140
					       dst_x, dst_y, width, height,
2141
					       tmp))
2142
			return true;
3299 Serge 2143
 
4251 Serge 2144
		gen3_composite_channel_convert(&tmp->src);
2145
		break;
2146
	}
2147
	DBG(("%s: source type=%d\n", __FUNCTION__, tmp->src.u.gen3.type));
3299 Serge 2148
 
4251 Serge 2149
	tmp->mask.u.gen3.type = SHADER_NONE;
2150
	tmp->mask.is_affine = true;
2151
	tmp->need_magic_ca_pass = false;
2152
	tmp->has_component_alpha = false;
2153
	if (mask && tmp->src.u.gen3.type != SHADER_ZERO) {
2154
		if (!reuse_source(sna,
2155
				  src, &tmp->src, src_x, src_y,
2156
				  mask, &tmp->mask, mask_x, mask_y)) {
2157
			tmp->mask.u.gen3.type = SHADER_TEXTURE;
2158
			DBG(("%s: preparing mask\n", __FUNCTION__));
2159
			switch (gen3_composite_picture(sna, mask, tmp, &tmp->mask,
2160
						       mask_x, mask_y,
2161
						       width,  height,
2162
						       dst_x,  dst_y,
2163
						       dst->polyMode == PolyModePrecise)) {
2164
			case -1:
2165
				goto cleanup_src;
2166
			case 0:
2167
				tmp->mask.u.gen3.type = SHADER_ZERO;
2168
				break;
2169
			case 1:
2170
				gen3_composite_channel_convert(&tmp->mask);
2171
				break;
2172
			}
2173
		}
2174
		DBG(("%s: mask type=%d\n", __FUNCTION__, tmp->mask.u.gen3.type));
2175
		if (tmp->mask.u.gen3.type == SHADER_ZERO) {
2176
			if (tmp->src.bo) {
2177
				kgem_bo_destroy(&sna->kgem,
2178
						tmp->src.bo);
2179
				tmp->src.bo = NULL;
2180
			}
2181
			tmp->src.u.gen3.type = SHADER_ZERO;
2182
			tmp->mask.u.gen3.type = SHADER_NONE;
2183
		}
3299 Serge 2184
 
4251 Serge 2185
		if (tmp->mask.u.gen3.type != SHADER_NONE) {
2186
			if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) {
2187
				/* Check if it's component alpha that relies on a source alpha
2188
				 * and on the source value.  We can only get one of those
2189
				 * into the single source value that we get to blend with.
2190
				 */
2191
				DBG(("%s: component-alpha mask: %d\n",
2192
				     __FUNCTION__, tmp->mask.u.gen3.type));
2193
				tmp->has_component_alpha = true;
2194
				if (tmp->mask.u.gen3.type == SHADER_WHITE) {
2195
					tmp->mask.u.gen3.type = SHADER_NONE;
2196
					tmp->has_component_alpha = false;
2197
				} else if (gen3_blend_op[op].src_alpha &&
2198
					   gen3_blend_op[op].src_blend != BLENDFACT_ZERO) {
2199
					if (op != PictOpOver)
2200
						goto cleanup_mask;
3299 Serge 2201
 
4251 Serge 2202
					tmp->need_magic_ca_pass = true;
2203
					tmp->op = PictOpOutReverse;
2204
				}
2205
			} else {
2206
				if (tmp->mask.is_opaque) {
2207
					tmp->mask.u.gen3.type = SHADER_NONE;
2208
				} else if (is_constant_ps(tmp->src.u.gen3.type) &&
2209
					   is_constant_ps(tmp->mask.u.gen3.type)) {
2210
					uint32_t v;
3299 Serge 2211
 
4251 Serge 2212
					v = multa(tmp->src.u.gen3.mode,
2213
						  tmp->mask.u.gen3.mode,
2214
						  24);
2215
					v |= multa(tmp->src.u.gen3.mode,
2216
						   tmp->mask.u.gen3.mode,
2217
						   16);
2218
					v |= multa(tmp->src.u.gen3.mode,
2219
						   tmp->mask.u.gen3.mode,
2220
						   8);
2221
					v |= multa(tmp->src.u.gen3.mode,
2222
						   tmp->mask.u.gen3.mode,
2223
						   0);
3299 Serge 2224
 
4251 Serge 2225
					DBG(("%s: combining constant source/mask: %x x %x -> %x\n",
2226
					     __FUNCTION__,
2227
					     tmp->src.u.gen3.mode,
2228
					     tmp->mask.u.gen3.mode,
2229
					     v));
3299 Serge 2230
 
4251 Serge 2231
					tmp->src.u.gen3.type = SHADER_CONSTANT;
2232
					tmp->src.u.gen3.mode = v;
2233
					tmp->src.is_opaque = false;
3299 Serge 2234
 
4251 Serge 2235
					tmp->mask.u.gen3.type = SHADER_NONE;
2236
				}
2237
			}
2238
		}
3299 Serge 2239
	}
4251 Serge 2240
	DBG(("%s: final src/mask type=%d/%d, affine=%d/%d\n", __FUNCTION__,
2241
	     tmp->src.u.gen3.type, tmp->mask.u.gen3.type,
2242
	     tmp->src.is_affine, tmp->mask.is_affine));
2243
 
2244
	tmp->prim_emit = gen3_emit_composite_primitive;
2245
	if (is_constant_ps(tmp->mask.u.gen3.type)) {
2246
		switch (tmp->src.u.gen3.type) {
2247
		case SHADER_NONE:
2248
		case SHADER_ZERO:
2249
		case SHADER_BLACK:
2250
		case SHADER_WHITE:
2251
		case SHADER_CONSTANT:
2252
#if defined(sse2) && !defined(__x86_64__)
2253
			if (sna->cpu_features & SSE2) {
2254
				tmp->prim_emit = gen3_emit_composite_primitive_constant__sse2;
2255
				tmp->emit_boxes = gen3_emit_composite_boxes_constant__sse2;
2256
			} else
2257
#endif
2258
			{
2259
				tmp->prim_emit = gen3_emit_composite_primitive_constant;
2260
				tmp->emit_boxes = gen3_emit_composite_boxes_constant;
2261
			}
2262
 
2263
			break;
2264
		case SHADER_LINEAR:
2265
		case SHADER_RADIAL:
2266
			if (tmp->src.transform == NULL) {
2267
#if defined(sse2) && !defined(__x86_64__)
2268
				if (sna->cpu_features & SSE2) {
2269
					tmp->prim_emit = gen3_emit_composite_primitive_identity_gradient__sse2;
2270
					tmp->emit_boxes = gen3_emit_composite_boxes_identity_gradient__sse2;
2271
				} else
2272
#endif
2273
				{
2274
					tmp->prim_emit = gen3_emit_composite_primitive_identity_gradient;
2275
					tmp->emit_boxes = gen3_emit_composite_boxes_identity_gradient;
2276
				}
2277
			} else if (tmp->src.is_affine) {
2278
				tmp->src.scale[1] = tmp->src.scale[0] = 1. / tmp->src.transform->matrix[2][2];
2279
#if defined(sse2) && !defined(__x86_64__)
2280
				if (sna->cpu_features & SSE2) {
2281
					tmp->prim_emit = gen3_emit_composite_primitive_affine_gradient__sse2;
2282
					tmp->emit_boxes = gen3_emit_composite_boxes_affine_gradient__sse2;
2283
				} else
2284
#endif
2285
				{
2286
					tmp->prim_emit = gen3_emit_composite_primitive_affine_gradient;
2287
					tmp->emit_boxes = gen3_emit_composite_boxes_affine_gradient;
2288
				}
2289
			}
2290
			break;
2291
		case SHADER_TEXTURE:
2292
			if (tmp->src.transform == NULL) {
2293
				if ((tmp->src.offset[0]|tmp->src.offset[1]|tmp->dst.x|tmp->dst.y) == 0) {
2294
#if defined(sse2) && !defined(__x86_64__)
2295
					if (sna->cpu_features & SSE2) {
2296
						tmp->prim_emit = gen3_emit_composite_primitive_identity_source_no_offset__sse2;
2297
						tmp->emit_boxes = gen3_emit_composite_boxes_identity_source_no_offset__sse2;
2298
					} else
2299
#endif
2300
					{
2301
						tmp->prim_emit = gen3_emit_composite_primitive_identity_source_no_offset;
2302
						tmp->emit_boxes = gen3_emit_composite_boxes_identity_source_no_offset;
2303
					}
2304
				} else {
2305
#if defined(sse2) && !defined(__x86_64__)
2306
					if (sna->cpu_features & SSE2) {
2307
						tmp->prim_emit = gen3_emit_composite_primitive_identity_source__sse2;
2308
						tmp->emit_boxes = gen3_emit_composite_boxes_identity_source__sse2;
2309
					} else
2310
#endif
2311
					{
2312
						tmp->prim_emit = gen3_emit_composite_primitive_identity_source;
2313
						tmp->emit_boxes = gen3_emit_composite_boxes_identity_source;
2314
					}
2315
				}
2316
			} else if (tmp->src.is_affine) {
2317
				tmp->src.scale[0] /= tmp->src.transform->matrix[2][2];
2318
				tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
2319
#if defined(sse2) && !defined(__x86_64__)
2320
				if (sna->cpu_features & SSE2) {
2321
					tmp->prim_emit = gen3_emit_composite_primitive_affine_source__sse2;
2322
					tmp->emit_boxes = gen3_emit_composite_boxes_affine_source__sse2;
2323
				} else
2324
#endif
2325
				{
2326
					tmp->prim_emit = gen3_emit_composite_primitive_affine_source;
2327
					tmp->emit_boxes = gen3_emit_composite_boxes_affine_source;
2328
				}
2329
			}
2330
			break;
2331
		}
2332
	} else if (tmp->mask.u.gen3.type == SHADER_TEXTURE) {
2333
		if (tmp->mask.transform == NULL) {
2334
			if (is_constant_ps(tmp->src.u.gen3.type)) {
2335
				if ((tmp->mask.offset[0]|tmp->mask.offset[1]|tmp->dst.x|tmp->dst.y) == 0) {
2336
#if defined(sse2) && !defined(__x86_64__)
2337
					if (sna->cpu_features & SSE2) {
2338
						tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask_no_offset__sse2;
2339
					} else
2340
#endif
2341
					{
2342
						tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask_no_offset;
2343
					}
2344
				} else {
2345
#if defined(sse2) && !defined(__x86_64__)
2346
					if (sna->cpu_features & SSE2) {
2347
						tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask__sse2;
2348
					} else
2349
#endif
2350
					{
2351
						tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask;
2352
					}
2353
				}
2354
			} else if (tmp->src.transform == NULL) {
2355
#if defined(sse2) && !defined(__x86_64__)
2356
				if (sna->cpu_features & SSE2) {
2357
					tmp->prim_emit = gen3_emit_composite_primitive_identity_source_mask__sse2;
2358
				} else
2359
#endif
2360
				{
2361
					tmp->prim_emit = gen3_emit_composite_primitive_identity_source_mask;
2362
				}
2363
			} else if (tmp->src.is_affine) {
2364
				tmp->src.scale[0] /= tmp->src.transform->matrix[2][2];
2365
				tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
2366
#if defined(sse2) && !defined(__x86_64__)
2367
				if (sna->cpu_features & SSE2) {
2368
					tmp->prim_emit = gen3_emit_composite_primitive_affine_source_mask__sse2;
2369
				} else
2370
#endif
2371
				{
2372
					tmp->prim_emit = gen3_emit_composite_primitive_affine_source_mask;
2373
				}
2374
			}
2375
		}
2376
	}
2377
 
2378
	tmp->floats_per_vertex = 2;
2379
	if (!is_constant_ps(tmp->src.u.gen3.type))
2380
		tmp->floats_per_vertex += tmp->src.is_affine ? 2 : 4;
2381
	if (!is_constant_ps(tmp->mask.u.gen3.type))
2382
		tmp->floats_per_vertex += tmp->mask.is_affine ? 2 : 4;
2383
	DBG(("%s: floats_per_vertex = 2 + %d + %d = %d [specialised emitter? %d]\n", __FUNCTION__,
2384
	     !is_constant_ps(tmp->src.u.gen3.type) ? tmp->src.is_affine ? 2 : 4 : 0,
2385
	     !is_constant_ps(tmp->mask.u.gen3.type) ? tmp->mask.is_affine ? 2 : 4 : 0,
2386
	     tmp->floats_per_vertex,
2387
	     tmp->prim_emit != gen3_emit_composite_primitive));
2388
	tmp->floats_per_rect = 3 * tmp->floats_per_vertex;
2389
 
2390
	tmp->blt   = gen3_render_composite_blt;
2391
	tmp->box   = gen3_render_composite_box;
2392
	tmp->boxes = gen3_render_composite_boxes__blt;
2393
	if (tmp->emit_boxes) {
2394
		tmp->boxes = gen3_render_composite_boxes;
2395
		tmp->thread_boxes = gen3_render_composite_boxes__thread;
2396
	}
2397
	tmp->done  = gen3_render_composite_done;
2398
 
2399
	if (!kgem_check_bo(&sna->kgem,
2400
			   tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
2401
			   NULL)) {
2402
		kgem_submit(&sna->kgem);
2403
		if (!kgem_check_bo(&sna->kgem,
2404
				   tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
2405
				   NULL))
2406
			goto cleanup_mask;
2407
	}
2408
 
2409
	gen3_emit_composite_state(sna, tmp);
2410
	gen3_align_vertex(sna, tmp);
2411
	return true;
2412
 
2413
cleanup_mask:
2414
	if (tmp->mask.bo)
2415
		kgem_bo_destroy(&sna->kgem, tmp->mask.bo);
2416
cleanup_src:
2417
	if (tmp->src.bo)
2418
		kgem_bo_destroy(&sna->kgem, tmp->src.bo);
2419
cleanup_dst:
2420
	if (tmp->redirect.real_bo)
2421
		kgem_bo_destroy(&sna->kgem, tmp->dst.bo);
2422
	return false;
3299 Serge 2423
}
4251 Serge 2424
#endif
3299 Serge 2425
 
2426
 
2427
 
2428
 
2429
 
2430
 
2431
 
2432
 
2433
 
2434
 
2435
 
2436
 
2437
 
2438
 
2439
 
2440
 
2441
 
2442
 
2443
 
2444
 
2445
 
2446
 
2447
 
2448
 
2449
 
2450
 
2451
 
2452
 
2453
 
2454
 
2455
 
2456
 
2457
 
2458
 
2459
 
2460
 
2461
 
2462
 
2463
 
2464
 
2465
 
2466
 
2467
 
2468
 
2469
 
2470
 
2471
 
2472
 
2473
 
2474
 
2475
 
2476
 
2477
 
2478
 
2479
 
2480
 
2481
 
2482
 
2483
 
2484
 
2485
 
2486
 
2487
 
2488
 
2489
 
2490
 
2491
 
2492
 
2493
 
2494
 
2495
 
2496
 
2497
 
2498
 
2499
 
2500
 
2501
 
2502
 
2503
 
2504
 
2505
 
2506
 
2507
 
2508
 
2509
 
2510
 
2511
 
2512
 
2513
 
2514
 
2515
 
2516
 
2517
 
2518
 
2519
 
2520
 
2521
 
2522
 
2523
 
2524
 
2525
 
2526
 
2527
 
2528
 
2529
 
2530
 
2531
 
2532
 
2533
 
2534
 
2535
 
2536
 
2537
 
2538
 
2539
 
2540
 
2541
 
2542
 
2543
 
2544
 
2545
 
2546
 
2547
 
2548
 
2549
 
2550
 
2551
 
2552
 
2553
 
2554
 
2555
 
2556
 
2557
 
2558
 
2559
 
2560
 
2561
 
2562
 
2563
 
2564
 
2565
 
2566
 
2567
 
2568
 
2569
 
2570
 
2571
 
2572
 
2573
 
2574
 
2575
 
2576
 
2577
 
2578
 
2579
 
2580
 
2581
 
2582
 
2583
 
2584
 
2585
 
2586
 
2587
 
2588
 
2589
 
2590
 
2591
 
2592
 
2593
 
2594
 
2595
 
2596
 
2597
 
2598
 
2599
 
2600
 
2601
 
2602
 
2603
 
2604
 
2605
 
2606
 
2607
 
2608
 
2609
 
2610
 
2611
 
2612
 
2613
 
2614
 
2615
 
2616
 
2617
 
2618
 
2619
 
2620
 
2621
 
2622
 
2623
 
2624
 
2625
 
2626
 
2627
 
2628
 
2629
 
2630
 
2631
 
2632
 
2633
 
2634
 
2635
 
2636
 
2637
 
2638
 
2639
 
2640
 
2641
 
2642
 
2643
 
2644
 
2645
 
2646
 
2647
 
2648
 
2649
 
2650
 
2651
 
2652
 
2653
 
2654
 
2655
 
2656
 
2657
 
2658
 
2659
 
2660
 
4251 Serge 2661
static void gen3_render_flush(struct sna *sna)
2662
{
2663
	gen3_vertex_close(sna);
3299 Serge 2664
 
4251 Serge 2665
	assert(sna->render.vertex_reloc[0] == 0);
2666
	assert(sna->render.vertex_offset == 0);
2667
}
3299 Serge 2668
 
4251 Serge 2669
static void
2670
gen3_render_fini(struct sna *sna)
2671
{
2672
}
3299 Serge 2673
 
4251 Serge 2674
const char *gen3_render_init(struct sna *sna, const char *backend)
2675
{
2676
	struct sna_render *render = &sna->render;
3299 Serge 2677
 
4251 Serge 2678
#if 0
2679
#if !NO_COMPOSITE
2680
	render->composite = gen3_render_composite;
2681
	render->prefer_gpu |= PREFER_GPU_RENDER;
2682
#endif
2683
#if !NO_COMPOSITE_SPANS
2684
	render->check_composite_spans = gen3_check_composite_spans;
2685
	render->composite_spans = gen3_render_composite_spans;
2686
	render->prefer_gpu |= PREFER_GPU_SPANS;
2687
#endif
3299 Serge 2688
 
4251 Serge 2689
	render->video = gen3_render_video;
3299 Serge 2690
 
4251 Serge 2691
	render->copy_boxes = gen3_render_copy_boxes;
2692
	render->copy = gen3_render_copy;
3299 Serge 2693
 
4251 Serge 2694
	render->fill_boxes = gen3_render_fill_boxes;
2695
	render->fill = gen3_render_fill;
2696
	render->fill_one = gen3_render_fill_one;
2697
#endif
3299 Serge 2698
 
4251 Serge 2699
    render->blit_tex = gen3_blit_tex;
2700
    render->caps = HW_BIT_BLIT | HW_TEX_BLIT;
3299 Serge 2701
 
4251 Serge 2702
	render->reset = gen3_render_reset;
2703
	render->flush = gen3_render_flush;
2704
	render->fini = gen3_render_fini;
3299 Serge 2705
 
4251 Serge 2706
	render->max_3d_size = MAX_3D_SIZE;
2707
	render->max_3d_pitch = MAX_3D_PITCH;
3299 Serge 2708
 
4251 Serge 2709
	sna->kgem.retire = gen3_render_retire;
2710
	sna->kgem.expire = gen3_render_expire;
2711
	return "Alviso (gen3)";
2712
}
3299 Serge 2713
 
2714
static bool
2715
gen3_blit_tex(struct sna *sna,
3769 Serge 2716
              uint8_t op, bool scale,
3299 Serge 2717
		      PixmapPtr src, struct kgem_bo *src_bo,
2718
		      PixmapPtr mask,struct kgem_bo *mask_bo,
4245 Serge 2719
		      PixmapPtr dst, struct kgem_bo *dst_bo,
3299 Serge 2720
              int32_t src_x, int32_t src_y,
2721
              int32_t msk_x, int32_t msk_y,
2722
              int32_t dst_x, int32_t dst_y,
2723
              int32_t width, int32_t height,
2724
              struct sna_composite_op *tmp)
2725
{
2726
 
2727
    DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__,
2728
         width, height, sna->kgem.ring));
2729
 
2730
    tmp->op = PictOpSrc;
2731
 
2732
    tmp->dst.pixmap = dst;
2733
    tmp->dst.bo     = dst_bo;
2734
    tmp->dst.width  = dst->drawable.width;
2735
    tmp->dst.height = dst->drawable.height;
2736
    tmp->dst.format = PICT_x8r8g8b8;
2737
 
2738
	tmp->rb_reversed = gen3_dst_rb_reversed(tmp->dst.format);
2739
 
2740
	tmp->u.gen3.num_constants = 0;
2741
	tmp->src.u.gen3.type = SHADER_TEXTURE;
2742
	tmp->src.is_affine = true;
2743
 
2744
 
2745
	tmp->src.repeat = RepeatNone;
2746
	tmp->src.filter = PictFilterNearest;
2747
 
2748
    tmp->src.bo = src_bo;
2749
	tmp->src.pict_format = PICT_x8r8g8b8;
4245 Serge 2750
 
3299 Serge 2751
	gen3_composite_channel_set_format(&tmp->src, tmp->src.pict_format);
4245 Serge 2752
 
3299 Serge 2753
    tmp->src.width  = src->drawable.width;
2754
    tmp->src.height = src->drawable.height;
2755
 
2756
	tmp->mask.u.gen3.type = SHADER_TEXTURE;
2757
	tmp->mask.is_affine = true;
2758
	tmp->need_magic_ca_pass = false;
2759
	tmp->has_component_alpha = false;
2760
 
2761
 
2762
 	tmp->mask.repeat = RepeatNone;
2763
	tmp->mask.filter = PictFilterNearest;
2764
    tmp->mask.is_affine = true;
2765
 
2766
    tmp->mask.bo = mask_bo;
2767
    tmp->mask.pict_format = PIXMAN_a8;
2768
	gen3_composite_channel_set_format(&tmp->mask, tmp->mask.pict_format);
2769
    tmp->mask.width  = mask->drawable.width;
2770
    tmp->mask.height = mask->drawable.height;
2771
 
3769 Serge 2772
    if( scale )
2773
    {
2774
        tmp->src.scale[0] = 1.f/width;
2775
        tmp->src.scale[1] = 1.f/height;
2776
    }
2777
    else
2778
    {
2779
        tmp->src.scale[0] = 1.f/src->drawable.width;
2780
        tmp->src.scale[1] = 1.f/src->drawable.height;
2781
    }
3299 Serge 2782
 
2783
    tmp->mask.scale[0] = 1.f/mask->drawable.width;
2784
    tmp->mask.scale[1] = 1.f/mask->drawable.height;
2785
 
2786
	tmp->prim_emit = gen3_emit_composite_primitive_identity_source_mask;
2787
 
2788
 
2789
	tmp->floats_per_vertex = 2;
2790
	if (!is_constant_ps(tmp->src.u.gen3.type))
2791
		tmp->floats_per_vertex += tmp->src.is_affine ? 2 : 4;
2792
	if (!is_constant_ps(tmp->mask.u.gen3.type))
2793
		tmp->floats_per_vertex += tmp->mask.is_affine ? 2 : 4;
4245 Serge 2794
//	DBG(("%s: floats_per_vertex = 2 + %d + %d = %d [specialised emitter? %d]\n", __FUNCTION__,
2795
//	     !is_constant_ps(tmp->src.u.gen3.type) ? tmp->src.is_affine ? 2 : 4 : 0,
2796
//	     !is_constant_ps(tmp->mask.u.gen3.type) ? tmp->mask.is_affine ? 2 : 4 : 0,
2797
//	     tmp->floats_per_vertex,
2798
//	     tmp->prim_emit != gen3_emit_composite_primitive));
3299 Serge 2799
	tmp->floats_per_rect = 3 * tmp->floats_per_vertex;
2800
 
2801
	tmp->blt   = gen3_render_composite_blt;
2802
 
2803
	tmp->done  = gen3_render_composite_done;
2804
 
2805
	if (!kgem_check_bo(&sna->kgem,
2806
			   tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
2807
			   NULL)) {
2808
		kgem_submit(&sna->kgem);
2809
	}
2810
 
2811
	gen3_emit_composite_state(sna, tmp);
2812
	gen3_align_vertex(sna, tmp);
2813
	return true;
2814
}