Subversion Repositories Kolibri OS

Rev

Rev 4359 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
4304 Serge 1
/*
2
 * Copyright © 2012 Intel Corporation
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
 * SOFTWARE.
22
 *
23
 * Authors:
24
 *    Chris Wilson 
25
 *
26
 */
27
 
28
#ifdef HAVE_CONFIG_H
29
#include "config.h"
30
#endif
31
 
32
#include "sna.h"
33
#include "sna_render.h"
34
#include "sna_render_inline.h"
35
#include "gen4_vertex.h"
36
 
37
#ifndef sse2
38
#define sse2
39
#endif
40
 
4501 Serge 41
void gen4_vertex_align(struct sna *sna, const struct sna_composite_op *op)
42
{
43
	int vertex_index;
44
 
45
	assert(op->floats_per_rect == 3*op->floats_per_vertex);
46
 
47
	vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
48
	if ((int)sna->render.vertex_size - vertex_index * op->floats_per_vertex < 2*op->floats_per_rect) {
49
		DBG(("%s: flushing vertex buffer: new index=%d, max=%d\n",
50
		     __FUNCTION__, vertex_index, sna->render.vertex_size / op->floats_per_vertex));
51
		if (gen4_vertex_finish(sna) < op->floats_per_rect) {
52
			kgem_submit(&sna->kgem);
53
			_kgem_set_mode(&sna->kgem, KGEM_RENDER);
54
		}
55
 
56
		vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex;
57
		assert(vertex_index * op->floats_per_vertex <= sna->render.vertex_size);
58
	}
59
 
60
	sna->render.vertex_index = vertex_index;
61
	sna->render.vertex_used = vertex_index * op->floats_per_vertex;
62
}
63
 
4304 Serge 64
void gen4_vertex_flush(struct sna *sna)
65
{
66
    DBG(("%s[%x] = %d\n", __FUNCTION__,
67
         4*sna->render.vertex_offset,
68
         sna->render.vertex_index - sna->render.vertex_start));
69
 
70
    assert(sna->render.vertex_offset);
4501 Serge 71
	assert(sna->render.vertex_offset <= sna->kgem.nbatch);
4304 Serge 72
    assert(sna->render.vertex_index > sna->render.vertex_start);
4501 Serge 73
	assert(sna->render.vertex_used <= sna->render.vertex_size);
4304 Serge 74
 
75
    sna->kgem.batch[sna->render.vertex_offset] =
76
        sna->render.vertex_index - sna->render.vertex_start;
77
    sna->render.vertex_offset = 0;
78
}
79
 
80
int gen4_vertex_finish(struct sna *sna)
81
{
82
    struct kgem_bo *bo;
83
    unsigned int i;
84
    unsigned hint, size;
85
 
86
    DBG(("%s: used=%d / %d\n", __FUNCTION__,
87
         sna->render.vertex_used, sna->render.vertex_size));
88
    assert(sna->render.vertex_offset == 0);
89
    assert(sna->render.vertex_used);
4501 Serge 90
	assert(sna->render.vertex_used <= sna->render.vertex_size);
4304 Serge 91
 
92
	sna_vertex_wait__locked(&sna->render);
93
 
94
    /* Note: we only need dword alignment (currently) */
95
 
4501 Serge 96
	hint = CREATE_GTT_MAP;
97
 
4304 Serge 98
    bo = sna->render.vbo;
99
    if (bo) {
100
        for (i = 0; i < sna->render.nvertex_reloc; i++) {
101
            DBG(("%s: reloc[%d] = %d\n", __FUNCTION__,
102
                 i, sna->render.vertex_reloc[i]));
103
 
104
            sna->kgem.batch[sna->render.vertex_reloc[i]] =
105
                kgem_add_reloc(&sna->kgem,
106
                           sna->render.vertex_reloc[i], bo,
107
                           I915_GEM_DOMAIN_VERTEX << 16,
108
                           0);
109
        }
110
 
111
        assert(!sna->render.active);
112
        sna->render.nvertex_reloc = 0;
113
        sna->render.vertex_used = 0;
114
        sna->render.vertex_index = 0;
115
        sna->render.vbo = NULL;
116
        sna->render.vb_id = 0;
117
 
118
        kgem_bo_destroy(&sna->kgem, bo);
4501 Serge 119
		hint |= CREATE_CACHED | CREATE_NO_THROTTLE;
120
	} else {
121
		if (kgem_is_idle(&sna->kgem)) {
122
			sna->render.vertices = sna->render.vertex_data;
123
			sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
124
			return 0;
125
		}
4304 Serge 126
    }
127
 
128
 
129
    size = 256*1024;
130
    assert(!sna->render.active);
131
    sna->render.vertices = NULL;
132
    sna->render.vbo = kgem_create_linear(&sna->kgem, size, hint);
133
    while (sna->render.vbo == NULL && size > 16*1024) {
134
        size /= 2;
135
        sna->render.vbo = kgem_create_linear(&sna->kgem, size, hint);
136
    }
137
    if (sna->render.vbo == NULL)
138
        sna->render.vbo = kgem_create_linear(&sna->kgem,
139
                             256*1024, CREATE_GTT_MAP);
140
    if (sna->render.vbo)
141
        sna->render.vertices = kgem_bo_map(&sna->kgem, sna->render.vbo);
142
    if (sna->render.vertices == NULL) {
143
        if (sna->render.vbo) {
144
            kgem_bo_destroy(&sna->kgem, sna->render.vbo);
145
            sna->render.vbo = NULL;
146
        }
147
        sna->render.vertices = sna->render.vertex_data;
148
        sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
149
        return 0;
150
    }
151
 
152
    if (sna->render.vertex_used) {
153
        DBG(("%s: copying initial buffer x %d to handle=%d\n",
154
             __FUNCTION__,
155
             sna->render.vertex_used,
156
             sna->render.vbo->handle));
157
        assert(sizeof(float)*sna->render.vertex_used <=
158
               __kgem_bo_size(sna->render.vbo));
159
        memcpy(sna->render.vertices,
160
               sna->render.vertex_data,
161
               sizeof(float)*sna->render.vertex_used);
162
    }
163
 
164
    size = __kgem_bo_size(sna->render.vbo)/4;
165
    if (size >= UINT16_MAX)
166
        size = UINT16_MAX - 1;
167
 
168
    DBG(("%s: create vbo handle=%d, size=%d\n",
169
         __FUNCTION__, sna->render.vbo->handle, size));
170
 
171
    sna->render.vertex_size = size;
172
    return sna->render.vertex_size - sna->render.vertex_used;
173
}
174
 
175
void gen4_vertex_close(struct sna *sna)
176
{
177
    struct kgem_bo *bo, *free_bo = NULL;
178
    unsigned int i, delta = 0;
179
 
180
    assert(sna->render.vertex_offset == 0);
181
    if (!sna->render.vb_id)
182
        return;
183
 
184
    DBG(("%s: used=%d, vbo active? %d, vb=%x, nreloc=%d\n",
185
         __FUNCTION__, sna->render.vertex_used, sna->render.vbo ? sna->render.vbo->handle : 0,
186
         sna->render.vb_id, sna->render.nvertex_reloc));
187
 
188
    assert(!sna->render.active);
189
 
190
    bo = sna->render.vbo;
191
    if (bo) {
192
        if (sna->render.vertex_size - sna->render.vertex_used < 64) {
193
            DBG(("%s: discarding vbo (full), handle=%d\n", __FUNCTION__, sna->render.vbo->handle));
194
            sna->render.vbo = NULL;
195
            sna->render.vertices = sna->render.vertex_data;
196
            sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
197
            free_bo = bo;
4501 Serge 198
		} else if (!sna->kgem.has_llc && sna->render.vertices == MAP(bo->map__cpu)) {
4304 Serge 199
            DBG(("%s: converting CPU map to GTT\n", __FUNCTION__));
200
            sna->render.vertices =
201
                kgem_bo_map__gtt(&sna->kgem, sna->render.vbo);
202
            if (sna->render.vertices == NULL) {
203
                sna->render.vbo = NULL;
204
                sna->render.vertices = sna->render.vertex_data;
205
                sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
206
                free_bo = bo;
207
            }
208
 
209
        }
210
    } else {
4501 Serge 211
		int size;
212
 
213
		size  = sna->kgem.nbatch;
214
		size += sna->kgem.batch_size - sna->kgem.surface;
215
		size += sna->render.vertex_used;
216
 
217
		if (size <= 1024) {
4304 Serge 218
            DBG(("%s: copy to batch: %d @ %d\n", __FUNCTION__,
219
                 sna->render.vertex_used, sna->kgem.nbatch));
4501 Serge 220
			assert(sna->kgem.nbatch + sna->render.vertex_used <= sna->kgem.surface);
4304 Serge 221
            memcpy(sna->kgem.batch + sna->kgem.nbatch,
222
                   sna->render.vertex_data,
223
                   sna->render.vertex_used * 4);
224
            delta = sna->kgem.nbatch * 4;
225
            bo = NULL;
226
            sna->kgem.nbatch += sna->render.vertex_used;
227
        } else {
4501 Serge 228
			size = 256 * 1024;
229
			do {
230
				bo = kgem_create_linear(&sna->kgem, size,
231
							CREATE_GTT_MAP | CREATE_NO_RETIRE | CREATE_NO_THROTTLE | CREATE_CACHED);
232
			} while (bo == NULL && (size>>=1) > sizeof(float)*sna->render.vertex_used);
233
 
234
			sna->render.vertices = NULL;
235
			if (bo)
236
				sna->render.vertices = kgem_bo_map(&sna->kgem, bo);
237
			if (sna->render.vertices != NULL) {
238
				DBG(("%s: new vbo: %d / %d\n", __FUNCTION__,
239
				     sna->render.vertex_used, __kgem_bo_size(bo)/4));
240
 
241
				assert(sizeof(float)*sna->render.vertex_used <= __kgem_bo_size(bo));
242
				memcpy(sna->render.vertices,
243
				       sna->render.vertex_data,
244
				       sizeof(float)*sna->render.vertex_used);
245
 
246
				size = __kgem_bo_size(bo)/4;
247
				if (size >= UINT16_MAX)
248
					size = UINT16_MAX - 1;
249
 
250
				sna->render.vbo = bo;
251
				sna->render.vertex_size = size;
252
			} else {
253
				DBG(("%s: tmp vbo: %d\n", __FUNCTION__,
254
				     sna->render.vertex_used));
255
 
256
				if (bo)
257
					kgem_bo_destroy(&sna->kgem, bo);
258
 
4304 Serge 259
            bo = kgem_create_linear(&sna->kgem,
260
                        4*sna->render.vertex_used,
261
                        CREATE_NO_THROTTLE);
262
            if (bo && !kgem_bo_write(&sna->kgem, bo,
263
                         sna->render.vertex_data,
264
                         4*sna->render.vertex_used)) {
265
                kgem_bo_destroy(&sna->kgem, bo);
266
                bo = NULL;
267
            }
4501 Serge 268
 
269
				assert(sna->render.vbo == NULL);
270
				sna->render.vertices = sna->render.vertex_data;
271
				sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
4304 Serge 272
            free_bo = bo;
273
        }
274
    }
4501 Serge 275
	}
4304 Serge 276
 
277
    assert(sna->render.nvertex_reloc);
278
    for (i = 0; i < sna->render.nvertex_reloc; i++) {
279
        DBG(("%s: reloc[%d] = %d\n", __FUNCTION__,
280
             i, sna->render.vertex_reloc[i]));
281
 
282
        sna->kgem.batch[sna->render.vertex_reloc[i]] =
283
            kgem_add_reloc(&sna->kgem,
284
                       sna->render.vertex_reloc[i], bo,
285
                       I915_GEM_DOMAIN_VERTEX << 16,
286
                       delta);
287
    }
288
    sna->render.nvertex_reloc = 0;
289
    sna->render.vb_id = 0;
290
 
291
    if (sna->render.vbo == NULL) {
292
        assert(!sna->render.active);
293
        sna->render.vertex_used = 0;
294
        sna->render.vertex_index = 0;
295
        assert(sna->render.vertices == sna->render.vertex_data);
296
        assert(sna->render.vertex_size == ARRAY_SIZE(sna->render.vertex_data));
297
    }
298
 
299
    if (free_bo)
300
        kgem_bo_destroy(&sna->kgem, free_bo);
301
}
302
 
303
/* specialised vertex emission routines */
304
 
305
#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y) /* XXX assert(!too_large(x, y)); */
306
#define OUT_VERTEX_F(v) vertex_emit(sna, v)
307
 
308
force_inline static float
309
compute_linear(const struct sna_composite_channel *channel,
310
	       int16_t x, int16_t y)
311
{
312
	return ((x+channel->offset[0]) * channel->u.linear.dx +
313
		(y+channel->offset[1]) * channel->u.linear.dy +
314
		channel->u.linear.offset);
315
}
316
 
317
sse2 inline static void
318
emit_texcoord(struct sna *sna,
319
	      const struct sna_composite_channel *channel,
320
	      int16_t x, int16_t y)
321
{
322
	if (channel->is_solid) {
323
		OUT_VERTEX_F(x);
324
		return;
325
	}
326
 
327
	x += channel->offset[0];
328
	y += channel->offset[1];
329
 
330
	if (channel->is_affine) {
331
		float s, t;
332
 
333
		sna_get_transformed_coordinates(x, y,
334
						channel->transform,
335
						&s, &t);
336
		OUT_VERTEX_F(s * channel->scale[0]);
337
		OUT_VERTEX_F(t * channel->scale[1]);
338
	} else {
339
		float s, t, w;
340
 
341
		sna_get_transformed_coordinates_3d(x, y,
342
						   channel->transform,
343
						   &s, &t, &w);
344
		OUT_VERTEX_F(s * channel->scale[0]);
345
		OUT_VERTEX_F(t * channel->scale[1]);
346
		OUT_VERTEX_F(w);
347
	}
348
}
349
 
350
sse2 force_inline static void
351
emit_vertex(struct sna *sna,
352
	    const struct sna_composite_op *op,
353
	    int16_t srcX, int16_t srcY,
354
	    int16_t mskX, int16_t mskY,
355
	    int16_t dstX, int16_t dstY)
356
{
357
	OUT_VERTEX(dstX, dstY);
358
	emit_texcoord(sna, &op->src, srcX, srcY);
359
}
360
 
361
sse2 fastcall static void
362
emit_primitive(struct sna *sna,
363
	       const struct sna_composite_op *op,
364
	       const struct sna_composite_rectangles *r)
365
{
366
	emit_vertex(sna, op,
367
		    r->src.x + r->width,  r->src.y + r->height,
368
		    r->mask.x + r->width, r->mask.y + r->height,
369
		    r->dst.x + r->width, r->dst.y + r->height);
370
	emit_vertex(sna, op,
371
		    r->src.x,  r->src.y + r->height,
372
		    r->mask.x, r->mask.y + r->height,
373
		    r->dst.x,  r->dst.y + r->height);
374
	emit_vertex(sna, op,
375
		    r->src.x,  r->src.y,
376
		    r->mask.x, r->mask.y,
377
		    r->dst.x,  r->dst.y);
378
}
379
 
380
sse2 force_inline static void
381
emit_vertex_mask(struct sna *sna,
382
		 const struct sna_composite_op *op,
383
		 int16_t srcX, int16_t srcY,
384
		 int16_t mskX, int16_t mskY,
385
		 int16_t dstX, int16_t dstY)
386
{
387
	OUT_VERTEX(dstX, dstY);
388
	emit_texcoord(sna, &op->src, srcX, srcY);
389
	emit_texcoord(sna, &op->mask, mskX, mskY);
390
}
391
 
392
sse2 fastcall static void
393
emit_primitive_mask(struct sna *sna,
394
		    const struct sna_composite_op *op,
395
		    const struct sna_composite_rectangles *r)
396
{
397
	emit_vertex_mask(sna, op,
398
			 r->src.x + r->width,  r->src.y + r->height,
399
			 r->mask.x + r->width, r->mask.y + r->height,
400
			 r->dst.x + r->width, r->dst.y + r->height);
401
	emit_vertex_mask(sna, op,
402
			 r->src.x,  r->src.y + r->height,
403
			 r->mask.x, r->mask.y + r->height,
404
			 r->dst.x,  r->dst.y + r->height);
405
	emit_vertex_mask(sna, op,
406
			 r->src.x,  r->src.y,
407
			 r->mask.x, r->mask.y,
408
			 r->dst.x,  r->dst.y);
409
}
410
 
411
sse2 fastcall static void
412
emit_primitive_solid(struct sna *sna,
413
		     const struct sna_composite_op *op,
414
		     const struct sna_composite_rectangles *r)
415
{
416
	float *v;
417
	union {
418
		struct sna_coordinate p;
419
		float f;
420
	} dst;
421
 
422
	assert(op->floats_per_rect == 6);
423
	assert((sna->render.vertex_used % 2) == 0);
424
	v = sna->render.vertices + sna->render.vertex_used;
425
	sna->render.vertex_used += 6;
426
	assert(sna->render.vertex_used <= sna->render.vertex_size);
427
 
428
	dst.p.x = r->dst.x + r->width;
429
	dst.p.y = r->dst.y + r->height;
430
	v[0] = dst.f;
431
	dst.p.x = r->dst.x;
432
	v[2] = dst.f;
433
	dst.p.y = r->dst.y;
434
	v[4] = dst.f;
435
 
436
	v[5] = v[3] = v[1] = .5;
437
}
438
 
439
sse2 fastcall static void
440
emit_boxes_solid(const struct sna_composite_op *op,
441
		 const BoxRec *box, int nbox,
442
		 float *v)
443
{
444
	do {
445
		union {
446
			struct sna_coordinate p;
447
			float f;
448
		} dst;
449
 
450
		dst.p.x = box->x2;
451
		dst.p.y = box->y2;
452
		v[0] = dst.f;
453
		dst.p.x = box->x1;
454
		v[2] = dst.f;
455
		dst.p.y = box->y1;
456
		v[4] = dst.f;
457
 
458
		v[5] = v[3] = v[1] = .5;
459
		box++;
460
		v += 6;
461
	} while (--nbox);
462
}
463
 
464
sse2 fastcall static void
465
emit_primitive_linear(struct sna *sna,
466
		      const struct sna_composite_op *op,
467
		      const struct sna_composite_rectangles *r)
468
{
469
	float *v;
470
	union {
471
		struct sna_coordinate p;
472
		float f;
473
	} dst;
474
 
475
	assert(op->floats_per_rect == 6);
476
	assert((sna->render.vertex_used % 2) == 0);
477
	v = sna->render.vertices + sna->render.vertex_used;
478
	sna->render.vertex_used += 6;
479
	assert(sna->render.vertex_used <= sna->render.vertex_size);
480
 
481
	dst.p.x = r->dst.x + r->width;
482
	dst.p.y = r->dst.y + r->height;
483
	v[0] = dst.f;
484
	dst.p.x = r->dst.x;
485
	v[2] = dst.f;
486
	dst.p.y = r->dst.y;
487
	v[4] = dst.f;
488
 
489
	v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height);
490
	v[3] = compute_linear(&op->src, r->src.x, r->src.y+r->height);
491
	v[5] = compute_linear(&op->src, r->src.x, r->src.y);
492
}
493
 
494
sse2 fastcall static void
495
emit_boxes_linear(const struct sna_composite_op *op,
496
		  const BoxRec *box, int nbox,
497
		  float *v)
498
{
499
	union {
500
		struct sna_coordinate p;
501
		float f;
502
	} dst;
503
 
504
	do {
505
		dst.p.x = box->x2;
506
		dst.p.y = box->y2;
507
		v[0] = dst.f;
508
		dst.p.x = box->x1;
509
		v[2] = dst.f;
510
		dst.p.y = box->y1;
511
		v[4] = dst.f;
512
 
513
		v[1] = compute_linear(&op->src, box->x2, box->y2);
514
		v[3] = compute_linear(&op->src, box->x1, box->y2);
515
		v[5] = compute_linear(&op->src, box->x1, box->y1);
516
 
517
		v += 6;
518
		box++;
519
	} while (--nbox);
520
}
521
 
522
sse2 fastcall static void
523
emit_primitive_identity_source(struct sna *sna,
524
			       const struct sna_composite_op *op,
525
			       const struct sna_composite_rectangles *r)
526
{
527
	union {
528
		struct sna_coordinate p;
529
		float f;
530
	} dst;
531
	float *v;
532
 
533
	assert(op->floats_per_rect == 9);
534
	assert((sna->render.vertex_used % 3) == 0);
535
	v = sna->render.vertices + sna->render.vertex_used;
536
	sna->render.vertex_used += 9;
537
 
538
	dst.p.x = r->dst.x + r->width;
539
	dst.p.y = r->dst.y + r->height;
540
	v[0] = dst.f;
541
	dst.p.x = r->dst.x;
542
	v[3] = dst.f;
543
	dst.p.y = r->dst.y;
544
	v[6] = dst.f;
545
 
546
	v[7] = v[4] = (r->src.x + op->src.offset[0]) * op->src.scale[0];
547
	v[1] = v[4] + r->width * op->src.scale[0];
548
 
549
	v[8] = (r->src.y + op->src.offset[1]) * op->src.scale[1];
550
	v[5] = v[2] = v[8] + r->height * op->src.scale[1];
551
}
552
 
553
sse2 fastcall static void
554
emit_boxes_identity_source(const struct sna_composite_op *op,
555
			   const BoxRec *box, int nbox,
556
			   float *v)
557
{
558
	do {
559
		union {
560
			struct sna_coordinate p;
561
			float f;
562
		} dst;
563
 
564
		dst.p.x = box->x2;
565
		dst.p.y = box->y2;
566
		v[0] = dst.f;
567
		dst.p.x = box->x1;
568
		v[3] = dst.f;
569
		dst.p.y = box->y1;
570
		v[6] = dst.f;
571
 
572
		v[7] = v[4] = (box->x1 + op->src.offset[0]) * op->src.scale[0];
573
		v[1] = (box->x2 + op->src.offset[0]) * op->src.scale[0];
574
 
575
		v[8] = (box->y1 + op->src.offset[1]) * op->src.scale[1];
576
		v[2] = v[5] = (box->y2 + op->src.offset[1]) * op->src.scale[1];
577
 
578
		v += 9;
579
		box++;
580
	} while (--nbox);
581
}
582
 
583
sse2 fastcall static void
584
emit_primitive_simple_source(struct sna *sna,
585
			     const struct sna_composite_op *op,
586
			     const struct sna_composite_rectangles *r)
587
{
588
	float *v;
589
	union {
590
		struct sna_coordinate p;
591
		float f;
592
	} dst;
593
 
594
	float xx = op->src.transform->matrix[0][0];
595
	float x0 = op->src.transform->matrix[0][2];
596
	float yy = op->src.transform->matrix[1][1];
597
	float y0 = op->src.transform->matrix[1][2];
598
	float sx = op->src.scale[0];
599
	float sy = op->src.scale[1];
600
	int16_t tx = op->src.offset[0];
601
	int16_t ty = op->src.offset[1];
602
 
603
	assert(op->floats_per_rect == 9);
604
	assert((sna->render.vertex_used % 3) == 0);
605
	v = sna->render.vertices + sna->render.vertex_used;
606
	sna->render.vertex_used += 3*3;
607
 
608
	dst.p.x = r->dst.x + r->width;
609
	dst.p.y = r->dst.y + r->height;
610
	v[0] = dst.f;
611
	v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx;
612
	v[5] = v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy;
613
 
614
	dst.p.x = r->dst.x;
615
	v[3] = dst.f;
616
	v[7] = v[4] = ((r->src.x + tx) * xx + x0) * sx;
617
 
618
	dst.p.y = r->dst.y;
619
	v[6] = dst.f;
620
	v[8] = ((r->src.y + ty) * yy + y0) * sy;
621
}
622
 
623
sse2 fastcall static void
624
emit_boxes_simple_source(const struct sna_composite_op *op,
625
			 const BoxRec *box, int nbox,
626
			 float *v)
627
{
628
	float xx = op->src.transform->matrix[0][0];
629
	float x0 = op->src.transform->matrix[0][2];
630
	float yy = op->src.transform->matrix[1][1];
631
	float y0 = op->src.transform->matrix[1][2];
632
	float sx = op->src.scale[0];
633
	float sy = op->src.scale[1];
634
	int16_t tx = op->src.offset[0];
635
	int16_t ty = op->src.offset[1];
636
 
637
	do {
638
		union {
639
			struct sna_coordinate p;
640
			float f;
641
		} dst;
642
 
643
		dst.p.x = box->x2;
644
		dst.p.y = box->y2;
645
		v[0] = dst.f;
646
		v[1] = ((box->x2 + tx) * xx + x0) * sx;
647
		v[5] = v[2] = ((box->y2 + ty) * yy + y0) * sy;
648
 
649
		dst.p.x = box->x1;
650
		v[3] = dst.f;
651
		v[7] = v[4] = ((box->x1 + tx) * xx + x0) * sx;
652
 
653
		dst.p.y = box->y1;
654
		v[6] = dst.f;
655
		v[8] = ((box->y1 + ty) * yy + y0) * sy;
656
 
657
		v += 9;
658
		box++;
659
	} while (--nbox);
660
}
661
 
662
sse2 fastcall static void
663
emit_primitive_affine_source(struct sna *sna,
664
			     const struct sna_composite_op *op,
665
			     const struct sna_composite_rectangles *r)
666
{
667
	union {
668
		struct sna_coordinate p;
669
		float f;
670
	} dst;
671
	float *v;
672
 
673
	assert(op->floats_per_rect == 9);
674
	assert((sna->render.vertex_used % 3) == 0);
675
	v = sna->render.vertices + sna->render.vertex_used;
676
	sna->render.vertex_used += 9;
677
 
678
	dst.p.x = r->dst.x + r->width;
679
	dst.p.y = r->dst.y + r->height;
680
	v[0] = dst.f;
681
	_sna_get_transformed_scaled(op->src.offset[0] + r->src.x + r->width,
682
				    op->src.offset[1] + r->src.y + r->height,
683
				    op->src.transform, op->src.scale,
684
				    &v[1], &v[2]);
685
 
686
	dst.p.x = r->dst.x;
687
	v[3] = dst.f;
688
	_sna_get_transformed_scaled(op->src.offset[0] + r->src.x,
689
				    op->src.offset[1] + r->src.y + r->height,
690
				    op->src.transform, op->src.scale,
691
				    &v[4], &v[5]);
692
 
693
	dst.p.y = r->dst.y;
694
	v[6] = dst.f;
695
	_sna_get_transformed_scaled(op->src.offset[0] + r->src.x,
696
				    op->src.offset[1] + r->src.y,
697
				    op->src.transform, op->src.scale,
698
				    &v[7], &v[8]);
699
}
700
 
701
sse2 fastcall static void
702
emit_boxes_affine_source(const struct sna_composite_op *op,
703
			 const BoxRec *box, int nbox,
704
			 float *v)
705
{
706
	do {
707
		union {
708
			struct sna_coordinate p;
709
			float f;
710
		} dst;
711
 
712
		dst.p.x = box->x2;
713
		dst.p.y = box->y2;
714
		v[0] = dst.f;
715
		_sna_get_transformed_scaled(op->src.offset[0] + box->x2,
716
					    op->src.offset[1] + box->y2,
717
					    op->src.transform, op->src.scale,
718
					    &v[1], &v[2]);
719
 
720
		dst.p.x = box->x1;
721
		v[3] = dst.f;
722
		_sna_get_transformed_scaled(op->src.offset[0] + box->x1,
723
					    op->src.offset[1] + box->y2,
724
					    op->src.transform, op->src.scale,
725
					    &v[4], &v[5]);
726
 
727
		dst.p.y = box->y1;
728
		v[6] = dst.f;
729
		_sna_get_transformed_scaled(op->src.offset[0] + box->x1,
730
					    op->src.offset[1] + box->y1,
731
					    op->src.transform, op->src.scale,
732
					    &v[7], &v[8]);
733
		box++;
734
		v += 9;
735
	} while (--nbox);
736
}
737
 
738
sse2 fastcall static void
739
emit_primitive_identity_mask(struct sna *sna,
740
			     const struct sna_composite_op *op,
741
			     const struct sna_composite_rectangles *r)
742
{
743
	union {
744
		struct sna_coordinate p;
745
		float f;
746
	} dst;
747
	float msk_x, msk_y;
748
	float w, h;
749
	float *v;
750
 
751
	msk_x = r->mask.x + op->mask.offset[0];
752
	msk_y = r->mask.y + op->mask.offset[1];
753
	w = r->width;
754
	h = r->height;
755
 
756
	DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n",
757
	     __FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h));
758
 
759
	assert(op->floats_per_rect == 12);
760
	assert((sna->render.vertex_used % 4) == 0);
761
	v = sna->render.vertices + sna->render.vertex_used;
762
	sna->render.vertex_used += 12;
763
 
764
	dst.p.x = r->dst.x + r->width;
765
	dst.p.y = r->dst.y + r->height;
766
	v[0] = dst.f;
767
	v[2] = (msk_x + w) * op->mask.scale[0];
768
	v[7] = v[3] = (msk_y + h) * op->mask.scale[1];
769
 
770
	dst.p.x = r->dst.x;
771
	v[4] = dst.f;
772
	v[10] = v[6] = msk_x * op->mask.scale[0];
773
 
774
	dst.p.y = r->dst.y;
775
	v[8] = dst.f;
776
	v[11] = msk_y * op->mask.scale[1];
777
 
778
	v[9] = v[5] = v[1] = .5;
779
}
780
 
781
sse2 fastcall static void
782
emit_boxes_identity_mask(const struct sna_composite_op *op,
783
			 const BoxRec *box, int nbox,
784
			 float *v)
785
{
786
	float msk_x = op->mask.offset[0];
787
	float msk_y = op->mask.offset[1];
788
 
789
	do {
790
		union {
791
			struct sna_coordinate p;
792
			float f;
793
		} dst;
794
 
795
		dst.p.x = box->x2;
796
		dst.p.y = box->y2;
797
		v[0] = dst.f;
798
		v[2] = (msk_x + box->x2) * op->mask.scale[0];
799
		v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1];
800
 
801
		dst.p.x = box->x1;
802
		v[4] = dst.f;
803
		v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0];
804
 
805
		dst.p.y = box->y1;
806
		v[8] = dst.f;
807
		v[11] = (msk_y + box->y1) * op->mask.scale[1];
808
 
809
		v[9] = v[5] = v[1] = .5;
810
		v += 12;
811
		box++;
812
	} while (--nbox);
813
}
814
 
815
sse2 fastcall static void
816
emit_primitive_linear_identity_mask(struct sna *sna,
817
				    const struct sna_composite_op *op,
818
				    const struct sna_composite_rectangles *r)
819
{
820
	union {
821
		struct sna_coordinate p;
822
		float f;
823
	} dst;
824
	float msk_x, msk_y;
825
	float w, h;
826
	float *v;
827
 
828
	msk_x = r->mask.x + op->mask.offset[0];
829
	msk_y = r->mask.y + op->mask.offset[1];
830
	w = r->width;
831
	h = r->height;
832
 
833
	DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n",
834
	     __FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h));
835
 
836
	assert(op->floats_per_rect == 12);
837
	assert((sna->render.vertex_used % 4) == 0);
838
	v = sna->render.vertices + sna->render.vertex_used;
839
	sna->render.vertex_used += 12;
840
 
841
	dst.p.x = r->dst.x + r->width;
842
	dst.p.y = r->dst.y + r->height;
843
	v[0] = dst.f;
844
	v[2] = (msk_x + w) * op->mask.scale[0];
845
	v[7] = v[3] = (msk_y + h) * op->mask.scale[1];
846
 
847
	dst.p.x = r->dst.x;
848
	v[4] = dst.f;
849
	v[10] = v[6] = msk_x * op->mask.scale[0];
850
 
851
	dst.p.y = r->dst.y;
852
	v[8] = dst.f;
853
	v[11] = msk_y * op->mask.scale[1];
854
 
855
	v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height);
856
	v[5] = compute_linear(&op->src, r->src.x, r->src.y+r->height);
857
	v[9] = compute_linear(&op->src, r->src.x, r->src.y);
858
}
859
 
860
sse2 fastcall static void
861
emit_boxes_linear_identity_mask(const struct sna_composite_op *op,
862
				const BoxRec *box, int nbox,
863
				float *v)
864
{
865
	float msk_x = op->mask.offset[0];
866
	float msk_y = op->mask.offset[1];
867
 
868
	do {
869
		union {
870
			struct sna_coordinate p;
871
			float f;
872
		} dst;
873
 
874
		dst.p.x = box->x2;
875
		dst.p.y = box->y2;
876
		v[0] = dst.f;
877
		v[2] = (msk_x + box->x2) * op->mask.scale[0];
878
		v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1];
879
 
880
		dst.p.x = box->x1;
881
		v[4] = dst.f;
882
		v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0];
883
 
884
		dst.p.y = box->y1;
885
		v[8] = dst.f;
886
		v[11] = (msk_y + box->y1) * op->mask.scale[1];
887
 
888
		v[1] = compute_linear(&op->src, box->x2, box->y2);
889
		v[5] = compute_linear(&op->src, box->x1, box->y2);
890
		v[9] = compute_linear(&op->src, box->x1, box->y1);
891
 
892
		v += 12;
893
		box++;
894
	} while (--nbox);
895
}
896
 
897
sse2 fastcall static void
898
emit_primitive_identity_source_mask(struct sna *sna,
899
				    const struct sna_composite_op *op,
900
				    const struct sna_composite_rectangles *r)
901
{
902
	union {
903
		struct sna_coordinate p;
904
		float f;
905
	} dst;
906
	float src_x, src_y;
907
	float msk_x, msk_y;
908
	float w, h;
909
	float *v;
910
 
911
	src_x = r->src.x + op->src.offset[0];
912
	src_y = r->src.y + op->src.offset[1];
913
	msk_x = r->mask.x + op->mask.offset[0];
914
	msk_y = r->mask.y + op->mask.offset[1];
915
	w = r->width;
916
	h = r->height;
917
 
918
	assert(op->floats_per_rect == 15);
919
	assert((sna->render.vertex_used % 5) == 0);
920
	v = sna->render.vertices + sna->render.vertex_used;
921
	sna->render.vertex_used += 15;
922
 
923
	dst.p.x = r->dst.x + r->width;
924
	dst.p.y = r->dst.y + r->height;
925
	v[0] = dst.f;
926
	v[1] = (src_x + w) * op->src.scale[0];
927
	v[2] = (src_y + h) * op->src.scale[1];
928
	v[3] = (msk_x + w) * op->mask.scale[0];
929
	v[4] = (msk_y + h) * op->mask.scale[1];
930
 
931
	dst.p.x = r->dst.x;
932
	v[5] = dst.f;
933
	v[6] = src_x * op->src.scale[0];
934
	v[7] = v[2];
935
	v[8] = msk_x * op->mask.scale[0];
936
	v[9] = v[4];
937
 
938
	dst.p.y = r->dst.y;
939
	v[10] = dst.f;
940
	v[11] = v[6];
941
	v[12] = src_y * op->src.scale[1];
942
	v[13] = v[8];
943
	v[14] = msk_y * op->mask.scale[1];
944
}
945
 
946
sse2 fastcall static void
947
emit_primitive_simple_source_identity(struct sna *sna,
948
				      const struct sna_composite_op *op,
949
				      const struct sna_composite_rectangles *r)
950
{
951
	float *v;
952
	union {
953
		struct sna_coordinate p;
954
		float f;
955
	} dst;
956
 
957
	float xx = op->src.transform->matrix[0][0];
958
	float x0 = op->src.transform->matrix[0][2];
959
	float yy = op->src.transform->matrix[1][1];
960
	float y0 = op->src.transform->matrix[1][2];
961
	float sx = op->src.scale[0];
962
	float sy = op->src.scale[1];
963
	int16_t tx = op->src.offset[0];
964
	int16_t ty = op->src.offset[1];
965
	float msk_x = r->mask.x + op->mask.offset[0];
966
	float msk_y = r->mask.y + op->mask.offset[1];
967
	float w = r->width, h = r->height;
968
 
969
	assert(op->floats_per_rect == 15);
970
	assert((sna->render.vertex_used % 5) == 0);
971
	v = sna->render.vertices + sna->render.vertex_used;
972
	sna->render.vertex_used += 3*5;
973
 
974
	dst.p.x = r->dst.x + r->width;
975
	dst.p.y = r->dst.y + r->height;
976
	v[0] = dst.f;
977
	v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx;
978
	v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy;
979
	v[3] = (msk_x + w) * op->mask.scale[0];
980
	v[4] = (msk_y + h) * op->mask.scale[1];
981
 
982
	dst.p.x = r->dst.x;
983
	v[5] = dst.f;
984
	v[6] = ((r->src.x + tx) * xx + x0) * sx;
985
	v[7] = v[2];
986
	v[8] = msk_x * op->mask.scale[0];
987
	v[9] = v[4];
988
 
989
	dst.p.y = r->dst.y;
990
	v[10] = dst.f;
991
	v[11] = v[6];
992
	v[12] = ((r->src.y + ty) * yy + y0) * sy;
993
	v[13] = v[8];
994
	v[14] = msk_y * op->mask.scale[1];
995
}
996
 
997
sse2 fastcall static void
998
emit_primitive_affine_source_identity(struct sna *sna,
999
				      const struct sna_composite_op *op,
1000
				      const struct sna_composite_rectangles *r)
1001
{
1002
	float *v;
1003
	union {
1004
		struct sna_coordinate p;
1005
		float f;
1006
	} dst;
1007
	float msk_x = r->mask.x + op->mask.offset[0];
1008
	float msk_y = r->mask.y + op->mask.offset[1];
1009
	float w = r->width, h = r->height;
1010
 
1011
	assert(op->floats_per_rect == 15);
1012
	assert((sna->render.vertex_used % 5) == 0);
1013
	v = sna->render.vertices + sna->render.vertex_used;
1014
	sna->render.vertex_used += 3*5;
1015
 
1016
	dst.p.x = r->dst.x + r->width;
1017
	dst.p.y = r->dst.y + r->height;
1018
	v[0] = dst.f;
1019
	_sna_get_transformed_scaled(op->src.offset[0] + r->src.x + r->width,
1020
				    op->src.offset[1] + r->src.y + r->height,
1021
				    op->src.transform, op->src.scale,
1022
				    &v[1], &v[2]);
1023
	v[3] = (msk_x + w) * op->mask.scale[0];
1024
	v[4] = (msk_y + h) * op->mask.scale[1];
1025
 
1026
	dst.p.x = r->dst.x;
1027
	v[5] = dst.f;
1028
	_sna_get_transformed_scaled(op->src.offset[0] + r->src.x,
1029
				    op->src.offset[1] + r->src.y + r->height,
1030
				    op->src.transform, op->src.scale,
1031
				    &v[6], &v[7]);
1032
	v[8] = msk_x * op->mask.scale[0];
1033
	v[9] = v[4];
1034
 
1035
	dst.p.y = r->dst.y;
1036
	v[10] = dst.f;
1037
	_sna_get_transformed_scaled(op->src.offset[0] + r->src.x,
1038
				    op->src.offset[1] + r->src.y,
1039
				    op->src.transform, op->src.scale,
1040
				    &v[11], &v[12]);
1041
	v[13] = v[8];
1042
	v[14] = msk_y * op->mask.scale[1];
1043
}
1044
 
1045
/* SSE4_2 */
1046
#if defined(sse4_2)
1047
 
1048
sse4_2 fastcall static void
1049
emit_primitive_linear__sse4_2(struct sna *sna,
1050
			      const struct sna_composite_op *op,
1051
			      const struct sna_composite_rectangles *r)
1052
{
1053
	float *v;
1054
	union {
1055
		struct sna_coordinate p;
1056
		float f;
1057
	} dst;
1058
 
1059
	assert(op->floats_per_rect == 6);
1060
	assert((sna->render.vertex_used % 2) == 0);
1061
	v = sna->render.vertices + sna->render.vertex_used;
1062
	sna->render.vertex_used += 6;
1063
	assert(sna->render.vertex_used <= sna->render.vertex_size);
1064
 
1065
	dst.p.x = r->dst.x + r->width;
1066
	dst.p.y = r->dst.y + r->height;
1067
	v[0] = dst.f;
1068
	dst.p.x = r->dst.x;
1069
	v[2] = dst.f;
1070
	dst.p.y = r->dst.y;
1071
	v[4] = dst.f;
1072
 
1073
	v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height);
1074
	v[3] = compute_linear(&op->src, r->src.x, r->src.y+r->height);
1075
	v[5] = compute_linear(&op->src, r->src.x, r->src.y);
1076
}
1077
 
1078
sse4_2 fastcall static void
1079
emit_boxes_linear__sse4_2(const struct sna_composite_op *op,
1080
			  const BoxRec *box, int nbox,
1081
			  float *v)
1082
{
1083
	union {
1084
		struct sna_coordinate p;
1085
		float f;
1086
	} dst;
1087
 
1088
	do {
1089
		dst.p.x = box->x2;
1090
		dst.p.y = box->y2;
1091
		v[0] = dst.f;
1092
		dst.p.x = box->x1;
1093
		v[2] = dst.f;
1094
		dst.p.y = box->y1;
1095
		v[4] = dst.f;
1096
 
1097
		v[1] = compute_linear(&op->src, box->x2, box->y2);
1098
		v[3] = compute_linear(&op->src, box->x1, box->y2);
1099
		v[5] = compute_linear(&op->src, box->x1, box->y1);
1100
 
1101
		v += 6;
1102
		box++;
1103
	} while (--nbox);
1104
}
1105
 
1106
sse4_2 fastcall static void
1107
emit_primitive_identity_source__sse4_2(struct sna *sna,
1108
				       const struct sna_composite_op *op,
1109
				       const struct sna_composite_rectangles *r)
1110
{
1111
	union {
1112
		struct sna_coordinate p;
1113
		float f;
1114
	} dst;
1115
	float *v;
1116
 
1117
	assert(op->floats_per_rect == 9);
1118
	assert((sna->render.vertex_used % 3) == 0);
1119
	v = sna->render.vertices + sna->render.vertex_used;
1120
	sna->render.vertex_used += 9;
1121
 
1122
	dst.p.x = r->dst.x + r->width;
1123
	dst.p.y = r->dst.y + r->height;
1124
	v[0] = dst.f;
1125
	dst.p.x = r->dst.x;
1126
	v[3] = dst.f;
1127
	dst.p.y = r->dst.y;
1128
	v[6] = dst.f;
1129
 
1130
	v[7] = v[4] = (r->src.x + op->src.offset[0]) * op->src.scale[0];
1131
	v[1] = v[4] + r->width * op->src.scale[0];
1132
 
1133
	v[8] = (r->src.y + op->src.offset[1]) * op->src.scale[1];
1134
	v[5] = v[2] = v[8] + r->height * op->src.scale[1];
1135
}
1136
 
1137
sse4_2 fastcall static void
1138
emit_boxes_identity_source__sse4_2(const struct sna_composite_op *op,
1139
				   const BoxRec *box, int nbox,
1140
				   float *v)
1141
{
1142
	do {
1143
		union {
1144
			struct sna_coordinate p;
1145
			float f;
1146
		} dst;
1147
 
1148
		dst.p.x = box->x2;
1149
		dst.p.y = box->y2;
1150
		v[0] = dst.f;
1151
		dst.p.x = box->x1;
1152
		v[3] = dst.f;
1153
		dst.p.y = box->y1;
1154
		v[6] = dst.f;
1155
 
1156
		v[7] = v[4] = (box->x1 + op->src.offset[0]) * op->src.scale[0];
1157
		v[1] = (box->x2 + op->src.offset[0]) * op->src.scale[0];
1158
 
1159
		v[8] = (box->y1 + op->src.offset[1]) * op->src.scale[1];
1160
		v[2] = v[5] = (box->y2 + op->src.offset[1]) * op->src.scale[1];
1161
 
1162
		v += 9;
1163
		box++;
1164
	} while (--nbox);
1165
}
1166
 
1167
sse4_2 fastcall static void
1168
emit_primitive_simple_source__sse4_2(struct sna *sna,
1169
				     const struct sna_composite_op *op,
1170
				     const struct sna_composite_rectangles *r)
1171
{
1172
	float *v;
1173
	union {
1174
		struct sna_coordinate p;
1175
		float f;
1176
	} dst;
1177
 
1178
	float xx = op->src.transform->matrix[0][0];
1179
	float x0 = op->src.transform->matrix[0][2];
1180
	float yy = op->src.transform->matrix[1][1];
1181
	float y0 = op->src.transform->matrix[1][2];
1182
	float sx = op->src.scale[0];
1183
	float sy = op->src.scale[1];
1184
	int16_t tx = op->src.offset[0];
1185
	int16_t ty = op->src.offset[1];
1186
 
1187
	assert(op->floats_per_rect == 9);
1188
	assert((sna->render.vertex_used % 3) == 0);
1189
	v = sna->render.vertices + sna->render.vertex_used;
1190
	sna->render.vertex_used += 3*3;
1191
 
1192
	dst.p.x = r->dst.x + r->width;
1193
	dst.p.y = r->dst.y + r->height;
1194
	v[0] = dst.f;
1195
	v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx;
1196
	v[5] = v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy;
1197
 
1198
	dst.p.x = r->dst.x;
1199
	v[3] = dst.f;
1200
	v[7] = v[4] = ((r->src.x + tx) * xx + x0) * sx;
1201
 
1202
	dst.p.y = r->dst.y;
1203
	v[6] = dst.f;
1204
	v[8] = ((r->src.y + ty) * yy + y0) * sy;
1205
}
1206
 
1207
sse4_2 fastcall static void
1208
emit_boxes_simple_source__sse4_2(const struct sna_composite_op *op,
1209
				 const BoxRec *box, int nbox,
1210
				 float *v)
1211
{
1212
	float xx = op->src.transform->matrix[0][0];
1213
	float x0 = op->src.transform->matrix[0][2];
1214
	float yy = op->src.transform->matrix[1][1];
1215
	float y0 = op->src.transform->matrix[1][2];
1216
	float sx = op->src.scale[0];
1217
	float sy = op->src.scale[1];
1218
	int16_t tx = op->src.offset[0];
1219
	int16_t ty = op->src.offset[1];
1220
 
1221
	do {
1222
		union {
1223
			struct sna_coordinate p;
1224
			float f;
1225
		} dst;
1226
 
1227
		dst.p.x = box->x2;
1228
		dst.p.y = box->y2;
1229
		v[0] = dst.f;
1230
		v[1] = ((box->x2 + tx) * xx + x0) * sx;
1231
		v[5] = v[2] = ((box->y2 + ty) * yy + y0) * sy;
1232
 
1233
		dst.p.x = box->x1;
1234
		v[3] = dst.f;
1235
		v[7] = v[4] = ((box->x1 + tx) * xx + x0) * sx;
1236
 
1237
		dst.p.y = box->y1;
1238
		v[6] = dst.f;
1239
		v[8] = ((box->y1 + ty) * yy + y0) * sy;
1240
 
1241
		v += 9;
1242
		box++;
1243
	} while (--nbox);
1244
}
1245
 
1246
sse4_2 fastcall static void
1247
emit_primitive_identity_mask__sse4_2(struct sna *sna,
1248
				     const struct sna_composite_op *op,
1249
				     const struct sna_composite_rectangles *r)
1250
{
1251
	union {
1252
		struct sna_coordinate p;
1253
		float f;
1254
	} dst;
1255
	float msk_x, msk_y;
1256
	float w, h;
1257
	float *v;
1258
 
1259
	msk_x = r->mask.x + op->mask.offset[0];
1260
	msk_y = r->mask.y + op->mask.offset[1];
1261
	w = r->width;
1262
	h = r->height;
1263
 
1264
	DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n",
1265
	     __FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h));
1266
 
1267
	assert(op->floats_per_rect == 12);
1268
	assert((sna->render.vertex_used % 4) == 0);
1269
	v = sna->render.vertices + sna->render.vertex_used;
1270
	sna->render.vertex_used += 12;
1271
 
1272
	dst.p.x = r->dst.x + r->width;
1273
	dst.p.y = r->dst.y + r->height;
1274
	v[0] = dst.f;
1275
	v[2] = (msk_x + w) * op->mask.scale[0];
1276
	v[7] = v[3] = (msk_y + h) * op->mask.scale[1];
1277
 
1278
	dst.p.x = r->dst.x;
1279
	v[4] = dst.f;
1280
	v[10] = v[6] = msk_x * op->mask.scale[0];
1281
 
1282
	dst.p.y = r->dst.y;
1283
	v[8] = dst.f;
1284
	v[11] = msk_y * op->mask.scale[1];
1285
 
1286
	v[9] = v[5] = v[1] = .5;
1287
}
1288
 
1289
sse4_2 fastcall static void
1290
emit_boxes_identity_mask__sse4_2(const struct sna_composite_op *op,
1291
				 const BoxRec *box, int nbox,
1292
				 float *v)
1293
{
1294
	float msk_x = op->mask.offset[0];
1295
	float msk_y = op->mask.offset[1];
1296
 
1297
	do {
1298
		union {
1299
			struct sna_coordinate p;
1300
			float f;
1301
		} dst;
1302
 
1303
		dst.p.x = box->x2;
1304
		dst.p.y = box->y2;
1305
		v[0] = dst.f;
1306
		v[2] = (msk_x + box->x2) * op->mask.scale[0];
1307
		v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1];
1308
 
1309
		dst.p.x = box->x1;
1310
		v[4] = dst.f;
1311
		v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0];
1312
 
1313
		dst.p.y = box->y1;
1314
		v[8] = dst.f;
1315
		v[11] = (msk_y + box->y1) * op->mask.scale[1];
1316
 
1317
		v[9] = v[5] = v[1] = .5;
1318
		v += 12;
1319
		box++;
1320
	} while (--nbox);
1321
}
1322
 
1323
sse4_2 fastcall static void
1324
emit_primitive_linear_identity_mask__sse4_2(struct sna *sna,
1325
					    const struct sna_composite_op *op,
1326
					    const struct sna_composite_rectangles *r)
1327
{
1328
	union {
1329
		struct sna_coordinate p;
1330
		float f;
1331
	} dst;
1332
	float msk_x, msk_y;
1333
	float w, h;
1334
	float *v;
1335
 
1336
	msk_x = r->mask.x + op->mask.offset[0];
1337
	msk_y = r->mask.y + op->mask.offset[1];
1338
	w = r->width;
1339
	h = r->height;
1340
 
1341
	DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n",
1342
	     __FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h));
1343
 
1344
	assert(op->floats_per_rect == 12);
1345
	assert((sna->render.vertex_used % 4) == 0);
1346
	v = sna->render.vertices + sna->render.vertex_used;
1347
	sna->render.vertex_used += 12;
1348
 
1349
	dst.p.x = r->dst.x + r->width;
1350
	dst.p.y = r->dst.y + r->height;
1351
	v[0] = dst.f;
1352
	v[2] = (msk_x + w) * op->mask.scale[0];
1353
	v[7] = v[3] = (msk_y + h) * op->mask.scale[1];
1354
 
1355
	dst.p.x = r->dst.x;
1356
	v[4] = dst.f;
1357
	v[10] = v[6] = msk_x * op->mask.scale[0];
1358
 
1359
	dst.p.y = r->dst.y;
1360
	v[8] = dst.f;
1361
	v[11] = msk_y * op->mask.scale[1];
1362
 
1363
	v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height);
1364
	v[5] = compute_linear(&op->src, r->src.x, r->src.y+r->height);
1365
	v[9] = compute_linear(&op->src, r->src.x, r->src.y);
1366
}
1367
 
1368
sse4_2 fastcall static void
1369
emit_boxes_linear_identity_mask__sse4_2(const struct sna_composite_op *op,
1370
					const BoxRec *box, int nbox,
1371
					float *v)
1372
{
1373
	float msk_x = op->mask.offset[0];
1374
	float msk_y = op->mask.offset[1];
1375
 
1376
	do {
1377
		union {
1378
			struct sna_coordinate p;
1379
			float f;
1380
		} dst;
1381
 
1382
		dst.p.x = box->x2;
1383
		dst.p.y = box->y2;
1384
		v[0] = dst.f;
1385
		v[2] = (msk_x + box->x2) * op->mask.scale[0];
1386
		v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1];
1387
 
1388
		dst.p.x = box->x1;
1389
		v[4] = dst.f;
1390
		v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0];
1391
 
1392
		dst.p.y = box->y1;
1393
		v[8] = dst.f;
1394
		v[11] = (msk_y + box->y1) * op->mask.scale[1];
1395
 
1396
		v[1] = compute_linear(&op->src, box->x2, box->y2);
1397
		v[5] = compute_linear(&op->src, box->x1, box->y2);
1398
		v[9] = compute_linear(&op->src, box->x1, box->y1);
1399
 
1400
		v += 12;
1401
		box++;
1402
	} while (--nbox);
1403
}
1404
 
1405
#endif
1406
 
1407
/* AVX2 */
1408
#if defined(avx2)
1409
 
1410
avx2 fastcall static void
1411
emit_primitive_linear__avx2(struct sna *sna,
1412
			    const struct sna_composite_op *op,
1413
			    const struct sna_composite_rectangles *r)
1414
{
1415
	float *v;
1416
	union {
1417
		struct sna_coordinate p;
1418
		float f;
1419
	} dst;
1420
 
1421
	assert(op->floats_per_rect == 6);
1422
	assert((sna->render.vertex_used % 2) == 0);
1423
	v = sna->render.vertices + sna->render.vertex_used;
1424
	sna->render.vertex_used += 6;
1425
	assert(sna->render.vertex_used <= sna->render.vertex_size);
1426
 
1427
	dst.p.x = r->dst.x + r->width;
1428
	dst.p.y = r->dst.y + r->height;
1429
	v[0] = dst.f;
1430
	dst.p.x = r->dst.x;
1431
	v[2] = dst.f;
1432
	dst.p.y = r->dst.y;
1433
	v[4] = dst.f;
1434
 
1435
	v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height);
1436
	v[3] = compute_linear(&op->src, r->src.x, r->src.y+r->height);
1437
	v[5] = compute_linear(&op->src, r->src.x, r->src.y);
1438
}
1439
 
1440
avx2 fastcall static void
1441
emit_boxes_linear__avx2(const struct sna_composite_op *op,
1442
			const BoxRec *box, int nbox,
1443
			float *v)
1444
{
1445
	union {
1446
		struct sna_coordinate p;
1447
		float f;
1448
	} dst;
1449
 
1450
	do {
1451
		dst.p.x = box->x2;
1452
		dst.p.y = box->y2;
1453
		v[0] = dst.f;
1454
		dst.p.x = box->x1;
1455
		v[2] = dst.f;
1456
		dst.p.y = box->y1;
1457
		v[4] = dst.f;
1458
 
1459
		v[1] = compute_linear(&op->src, box->x2, box->y2);
1460
		v[3] = compute_linear(&op->src, box->x1, box->y2);
1461
		v[5] = compute_linear(&op->src, box->x1, box->y1);
1462
 
1463
		v += 6;
1464
		box++;
1465
	} while (--nbox);
1466
}
1467
 
1468
avx2 fastcall static void
1469
emit_primitive_identity_source__avx2(struct sna *sna,
1470
				     const struct sna_composite_op *op,
1471
				     const struct sna_composite_rectangles *r)
1472
{
1473
	union {
1474
		struct sna_coordinate p;
1475
		float f;
1476
	} dst;
1477
	float *v;
1478
 
1479
	assert(op->floats_per_rect == 9);
1480
	assert((sna->render.vertex_used % 3) == 0);
1481
	v = sna->render.vertices + sna->render.vertex_used;
1482
	sna->render.vertex_used += 9;
1483
 
1484
	dst.p.x = r->dst.x + r->width;
1485
	dst.p.y = r->dst.y + r->height;
1486
	v[0] = dst.f;
1487
	dst.p.x = r->dst.x;
1488
	v[3] = dst.f;
1489
	dst.p.y = r->dst.y;
1490
	v[6] = dst.f;
1491
 
1492
	v[7] = v[4] = (r->src.x + op->src.offset[0]) * op->src.scale[0];
1493
	v[1] = v[4] + r->width * op->src.scale[0];
1494
 
1495
	v[8] = (r->src.y + op->src.offset[1]) * op->src.scale[1];
1496
	v[5] = v[2] = v[8] + r->height * op->src.scale[1];
1497
}
1498
 
1499
avx2 fastcall static void
1500
emit_boxes_identity_source__avx2(const struct sna_composite_op *op,
1501
				 const BoxRec *box, int nbox,
1502
				 float *v)
1503
{
1504
	do {
1505
		union {
1506
			struct sna_coordinate p;
1507
			float f;
1508
		} dst;
1509
 
1510
		dst.p.x = box->x2;
1511
		dst.p.y = box->y2;
1512
		v[0] = dst.f;
1513
		dst.p.x = box->x1;
1514
		v[3] = dst.f;
1515
		dst.p.y = box->y1;
1516
		v[6] = dst.f;
1517
 
1518
		v[7] = v[4] = (box->x1 + op->src.offset[0]) * op->src.scale[0];
1519
		v[1] = (box->x2 + op->src.offset[0]) * op->src.scale[0];
1520
 
1521
		v[8] = (box->y1 + op->src.offset[1]) * op->src.scale[1];
1522
		v[2] = v[5] = (box->y2 + op->src.offset[1]) * op->src.scale[1];
1523
 
1524
		v += 9;
1525
		box++;
1526
	} while (--nbox);
1527
}
1528
 
1529
avx2 fastcall static void
1530
emit_primitive_simple_source__avx2(struct sna *sna,
1531
				   const struct sna_composite_op *op,
1532
				   const struct sna_composite_rectangles *r)
1533
{
1534
	float *v;
1535
	union {
1536
		struct sna_coordinate p;
1537
		float f;
1538
	} dst;
1539
 
1540
	float xx = op->src.transform->matrix[0][0];
1541
	float x0 = op->src.transform->matrix[0][2];
1542
	float yy = op->src.transform->matrix[1][1];
1543
	float y0 = op->src.transform->matrix[1][2];
1544
	float sx = op->src.scale[0];
1545
	float sy = op->src.scale[1];
1546
	int16_t tx = op->src.offset[0];
1547
	int16_t ty = op->src.offset[1];
1548
 
1549
	assert(op->floats_per_rect == 9);
1550
	assert((sna->render.vertex_used % 3) == 0);
1551
	v = sna->render.vertices + sna->render.vertex_used;
1552
	sna->render.vertex_used += 3*3;
1553
 
1554
	dst.p.x = r->dst.x + r->width;
1555
	dst.p.y = r->dst.y + r->height;
1556
	v[0] = dst.f;
1557
	v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx;
1558
	v[5] = v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy;
1559
 
1560
	dst.p.x = r->dst.x;
1561
	v[3] = dst.f;
1562
	v[7] = v[4] = ((r->src.x + tx) * xx + x0) * sx;
1563
 
1564
	dst.p.y = r->dst.y;
1565
	v[6] = dst.f;
1566
	v[8] = ((r->src.y + ty) * yy + y0) * sy;
1567
}
1568
 
1569
avx2 fastcall static void
1570
emit_boxes_simple_source__avx2(const struct sna_composite_op *op,
1571
			       const BoxRec *box, int nbox,
1572
			       float *v)
1573
{
1574
	float xx = op->src.transform->matrix[0][0];
1575
	float x0 = op->src.transform->matrix[0][2];
1576
	float yy = op->src.transform->matrix[1][1];
1577
	float y0 = op->src.transform->matrix[1][2];
1578
	float sx = op->src.scale[0];
1579
	float sy = op->src.scale[1];
1580
	int16_t tx = op->src.offset[0];
1581
	int16_t ty = op->src.offset[1];
1582
 
1583
	do {
1584
		union {
1585
			struct sna_coordinate p;
1586
			float f;
1587
		} dst;
1588
 
1589
		dst.p.x = box->x2;
1590
		dst.p.y = box->y2;
1591
		v[0] = dst.f;
1592
		v[1] = ((box->x2 + tx) * xx + x0) * sx;
1593
		v[5] = v[2] = ((box->y2 + ty) * yy + y0) * sy;
1594
 
1595
		dst.p.x = box->x1;
1596
		v[3] = dst.f;
1597
		v[7] = v[4] = ((box->x1 + tx) * xx + x0) * sx;
1598
 
1599
		dst.p.y = box->y1;
1600
		v[6] = dst.f;
1601
		v[8] = ((box->y1 + ty) * yy + y0) * sy;
1602
 
1603
		v += 9;
1604
		box++;
1605
	} while (--nbox);
1606
}
1607
 
1608
avx2 fastcall static void
1609
emit_primitive_identity_mask__avx2(struct sna *sna,
1610
				   const struct sna_composite_op *op,
1611
				   const struct sna_composite_rectangles *r)
1612
{
1613
	union {
1614
		struct sna_coordinate p;
1615
		float f;
1616
	} dst;
1617
	float msk_x, msk_y;
1618
	float w, h;
1619
	float *v;
1620
 
1621
	msk_x = r->mask.x + op->mask.offset[0];
1622
	msk_y = r->mask.y + op->mask.offset[1];
1623
	w = r->width;
1624
	h = r->height;
1625
 
1626
	DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n",
1627
	     __FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h));
1628
 
1629
	assert(op->floats_per_rect == 12);
1630
	assert((sna->render.vertex_used % 4) == 0);
1631
	v = sna->render.vertices + sna->render.vertex_used;
1632
	sna->render.vertex_used += 12;
1633
 
1634
	dst.p.x = r->dst.x + r->width;
1635
	dst.p.y = r->dst.y + r->height;
1636
	v[0] = dst.f;
1637
	v[2] = (msk_x + w) * op->mask.scale[0];
1638
	v[7] = v[3] = (msk_y + h) * op->mask.scale[1];
1639
 
1640
	dst.p.x = r->dst.x;
1641
	v[4] = dst.f;
1642
	v[10] = v[6] = msk_x * op->mask.scale[0];
1643
 
1644
	dst.p.y = r->dst.y;
1645
	v[8] = dst.f;
1646
	v[11] = msk_y * op->mask.scale[1];
1647
 
1648
	v[9] = v[5] = v[1] = .5;
1649
}
1650
 
1651
avx2 fastcall static void
1652
emit_boxes_identity_mask__avx2(const struct sna_composite_op *op,
1653
			       const BoxRec *box, int nbox,
1654
			       float *v)
1655
{
1656
	float msk_x = op->mask.offset[0];
1657
	float msk_y = op->mask.offset[1];
1658
 
1659
	do {
1660
		union {
1661
			struct sna_coordinate p;
1662
			float f;
1663
		} dst;
1664
 
1665
		dst.p.x = box->x2;
1666
		dst.p.y = box->y2;
1667
		v[0] = dst.f;
1668
		v[2] = (msk_x + box->x2) * op->mask.scale[0];
1669
		v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1];
1670
 
1671
		dst.p.x = box->x1;
1672
		v[4] = dst.f;
1673
		v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0];
1674
 
1675
		dst.p.y = box->y1;
1676
		v[8] = dst.f;
1677
		v[11] = (msk_y + box->y1) * op->mask.scale[1];
1678
 
1679
		v[9] = v[5] = v[1] = .5;
1680
		v += 12;
1681
		box++;
1682
	} while (--nbox);
1683
}
1684
 
1685
avx2 fastcall static void
1686
emit_primitive_linear_identity_mask__avx2(struct sna *sna,
1687
					  const struct sna_composite_op *op,
1688
					  const struct sna_composite_rectangles *r)
1689
{
1690
	union {
1691
		struct sna_coordinate p;
1692
		float f;
1693
	} dst;
1694
	float msk_x, msk_y;
1695
	float w, h;
1696
	float *v;
1697
 
1698
	msk_x = r->mask.x + op->mask.offset[0];
1699
	msk_y = r->mask.y + op->mask.offset[1];
1700
	w = r->width;
1701
	h = r->height;
1702
 
1703
	DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n",
1704
	     __FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h));
1705
 
1706
	assert(op->floats_per_rect == 12);
1707
	assert((sna->render.vertex_used % 4) == 0);
1708
	v = sna->render.vertices + sna->render.vertex_used;
1709
	sna->render.vertex_used += 12;
1710
 
1711
	dst.p.x = r->dst.x + r->width;
1712
	dst.p.y = r->dst.y + r->height;
1713
	v[0] = dst.f;
1714
	v[2] = (msk_x + w) * op->mask.scale[0];
1715
	v[7] = v[3] = (msk_y + h) * op->mask.scale[1];
1716
 
1717
	dst.p.x = r->dst.x;
1718
	v[4] = dst.f;
1719
	v[10] = v[6] = msk_x * op->mask.scale[0];
1720
 
1721
	dst.p.y = r->dst.y;
1722
	v[8] = dst.f;
1723
	v[11] = msk_y * op->mask.scale[1];
1724
 
1725
	v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height);
1726
	v[5] = compute_linear(&op->src, r->src.x, r->src.y+r->height);
1727
	v[9] = compute_linear(&op->src, r->src.x, r->src.y);
1728
}
1729
 
1730
avx2 fastcall static void
1731
emit_boxes_linear_identity_mask__avx2(const struct sna_composite_op *op,
1732
				      const BoxRec *box, int nbox,
1733
				      float *v)
1734
{
1735
	float msk_x = op->mask.offset[0];
1736
	float msk_y = op->mask.offset[1];
1737
 
1738
	do {
1739
		union {
1740
			struct sna_coordinate p;
1741
			float f;
1742
		} dst;
1743
 
1744
		dst.p.x = box->x2;
1745
		dst.p.y = box->y2;
1746
		v[0] = dst.f;
1747
		v[2] = (msk_x + box->x2) * op->mask.scale[0];
1748
		v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1];
1749
 
1750
		dst.p.x = box->x1;
1751
		v[4] = dst.f;
1752
		v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0];
1753
 
1754
		dst.p.y = box->y1;
1755
		v[8] = dst.f;
1756
		v[11] = (msk_y + box->y1) * op->mask.scale[1];
1757
 
1758
		v[1] = compute_linear(&op->src, box->x2, box->y2);
1759
		v[5] = compute_linear(&op->src, box->x1, box->y2);
1760
		v[9] = compute_linear(&op->src, box->x1, box->y1);
1761
 
1762
		v += 12;
1763
		box++;
1764
	} while (--nbox);
1765
}
1766
 
1767
#endif
1768
 
1769
unsigned gen4_choose_composite_emitter(struct sna *sna, struct sna_composite_op *tmp)
1770
{
1771
	unsigned vb;
1772
 
1773
	if (tmp->mask.bo) {
1774
		if (tmp->mask.transform == NULL) {
1775
			if (tmp->src.is_solid) {
1776
				DBG(("%s: solid, identity mask\n", __FUNCTION__));
1777
#if defined(avx2)
1778
				if (sna->cpu_features & AVX2) {
1779
					tmp->prim_emit = emit_primitive_identity_mask__avx2;
1780
				} else
1781
#endif
1782
#if defined(sse4_2)
1783
				if (sna->cpu_features & SSE4_2) {
1784
					tmp->prim_emit = emit_primitive_identity_mask__sse4_2;
1785
				} else
1786
#endif
1787
				{
1788
					tmp->prim_emit = emit_primitive_identity_mask;
1789
				}
1790
				tmp->floats_per_vertex = 4;
1791
				vb = 1 | 2 << 2;
1792
			} else if (tmp->src.is_linear) {
1793
				DBG(("%s: linear, identity mask\n", __FUNCTION__));
1794
#if defined(avx2)
1795
				if (sna->cpu_features & AVX2) {
1796
					tmp->prim_emit = emit_primitive_linear_identity_mask__avx2;
1797
				} else
1798
#endif
1799
#if defined(sse4_2)
1800
				if (sna->cpu_features & SSE4_2) {
1801
					tmp->prim_emit = emit_primitive_linear_identity_mask__sse4_2;
1802
				} else
1803
#endif
1804
				{
1805
					tmp->prim_emit = emit_primitive_linear_identity_mask;
1806
				}
1807
				tmp->floats_per_vertex = 4;
1808
				vb = 1 | 2 << 2;
1809
			} else if (tmp->src.transform == NULL) {
1810
				DBG(("%s: identity source, identity mask\n", __FUNCTION__));
1811
				tmp->prim_emit = emit_primitive_identity_source_mask;
1812
				tmp->floats_per_vertex = 5;
1813
				vb = 2 << 2 | 2;
1814
			} else if (tmp->src.is_affine) {
1815
				tmp->src.scale[0] /= tmp->src.transform->matrix[2][2];
1816
				tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
1817
				if (!sna_affine_transform_is_rotation(tmp->src.transform)) {
1818
					DBG(("%s: simple src, identity mask\n", __FUNCTION__));
1819
					tmp->prim_emit = emit_primitive_simple_source_identity;
1820
				} else {
1821
					DBG(("%s: affine src, identity mask\n", __FUNCTION__));
1822
					tmp->prim_emit = emit_primitive_affine_source_identity;
1823
				}
1824
				tmp->floats_per_vertex = 5;
1825
				vb = 2 << 2 | 2;
1826
			} else {
1827
				DBG(("%s: projective source, identity mask\n", __FUNCTION__));
1828
				tmp->prim_emit = emit_primitive_mask;
1829
				tmp->floats_per_vertex = 6;
1830
				vb = 2 << 2 | 3;
1831
			}
1832
		} else {
1833
			tmp->prim_emit = emit_primitive_mask;
1834
			tmp->floats_per_vertex = 1;
1835
			vb = 0;
1836
			if (tmp->mask.is_solid) {
1837
				tmp->floats_per_vertex += 1;
1838
				vb |= 1 << 2;
1839
			} else if (tmp->mask.is_affine) {
1840
				tmp->floats_per_vertex += 2;
1841
				vb |= 2 << 2;
1842
			}else {
1843
				tmp->floats_per_vertex += 3;
1844
				vb |= 3 << 2;
1845
			}
1846
			if (tmp->src.is_solid) {
1847
				tmp->floats_per_vertex += 1;
1848
				vb |= 1;
1849
			} else if (tmp->src.is_affine) {
1850
				tmp->floats_per_vertex += 2;
1851
				vb |= 2 ;
1852
			}else {
1853
				tmp->floats_per_vertex += 3;
1854
				vb |= 3;
1855
			}
1856
			DBG(("%s: general mask: floats-per-vertex=%d, vb=%x\n",
1857
			     __FUNCTION__,tmp->floats_per_vertex, vb));
1858
		}
1859
	} else {
1860
#if 0
1861
		if (tmp->src.is_solid) {
1862
			DBG(("%s: solid, no mask\n", __FUNCTION__));
1863
			tmp->prim_emit = emit_primitive_solid;
1864
			if (tmp->src.is_opaque && tmp->op == PictOpOver)
1865
				tmp->op = PictOpSrc;
1866
			tmp->floats_per_vertex = 2;
1867
			vb = 1;
1868
		} else if (tmp->src.is_linear) {
1869
			DBG(("%s: linear, no mask\n", __FUNCTION__));
1870
#if defined(avx2)
1871
			if (sna->cpu_features & AVX2) {
1872
				tmp->prim_emit = emit_primitive_linear__avx2;
1873
			} else
1874
#endif
1875
#if defined(sse4_2)
1876
			if (sna->cpu_features & SSE4_2) {
1877
				tmp->prim_emit = emit_primitive_linear__sse4_2;
1878
			} else
1879
#endif
1880
			{
1881
				tmp->prim_emit = emit_primitive_linear;
1882
			}
1883
			tmp->floats_per_vertex = 2;
1884
			vb = 1;
1885
		} else if (tmp->src.transform == NULL) {
1886
			DBG(("%s: identity src, no mask\n", __FUNCTION__));
1887
#if defined(avx2)
1888
			if (sna->cpu_features & AVX2) {
1889
				tmp->prim_emit = emit_primitive_identity_source__avx2;
1890
			} else
1891
#endif
1892
#if defined(sse4_2)
1893
			if (sna->cpu_features & SSE4_2) {
1894
				tmp->prim_emit = emit_primitive_identity_source__sse4_2;
1895
			} else
1896
#endif
1897
			{
1898
				tmp->prim_emit = emit_primitive_identity_source;
1899
			}
1900
			tmp->floats_per_vertex = 3;
1901
			vb = 2;
1902
		} else if (tmp->src.is_affine) {
1903
			tmp->src.scale[0] /= tmp->src.transform->matrix[2][2];
1904
			tmp->src.scale[1] /= tmp->src.transform->matrix[2][2];
1905
			if (!sna_affine_transform_is_rotation(tmp->src.transform)) {
1906
				DBG(("%s: simple src, no mask\n", __FUNCTION__));
1907
#if defined(avx2)
1908
				if (sna->cpu_features & AVX2) {
1909
					tmp->prim_emit = emit_primitive_simple_source__avx2;
1910
				} else
1911
#endif
1912
#if defined(sse4_2)
1913
				if (sna->cpu_features & SSE4_2) {
1914
					tmp->prim_emit = emit_primitive_simple_source__sse4_2;
1915
				} else
1916
#endif
1917
				{
1918
					tmp->prim_emit = emit_primitive_simple_source;
1919
				}
1920
			} else {
1921
				DBG(("%s: affine src, no mask\n", __FUNCTION__));
1922
				tmp->prim_emit = emit_primitive_affine_source;
1923
			}
1924
			tmp->floats_per_vertex = 3;
1925
			vb = 2;
1926
		} else {
1927
			DBG(("%s: projective src, no mask\n", __FUNCTION__));
1928
			assert(!tmp->src.is_solid);
1929
			tmp->prim_emit = emit_primitive;
1930
			tmp->floats_per_vertex = 4;
1931
			vb = 3;
1932
		}
1933
#endif
1934
	}
1935
	tmp->floats_per_rect = 3 * tmp->floats_per_vertex;
1936
 
1937
	return vb;
1938
}
1939
 
1940