Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
1891 serge 1
/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
2
/*
3
 * Copyright © 2000 SuSE, Inc.
4
 * Copyright © 2007 Red Hat, Inc.
5
 *
6
 * Permission to use, copy, modify, distribute, and sell this software and its
7
 * documentation for any purpose is hereby granted without fee, provided that
8
 * the above copyright notice appear in all copies and that both that
9
 * copyright notice and this permission notice appear in supporting
10
 * documentation, and that the name of SuSE not be used in advertising or
11
 * publicity pertaining to distribution of the software without specific,
12
 * written prior permission.  SuSE makes no representations about the
13
 * suitability of this software for any purpose.  It is provided "as is"
14
 * without express or implied warranty.
15
 *
16
 * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
17
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
18
 * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
20
 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
21
 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
22
 *
23
 * Author:  Keith Packard, SuSE, Inc.
24
 */
25
 
26
#ifdef HAVE_CONFIG_H
27
#include 
28
#endif
29
#include 
30
#include 
31
#include "pixman-private.h"
32
#include "pixman-combine32.h"
33
#include "pixman-fast-path.h"
34
 
35
static force_inline uint32_t
36
fetch_24 (uint8_t *a)
37
{
38
    if (((unsigned long)a) & 1)
39
    {
40
#ifdef WORDS_BIGENDIAN
41
	return (*a << 16) | (*(uint16_t *)(a + 1));
42
#else
43
	return *a | (*(uint16_t *)(a + 1) << 8);
44
#endif
45
    }
46
    else
47
    {
48
#ifdef WORDS_BIGENDIAN
49
	return (*(uint16_t *)a << 8) | *(a + 2);
50
#else
51
	return *(uint16_t *)a | (*(a + 2) << 16);
52
#endif
53
    }
54
}
55
 
56
static force_inline void
57
store_24 (uint8_t *a,
58
          uint32_t v)
59
{
60
    if (((unsigned long)a) & 1)
61
    {
62
#ifdef WORDS_BIGENDIAN
63
	*a = (uint8_t) (v >> 16);
64
	*(uint16_t *)(a + 1) = (uint16_t) (v);
65
#else
66
	*a = (uint8_t) (v);
67
	*(uint16_t *)(a + 1) = (uint16_t) (v >> 8);
68
#endif
69
    }
70
    else
71
    {
72
#ifdef WORDS_BIGENDIAN
73
	*(uint16_t *)a = (uint16_t)(v >> 8);
74
	*(a + 2) = (uint8_t)v;
75
#else
76
	*(uint16_t *)a = (uint16_t)v;
77
	*(a + 2) = (uint8_t)(v >> 16);
78
#endif
79
    }
80
}
81
 
82
static force_inline uint32_t
83
over (uint32_t src,
84
      uint32_t dest)
85
{
86
    uint32_t a = ~src >> 24;
87
 
88
    UN8x4_MUL_UN8_ADD_UN8x4 (dest, a, src);
89
 
90
    return dest;
91
}
92
 
93
static uint32_t
94
in (uint32_t x,
95
    uint8_t  y)
96
{
97
    uint16_t a = y;
98
 
99
    UN8x4_MUL_UN8 (x, a);
100
 
101
    return x;
102
}
103
 
104
/*
105
 * Naming convention:
106
 *
107
 *  op_src_mask_dest
108
 */
109
static void
110
fast_composite_over_x888_8_8888 (pixman_implementation_t *imp,
111
                                 pixman_op_t              op,
112
                                 pixman_image_t *         src_image,
113
                                 pixman_image_t *         mask_image,
114
                                 pixman_image_t *         dst_image,
115
                                 int32_t                  src_x,
116
                                 int32_t                  src_y,
117
                                 int32_t                  mask_x,
118
                                 int32_t                  mask_y,
119
                                 int32_t                  dest_x,
120
                                 int32_t                  dest_y,
121
                                 int32_t                  width,
122
                                 int32_t                  height)
123
{
124
    uint32_t    *src, *src_line;
125
    uint32_t    *dst, *dst_line;
126
    uint8_t     *mask, *mask_line;
127
    int src_stride, mask_stride, dst_stride;
128
    uint8_t m;
129
    uint32_t s, d;
130
    int32_t w;
131
 
132
    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
133
    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
134
    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
135
 
136
    while (height--)
137
    {
138
	src = src_line;
139
	src_line += src_stride;
140
	dst = dst_line;
141
	dst_line += dst_stride;
142
	mask = mask_line;
143
	mask_line += mask_stride;
144
 
145
	w = width;
146
	while (w--)
147
	{
148
	    m = *mask++;
149
	    if (m)
150
	    {
151
		s = *src | 0xff000000;
152
 
153
		if (m == 0xff)
154
		{
155
		    *dst = s;
156
		}
157
		else
158
		{
159
		    d = in (s, m);
160
		    *dst = over (d, *dst);
161
		}
162
	    }
163
	    src++;
164
	    dst++;
165
	}
166
    }
167
}
168
 
169
static void
170
fast_composite_in_n_8_8 (pixman_implementation_t *imp,
171
                         pixman_op_t              op,
172
                         pixman_image_t *         src_image,
173
                         pixman_image_t *         mask_image,
174
                         pixman_image_t *         dest_image,
175
                         int32_t                  src_x,
176
                         int32_t                  src_y,
177
                         int32_t                  mask_x,
178
                         int32_t                  mask_y,
179
                         int32_t                  dest_x,
180
                         int32_t                  dest_y,
181
                         int32_t                  width,
182
                         int32_t                  height)
183
{
184
    uint32_t src, srca;
185
    uint8_t     *dst_line, *dst;
186
    uint8_t     *mask_line, *mask, m;
187
    int dst_stride, mask_stride;
188
    int32_t w;
189
    uint16_t t;
190
 
191
    src = _pixman_image_get_solid (src_image, dest_image->bits.format);
192
 
193
    srca = src >> 24;
194
 
195
    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
196
    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
197
 
198
    if (srca == 0xff)
199
    {
200
	while (height--)
201
	{
202
	    dst = dst_line;
203
	    dst_line += dst_stride;
204
	    mask = mask_line;
205
	    mask_line += mask_stride;
206
	    w = width;
207
 
208
	    while (w--)
209
	    {
210
		m = *mask++;
211
 
212
		if (m == 0)
213
		    *dst = 0;
214
		else if (m != 0xff)
215
		    *dst = MUL_UN8 (m, *dst, t);
216
 
217
		dst++;
218
	    }
219
	}
220
    }
221
    else
222
    {
223
	while (height--)
224
	{
225
	    dst = dst_line;
226
	    dst_line += dst_stride;
227
	    mask = mask_line;
228
	    mask_line += mask_stride;
229
	    w = width;
230
 
231
	    while (w--)
232
	    {
233
		m = *mask++;
234
		m = MUL_UN8 (m, srca, t);
235
 
236
		if (m == 0)
237
		    *dst = 0;
238
		else if (m != 0xff)
239
		    *dst = MUL_UN8 (m, *dst, t);
240
 
241
		dst++;
242
	    }
243
	}
244
    }
245
}
246
 
247
static void
248
fast_composite_in_8_8 (pixman_implementation_t *imp,
249
                       pixman_op_t              op,
250
                       pixman_image_t *         src_image,
251
                       pixman_image_t *         mask_image,
252
                       pixman_image_t *         dest_image,
253
                       int32_t                  src_x,
254
                       int32_t                  src_y,
255
                       int32_t                  mask_x,
256
                       int32_t                  mask_y,
257
                       int32_t                  dest_x,
258
                       int32_t                  dest_y,
259
                       int32_t                  width,
260
                       int32_t                  height)
261
{
262
    uint8_t     *dst_line, *dst;
263
    uint8_t     *src_line, *src;
264
    int dst_stride, src_stride;
265
    int32_t w;
266
    uint8_t s;
267
    uint16_t t;
268
 
269
    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
270
    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
271
 
272
    while (height--)
273
    {
274
	dst = dst_line;
275
	dst_line += dst_stride;
276
	src = src_line;
277
	src_line += src_stride;
278
	w = width;
279
 
280
	while (w--)
281
	{
282
	    s = *src++;
283
 
284
	    if (s == 0)
285
		*dst = 0;
286
	    else if (s != 0xff)
287
		*dst = MUL_UN8 (s, *dst, t);
288
 
289
	    dst++;
290
	}
291
    }
292
}
293
 
294
static void
295
fast_composite_over_n_8_8888 (pixman_implementation_t *imp,
296
                              pixman_op_t              op,
297
                              pixman_image_t *         src_image,
298
                              pixman_image_t *         mask_image,
299
                              pixman_image_t *         dst_image,
300
                              int32_t                  src_x,
301
                              int32_t                  src_y,
302
                              int32_t                  mask_x,
303
                              int32_t                  mask_y,
304
                              int32_t                  dest_x,
305
                              int32_t                  dest_y,
306
                              int32_t                  width,
307
                              int32_t                  height)
308
{
309
    uint32_t src, srca;
310
    uint32_t    *dst_line, *dst, d;
311
    uint8_t     *mask_line, *mask, m;
312
    int dst_stride, mask_stride;
313
    int32_t w;
314
 
315
    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
316
 
317
    srca = src >> 24;
318
    if (src == 0)
319
	return;
320
 
321
    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
322
    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
323
 
324
    while (height--)
325
    {
326
	dst = dst_line;
327
	dst_line += dst_stride;
328
	mask = mask_line;
329
	mask_line += mask_stride;
330
	w = width;
331
 
332
	while (w--)
333
	{
334
	    m = *mask++;
335
	    if (m == 0xff)
336
	    {
337
		if (srca == 0xff)
338
		    *dst = src;
339
		else
340
		    *dst = over (src, *dst);
341
	    }
342
	    else if (m)
343
	    {
344
		d = in (src, m);
345
		*dst = over (d, *dst);
346
	    }
347
	    dst++;
348
	}
349
    }
350
}
351
 
352
static void
353
fast_composite_add_n_8888_8888_ca (pixman_implementation_t *imp,
354
				   pixman_op_t              op,
355
				   pixman_image_t *         src_image,
356
				   pixman_image_t *         mask_image,
357
				   pixman_image_t *         dst_image,
358
				   int32_t                  src_x,
359
				   int32_t                  src_y,
360
				   int32_t                  mask_x,
361
				   int32_t                  mask_y,
362
				   int32_t                  dest_x,
363
				   int32_t                  dest_y,
364
				   int32_t                  width,
365
				   int32_t                  height)
366
{
367
    uint32_t src, srca, s;
368
    uint32_t    *dst_line, *dst, d;
369
    uint32_t    *mask_line, *mask, ma;
370
    int dst_stride, mask_stride;
371
    int32_t w;
372
 
373
    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
374
 
375
    srca = src >> 24;
376
    if (src == 0)
377
	return;
378
 
379
    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
380
    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
381
 
382
    while (height--)
383
    {
384
	dst = dst_line;
385
	dst_line += dst_stride;
386
	mask = mask_line;
387
	mask_line += mask_stride;
388
	w = width;
389
 
390
	while (w--)
391
	{
392
	    ma = *mask++;
393
 
394
	    if (ma)
395
	    {
396
		d = *dst;
397
		s = src;
398
 
399
		UN8x4_MUL_UN8x4_ADD_UN8x4 (s, ma, d);
400
 
401
		*dst = s;
402
	    }
403
 
404
	    dst++;
405
	}
406
    }
407
}
408
 
409
static void
410
fast_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
411
                                    pixman_op_t              op,
412
                                    pixman_image_t *         src_image,
413
                                    pixman_image_t *         mask_image,
414
                                    pixman_image_t *         dst_image,
415
                                    int32_t                  src_x,
416
                                    int32_t                  src_y,
417
                                    int32_t                  mask_x,
418
                                    int32_t                  mask_y,
419
                                    int32_t                  dest_x,
420
                                    int32_t                  dest_y,
421
                                    int32_t                  width,
422
                                    int32_t                  height)
423
{
424
    uint32_t src, srca, s;
425
    uint32_t    *dst_line, *dst, d;
426
    uint32_t    *mask_line, *mask, ma;
427
    int dst_stride, mask_stride;
428
    int32_t w;
429
 
430
    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
431
 
432
    srca = src >> 24;
433
    if (src == 0)
434
	return;
435
 
436
    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
437
    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
438
 
439
    while (height--)
440
    {
441
	dst = dst_line;
442
	dst_line += dst_stride;
443
	mask = mask_line;
444
	mask_line += mask_stride;
445
	w = width;
446
 
447
	while (w--)
448
	{
449
	    ma = *mask++;
450
	    if (ma == 0xffffffff)
451
	    {
452
		if (srca == 0xff)
453
		    *dst = src;
454
		else
455
		    *dst = over (src, *dst);
456
	    }
457
	    else if (ma)
458
	    {
459
		d = *dst;
460
		s = src;
461
 
462
		UN8x4_MUL_UN8x4 (s, ma);
463
		UN8x4_MUL_UN8 (ma, srca);
464
		ma = ~ma;
465
		UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s);
466
 
467
		*dst = d;
468
	    }
469
 
470
	    dst++;
471
	}
472
    }
473
}
474
 
475
static void
476
fast_composite_over_n_8_0888 (pixman_implementation_t *imp,
477
                              pixman_op_t              op,
478
                              pixman_image_t *         src_image,
479
                              pixman_image_t *         mask_image,
480
                              pixman_image_t *         dst_image,
481
                              int32_t                  src_x,
482
                              int32_t                  src_y,
483
                              int32_t                  mask_x,
484
                              int32_t                  mask_y,
485
                              int32_t                  dest_x,
486
                              int32_t                  dest_y,
487
                              int32_t                  width,
488
                              int32_t                  height)
489
{
490
    uint32_t src, srca;
491
    uint8_t     *dst_line, *dst;
492
    uint32_t d;
493
    uint8_t     *mask_line, *mask, m;
494
    int dst_stride, mask_stride;
495
    int32_t w;
496
 
497
    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
498
 
499
    srca = src >> 24;
500
    if (src == 0)
501
	return;
502
 
503
    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3);
504
    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
505
 
506
    while (height--)
507
    {
508
	dst = dst_line;
509
	dst_line += dst_stride;
510
	mask = mask_line;
511
	mask_line += mask_stride;
512
	w = width;
513
 
514
	while (w--)
515
	{
516
	    m = *mask++;
517
	    if (m == 0xff)
518
	    {
519
		if (srca == 0xff)
520
		{
521
		    d = src;
522
		}
523
		else
524
		{
525
		    d = fetch_24 (dst);
526
		    d = over (src, d);
527
		}
528
		store_24 (dst, d);
529
	    }
530
	    else if (m)
531
	    {
532
		d = over (in (src, m), fetch_24 (dst));
533
		store_24 (dst, d);
534
	    }
535
	    dst += 3;
536
	}
537
    }
538
}
539
 
540
static void
541
fast_composite_over_n_8_0565 (pixman_implementation_t *imp,
542
                              pixman_op_t              op,
543
                              pixman_image_t *         src_image,
544
                              pixman_image_t *         mask_image,
545
                              pixman_image_t *         dst_image,
546
                              int32_t                  src_x,
547
                              int32_t                  src_y,
548
                              int32_t                  mask_x,
549
                              int32_t                  mask_y,
550
                              int32_t                  dest_x,
551
                              int32_t                  dest_y,
552
                              int32_t                  width,
553
                              int32_t                  height)
554
{
555
    uint32_t src, srca;
556
    uint16_t    *dst_line, *dst;
557
    uint32_t d;
558
    uint8_t     *mask_line, *mask, m;
559
    int dst_stride, mask_stride;
560
    int32_t w;
561
 
562
    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
563
 
564
    srca = src >> 24;
565
    if (src == 0)
566
	return;
567
 
568
    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
569
    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
570
 
571
    while (height--)
572
    {
573
	dst = dst_line;
574
	dst_line += dst_stride;
575
	mask = mask_line;
576
	mask_line += mask_stride;
577
	w = width;
578
 
579
	while (w--)
580
	{
581
	    m = *mask++;
582
	    if (m == 0xff)
583
	    {
584
		if (srca == 0xff)
585
		{
586
		    d = src;
587
		}
588
		else
589
		{
590
		    d = *dst;
591
		    d = over (src, CONVERT_0565_TO_0888 (d));
592
		}
593
		*dst = CONVERT_8888_TO_0565 (d);
594
	    }
595
	    else if (m)
596
	    {
597
		d = *dst;
598
		d = over (in (src, m), CONVERT_0565_TO_0888 (d));
599
		*dst = CONVERT_8888_TO_0565 (d);
600
	    }
601
	    dst++;
602
	}
603
    }
604
}
605
 
606
static void
607
fast_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
608
                                    pixman_op_t              op,
609
                                    pixman_image_t *         src_image,
610
                                    pixman_image_t *         mask_image,
611
                                    pixman_image_t *         dst_image,
612
                                    int32_t                  src_x,
613
                                    int32_t                  src_y,
614
                                    int32_t                  mask_x,
615
                                    int32_t                  mask_y,
616
                                    int32_t                  dest_x,
617
                                    int32_t                  dest_y,
618
                                    int32_t                  width,
619
                                    int32_t                  height)
620
{
621
    uint32_t  src, srca, s;
622
    uint16_t  src16;
623
    uint16_t *dst_line, *dst;
624
    uint32_t  d;
625
    uint32_t *mask_line, *mask, ma;
626
    int dst_stride, mask_stride;
627
    int32_t w;
628
 
629
    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
630
 
631
    srca = src >> 24;
632
    if (src == 0)
633
	return;
634
 
635
    src16 = CONVERT_8888_TO_0565 (src);
636
 
637
    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
638
    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
639
 
640
    while (height--)
641
    {
642
	dst = dst_line;
643
	dst_line += dst_stride;
644
	mask = mask_line;
645
	mask_line += mask_stride;
646
	w = width;
647
 
648
	while (w--)
649
	{
650
	    ma = *mask++;
651
	    if (ma == 0xffffffff)
652
	    {
653
		if (srca == 0xff)
654
		{
655
		    *dst = src16;
656
		}
657
		else
658
		{
659
		    d = *dst;
660
		    d = over (src, CONVERT_0565_TO_0888 (d));
661
		    *dst = CONVERT_8888_TO_0565 (d);
662
		}
663
	    }
664
	    else if (ma)
665
	    {
666
		d = *dst;
667
		d = CONVERT_0565_TO_0888 (d);
668
 
669
		s = src;
670
 
671
		UN8x4_MUL_UN8x4 (s, ma);
672
		UN8x4_MUL_UN8 (ma, srca);
673
		ma = ~ma;
674
		UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s);
675
 
676
		*dst = CONVERT_8888_TO_0565 (d);
677
	    }
678
	    dst++;
679
	}
680
    }
681
}
682
 
683
static void
684
fast_composite_over_8888_8888 (pixman_implementation_t *imp,
685
                               pixman_op_t              op,
686
                               pixman_image_t *         src_image,
687
                               pixman_image_t *         mask_image,
688
                               pixman_image_t *         dst_image,
689
                               int32_t                  src_x,
690
                               int32_t                  src_y,
691
                               int32_t                  mask_x,
692
                               int32_t                  mask_y,
693
                               int32_t                  dest_x,
694
                               int32_t                  dest_y,
695
                               int32_t                  width,
696
                               int32_t                  height)
697
{
698
    uint32_t    *dst_line, *dst;
699
    uint32_t    *src_line, *src, s;
700
    int dst_stride, src_stride;
701
    uint8_t a;
702
    int32_t w;
703
 
704
    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
705
    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
706
 
707
    while (height--)
708
    {
709
	dst = dst_line;
710
	dst_line += dst_stride;
711
	src = src_line;
712
	src_line += src_stride;
713
	w = width;
714
 
715
	while (w--)
716
	{
717
	    s = *src++;
718
	    a = s >> 24;
719
	    if (a == 0xff)
720
		*dst = s;
721
	    else if (s)
722
		*dst = over (s, *dst);
723
	    dst++;
724
	}
725
    }
726
}
727
 
728
static void
729
fast_composite_src_x888_8888 (pixman_implementation_t *imp,
730
			      pixman_op_t              op,
731
			      pixman_image_t *         src_image,
732
			      pixman_image_t *         mask_image,
733
			      pixman_image_t *         dst_image,
734
			      int32_t                  src_x,
735
			      int32_t                  src_y,
736
			      int32_t                  mask_x,
737
			      int32_t                  mask_y,
738
			      int32_t                  dest_x,
739
			      int32_t                  dest_y,
740
			      int32_t                  width,
741
			      int32_t                  height)
742
{
743
    uint32_t    *dst_line, *dst;
744
    uint32_t    *src_line, *src;
745
    int dst_stride, src_stride;
746
    int32_t w;
747
 
748
    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
749
    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
750
 
751
    while (height--)
752
    {
753
	dst = dst_line;
754
	dst_line += dst_stride;
755
	src = src_line;
756
	src_line += src_stride;
757
	w = width;
758
 
759
	while (w--)
760
	    *dst++ = (*src++) | 0xff000000;
761
    }
762
}
763
 
764
#if 0
765
static void
766
fast_composite_over_8888_0888 (pixman_implementation_t *imp,
767
			       pixman_op_t              op,
768
			       pixman_image_t *         src_image,
769
			       pixman_image_t *         mask_image,
770
			       pixman_image_t *         dst_image,
771
			       int32_t                  src_x,
772
			       int32_t                  src_y,
773
			       int32_t                  mask_x,
774
			       int32_t                  mask_y,
775
			       int32_t                  dest_x,
776
			       int32_t                  dest_y,
777
			       int32_t                  width,
778
			       int32_t                  height)
779
{
780
    uint8_t     *dst_line, *dst;
781
    uint32_t d;
782
    uint32_t    *src_line, *src, s;
783
    uint8_t a;
784
    int dst_stride, src_stride;
785
    int32_t w;
786
 
787
    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3);
788
    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
789
 
790
    while (height--)
791
    {
792
	dst = dst_line;
793
	dst_line += dst_stride;
794
	src = src_line;
795
	src_line += src_stride;
796
	w = width;
797
 
798
	while (w--)
799
	{
800
	    s = *src++;
801
	    a = s >> 24;
802
	    if (a)
803
	    {
804
		if (a == 0xff)
805
		    d = s;
806
		else
807
		    d = over (s, fetch_24 (dst));
808
 
809
		store_24 (dst, d);
810
	    }
811
	    dst += 3;
812
	}
813
    }
814
}
815
#endif
816
 
817
static void
818
fast_composite_over_8888_0565 (pixman_implementation_t *imp,
819
                               pixman_op_t              op,
820
                               pixman_image_t *         src_image,
821
                               pixman_image_t *         mask_image,
822
                               pixman_image_t *         dst_image,
823
                               int32_t                  src_x,
824
                               int32_t                  src_y,
825
                               int32_t                  mask_x,
826
                               int32_t                  mask_y,
827
                               int32_t                  dest_x,
828
                               int32_t                  dest_y,
829
                               int32_t                  width,
830
                               int32_t                  height)
831
{
832
    uint16_t    *dst_line, *dst;
833
    uint32_t d;
834
    uint32_t    *src_line, *src, s;
835
    uint8_t a;
836
    int dst_stride, src_stride;
837
    int32_t w;
838
 
839
    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
840
    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
841
 
842
    while (height--)
843
    {
844
	dst = dst_line;
845
	dst_line += dst_stride;
846
	src = src_line;
847
	src_line += src_stride;
848
	w = width;
849
 
850
	while (w--)
851
	{
852
	    s = *src++;
853
	    a = s >> 24;
854
	    if (s)
855
	    {
856
		if (a == 0xff)
857
		{
858
		    d = s;
859
		}
860
		else
861
		{
862
		    d = *dst;
863
		    d = over (s, CONVERT_0565_TO_0888 (d));
864
		}
865
		*dst = CONVERT_8888_TO_0565 (d);
866
	    }
867
	    dst++;
868
	}
869
    }
870
}
871
 
872
static void
873
fast_composite_src_x888_0565 (pixman_implementation_t *imp,
874
                              pixman_op_t              op,
875
                              pixman_image_t *         src_image,
876
                              pixman_image_t *         mask_image,
877
                              pixman_image_t *         dst_image,
878
                              int32_t                  src_x,
879
                              int32_t                  src_y,
880
                              int32_t                  mask_x,
881
                              int32_t                  mask_y,
882
                              int32_t                  dest_x,
883
                              int32_t                  dest_y,
884
                              int32_t                  width,
885
                              int32_t                  height)
886
{
887
    uint16_t    *dst_line, *dst;
888
    uint32_t    *src_line, *src, s;
889
    int dst_stride, src_stride;
890
    int32_t w;
891
 
892
    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
893
    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
894
 
895
    while (height--)
896
    {
897
	dst = dst_line;
898
	dst_line += dst_stride;
899
	src = src_line;
900
	src_line += src_stride;
901
	w = width;
902
 
903
	while (w--)
904
	{
905
	    s = *src++;
906
	    *dst = CONVERT_8888_TO_0565 (s);
907
	    dst++;
908
	}
909
    }
910
}
911
 
912
static void
913
fast_composite_add_8_8 (pixman_implementation_t *imp,
914
			pixman_op_t              op,
915
			pixman_image_t *         src_image,
916
			pixman_image_t *         mask_image,
917
			pixman_image_t *         dst_image,
918
			int32_t                  src_x,
919
			int32_t                  src_y,
920
			int32_t                  mask_x,
921
			int32_t                  mask_y,
922
			int32_t                  dest_x,
923
			int32_t                  dest_y,
924
			int32_t                  width,
925
			int32_t                  height)
926
{
927
    uint8_t     *dst_line, *dst;
928
    uint8_t     *src_line, *src;
929
    int dst_stride, src_stride;
930
    int32_t w;
931
    uint8_t s, d;
932
    uint16_t t;
933
 
934
    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
935
    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
936
 
937
    while (height--)
938
    {
939
	dst = dst_line;
940
	dst_line += dst_stride;
941
	src = src_line;
942
	src_line += src_stride;
943
	w = width;
944
 
945
	while (w--)
946
	{
947
	    s = *src++;
948
	    if (s)
949
	    {
950
		if (s != 0xff)
951
		{
952
		    d = *dst;
953
		    t = d + s;
954
		    s = t | (0 - (t >> 8));
955
		}
956
		*dst = s;
957
	    }
958
	    dst++;
959
	}
960
    }
961
}
962
 
963
static void
964
fast_composite_add_8888_8888 (pixman_implementation_t *imp,
965
                              pixman_op_t              op,
966
                              pixman_image_t *         src_image,
967
                              pixman_image_t *         mask_image,
968
                              pixman_image_t *         dst_image,
969
                              int32_t                  src_x,
970
                              int32_t                  src_y,
971
                              int32_t                  mask_x,
972
                              int32_t                  mask_y,
973
                              int32_t                  dest_x,
974
                              int32_t                  dest_y,
975
                              int32_t                  width,
976
                              int32_t                  height)
977
{
978
    uint32_t    *dst_line, *dst;
979
    uint32_t    *src_line, *src;
980
    int dst_stride, src_stride;
981
    int32_t w;
982
    uint32_t s, d;
983
 
984
    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
985
    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
986
 
987
    while (height--)
988
    {
989
	dst = dst_line;
990
	dst_line += dst_stride;
991
	src = src_line;
992
	src_line += src_stride;
993
	w = width;
994
 
995
	while (w--)
996
	{
997
	    s = *src++;
998
	    if (s)
999
	    {
1000
		if (s != 0xffffffff)
1001
		{
1002
		    d = *dst;
1003
		    if (d)
1004
			UN8x4_ADD_UN8x4 (s, d);
1005
		}
1006
		*dst = s;
1007
	    }
1008
	    dst++;
1009
	}
1010
    }
1011
}
1012
 
1013
static void
1014
fast_composite_add_n_8_8 (pixman_implementation_t *imp,
1015
			  pixman_op_t              op,
1016
			  pixman_image_t *         src_image,
1017
			  pixman_image_t *         mask_image,
1018
			  pixman_image_t *         dst_image,
1019
			  int32_t                  src_x,
1020
			  int32_t                  src_y,
1021
			  int32_t                  mask_x,
1022
			  int32_t                  mask_y,
1023
			  int32_t                  dest_x,
1024
			  int32_t                  dest_y,
1025
			  int32_t                  width,
1026
			  int32_t                  height)
1027
{
1028
    uint8_t     *dst_line, *dst;
1029
    uint8_t     *mask_line, *mask;
1030
    int dst_stride, mask_stride;
1031
    int32_t w;
1032
    uint32_t src;
1033
    uint8_t sa;
1034
 
1035
    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
1036
    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
1037
    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
1038
    sa = (src >> 24);
1039
 
1040
    while (height--)
1041
    {
1042
	dst = dst_line;
1043
	dst_line += dst_stride;
1044
	mask = mask_line;
1045
	mask_line += mask_stride;
1046
	w = width;
1047
 
1048
	while (w--)
1049
	{
1050
	    uint16_t tmp;
1051
	    uint16_t a;
1052
	    uint32_t m, d;
1053
	    uint32_t r;
1054
 
1055
	    a = *mask++;
1056
	    d = *dst;
1057
 
1058
	    m = MUL_UN8 (sa, a, tmp);
1059
	    r = ADD_UN8 (m, d, tmp);
1060
 
1061
	    *dst++ = r;
1062
	}
1063
    }
1064
}
1065
 
1066
#ifdef WORDS_BIGENDIAN
1067
#define CREATE_BITMASK(n) (0x80000000 >> (n))
1068
#define UPDATE_BITMASK(n) ((n) >> 1)
1069
#else
1070
#define CREATE_BITMASK(n) (1 << (n))
1071
#define UPDATE_BITMASK(n) ((n) << 1)
1072
#endif
1073
 
1074
#define TEST_BIT(p, n)					\
1075
    (*((p) + ((n) >> 5)) & CREATE_BITMASK ((n) & 31))
1076
#define SET_BIT(p, n)							\
1077
    do { *((p) + ((n) >> 5)) |= CREATE_BITMASK ((n) & 31); } while (0);
1078
 
1079
static void
1080
fast_composite_add_1000_1000 (pixman_implementation_t *imp,
1081
                              pixman_op_t              op,
1082
                              pixman_image_t *         src_image,
1083
                              pixman_image_t *         mask_image,
1084
                              pixman_image_t *         dst_image,
1085
                              int32_t                  src_x,
1086
                              int32_t                  src_y,
1087
                              int32_t                  mask_x,
1088
                              int32_t                  mask_y,
1089
                              int32_t                  dest_x,
1090
                              int32_t                  dest_y,
1091
                              int32_t                  width,
1092
                              int32_t                  height)
1093
{
1094
    uint32_t     *dst_line, *dst;
1095
    uint32_t     *src_line, *src;
1096
    int           dst_stride, src_stride;
1097
    int32_t       w;
1098
 
1099
    PIXMAN_IMAGE_GET_LINE (src_image, 0, src_y, uint32_t,
1100
                           src_stride, src_line, 1);
1101
    PIXMAN_IMAGE_GET_LINE (dst_image, 0, dest_y, uint32_t,
1102
                           dst_stride, dst_line, 1);
1103
 
1104
    while (height--)
1105
    {
1106
	dst = dst_line;
1107
	dst_line += dst_stride;
1108
	src = src_line;
1109
	src_line += src_stride;
1110
	w = width;
1111
 
1112
	while (w--)
1113
	{
1114
	    /*
1115
	     * TODO: improve performance by processing uint32_t data instead
1116
	     *       of individual bits
1117
	     */
1118
	    if (TEST_BIT (src, src_x + w))
1119
		SET_BIT (dst, dest_x + w);
1120
	}
1121
    }
1122
}
1123
 
1124
static void
1125
fast_composite_over_n_1_8888 (pixman_implementation_t *imp,
1126
                              pixman_op_t              op,
1127
                              pixman_image_t *         src_image,
1128
                              pixman_image_t *         mask_image,
1129
                              pixman_image_t *         dst_image,
1130
                              int32_t                  src_x,
1131
                              int32_t                  src_y,
1132
                              int32_t                  mask_x,
1133
                              int32_t                  mask_y,
1134
                              int32_t                  dest_x,
1135
                              int32_t                  dest_y,
1136
                              int32_t                  width,
1137
                              int32_t                  height)
1138
{
1139
    uint32_t     src, srca;
1140
    uint32_t    *dst, *dst_line;
1141
    uint32_t    *mask, *mask_line;
1142
    int          mask_stride, dst_stride;
1143
    uint32_t     bitcache, bitmask;
1144
    int32_t      w;
1145
 
1146
    if (width <= 0)
1147
	return;
1148
 
1149
    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
1150
    srca = src >> 24;
1151
    if (src == 0)
1152
	return;
1153
 
1154
    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t,
1155
                           dst_stride, dst_line, 1);
1156
    PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t,
1157
                           mask_stride, mask_line, 1);
1158
    mask_line += mask_x >> 5;
1159
 
1160
    if (srca == 0xff)
1161
    {
1162
	while (height--)
1163
	{
1164
	    dst = dst_line;
1165
	    dst_line += dst_stride;
1166
	    mask = mask_line;
1167
	    mask_line += mask_stride;
1168
	    w = width;
1169
 
1170
	    bitcache = *mask++;
1171
	    bitmask = CREATE_BITMASK (mask_x & 31);
1172
 
1173
	    while (w--)
1174
	    {
1175
		if (bitmask == 0)
1176
		{
1177
		    bitcache = *mask++;
1178
		    bitmask = CREATE_BITMASK (0);
1179
		}
1180
		if (bitcache & bitmask)
1181
		    *dst = src;
1182
		bitmask = UPDATE_BITMASK (bitmask);
1183
		dst++;
1184
	    }
1185
	}
1186
    }
1187
    else
1188
    {
1189
	while (height--)
1190
	{
1191
	    dst = dst_line;
1192
	    dst_line += dst_stride;
1193
	    mask = mask_line;
1194
	    mask_line += mask_stride;
1195
	    w = width;
1196
 
1197
	    bitcache = *mask++;
1198
	    bitmask = CREATE_BITMASK (mask_x & 31);
1199
 
1200
	    while (w--)
1201
	    {
1202
		if (bitmask == 0)
1203
		{
1204
		    bitcache = *mask++;
1205
		    bitmask = CREATE_BITMASK (0);
1206
		}
1207
		if (bitcache & bitmask)
1208
		    *dst = over (src, *dst);
1209
		bitmask = UPDATE_BITMASK (bitmask);
1210
		dst++;
1211
	    }
1212
	}
1213
    }
1214
}
1215
 
1216
static void
1217
fast_composite_over_n_1_0565 (pixman_implementation_t *imp,
1218
                              pixman_op_t              op,
1219
                              pixman_image_t *         src_image,
1220
                              pixman_image_t *         mask_image,
1221
                              pixman_image_t *         dst_image,
1222
                              int32_t                  src_x,
1223
                              int32_t                  src_y,
1224
                              int32_t                  mask_x,
1225
                              int32_t                  mask_y,
1226
                              int32_t                  dest_x,
1227
                              int32_t                  dest_y,
1228
                              int32_t                  width,
1229
                              int32_t                  height)
1230
{
1231
    uint32_t     src, srca;
1232
    uint16_t    *dst, *dst_line;
1233
    uint32_t    *mask, *mask_line;
1234
    int          mask_stride, dst_stride;
1235
    uint32_t     bitcache, bitmask;
1236
    int32_t      w;
1237
    uint32_t     d;
1238
    uint16_t     src565;
1239
 
1240
    if (width <= 0)
1241
	return;
1242
 
1243
    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
1244
    srca = src >> 24;
1245
    if (src == 0)
1246
	return;
1247
 
1248
    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t,
1249
                           dst_stride, dst_line, 1);
1250
    PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t,
1251
                           mask_stride, mask_line, 1);
1252
    mask_line += mask_x >> 5;
1253
 
1254
    if (srca == 0xff)
1255
    {
1256
	src565 = CONVERT_8888_TO_0565 (src);
1257
	while (height--)
1258
	{
1259
	    dst = dst_line;
1260
	    dst_line += dst_stride;
1261
	    mask = mask_line;
1262
	    mask_line += mask_stride;
1263
	    w = width;
1264
 
1265
	    bitcache = *mask++;
1266
	    bitmask = CREATE_BITMASK (mask_x & 31);
1267
 
1268
	    while (w--)
1269
	    {
1270
		if (bitmask == 0)
1271
		{
1272
		    bitcache = *mask++;
1273
		    bitmask = CREATE_BITMASK (0);
1274
		}
1275
		if (bitcache & bitmask)
1276
		    *dst = src565;
1277
		bitmask = UPDATE_BITMASK (bitmask);
1278
		dst++;
1279
	    }
1280
	}
1281
    }
1282
    else
1283
    {
1284
	while (height--)
1285
	{
1286
	    dst = dst_line;
1287
	    dst_line += dst_stride;
1288
	    mask = mask_line;
1289
	    mask_line += mask_stride;
1290
	    w = width;
1291
 
1292
	    bitcache = *mask++;
1293
	    bitmask = CREATE_BITMASK (mask_x & 31);
1294
 
1295
	    while (w--)
1296
	    {
1297
		if (bitmask == 0)
1298
		{
1299
		    bitcache = *mask++;
1300
		    bitmask = CREATE_BITMASK (0);
1301
		}
1302
		if (bitcache & bitmask)
1303
		{
1304
		    d = over (src, CONVERT_0565_TO_0888 (*dst));
1305
		    *dst = CONVERT_8888_TO_0565 (d);
1306
		}
1307
		bitmask = UPDATE_BITMASK (bitmask);
1308
		dst++;
1309
	    }
1310
	}
1311
    }
1312
}
1313
 
1314
/*
1315
 * Simple bitblt
1316
 */
1317
 
1318
static void
1319
fast_composite_solid_fill (pixman_implementation_t *imp,
1320
                           pixman_op_t              op,
1321
                           pixman_image_t *         src_image,
1322
                           pixman_image_t *         mask_image,
1323
                           pixman_image_t *         dst_image,
1324
                           int32_t                  src_x,
1325
                           int32_t                  src_y,
1326
                           int32_t                  mask_x,
1327
                           int32_t                  mask_y,
1328
                           int32_t                  dest_x,
1329
                           int32_t                  dest_y,
1330
                           int32_t                  width,
1331
                           int32_t                  height)
1332
{
1333
    uint32_t src;
1334
 
1335
    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
1336
 
1337
    if (dst_image->bits.format == PIXMAN_a8)
1338
    {
1339
	src = src >> 24;
1340
    }
1341
    else if (dst_image->bits.format == PIXMAN_r5g6b5 ||
1342
             dst_image->bits.format == PIXMAN_b5g6r5)
1343
    {
1344
	src = CONVERT_8888_TO_0565 (src);
1345
    }
1346
 
1347
    pixman_fill (dst_image->bits.bits, dst_image->bits.rowstride,
1348
                 PIXMAN_FORMAT_BPP (dst_image->bits.format),
1349
                 dest_x, dest_y,
1350
                 width, height,
1351
                 src);
1352
}
1353
 
1354
static void
1355
fast_composite_src_memcpy (pixman_implementation_t *imp,
1356
			   pixman_op_t              op,
1357
			   pixman_image_t *         src_image,
1358
			   pixman_image_t *         mask_image,
1359
			   pixman_image_t *         dst_image,
1360
			   int32_t                  src_x,
1361
			   int32_t                  src_y,
1362
			   int32_t                  mask_x,
1363
			   int32_t                  mask_y,
1364
			   int32_t                  dest_x,
1365
			   int32_t                  dest_y,
1366
			   int32_t                  width,
1367
			   int32_t                  height)
1368
{
1369
    int bpp = PIXMAN_FORMAT_BPP (dst_image->bits.format) / 8;
1370
    uint32_t n_bytes = width * bpp;
1371
    int dst_stride, src_stride;
1372
    uint8_t    *dst;
1373
    uint8_t    *src;
1374
 
1375
    src_stride = src_image->bits.rowstride * 4;
1376
    dst_stride = dst_image->bits.rowstride * 4;
1377
 
1378
    src = (uint8_t *)src_image->bits.bits + src_y * src_stride + src_x * bpp;
1379
    dst = (uint8_t *)dst_image->bits.bits + dest_y * dst_stride + dest_x * bpp;
1380
 
1381
    while (height--)
1382
    {
1383
	memcpy (dst, src, n_bytes);
1384
 
1385
	dst += dst_stride;
1386
	src += src_stride;
1387
    }
1388
}
1389
 
1390
FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, SRC, COVER);
1391
FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, SRC, NONE);
1392
FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, SRC, PAD);
1393
FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, SRC, NORMAL);
1394
FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, OVER, COVER);
1395
FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, OVER, NONE);
1396
FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, OVER, PAD);
1397
FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, OVER, NORMAL);
1398
FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, SRC, COVER);
1399
FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, SRC, NONE);
1400
FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, SRC, PAD);
1401
FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, SRC, NORMAL);
1402
FAST_NEAREST (565_565_normal, 0565, 0565, uint16_t, uint16_t, SRC, NORMAL);
1403
FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, OVER, COVER);
1404
FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, NONE);
1405
FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, OVER, PAD);
1406
FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL);
1407
 
1408
/* Use more unrolling for src_0565_0565 because it is typically CPU bound */
1409
static force_inline void
1410
scaled_nearest_scanline_565_565_SRC (uint16_t *      dst,
1411
				     uint16_t *      src,
1412
				     int32_t         w,
1413
				     pixman_fixed_t  vx,
1414
				     pixman_fixed_t  unit_x,
1415
				     pixman_fixed_t  max_vx)
1416
{
1417
    uint16_t tmp1, tmp2, tmp3, tmp4;
1418
    while ((w -= 4) >= 0)
1419
    {
1420
	tmp1 = src[pixman_fixed_to_int (vx)];
1421
	vx += unit_x;
1422
	tmp2 = src[pixman_fixed_to_int (vx)];
1423
	vx += unit_x;
1424
	tmp3 = src[pixman_fixed_to_int (vx)];
1425
	vx += unit_x;
1426
	tmp4 = src[pixman_fixed_to_int (vx)];
1427
	vx += unit_x;
1428
	*dst++ = tmp1;
1429
	*dst++ = tmp2;
1430
	*dst++ = tmp3;
1431
	*dst++ = tmp4;
1432
    }
1433
    if (w & 2)
1434
    {
1435
	tmp1 = src[pixman_fixed_to_int (vx)];
1436
	vx += unit_x;
1437
	tmp2 = src[pixman_fixed_to_int (vx)];
1438
	vx += unit_x;
1439
	*dst++ = tmp1;
1440
	*dst++ = tmp2;
1441
    }
1442
    if (w & 1)
1443
	*dst++ = src[pixman_fixed_to_int (vx)];
1444
}
1445
 
1446
FAST_NEAREST_MAINLOOP (565_565_cover_SRC,
1447
		       scaled_nearest_scanline_565_565_SRC,
1448
		       uint16_t, uint16_t, COVER);
1449
FAST_NEAREST_MAINLOOP (565_565_none_SRC,
1450
		       scaled_nearest_scanline_565_565_SRC,
1451
		       uint16_t, uint16_t, NONE);
1452
FAST_NEAREST_MAINLOOP (565_565_pad_SRC,
1453
		       scaled_nearest_scanline_565_565_SRC,
1454
		       uint16_t, uint16_t, PAD);
1455
 
1456
static force_inline uint32_t
1457
fetch_nearest (pixman_repeat_t src_repeat,
1458
	       pixman_format_code_t format,
1459
	       uint32_t *src, int x, int src_width)
1460
{
1461
    if (repeat (src_repeat, &x, src_width))
1462
    {
1463
	if (format == PIXMAN_x8r8g8b8)
1464
	    return *(src + x) | 0xff000000;
1465
	else
1466
	    return *(src + x);
1467
    }
1468
    else
1469
    {
1470
	return 0;
1471
    }
1472
}
1473
 
1474
static force_inline void
1475
combine_over (uint32_t s, uint32_t *dst)
1476
{
1477
    if (s)
1478
    {
1479
	uint8_t ia = 0xff - (s >> 24);
1480
 
1481
	if (ia)
1482
	    UN8x4_MUL_UN8_ADD_UN8x4 (*dst, ia, s);
1483
	else
1484
	    *dst = s;
1485
    }
1486
}
1487
 
1488
static force_inline void
1489
combine_src (uint32_t s, uint32_t *dst)
1490
{
1491
    *dst = s;
1492
}
1493
 
1494
static void
1495
fast_composite_scaled_nearest (pixman_implementation_t *imp,
1496
			       pixman_op_t              op,
1497
			       pixman_image_t *         src_image,
1498
			       pixman_image_t *         mask_image,
1499
			       pixman_image_t *         dst_image,
1500
			       int32_t                  src_x,
1501
			       int32_t                  src_y,
1502
			       int32_t                  mask_x,
1503
			       int32_t                  mask_y,
1504
			       int32_t                  dest_x,
1505
			       int32_t                  dest_y,
1506
			       int32_t                  width,
1507
			       int32_t                  height)
1508
{
1509
    uint32_t       *dst_line;
1510
    uint32_t       *src_line;
1511
    int             dst_stride, src_stride;
1512
    int		    src_width, src_height;
1513
    pixman_repeat_t src_repeat;
1514
    pixman_fixed_t unit_x, unit_y;
1515
    pixman_format_code_t src_format;
1516
    pixman_vector_t v;
1517
    pixman_fixed_t vy;
1518
 
1519
    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
1520
    /* pass in 0 instead of src_x and src_y because src_x and src_y need to be
1521
     * transformed from destination space to source space
1522
     */
1523
    PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, uint32_t, src_stride, src_line, 1);
1524
 
1525
    /* reference point is the center of the pixel */
1526
    v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;
1527
    v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;
1528
    v.vector[2] = pixman_fixed_1;
1529
 
1530
    if (!pixman_transform_point_3d (src_image->common.transform, &v))
1531
	return;
1532
 
1533
    unit_x = src_image->common.transform->matrix[0][0];
1534
    unit_y = src_image->common.transform->matrix[1][1];
1535
 
1536
    /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */
1537
    v.vector[0] -= pixman_fixed_e;
1538
    v.vector[1] -= pixman_fixed_e;
1539
 
1540
    src_height = src_image->bits.height;
1541
    src_width = src_image->bits.width;
1542
    src_repeat = src_image->common.repeat;
1543
    src_format = src_image->bits.format;
1544
 
1545
    vy = v.vector[1];
1546
    while (height--)
1547
    {
1548
        pixman_fixed_t vx = v.vector[0];
1549
	int y = pixman_fixed_to_int (vy);
1550
	uint32_t *dst = dst_line;
1551
 
1552
	dst_line += dst_stride;
1553
 
1554
        /* adjust the y location by a unit vector in the y direction
1555
         * this is equivalent to transforming y+1 of the destination point to source space */
1556
        vy += unit_y;
1557
 
1558
	if (!repeat (src_repeat, &y, src_height))
1559
	{
1560
	    if (op == PIXMAN_OP_SRC)
1561
		memset (dst, 0, sizeof (*dst) * width);
1562
	}
1563
	else
1564
	{
1565
	    int w = width;
1566
 
1567
	    uint32_t *src = src_line + y * src_stride;
1568
 
1569
	    while (w >= 2)
1570
	    {
1571
		uint32_t s1, s2;
1572
		int x1, x2;
1573
 
1574
		x1 = pixman_fixed_to_int (vx);
1575
		vx += unit_x;
1576
 
1577
		x2 = pixman_fixed_to_int (vx);
1578
		vx += unit_x;
1579
 
1580
		w -= 2;
1581
 
1582
		s1 = fetch_nearest (src_repeat, src_format, src, x1, src_width);
1583
		s2 = fetch_nearest (src_repeat, src_format, src, x2, src_width);
1584
 
1585
		if (op == PIXMAN_OP_OVER)
1586
		{
1587
		    combine_over (s1, dst++);
1588
		    combine_over (s2, dst++);
1589
		}
1590
		else
1591
		{
1592
		    combine_src (s1, dst++);
1593
		    combine_src (s2, dst++);
1594
		}
1595
	    }
1596
 
1597
	    while (w--)
1598
	    {
1599
		uint32_t s;
1600
		int x;
1601
 
1602
		x = pixman_fixed_to_int (vx);
1603
		vx += unit_x;
1604
 
1605
		s = fetch_nearest (src_repeat, src_format, src, x, src_width);
1606
 
1607
		if (op == PIXMAN_OP_OVER)
1608
		    combine_over (s, dst++);
1609
		else
1610
		    combine_src (s, dst++);
1611
	    }
1612
	}
1613
    }
1614
}
1615
 
1616
static const pixman_fast_path_t c_fast_paths[] =
1617
{
1618
    PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, fast_composite_over_n_8_0565),
1619
    PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, fast_composite_over_n_8_0565),
1620
    PIXMAN_STD_FAST_PATH (OVER, solid, a8, r8g8b8, fast_composite_over_n_8_0888),
1621
    PIXMAN_STD_FAST_PATH (OVER, solid, a8, b8g8r8, fast_composite_over_n_8_0888),
1622
    PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, fast_composite_over_n_8_8888),
1623
    PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, fast_composite_over_n_8_8888),
1624
    PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, fast_composite_over_n_8_8888),
1625
    PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, fast_composite_over_n_8_8888),
1626
    PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8r8g8b8, fast_composite_over_n_1_8888),
1627
    PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8r8g8b8, fast_composite_over_n_1_8888),
1628
    PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8b8g8r8, fast_composite_over_n_1_8888),
1629
    PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8b8g8r8, fast_composite_over_n_1_8888),
1630
    PIXMAN_STD_FAST_PATH (OVER, solid, a1, r5g6b5,   fast_composite_over_n_1_0565),
1631
    PIXMAN_STD_FAST_PATH (OVER, solid, a1, b5g6r5,   fast_composite_over_n_1_0565),
1632
    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, fast_composite_over_n_8888_8888_ca),
1633
    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, fast_composite_over_n_8888_8888_ca),
1634
    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, fast_composite_over_n_8888_0565_ca),
1635
    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, fast_composite_over_n_8888_8888_ca),
1636
    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, fast_composite_over_n_8888_8888_ca),
1637
    PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, fast_composite_over_n_8888_0565_ca),
1638
    PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, fast_composite_over_x888_8_8888),
1639
    PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, fast_composite_over_x888_8_8888),
1640
    PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, fast_composite_over_x888_8_8888),
1641
    PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, fast_composite_over_x888_8_8888),
1642
    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, fast_composite_over_8888_8888),
1643
    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, fast_composite_over_8888_8888),
1644
    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, fast_composite_over_8888_0565),
1645
    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, fast_composite_over_8888_8888),
1646
    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, fast_composite_over_8888_8888),
1647
    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, fast_composite_over_8888_0565),
1648
    PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, fast_composite_add_8888_8888),
1649
    PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, fast_composite_add_8888_8888),
1650
    PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, fast_composite_add_8_8),
1651
    PIXMAN_STD_FAST_PATH (ADD, a1, null, a1, fast_composite_add_1000_1000),
1652
    PIXMAN_STD_FAST_PATH_CA (ADD, solid, a8r8g8b8, a8r8g8b8, fast_composite_add_n_8888_8888_ca),
1653
    PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, fast_composite_add_n_8_8),
1654
    PIXMAN_STD_FAST_PATH (SRC, solid, null, a8r8g8b8, fast_composite_solid_fill),
1655
    PIXMAN_STD_FAST_PATH (SRC, solid, null, x8r8g8b8, fast_composite_solid_fill),
1656
    PIXMAN_STD_FAST_PATH (SRC, solid, null, a8b8g8r8, fast_composite_solid_fill),
1657
    PIXMAN_STD_FAST_PATH (SRC, solid, null, x8b8g8r8, fast_composite_solid_fill),
1658
    PIXMAN_STD_FAST_PATH (SRC, solid, null, a8, fast_composite_solid_fill),
1659
    PIXMAN_STD_FAST_PATH (SRC, solid, null, r5g6b5, fast_composite_solid_fill),
1660
    PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, fast_composite_src_x888_8888),
1661
    PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, fast_composite_src_x888_8888),
1662
    PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy),
1663
    PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, fast_composite_src_memcpy),
1664
    PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy),
1665
    PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy),
1666
    PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, fast_composite_src_memcpy),
1667
    PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy),
1668
    PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8x8, fast_composite_src_memcpy),
1669
    PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8a8, fast_composite_src_memcpy),
1670
    PIXMAN_STD_FAST_PATH (SRC, b8g8r8x8, null, b8g8r8x8, fast_composite_src_memcpy),
1671
    PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, fast_composite_src_memcpy),
1672
    PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, fast_composite_src_memcpy),
1673
    PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, fast_composite_src_memcpy),
1674
    PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, b8g8r8, fast_composite_src_memcpy),
1675
    PIXMAN_STD_FAST_PATH (SRC, x1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy),
1676
    PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy),
1677
    PIXMAN_STD_FAST_PATH (SRC, a8, null, a8, fast_composite_src_memcpy),
1678
    PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, r5g6b5, fast_composite_src_x888_0565),
1679
    PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, r5g6b5, fast_composite_src_x888_0565),
1680
    PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, b5g6r5, fast_composite_src_x888_0565),
1681
    PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, b5g6r5, fast_composite_src_x888_0565),
1682
    PIXMAN_STD_FAST_PATH (IN, a8, null, a8, fast_composite_in_8_8),
1683
    PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, fast_composite_in_n_8_8),
1684
 
1685
    SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888_8888),
1686
    SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888_8888),
1687
    SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, 8888_8888),
1688
    SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, 8888_8888),
1689
 
1690
    SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888_8888),
1691
    SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, 8888_8888),
1692
 
1693
    SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, 8888_565),
1694
    SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, 8888_565),
1695
 
1696
    SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, 565_565),
1697
 
1698
    SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, 8888_8888),
1699
    SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, 8888_8888),
1700
    SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, 8888_8888),
1701
    SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, 8888_8888),
1702
 
1703
    SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, 8888_565),
1704
 
1705
#define NEAREST_FAST_PATH(op,s,d)		\
1706
    {   PIXMAN_OP_ ## op,			\
1707
	PIXMAN_ ## s, SCALED_NEAREST_FLAGS,	\
1708
	PIXMAN_null, 0,				\
1709
	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,	\
1710
	fast_composite_scaled_nearest,		\
1711
    }
1712
 
1713
    NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8),
1714
    NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8),
1715
    NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8),
1716
    NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8),
1717
 
1718
    NEAREST_FAST_PATH (SRC, x8r8g8b8, a8r8g8b8),
1719
    NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8),
1720
    NEAREST_FAST_PATH (SRC, x8b8g8r8, a8b8g8r8),
1721
    NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8),
1722
 
1723
    NEAREST_FAST_PATH (OVER, x8r8g8b8, x8r8g8b8),
1724
    NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8),
1725
    NEAREST_FAST_PATH (OVER, x8b8g8r8, x8b8g8r8),
1726
    NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8),
1727
 
1728
    NEAREST_FAST_PATH (OVER, x8r8g8b8, a8r8g8b8),
1729
    NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8),
1730
    NEAREST_FAST_PATH (OVER, x8b8g8r8, a8b8g8r8),
1731
    NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8),
1732
 
1733
    {   PIXMAN_OP_NONE	},
1734
};
1735
 
1736
static void
1737
pixman_fill8 (uint32_t *bits,
1738
              int       stride,
1739
              int       x,
1740
              int       y,
1741
              int       width,
1742
              int       height,
1743
              uint32_t xor)
1744
{
1745
    int byte_stride = stride * (int) sizeof (uint32_t);
1746
    uint8_t *dst = (uint8_t *) bits;
1747
    uint8_t v = xor & 0xff;
1748
    int i;
1749
 
1750
    dst = dst + y * byte_stride + x;
1751
 
1752
    while (height--)
1753
    {
1754
	for (i = 0; i < width; ++i)
1755
	    dst[i] = v;
1756
 
1757
	dst += byte_stride;
1758
    }
1759
}
1760
 
1761
static void
1762
pixman_fill16 (uint32_t *bits,
1763
               int       stride,
1764
               int       x,
1765
               int       y,
1766
               int       width,
1767
               int       height,
1768
               uint32_t xor)
1769
{
1770
    int short_stride =
1771
	(stride * (int)sizeof (uint32_t)) / (int)sizeof (uint16_t);
1772
    uint16_t *dst = (uint16_t *)bits;
1773
    uint16_t v = xor & 0xffff;
1774
    int i;
1775
 
1776
    dst = dst + y * short_stride + x;
1777
 
1778
    while (height--)
1779
    {
1780
	for (i = 0; i < width; ++i)
1781
	    dst[i] = v;
1782
 
1783
	dst += short_stride;
1784
    }
1785
}
1786
 
1787
static void
1788
pixman_fill32 (uint32_t *bits,
1789
               int       stride,
1790
               int       x,
1791
               int       y,
1792
               int       width,
1793
               int       height,
1794
               uint32_t  xor)
1795
{
1796
    int i;
1797
 
1798
    bits = bits + y * stride + x;
1799
 
1800
    while (height--)
1801
    {
1802
	for (i = 0; i < width; ++i)
1803
	    bits[i] = xor;
1804
 
1805
	bits += stride;
1806
    }
1807
}
1808
 
1809
static pixman_bool_t
1810
fast_path_fill (pixman_implementation_t *imp,
1811
                uint32_t *               bits,
1812
                int                      stride,
1813
                int                      bpp,
1814
                int                      x,
1815
                int                      y,
1816
                int                      width,
1817
                int                      height,
1818
                uint32_t		 xor)
1819
{
1820
    switch (bpp)
1821
    {
1822
    case 8:
1823
	pixman_fill8 (bits, stride, x, y, width, height, xor);
1824
	break;
1825
 
1826
    case 16:
1827
	pixman_fill16 (bits, stride, x, y, width, height, xor);
1828
	break;
1829
 
1830
    case 32:
1831
	pixman_fill32 (bits, stride, x, y, width, height, xor);
1832
	break;
1833
 
1834
    default:
1835
	return _pixman_implementation_fill (
1836
	    imp->delegate, bits, stride, bpp, x, y, width, height, xor);
1837
	break;
1838
    }
1839
 
1840
    return TRUE;
1841
}
1842
 
1843
pixman_implementation_t *
1844
_pixman_implementation_create_fast_path (void)
1845
{
1846
    pixman_implementation_t *general = _pixman_implementation_create_general ();
1847
    pixman_implementation_t *imp = _pixman_implementation_create (general, c_fast_paths);
1848
 
1849
    imp->fill = fast_path_fill;
1850
 
1851
    return imp;
1852
}