Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4349 Serge 1
/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
2
/*
3
 * Copyright © 2000 SuSE, Inc.
4
 * Copyright © 2007 Red Hat, Inc.
5
 *
6
 * Permission to use, copy, modify, distribute, and sell this software and its
7
 * documentation for any purpose is hereby granted without fee, provided that
8
 * the above copyright notice appear in all copies and that both that
9
 * copyright notice and this permission notice appear in supporting
10
 * documentation, and that the name of SuSE not be used in advertising or
11
 * publicity pertaining to distribution of the software without specific,
12
 * written prior permission.  SuSE makes no representations about the
13
 * suitability of this software for any purpose.  It is provided "as is"
14
 * without express or implied warranty.
15
 *
16
 * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
17
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
18
 * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
20
 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
21
 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
22
 *
23
 * Author:  Keith Packard, SuSE, Inc.
24
 */
25
 
26
#ifndef PIXMAN_FAST_PATH_H__
27
#define PIXMAN_FAST_PATH_H__
28
 
29
#include "pixman-private.h"
30
 
31
#define PIXMAN_REPEAT_COVER -1
32
 
33
/* Flags describing input parameters to fast path macro template.
34
 * Turning on some flag values may indicate that
35
 * "some property X is available so template can use this" or
36
 * "some property X should be handled by template".
37
 *
38
 * FLAG_HAVE_SOLID_MASK
39
 *  Input mask is solid so template should handle this.
40
 *
41
 * FLAG_HAVE_NON_SOLID_MASK
42
 *  Input mask is bits mask so template should handle this.
43
 *
44
 * FLAG_HAVE_SOLID_MASK and FLAG_HAVE_NON_SOLID_MASK are mutually
45
 * exclusive. (It's not allowed to turn both flags on)
46
 */
47
#define FLAG_NONE				(0)
48
#define FLAG_HAVE_SOLID_MASK			(1 <<   1)
49
#define FLAG_HAVE_NON_SOLID_MASK		(1 <<   2)
50
 
51
/* To avoid too short repeated scanline function calls, extend source
52
 * scanlines having width less than below constant value.
53
 */
54
#define REPEAT_NORMAL_MIN_WIDTH			64
55
 
56
static force_inline pixman_bool_t
57
repeat (pixman_repeat_t repeat, int *c, int size)
58
{
59
    if (repeat == PIXMAN_REPEAT_NONE)
60
    {
61
	if (*c < 0 || *c >= size)
62
	    return FALSE;
63
    }
64
    else if (repeat == PIXMAN_REPEAT_NORMAL)
65
    {
66
	while (*c >= size)
67
	    *c -= size;
68
	while (*c < 0)
69
	    *c += size;
70
    }
71
    else if (repeat == PIXMAN_REPEAT_PAD)
72
    {
73
	*c = CLIP (*c, 0, size - 1);
74
    }
75
    else /* REFLECT */
76
    {
77
	*c = MOD (*c, size * 2);
78
	if (*c >= size)
79
	    *c = size * 2 - *c - 1;
80
    }
81
    return TRUE;
82
}
83
 
84
static force_inline int
85
pixman_fixed_to_bilinear_weight (pixman_fixed_t x)
86
{
87
    return (x >> (16 - BILINEAR_INTERPOLATION_BITS)) &
88
	   ((1 << BILINEAR_INTERPOLATION_BITS) - 1);
89
}
90
 
91
#if BILINEAR_INTERPOLATION_BITS <= 4
92
/* Inspired by Filter_32_opaque from Skia */
93
static force_inline uint32_t
94
bilinear_interpolation (uint32_t tl, uint32_t tr,
95
			uint32_t bl, uint32_t br,
96
			int distx, int disty)
97
{
98
    int distxy, distxiy, distixy, distixiy;
99
    uint32_t lo, hi;
100
 
101
    distx <<= (4 - BILINEAR_INTERPOLATION_BITS);
102
    disty <<= (4 - BILINEAR_INTERPOLATION_BITS);
103
 
104
    distxy = distx * disty;
105
    distxiy = (distx << 4) - distxy;	/* distx * (16 - disty) */
106
    distixy = (disty << 4) - distxy;	/* disty * (16 - distx) */
107
    distixiy =
108
	16 * 16 - (disty << 4) -
109
	(distx << 4) + distxy; /* (16 - distx) * (16 - disty) */
110
 
111
    lo = (tl & 0xff00ff) * distixiy;
112
    hi = ((tl >> 8) & 0xff00ff) * distixiy;
113
 
114
    lo += (tr & 0xff00ff) * distxiy;
115
    hi += ((tr >> 8) & 0xff00ff) * distxiy;
116
 
117
    lo += (bl & 0xff00ff) * distixy;
118
    hi += ((bl >> 8) & 0xff00ff) * distixy;
119
 
120
    lo += (br & 0xff00ff) * distxy;
121
    hi += ((br >> 8) & 0xff00ff) * distxy;
122
 
123
    return ((lo >> 8) & 0xff00ff) | (hi & ~0xff00ff);
124
}
125
 
126
#else
127
#if SIZEOF_LONG > 4
128
 
129
static force_inline uint32_t
130
bilinear_interpolation (uint32_t tl, uint32_t tr,
131
			uint32_t bl, uint32_t br,
132
			int distx, int disty)
133
{
134
    uint64_t distxy, distxiy, distixy, distixiy;
135
    uint64_t tl64, tr64, bl64, br64;
136
    uint64_t f, r;
137
 
138
    distx <<= (8 - BILINEAR_INTERPOLATION_BITS);
139
    disty <<= (8 - BILINEAR_INTERPOLATION_BITS);
140
 
141
    distxy = distx * disty;
142
    distxiy = distx * (256 - disty);
143
    distixy = (256 - distx) * disty;
144
    distixiy = (256 - distx) * (256 - disty);
145
 
146
    /* Alpha and Blue */
147
    tl64 = tl & 0xff0000ff;
148
    tr64 = tr & 0xff0000ff;
149
    bl64 = bl & 0xff0000ff;
150
    br64 = br & 0xff0000ff;
151
 
152
    f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy;
153
    r = f & 0x0000ff0000ff0000ull;
154
 
155
    /* Red and Green */
156
    tl64 = tl;
157
    tl64 = ((tl64 << 16) & 0x000000ff00000000ull) | (tl64 & 0x0000ff00ull);
158
 
159
    tr64 = tr;
160
    tr64 = ((tr64 << 16) & 0x000000ff00000000ull) | (tr64 & 0x0000ff00ull);
161
 
162
    bl64 = bl;
163
    bl64 = ((bl64 << 16) & 0x000000ff00000000ull) | (bl64 & 0x0000ff00ull);
164
 
165
    br64 = br;
166
    br64 = ((br64 << 16) & 0x000000ff00000000ull) | (br64 & 0x0000ff00ull);
167
 
168
    f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy;
169
    r |= ((f >> 16) & 0x000000ff00000000ull) | (f & 0xff000000ull);
170
 
171
    return (uint32_t)(r >> 16);
172
}
173
 
174
#else
175
 
176
static force_inline uint32_t
177
bilinear_interpolation (uint32_t tl, uint32_t tr,
178
			uint32_t bl, uint32_t br,
179
			int distx, int disty)
180
{
181
    int distxy, distxiy, distixy, distixiy;
182
    uint32_t f, r;
183
 
184
    distx <<= (8 - BILINEAR_INTERPOLATION_BITS);
185
    disty <<= (8 - BILINEAR_INTERPOLATION_BITS);
186
 
187
    distxy = distx * disty;
188
    distxiy = (distx << 8) - distxy;	/* distx * (256 - disty) */
189
    distixy = (disty << 8) - distxy;	/* disty * (256 - distx) */
190
    distixiy =
191
	256 * 256 - (disty << 8) -
192
	(distx << 8) + distxy;		/* (256 - distx) * (256 - disty) */
193
 
194
    /* Blue */
195
    r = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy
196
      + (bl & 0x000000ff) * distixy  + (br & 0x000000ff) * distxy;
197
 
198
    /* Green */
199
    f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy
200
      + (bl & 0x0000ff00) * distixy  + (br & 0x0000ff00) * distxy;
201
    r |= f & 0xff000000;
202
 
203
    tl >>= 16;
204
    tr >>= 16;
205
    bl >>= 16;
206
    br >>= 16;
207
    r >>= 16;
208
 
209
    /* Red */
210
    f = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy
211
      + (bl & 0x000000ff) * distixy  + (br & 0x000000ff) * distxy;
212
    r |= f & 0x00ff0000;
213
 
214
    /* Alpha */
215
    f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy
216
      + (bl & 0x0000ff00) * distixy  + (br & 0x0000ff00) * distxy;
217
    r |= f & 0xff000000;
218
 
219
    return r;
220
}
221
 
222
#endif
223
#endif // BILINEAR_INTERPOLATION_BITS <= 4
224
 
225
/*
226
 * For each scanline fetched from source image with PAD repeat:
227
 * - calculate how many pixels need to be padded on the left side
228
 * - calculate how many pixels need to be padded on the right side
229
 * - update width to only count pixels which are fetched from the image
230
 * All this information is returned via 'width', 'left_pad', 'right_pad'
231
 * arguments. The code is assuming that 'unit_x' is positive.
232
 *
233
 * Note: 64-bit math is used in order to avoid potential overflows, which
234
 *       is probably excessive in many cases. This particular function
235
 *       may need its own correctness test and performance tuning.
236
 */
237
static force_inline void
238
pad_repeat_get_scanline_bounds (int32_t         source_image_width,
239
				pixman_fixed_t  vx,
240
				pixman_fixed_t  unit_x,
241
				int32_t *       width,
242
				int32_t *       left_pad,
243
				int32_t *       right_pad)
244
{
245
    int64_t max_vx = (int64_t) source_image_width << 16;
246
    int64_t tmp;
247
    if (vx < 0)
248
    {
249
	tmp = ((int64_t) unit_x - 1 - vx) / unit_x;
250
	if (tmp > *width)
251
	{
252
	    *left_pad = *width;
253
	    *width = 0;
254
	}
255
	else
256
	{
257
	    *left_pad = (int32_t) tmp;
258
	    *width -= (int32_t) tmp;
259
	}
260
    }
261
    else
262
    {
263
	*left_pad = 0;
264
    }
265
    tmp = ((int64_t) unit_x - 1 - vx + max_vx) / unit_x - *left_pad;
266
    if (tmp < 0)
267
    {
268
	*right_pad = *width;
269
	*width = 0;
270
    }
271
    else if (tmp >= *width)
272
    {
273
	*right_pad = 0;
274
    }
275
    else
276
    {
277
	*right_pad = *width - (int32_t) tmp;
278
	*width = (int32_t) tmp;
279
    }
280
}
281
 
282
/* A macroified version of specialized nearest scalers for some
283
 * common 8888 and 565 formats. It supports SRC and OVER ops.
284
 *
285
 * There are two repeat versions, one that handles repeat normal,
286
 * and one without repeat handling that only works if the src region
287
 * used is completely covered by the pre-repeated source samples.
288
 *
289
 * The loops are unrolled to process two pixels per iteration for better
290
 * performance on most CPU architectures (superscalar processors
291
 * can issue several operations simultaneously, other processors can hide
292
 * instructions latencies by pipelining operations). Unrolling more
293
 * does not make much sense because the compiler will start running out
294
 * of spare registers soon.
295
 */
296
 
297
#define GET_8888_ALPHA(s) ((s) >> 24)
298
 /* This is not actually used since we don't have an OVER with
299
    565 source, but it is needed to build. */
300
#define GET_0565_ALPHA(s) 0xff
301
#define GET_x888_ALPHA(s) 0xff
302
 
303
#define FAST_NEAREST_SCANLINE(scanline_func_name, SRC_FORMAT, DST_FORMAT,			\
304
			      src_type_t, dst_type_t, OP, repeat_mode)				\
305
static force_inline void									\
306
scanline_func_name (dst_type_t       *dst,							\
307
		    const src_type_t *src,							\
308
		    int32_t           w,							\
309
		    pixman_fixed_t    vx,							\
310
		    pixman_fixed_t    unit_x,							\
311
		    pixman_fixed_t    src_width_fixed,						\
312
		    pixman_bool_t     fully_transparent_src)					\
313
{												\
314
	uint32_t   d;										\
315
	src_type_t s1, s2;									\
316
	uint8_t    a1, a2;									\
317
	int        x1, x2;									\
318
												\
319
	if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER && fully_transparent_src)			\
320
	    return;										\
321
												\
322
	if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER)		\
323
	    abort();										\
324
												\
325
	while ((w -= 2) >= 0)									\
326
	{											\
327
	    x1 = pixman_fixed_to_int (vx);							\
328
	    vx += unit_x;									\
329
	    if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)				\
330
	    {											\
331
		/* This works because we know that unit_x is positive */			\
332
		while (vx >= 0)									\
333
		    vx -= src_width_fixed;							\
334
	    }											\
335
	    s1 = *(src + x1);									\
336
												\
337
	    x2 = pixman_fixed_to_int (vx);							\
338
	    vx += unit_x;									\
339
	    if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)				\
340
	    {											\
341
		/* This works because we know that unit_x is positive */			\
342
		while (vx >= 0)									\
343
		    vx -= src_width_fixed;							\
344
	    }											\
345
	    s2 = *(src + x2);									\
346
												\
347
	    if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER)						\
348
	    {											\
349
		a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1);						\
350
		a2 = GET_ ## SRC_FORMAT ## _ALPHA(s2);						\
351
												\
352
		if (a1 == 0xff)									\
353
		{										\
354
		    *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1);			\
355
		}										\
356
		else if (s1)									\
357
		{										\
358
		    d = convert_ ## DST_FORMAT ## _to_8888 (*dst);				\
359
		    s1 = convert_ ## SRC_FORMAT ## _to_8888 (s1);				\
360
		    a1 ^= 0xff;									\
361
		    UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1);					\
362
		    *dst = convert_8888_to_ ## DST_FORMAT (d);					\
363
		}										\
364
		dst++;										\
365
												\
366
		if (a2 == 0xff)									\
367
		{										\
368
		    *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s2);			\
369
		}										\
370
		else if (s2)									\
371
		{										\
372
		    d = convert_## DST_FORMAT ## _to_8888 (*dst);				\
373
		    s2 = convert_## SRC_FORMAT ## _to_8888 (s2);				\
374
		    a2 ^= 0xff;									\
375
		    UN8x4_MUL_UN8_ADD_UN8x4 (d, a2, s2);					\
376
		    *dst = convert_8888_to_ ## DST_FORMAT (d);					\
377
		}										\
378
		dst++;										\
379
	    }											\
380
	    else /* PIXMAN_OP_SRC */								\
381
	    {											\
382
		*dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1);			\
383
		*dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s2);			\
384
	    }											\
385
	}											\
386
												\
387
	if (w & 1)										\
388
	{											\
389
	    x1 = pixman_fixed_to_int (vx);							\
390
	    s1 = *(src + x1);									\
391
												\
392
	    if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER)						\
393
	    {											\
394
		a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1);						\
395
												\
396
		if (a1 == 0xff)									\
397
		{										\
398
		    *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1);			\
399
		}										\
400
		else if (s1)									\
401
		{										\
402
		    d = convert_## DST_FORMAT ## _to_8888 (*dst);				\
403
		    s1 = convert_ ## SRC_FORMAT ## _to_8888 (s1);				\
404
		    a1 ^= 0xff;									\
405
		    UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1);					\
406
		    *dst = convert_8888_to_ ## DST_FORMAT (d);					\
407
		}										\
408
		dst++;										\
409
	    }											\
410
	    else /* PIXMAN_OP_SRC */								\
411
	    {											\
412
		*dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1);			\
413
	    }											\
414
	}											\
415
}
416
 
417
#define FAST_NEAREST_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t,	\
418
				  dst_type_t, repeat_mode, have_mask, mask_is_solid)		\
419
static void											\
420
fast_composite_scaled_nearest  ## scale_func_name (pixman_implementation_t *imp,		\
421
						   pixman_composite_info_t *info)               \
422
{												\
423
    PIXMAN_COMPOSITE_ARGS (info);					                        \
424
    dst_type_t *dst_line;						                        \
425
    mask_type_t *mask_line;									\
426
    src_type_t *src_first_line;									\
427
    int       y;										\
428
    pixman_fixed_t src_width_fixed = pixman_int_to_fixed (src_image->bits.width);		\
429
    pixman_fixed_t max_vy;									\
430
    pixman_vector_t v;										\
431
    pixman_fixed_t vx, vy;									\
432
    pixman_fixed_t unit_x, unit_y;								\
433
    int32_t left_pad, right_pad;								\
434
												\
435
    src_type_t *src;										\
436
    dst_type_t *dst;										\
437
    mask_type_t solid_mask;									\
438
    const mask_type_t *mask = &solid_mask;							\
439
    int src_stride, mask_stride, dst_stride;							\
440
												\
441
    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1);	\
442
    if (have_mask)										\
443
    {												\
444
	if (mask_is_solid)									\
445
	    solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format);	\
446
	else											\
447
	    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t,			\
448
				   mask_stride, mask_line, 1);					\
449
    }												\
450
    /* pass in 0 instead of src_x and src_y because src_x and src_y need to be			\
451
     * transformed from destination space to source space */					\
452
    PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1);		\
453
												\
454
    /* reference point is the center of the pixel */						\
455
    v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;				\
456
    v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;				\
457
    v.vector[2] = pixman_fixed_1;								\
458
												\
459
    if (!pixman_transform_point_3d (src_image->common.transform, &v))				\
460
	return;											\
461
												\
462
    unit_x = src_image->common.transform->matrix[0][0];						\
463
    unit_y = src_image->common.transform->matrix[1][1];						\
464
												\
465
    /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */			\
466
    v.vector[0] -= pixman_fixed_e;								\
467
    v.vector[1] -= pixman_fixed_e;								\
468
												\
469
    vx = v.vector[0];										\
470
    vy = v.vector[1];										\
471
												\
472
    if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)					\
473
    {												\
474
	max_vy = pixman_int_to_fixed (src_image->bits.height);					\
475
												\
476
	/* Clamp repeating positions inside the actual samples */				\
477
	repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed);					\
478
	repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy);						\
479
    }												\
480
												\
481
    if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD ||					\
482
	PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)					\
483
    {												\
484
	pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x,			\
485
					&width, &left_pad, &right_pad);				\
486
	vx += left_pad * unit_x;								\
487
    }												\
488
												\
489
    while (--height >= 0)									\
490
    {												\
491
	dst = dst_line;										\
492
	dst_line += dst_stride;									\
493
	if (have_mask && !mask_is_solid)							\
494
	{											\
495
	    mask = mask_line;									\
496
	    mask_line += mask_stride;								\
497
	}											\
498
												\
499
	y = pixman_fixed_to_int (vy);								\
500
	vy += unit_y;										\
501
	if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)				\
502
	    repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy);						\
503
	if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD)					\
504
	{											\
505
	    repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height);				\
506
	    src = src_first_line + src_stride * y;						\
507
	    if (left_pad > 0)									\
508
	    {											\
509
		scanline_func (mask, dst,							\
510
			       src + src_image->bits.width - src_image->bits.width + 1,		\
511
			       left_pad, -pixman_fixed_e, 0, src_width_fixed, FALSE);		\
512
	    }											\
513
	    if (width > 0)									\
514
	    {											\
515
		scanline_func (mask + (mask_is_solid ? 0 : left_pad),				\
516
			       dst + left_pad, src + src_image->bits.width, width,		\
517
			       vx - src_width_fixed, unit_x, src_width_fixed, FALSE);		\
518
	    }											\
519
	    if (right_pad > 0)									\
520
	    {											\
521
		scanline_func (mask + (mask_is_solid ? 0 : left_pad + width),			\
522
			       dst + left_pad + width, src + src_image->bits.width,		\
523
			       right_pad, -pixman_fixed_e, 0, src_width_fixed, FALSE);		\
524
	    }											\
525
	}											\
526
	else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)				\
527
	{											\
528
	    static const src_type_t zero[1] = { 0 };						\
529
	    if (y < 0 || y >= src_image->bits.height)						\
530
	    {											\
531
		scanline_func (mask, dst, zero + 1, left_pad + width + right_pad,		\
532
			       -pixman_fixed_e, 0, src_width_fixed, TRUE);			\
533
		continue;									\
534
	    }											\
535
	    src = src_first_line + src_stride * y;						\
536
	    if (left_pad > 0)									\
537
	    {											\
538
		scanline_func (mask, dst, zero + 1, left_pad,					\
539
			       -pixman_fixed_e, 0, src_width_fixed, TRUE);			\
540
	    }											\
541
	    if (width > 0)									\
542
	    {											\
543
		scanline_func (mask + (mask_is_solid ? 0 : left_pad),				\
544
			       dst + left_pad, src + src_image->bits.width, width,		\
545
			       vx - src_width_fixed, unit_x, src_width_fixed, FALSE);		\
546
	    }											\
547
	    if (right_pad > 0)									\
548
	    {											\
549
		scanline_func (mask + (mask_is_solid ? 0 : left_pad + width),			\
550
			       dst + left_pad + width, zero + 1, right_pad,			\
551
			       -pixman_fixed_e, 0, src_width_fixed, TRUE);			\
552
	    }											\
553
	}											\
554
	else											\
555
	{											\
556
	    src = src_first_line + src_stride * y;						\
557
	    scanline_func (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed,	\
558
			   unit_x, src_width_fixed, FALSE);					\
559
	}											\
560
    }												\
561
}
562
 
563
/* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
564
#define FAST_NEAREST_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t,	\
565
				  dst_type_t, repeat_mode, have_mask, mask_is_solid)		\
566
	FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t,	\
567
				  dst_type_t, repeat_mode, have_mask, mask_is_solid)
568
 
569
#define FAST_NEAREST_MAINLOOP_NOMASK(scale_func_name, scanline_func, src_type_t, dst_type_t,	\
570
			      repeat_mode)							\
571
    static force_inline void									\
572
    scanline_func##scale_func_name##_wrapper (							\
573
		    const uint8_t    *mask,							\
574
		    dst_type_t       *dst,							\
575
		    const src_type_t *src,							\
576
		    int32_t          w,								\
577
		    pixman_fixed_t   vx,							\
578
		    pixman_fixed_t   unit_x,							\
579
		    pixman_fixed_t   max_vx,							\
580
		    pixman_bool_t    fully_transparent_src)					\
581
    {												\
582
	scanline_func (dst, src, w, vx, unit_x, max_vx, fully_transparent_src);			\
583
    }												\
584
    FAST_NEAREST_MAINLOOP_INT (scale_func_name, scanline_func##scale_func_name##_wrapper,	\
585
			       src_type_t, uint8_t, dst_type_t, repeat_mode, FALSE, FALSE)
586
 
587
#define FAST_NEAREST_MAINLOOP(scale_func_name, scanline_func, src_type_t, dst_type_t,		\
588
			      repeat_mode)							\
589
	FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name, scanline_func, src_type_t,		\
590
			      dst_type_t, repeat_mode)
591
 
592
#define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT,				\
593
		     src_type_t, dst_type_t, OP, repeat_mode)				\
594
    FAST_NEAREST_SCANLINE(scaled_nearest_scanline_ ## scale_func_name ## _ ## OP,	\
595
			  SRC_FORMAT, DST_FORMAT, src_type_t, dst_type_t,		\
596
			  OP, repeat_mode)						\
597
    FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name ## _ ## OP,			\
598
			  scaled_nearest_scanline_ ## scale_func_name ## _ ## OP,	\
599
			  src_type_t, dst_type_t, repeat_mode)
600
 
601
 
602
#define SCALED_NEAREST_FLAGS						\
603
    (FAST_PATH_SCALE_TRANSFORM	|					\
604
     FAST_PATH_NO_ALPHA_MAP	|					\
605
     FAST_PATH_NEAREST_FILTER	|					\
606
     FAST_PATH_NO_ACCESSORS	|					\
607
     FAST_PATH_NARROW_FORMAT)
608
 
609
#define SIMPLE_NEAREST_FAST_PATH_NORMAL(op,s,d,func)			\
610
    {   PIXMAN_OP_ ## op,						\
611
	PIXMAN_ ## s,							\
612
	(SCALED_NEAREST_FLAGS		|				\
613
	 FAST_PATH_NORMAL_REPEAT	|				\
614
	 FAST_PATH_X_UNIT_POSITIVE),					\
615
	PIXMAN_null, 0,							\
616
	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
617
	fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op,	\
618
    }
619
 
620
#define SIMPLE_NEAREST_FAST_PATH_PAD(op,s,d,func)			\
621
    {   PIXMAN_OP_ ## op,						\
622
	PIXMAN_ ## s,							\
623
	(SCALED_NEAREST_FLAGS		|				\
624
	 FAST_PATH_PAD_REPEAT		|				\
625
	 FAST_PATH_X_UNIT_POSITIVE),					\
626
	PIXMAN_null, 0,							\
627
	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
628
	fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op,	\
629
    }
630
 
631
#define SIMPLE_NEAREST_FAST_PATH_NONE(op,s,d,func)			\
632
    {   PIXMAN_OP_ ## op,						\
633
	PIXMAN_ ## s,							\
634
	(SCALED_NEAREST_FLAGS		|				\
635
	 FAST_PATH_NONE_REPEAT		|				\
636
	 FAST_PATH_X_UNIT_POSITIVE),					\
637
	PIXMAN_null, 0,							\
638
	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
639
	fast_composite_scaled_nearest_ ## func ## _none ## _ ## op,	\
640
    }
641
 
642
#define SIMPLE_NEAREST_FAST_PATH_COVER(op,s,d,func)			\
643
    {   PIXMAN_OP_ ## op,						\
644
	PIXMAN_ ## s,							\
645
	SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST,    \
646
	PIXMAN_null, 0,							\
647
	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
648
	fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op,	\
649
    }
650
 
651
#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NORMAL(op,s,d,func)		\
652
    {   PIXMAN_OP_ ## op,						\
653
	PIXMAN_ ## s,							\
654
	(SCALED_NEAREST_FLAGS		|				\
655
	 FAST_PATH_NORMAL_REPEAT	|				\
656
	 FAST_PATH_X_UNIT_POSITIVE),					\
657
	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
658
	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
659
	fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op,	\
660
    }
661
 
662
#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD(op,s,d,func)		\
663
    {   PIXMAN_OP_ ## op,						\
664
	PIXMAN_ ## s,							\
665
	(SCALED_NEAREST_FLAGS		|				\
666
	 FAST_PATH_PAD_REPEAT		|				\
667
	 FAST_PATH_X_UNIT_POSITIVE),					\
668
	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
669
	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
670
	fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op,	\
671
    }
672
 
673
#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE(op,s,d,func)		\
674
    {   PIXMAN_OP_ ## op,						\
675
	PIXMAN_ ## s,							\
676
	(SCALED_NEAREST_FLAGS		|				\
677
	 FAST_PATH_NONE_REPEAT		|				\
678
	 FAST_PATH_X_UNIT_POSITIVE),					\
679
	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
680
	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
681
	fast_composite_scaled_nearest_ ## func ## _none ## _ ## op,	\
682
    }
683
 
684
#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER(op,s,d,func)		\
685
    {   PIXMAN_OP_ ## op,						\
686
	PIXMAN_ ## s,							\
687
	SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST,	\
688
	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
689
	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
690
	fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op,	\
691
    }
692
 
693
#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func)		\
694
    {   PIXMAN_OP_ ## op,						\
695
	PIXMAN_ ## s,							\
696
	(SCALED_NEAREST_FLAGS		|				\
697
	 FAST_PATH_NORMAL_REPEAT	|				\
698
	 FAST_PATH_X_UNIT_POSITIVE),					\
699
	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
700
	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
701
	fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op,	\
702
    }
703
 
704
#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD(op,s,d,func)		\
705
    {   PIXMAN_OP_ ## op,						\
706
	PIXMAN_ ## s,							\
707
	(SCALED_NEAREST_FLAGS		|				\
708
	 FAST_PATH_PAD_REPEAT		|				\
709
	 FAST_PATH_X_UNIT_POSITIVE),					\
710
	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
711
	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
712
	fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op,	\
713
    }
714
 
715
#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE(op,s,d,func)		\
716
    {   PIXMAN_OP_ ## op,						\
717
	PIXMAN_ ## s,							\
718
	(SCALED_NEAREST_FLAGS		|				\
719
	 FAST_PATH_NONE_REPEAT		|				\
720
	 FAST_PATH_X_UNIT_POSITIVE),					\
721
	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
722
	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
723
	fast_composite_scaled_nearest_ ## func ## _none ## _ ## op,	\
724
    }
725
 
726
#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER(op,s,d,func)		\
727
    {   PIXMAN_OP_ ## op,						\
728
	PIXMAN_ ## s,							\
729
	SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST,	\
730
	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
731
	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
732
	fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op,	\
733
    }
734
 
735
/* Prefer the use of 'cover' variant, because it is faster */
736
#define SIMPLE_NEAREST_FAST_PATH(op,s,d,func)				\
737
    SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func),			\
738
    SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func),			\
739
    SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func),				\
740
    SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func)
741
 
742
#define SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func)			\
743
    SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func),		\
744
    SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func),		\
745
    SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func)
746
 
747
#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH(op,s,d,func)		\
748
    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER (op,s,d,func),		\
749
    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE (op,s,d,func),		\
750
    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD (op,s,d,func)
751
 
752
/*****************************************************************************/
753
 
754
/*
755
 * Identify 5 zones in each scanline for bilinear scaling. Depending on
756
 * whether 2 pixels to be interpolated are fetched from the image itself,
757
 * from the padding area around it or from both image and padding area.
758
 */
759
static force_inline void
760
bilinear_pad_repeat_get_scanline_bounds (int32_t         source_image_width,
761
					 pixman_fixed_t  vx,
762
					 pixman_fixed_t  unit_x,
763
					 int32_t *       left_pad,
764
					 int32_t *       left_tz,
765
					 int32_t *       width,
766
					 int32_t *       right_tz,
767
					 int32_t *       right_pad)
768
{
769
	int width1 = *width, left_pad1, right_pad1;
770
	int width2 = *width, left_pad2, right_pad2;
771
 
772
	pad_repeat_get_scanline_bounds (source_image_width, vx, unit_x,
773
					&width1, &left_pad1, &right_pad1);
774
	pad_repeat_get_scanline_bounds (source_image_width, vx + pixman_fixed_1,
775
					unit_x, &width2, &left_pad2, &right_pad2);
776
 
777
	*left_pad = left_pad2;
778
	*left_tz = left_pad1 - left_pad2;
779
	*right_tz = right_pad2 - right_pad1;
780
	*right_pad = right_pad1;
781
	*width -= *left_pad + *left_tz + *right_tz + *right_pad;
782
}
783
 
784
/*
785
 * Main loop template for single pass bilinear scaling. It needs to be
786
 * provided with 'scanline_func' which should do the compositing operation.
787
 * The needed function has the following prototype:
788
 *
789
 *	scanline_func (dst_type_t *       dst,
790
 *		       const mask_type_ * mask,
791
 *		       const src_type_t * src_top,
792
 *		       const src_type_t * src_bottom,
793
 *		       int32_t            width,
794
 *		       int                weight_top,
795
 *		       int                weight_bottom,
796
 *		       pixman_fixed_t     vx,
797
 *		       pixman_fixed_t     unit_x,
798
 *		       pixman_fixed_t     max_vx,
799
 *		       pixman_bool_t      zero_src)
800
 *
801
 * Where:
802
 *  dst                 - destination scanline buffer for storing results
803
 *  mask                - mask buffer (or single value for solid mask)
804
 *  src_top, src_bottom - two source scanlines
805
 *  width               - number of pixels to process
806
 *  weight_top          - weight of the top row for interpolation
807
 *  weight_bottom       - weight of the bottom row for interpolation
808
 *  vx                  - initial position for fetching the first pair of
809
 *                        pixels from the source buffer
810
 *  unit_x              - position increment needed to move to the next pair
811
 *                        of pixels
812
 *  max_vx              - image size as a fixed point value, can be used for
813
 *                        implementing NORMAL repeat (when it is supported)
814
 *  zero_src            - boolean hint variable, which is set to TRUE when
815
 *                        all source pixels are fetched from zero padding
816
 *                        zone for NONE repeat
817
 *
818
 * Note: normally the sum of 'weight_top' and 'weight_bottom' is equal to
819
 *       BILINEAR_INTERPOLATION_RANGE, but sometimes it may be less than that
820
 *       for NONE repeat when handling fuzzy antialiased top or bottom image
821
 *       edges. Also both top and bottom weight variables are guaranteed to
822
 *       have value, which is less than BILINEAR_INTERPOLATION_RANGE.
823
 *       For example, the weights can fit into unsigned byte or be used
824
 *       with 8-bit SIMD multiplication instructions for 8-bit interpolation
825
 *       precision.
826
 */
827
#define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t,	\
828
				  dst_type_t, repeat_mode, flags)				\
829
static void											\
830
fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,		\
831
						   pixman_composite_info_t *info)		\
832
{												\
833
    PIXMAN_COMPOSITE_ARGS (info);								\
834
    dst_type_t *dst_line;									\
835
    mask_type_t *mask_line;									\
836
    src_type_t *src_first_line;									\
837
    int       y1, y2;										\
838
    pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */		\
839
    pixman_vector_t v;										\
840
    pixman_fixed_t vx, vy;									\
841
    pixman_fixed_t unit_x, unit_y;								\
842
    int32_t left_pad, left_tz, right_tz, right_pad;						\
843
												\
844
    dst_type_t *dst;										\
845
    mask_type_t solid_mask;									\
846
    const mask_type_t *mask = &solid_mask;							\
847
    int src_stride, mask_stride, dst_stride;							\
848
												\
849
    int src_width;										\
850
    pixman_fixed_t src_width_fixed;								\
851
    int max_x;											\
852
    pixman_bool_t need_src_extension;								\
853
												\
854
    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1);	\
855
    if (flags & FLAG_HAVE_SOLID_MASK)								\
856
    {												\
857
	solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format);	\
858
	mask_stride = 0;									\
859
    }												\
860
    else if (flags & FLAG_HAVE_NON_SOLID_MASK)							\
861
    {												\
862
	PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t,				\
863
			       mask_stride, mask_line, 1);					\
864
    }												\
865
												\
866
    /* pass in 0 instead of src_x and src_y because src_x and src_y need to be			\
867
     * transformed from destination space to source space */					\
868
    PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1);		\
869
												\
870
    /* reference point is the center of the pixel */						\
871
    v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;				\
872
    v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;				\
873
    v.vector[2] = pixman_fixed_1;								\
874
												\
875
    if (!pixman_transform_point_3d (src_image->common.transform, &v))				\
876
	return;											\
877
												\
878
    unit_x = src_image->common.transform->matrix[0][0];						\
879
    unit_y = src_image->common.transform->matrix[1][1];						\
880
												\
881
    v.vector[0] -= pixman_fixed_1 / 2;								\
882
    v.vector[1] -= pixman_fixed_1 / 2;								\
883
												\
884
    vy = v.vector[1];										\
885
												\
886
    if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD ||					\
887
	PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)					\
888
    {												\
889
	bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x,	\
890
					&left_pad, &left_tz, &width, &right_tz, &right_pad);	\
891
	if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD)					\
892
	{											\
893
	    /* PAD repeat does not need special handling for 'transition zones' and */		\
894
	    /* they can be combined with 'padding zones' safely */				\
895
	    left_pad += left_tz;								\
896
	    right_pad += right_tz;								\
897
	    left_tz = right_tz = 0;								\
898
	}											\
899
	v.vector[0] += left_pad * unit_x;							\
900
    }												\
901
												\
902
    if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)					\
903
    {												\
904
	vx = v.vector[0];									\
905
	repeat (PIXMAN_REPEAT_NORMAL, &vx, pixman_int_to_fixed(src_image->bits.width));		\
906
	max_x = pixman_fixed_to_int (vx + (width - 1) * (int64_t)unit_x) + 1;			\
907
												\
908
	if (src_image->bits.width < REPEAT_NORMAL_MIN_WIDTH)					\
909
	{											\
910
	    src_width = 0;									\
911
												\
912
	    while (src_width < REPEAT_NORMAL_MIN_WIDTH && src_width <= max_x)			\
913
		src_width += src_image->bits.width;						\
914
												\
915
	    need_src_extension = TRUE;								\
916
	}											\
917
	else											\
918
	{											\
919
	    src_width = src_image->bits.width;							\
920
	    need_src_extension = FALSE;								\
921
	}											\
922
												\
923
	src_width_fixed = pixman_int_to_fixed (src_width);					\
924
    }												\
925
												\
926
    while (--height >= 0)									\
927
    {												\
928
	int weight1, weight2;									\
929
	dst = dst_line;										\
930
	dst_line += dst_stride;									\
931
	vx = v.vector[0];									\
932
	if (flags & FLAG_HAVE_NON_SOLID_MASK)							\
933
	{											\
934
	    mask = mask_line;									\
935
	    mask_line += mask_stride;								\
936
	}											\
937
												\
938
	y1 = pixman_fixed_to_int (vy);								\
939
	weight2 = pixman_fixed_to_bilinear_weight (vy);						\
940
	if (weight2)										\
941
	{											\
942
	    /* both weight1 and weight2 are smaller than BILINEAR_INTERPOLATION_RANGE */	\
943
	    y2 = y1 + 1;									\
944
	    weight1 = BILINEAR_INTERPOLATION_RANGE - weight2;					\
945
	}											\
946
	else											\
947
	{											\
948
	    /* set both top and bottom row to the same scanline and tweak weights */		\
949
	    y2 = y1;										\
950
	    weight1 = weight2 = BILINEAR_INTERPOLATION_RANGE / 2;				\
951
	}											\
952
	vy += unit_y;										\
953
	if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD)					\
954
	{											\
955
	    src_type_t *src1, *src2;								\
956
	    src_type_t buf1[2];									\
957
	    src_type_t buf2[2];									\
958
	    repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height);				\
959
	    repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height);				\
960
	    src1 = src_first_line + src_stride * y1;						\
961
	    src2 = src_first_line + src_stride * y2;						\
962
												\
963
	    if (left_pad > 0)									\
964
	    {											\
965
		buf1[0] = buf1[1] = src1[0];							\
966
		buf2[0] = buf2[1] = src2[0];							\
967
		scanline_func (dst, mask,							\
968
			       buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, FALSE);		\
969
		dst += left_pad;								\
970
		if (flags & FLAG_HAVE_NON_SOLID_MASK)						\
971
		    mask += left_pad;								\
972
	    }											\
973
	    if (width > 0)									\
974
	    {											\
975
		scanline_func (dst, mask,							\
976
			       src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE);	\
977
		dst += width;									\
978
		if (flags & FLAG_HAVE_NON_SOLID_MASK)						\
979
		    mask += width;								\
980
	    }											\
981
	    if (right_pad > 0)									\
982
	    {											\
983
		buf1[0] = buf1[1] = src1[src_image->bits.width - 1];				\
984
		buf2[0] = buf2[1] = src2[src_image->bits.width - 1];				\
985
		scanline_func (dst, mask,							\
986
			       buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, FALSE);	\
987
	    }											\
988
	}											\
989
	else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)				\
990
	{											\
991
	    src_type_t *src1, *src2;								\
992
	    src_type_t buf1[2];									\
993
	    src_type_t buf2[2];									\
994
	    /* handle top/bottom zero padding by just setting weights to 0 if needed */		\
995
	    if (y1 < 0)										\
996
	    {											\
997
		weight1 = 0;									\
998
		y1 = 0;										\
999
	    }											\
1000
	    if (y1 >= src_image->bits.height)							\
1001
	    {											\
1002
		weight1 = 0;									\
1003
		y1 = src_image->bits.height - 1;						\
1004
	    }											\
1005
	    if (y2 < 0)										\
1006
	    {											\
1007
		weight2 = 0;									\
1008
		y2 = 0;										\
1009
	    }											\
1010
	    if (y2 >= src_image->bits.height)							\
1011
	    {											\
1012
		weight2 = 0;									\
1013
		y2 = src_image->bits.height - 1;						\
1014
	    }											\
1015
	    src1 = src_first_line + src_stride * y1;						\
1016
	    src2 = src_first_line + src_stride * y2;						\
1017
												\
1018
	    if (left_pad > 0)									\
1019
	    {											\
1020
		buf1[0] = buf1[1] = 0;								\
1021
		buf2[0] = buf2[1] = 0;								\
1022
		scanline_func (dst, mask,							\
1023
			       buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, TRUE);		\
1024
		dst += left_pad;								\
1025
		if (flags & FLAG_HAVE_NON_SOLID_MASK)						\
1026
		    mask += left_pad;								\
1027
	    }											\
1028
	    if (left_tz > 0)									\
1029
	    {											\
1030
		buf1[0] = 0;									\
1031
		buf1[1] = src1[0];								\
1032
		buf2[0] = 0;									\
1033
		buf2[1] = src2[0];								\
1034
		scanline_func (dst, mask,							\
1035
			       buf1, buf2, left_tz, weight1, weight2,				\
1036
			       pixman_fixed_frac (vx), unit_x, 0, FALSE);			\
1037
		dst += left_tz;									\
1038
		if (flags & FLAG_HAVE_NON_SOLID_MASK)						\
1039
		    mask += left_tz;								\
1040
		vx += left_tz * unit_x;								\
1041
	    }											\
1042
	    if (width > 0)									\
1043
	    {											\
1044
		scanline_func (dst, mask,							\
1045
			       src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE);	\
1046
		dst += width;									\
1047
		if (flags & FLAG_HAVE_NON_SOLID_MASK)						\
1048
		    mask += width;								\
1049
		vx += width * unit_x;								\
1050
	    }											\
1051
	    if (right_tz > 0)									\
1052
	    {											\
1053
		buf1[0] = src1[src_image->bits.width - 1];					\
1054
		buf1[1] = 0;									\
1055
		buf2[0] = src2[src_image->bits.width - 1];					\
1056
		buf2[1] = 0;									\
1057
		scanline_func (dst, mask,							\
1058
			       buf1, buf2, right_tz, weight1, weight2,				\
1059
			       pixman_fixed_frac (vx), unit_x, 0, FALSE);			\
1060
		dst += right_tz;								\
1061
		if (flags & FLAG_HAVE_NON_SOLID_MASK)						\
1062
		    mask += right_tz;								\
1063
	    }											\
1064
	    if (right_pad > 0)									\
1065
	    {											\
1066
		buf1[0] = buf1[1] = 0;								\
1067
		buf2[0] = buf2[1] = 0;								\
1068
		scanline_func (dst, mask,							\
1069
			       buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, TRUE);		\
1070
	    }											\
1071
	}											\
1072
	else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)				\
1073
	{											\
1074
	    int32_t	    num_pixels;								\
1075
	    int32_t	    width_remain;							\
1076
	    src_type_t *    src_line_top;							\
1077
	    src_type_t *    src_line_bottom;							\
1078
	    src_type_t	    buf1[2];								\
1079
	    src_type_t	    buf2[2];								\
1080
	    src_type_t	    extended_src_line0[REPEAT_NORMAL_MIN_WIDTH*2];			\
1081
	    src_type_t	    extended_src_line1[REPEAT_NORMAL_MIN_WIDTH*2];			\
1082
	    int		    i, j;								\
1083
												\
1084
	    repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height);				\
1085
	    repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height);				\
1086
	    src_line_top = src_first_line + src_stride * y1;					\
1087
	    src_line_bottom = src_first_line + src_stride * y2;					\
1088
												\
1089
	    if (need_src_extension)								\
1090
	    {											\
1091
		for (i=0; i
1092
		{										\
1093
		    for (j=0; jbits.width; j++, i++)				\
1094
		    {										\
1095
			extended_src_line0[i] = src_line_top[j];				\
1096
			extended_src_line1[i] = src_line_bottom[j];				\
1097
		    }										\
1098
		}										\
1099
												\
1100
		src_line_top = &extended_src_line0[0];						\
1101
		src_line_bottom = &extended_src_line1[0];					\
1102
	    }											\
1103
												\
1104
	    /* Top & Bottom wrap around buffer */						\
1105
	    buf1[0] = src_line_top[src_width - 1];						\
1106
	    buf1[1] = src_line_top[0];								\
1107
	    buf2[0] = src_line_bottom[src_width - 1];						\
1108
	    buf2[1] = src_line_bottom[0];							\
1109
												\
1110
	    width_remain = width;								\
1111
												\
1112
	    while (width_remain > 0)								\
1113
	    {											\
1114
		/* We use src_width_fixed because it can make vx in original source range */	\
1115
		repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed);				\
1116
												\
1117
		/* Wrap around part */								\
1118
		if (pixman_fixed_to_int (vx) == src_width - 1)					\
1119
		{										\
1120
		    /* for positive unit_x							\
1121
		     * num_pixels = max(n) + 1, where vx + n*unit_x < src_width_fixed		\
1122
		     *										\
1123
		     * vx is in range [0, src_width_fixed - pixman_fixed_e]			\
1124
		     * So we are safe from overflow.						\
1125
		     */										\
1126
		    num_pixels = ((src_width_fixed - vx - pixman_fixed_e) / unit_x) + 1;	\
1127
												\
1128
		    if (num_pixels > width_remain)						\
1129
			num_pixels = width_remain;						\
1130
												\
1131
		    scanline_func (dst, mask, buf1, buf2, num_pixels,				\
1132
				   weight1, weight2, pixman_fixed_frac(vx),			\
1133
				   unit_x, src_width_fixed, FALSE);				\
1134
												\
1135
		    width_remain -= num_pixels;							\
1136
		    vx += num_pixels * unit_x;							\
1137
		    dst += num_pixels;								\
1138
												\
1139
		    if (flags & FLAG_HAVE_NON_SOLID_MASK)					\
1140
			mask += num_pixels;							\
1141
												\
1142
		    repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed);			\
1143
		}										\
1144
												\
1145
		/* Normal scanline composite */							\
1146
		if (pixman_fixed_to_int (vx) != src_width - 1 && width_remain > 0)		\
1147
		{										\
1148
		    /* for positive unit_x							\
1149
		     * num_pixels = max(n) + 1, where vx + n*unit_x < (src_width_fixed - 1)	\
1150
		     *										\
1151
		     * vx is in range [0, src_width_fixed - pixman_fixed_e]			\
1152
		     * So we are safe from overflow here.					\
1153
		     */										\
1154
		    num_pixels = ((src_width_fixed - pixman_fixed_1 - vx - pixman_fixed_e)	\
1155
				  / unit_x) + 1;						\
1156
												\
1157
		    if (num_pixels > width_remain)						\
1158
			num_pixels = width_remain;						\
1159
												\
1160
		    scanline_func (dst, mask, src_line_top, src_line_bottom, num_pixels,	\
1161
				   weight1, weight2, vx, unit_x, src_width_fixed, FALSE);	\
1162
												\
1163
		    width_remain -= num_pixels;							\
1164
		    vx += num_pixels * unit_x;							\
1165
		    dst += num_pixels;								\
1166
												\
1167
		    if (flags & FLAG_HAVE_NON_SOLID_MASK)					\
1168
		        mask += num_pixels;							\
1169
		}										\
1170
	    }											\
1171
	}											\
1172
	else											\
1173
	{											\
1174
	    scanline_func (dst, mask, src_first_line + src_stride * y1,				\
1175
			   src_first_line + src_stride * y2, width,				\
1176
			   weight1, weight2, vx, unit_x, max_vx, FALSE);			\
1177
	}											\
1178
    }												\
1179
}
1180
 
1181
/* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
1182
#define FAST_BILINEAR_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t,	\
1183
				  dst_type_t, repeat_mode, flags)				\
1184
	FAST_BILINEAR_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t,\
1185
				  dst_type_t, repeat_mode, flags)
1186
 
1187
#define SCALED_BILINEAR_FLAGS						\
1188
    (FAST_PATH_SCALE_TRANSFORM	|					\
1189
     FAST_PATH_NO_ALPHA_MAP	|					\
1190
     FAST_PATH_BILINEAR_FILTER	|					\
1191
     FAST_PATH_NO_ACCESSORS	|					\
1192
     FAST_PATH_NARROW_FORMAT)
1193
 
1194
#define SIMPLE_BILINEAR_FAST_PATH_PAD(op,s,d,func)			\
1195
    {   PIXMAN_OP_ ## op,						\
1196
	PIXMAN_ ## s,							\
1197
	(SCALED_BILINEAR_FLAGS		|				\
1198
	 FAST_PATH_PAD_REPEAT		|				\
1199
	 FAST_PATH_X_UNIT_POSITIVE),					\
1200
	PIXMAN_null, 0,							\
1201
	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
1202
	fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op,	\
1203
    }
1204
 
1205
#define SIMPLE_BILINEAR_FAST_PATH_NONE(op,s,d,func)			\
1206
    {   PIXMAN_OP_ ## op,						\
1207
	PIXMAN_ ## s,							\
1208
	(SCALED_BILINEAR_FLAGS		|				\
1209
	 FAST_PATH_NONE_REPEAT		|				\
1210
	 FAST_PATH_X_UNIT_POSITIVE),					\
1211
	PIXMAN_null, 0,							\
1212
	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
1213
	fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op,	\
1214
    }
1215
 
1216
#define SIMPLE_BILINEAR_FAST_PATH_COVER(op,s,d,func)			\
1217
    {   PIXMAN_OP_ ## op,						\
1218
	PIXMAN_ ## s,							\
1219
	SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR,	\
1220
	PIXMAN_null, 0,							\
1221
	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
1222
	fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op,	\
1223
    }
1224
 
1225
#define SIMPLE_BILINEAR_FAST_PATH_NORMAL(op,s,d,func)			\
1226
    {   PIXMAN_OP_ ## op,						\
1227
	PIXMAN_ ## s,							\
1228
	(SCALED_BILINEAR_FLAGS		|				\
1229
	 FAST_PATH_NORMAL_REPEAT	|				\
1230
	 FAST_PATH_X_UNIT_POSITIVE),					\
1231
	PIXMAN_null, 0,							\
1232
	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
1233
	fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op,	\
1234
    }
1235
 
1236
#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD(op,s,d,func)		\
1237
    {   PIXMAN_OP_ ## op,						\
1238
	PIXMAN_ ## s,							\
1239
	(SCALED_BILINEAR_FLAGS		|				\
1240
	 FAST_PATH_PAD_REPEAT		|				\
1241
	 FAST_PATH_X_UNIT_POSITIVE),					\
1242
	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
1243
	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
1244
	fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op,	\
1245
    }
1246
 
1247
#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE(op,s,d,func)		\
1248
    {   PIXMAN_OP_ ## op,						\
1249
	PIXMAN_ ## s,							\
1250
	(SCALED_BILINEAR_FLAGS		|				\
1251
	 FAST_PATH_NONE_REPEAT		|				\
1252
	 FAST_PATH_X_UNIT_POSITIVE),					\
1253
	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
1254
	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
1255
	fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op,	\
1256
    }
1257
 
1258
#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER(op,s,d,func)		\
1259
    {   PIXMAN_OP_ ## op,						\
1260
	PIXMAN_ ## s,							\
1261
	SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR,	\
1262
	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
1263
	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
1264
	fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op,	\
1265
    }
1266
 
1267
#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NORMAL(op,s,d,func)		\
1268
    {   PIXMAN_OP_ ## op,						\
1269
	PIXMAN_ ## s,							\
1270
	(SCALED_BILINEAR_FLAGS		|				\
1271
	 FAST_PATH_NORMAL_REPEAT	|				\
1272
	 FAST_PATH_X_UNIT_POSITIVE),					\
1273
	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
1274
	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
1275
	fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op,	\
1276
    }
1277
 
1278
#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD(op,s,d,func)		\
1279
    {   PIXMAN_OP_ ## op,						\
1280
	PIXMAN_ ## s,							\
1281
	(SCALED_BILINEAR_FLAGS		|				\
1282
	 FAST_PATH_PAD_REPEAT		|				\
1283
	 FAST_PATH_X_UNIT_POSITIVE),					\
1284
	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
1285
	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
1286
	fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op,	\
1287
    }
1288
 
1289
#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE(op,s,d,func)		\
1290
    {   PIXMAN_OP_ ## op,						\
1291
	PIXMAN_ ## s,							\
1292
	(SCALED_BILINEAR_FLAGS		|				\
1293
	 FAST_PATH_NONE_REPEAT		|				\
1294
	 FAST_PATH_X_UNIT_POSITIVE),					\
1295
	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
1296
	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
1297
	fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op,	\
1298
    }
1299
 
1300
#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER(op,s,d,func)		\
1301
    {   PIXMAN_OP_ ## op,						\
1302
	PIXMAN_ ## s,							\
1303
	SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR,	\
1304
	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
1305
	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
1306
	fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op,	\
1307
    }
1308
 
1309
#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func)	\
1310
    {   PIXMAN_OP_ ## op,						\
1311
	PIXMAN_ ## s,							\
1312
	(SCALED_BILINEAR_FLAGS		|				\
1313
	 FAST_PATH_NORMAL_REPEAT	|				\
1314
	 FAST_PATH_X_UNIT_POSITIVE),					\
1315
	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
1316
	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
1317
	fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op,	\
1318
    }
1319
 
1320
/* Prefer the use of 'cover' variant, because it is faster */
1321
#define SIMPLE_BILINEAR_FAST_PATH(op,s,d,func)				\
1322
    SIMPLE_BILINEAR_FAST_PATH_COVER (op,s,d,func),			\
1323
    SIMPLE_BILINEAR_FAST_PATH_NONE (op,s,d,func),			\
1324
    SIMPLE_BILINEAR_FAST_PATH_PAD (op,s,d,func),			\
1325
    SIMPLE_BILINEAR_FAST_PATH_NORMAL (op,s,d,func)
1326
 
1327
#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH(op,s,d,func)			\
1328
    SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER (op,s,d,func),		\
1329
    SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE (op,s,d,func),		\
1330
    SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD (op,s,d,func),		\
1331
    SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NORMAL (op,s,d,func)
1332
 
1333
#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH(op,s,d,func)		\
1334
    SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER (op,s,d,func),		\
1335
    SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE (op,s,d,func),		\
1336
    SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD (op,s,d,func),		\
1337
    SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NORMAL (op,s,d,func)
1338
 
1339
#endif