Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
1891 serge 1
/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
2
/*
3
 * Copyright © 2000 SuSE, Inc.
4
 * Copyright © 2007 Red Hat, Inc.
5
 *
6
 * Permission to use, copy, modify, distribute, and sell this software and its
7
 * documentation for any purpose is hereby granted without fee, provided that
8
 * the above copyright notice appear in all copies and that both that
9
 * copyright notice and this permission notice appear in supporting
10
 * documentation, and that the name of SuSE not be used in advertising or
11
 * publicity pertaining to distribution of the software without specific,
12
 * written prior permission.  SuSE makes no representations about the
13
 * suitability of this software for any purpose.  It is provided "as is"
14
 * without express or implied warranty.
15
 *
16
 * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
17
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
18
 * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
20
 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
21
 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
22
 *
23
 * Author:  Keith Packard, SuSE, Inc.
24
 */
25
 
26
#ifndef PIXMAN_FAST_PATH_H__
27
#define PIXMAN_FAST_PATH_H__
28
 
29
#include "pixman-private.h"
30
 
31
#define PIXMAN_REPEAT_COVER -1
32
 
33
static force_inline pixman_bool_t
34
repeat (pixman_repeat_t repeat, int *c, int size)
35
{
36
    if (repeat == PIXMAN_REPEAT_NONE)
37
    {
38
	if (*c < 0 || *c >= size)
39
	    return FALSE;
40
    }
41
    else if (repeat == PIXMAN_REPEAT_NORMAL)
42
    {
43
	while (*c >= size)
44
	    *c -= size;
45
	while (*c < 0)
46
	    *c += size;
47
    }
48
    else if (repeat == PIXMAN_REPEAT_PAD)
49
    {
50
	*c = CLIP (*c, 0, size - 1);
51
    }
52
    else /* REFLECT */
53
    {
54
	*c = MOD (*c, size * 2);
55
	if (*c >= size)
56
	    *c = size * 2 - *c - 1;
57
    }
58
    return TRUE;
59
}
60
 
61
/*
62
 * For each scanline fetched from source image with PAD repeat:
63
 * - calculate how many pixels need to be padded on the left side
64
 * - calculate how many pixels need to be padded on the right side
65
 * - update width to only count pixels which are fetched from the image
66
 * All this information is returned via 'width', 'left_pad', 'right_pad'
67
 * arguments. The code is assuming that 'unit_x' is positive.
68
 *
69
 * Note: 64-bit math is used in order to avoid potential overflows, which
70
 *       is probably excessive in many cases. This particular function
71
 *       may need its own correctness test and performance tuning.
72
 */
73
static force_inline void
74
pad_repeat_get_scanline_bounds (int32_t         source_image_width,
75
				pixman_fixed_t  vx,
76
				pixman_fixed_t  unit_x,
77
				int32_t *       width,
78
				int32_t *       left_pad,
79
				int32_t *       right_pad)
80
{
81
    int64_t max_vx = (int64_t) source_image_width << 16;
82
    int64_t tmp;
83
    if (vx < 0)
84
    {
85
	tmp = ((int64_t) unit_x - 1 - vx) / unit_x;
86
	if (tmp > *width)
87
	{
88
	    *left_pad = *width;
89
	    *width = 0;
90
	}
91
	else
92
	{
93
	    *left_pad = (int32_t) tmp;
94
	    *width -= (int32_t) tmp;
95
	}
96
    }
97
    else
98
    {
99
	*left_pad = 0;
100
    }
101
    tmp = ((int64_t) unit_x - 1 - vx + max_vx) / unit_x - *left_pad;
102
    if (tmp < 0)
103
    {
104
	*right_pad = *width;
105
	*width = 0;
106
    }
107
    else if (tmp >= *width)
108
    {
109
	*right_pad = 0;
110
    }
111
    else
112
    {
113
	*right_pad = *width - (int32_t) tmp;
114
	*width = (int32_t) tmp;
115
    }
116
}
117
 
118
/* A macroified version of specialized nearest scalers for some
119
 * common 8888 and 565 formats. It supports SRC and OVER ops.
120
 *
121
 * There are two repeat versions, one that handles repeat normal,
122
 * and one without repeat handling that only works if the src region
123
 * used is completely covered by the pre-repeated source samples.
124
 *
125
 * The loops are unrolled to process two pixels per iteration for better
126
 * performance on most CPU architectures (superscalar processors
127
 * can issue several operations simultaneously, other processors can hide
128
 * instructions latencies by pipelining operations). Unrolling more
129
 * does not make much sense because the compiler will start running out
130
 * of spare registers soon.
131
 */
132
 
133
#define GET_8888_ALPHA(s) ((s) >> 24)
134
 /* This is not actually used since we don't have an OVER with
135
    565 source, but it is needed to build. */
136
#define GET_0565_ALPHA(s) 0xff
137
 
138
#define FAST_NEAREST_SCANLINE(scanline_func_name, SRC_FORMAT, DST_FORMAT,			\
139
			      src_type_t, dst_type_t, OP, repeat_mode)				\
140
static force_inline void									\
141
scanline_func_name (dst_type_t     *dst,							\
142
		    src_type_t     *src,							\
143
		    int32_t         w,								\
144
		    pixman_fixed_t  vx,								\
145
		    pixman_fixed_t  unit_x,							\
146
		    pixman_fixed_t  max_vx)							\
147
{												\
148
	uint32_t   d;										\
149
	src_type_t s1, s2;									\
150
	uint8_t    a1, a2;									\
151
	int        x1, x2;									\
152
												\
153
	if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER)		\
154
	    abort();										\
155
												\
156
	while ((w -= 2) >= 0)									\
157
	{											\
158
	    x1 = vx >> 16;									\
159
	    vx += unit_x;									\
160
	    if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)				\
161
	    {											\
162
		/* This works because we know that unit_x is positive */			\
163
		while (vx >= max_vx)								\
164
		    vx -= max_vx;								\
165
	    }											\
166
	    s1 = src[x1];									\
167
												\
168
	    x2 = vx >> 16;									\
169
	    vx += unit_x;									\
170
	    if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)				\
171
	    {											\
172
		/* This works because we know that unit_x is positive */			\
173
		while (vx >= max_vx)								\
174
		    vx -= max_vx;								\
175
	    }											\
176
	    s2 = src[x2];									\
177
												\
178
	    if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER)						\
179
	    {											\
180
		a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1);						\
181
		a2 = GET_ ## SRC_FORMAT ## _ALPHA(s2);						\
182
												\
183
		if (a1 == 0xff)									\
184
		{										\
185
		    *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1);			\
186
		}										\
187
		else if (s1)									\
188
		{										\
189
		    d = CONVERT_ ## DST_FORMAT ## _TO_8888 (*dst);				\
190
		    s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1);				\
191
		    a1 ^= 0xff;									\
192
		    UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1);					\
193
		    *dst = CONVERT_8888_TO_ ## DST_FORMAT (d);					\
194
		}										\
195
		dst++;										\
196
												\
197
		if (a2 == 0xff)									\
198
		{										\
199
		    *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2);			\
200
		}										\
201
		else if (s2)									\
202
		{										\
203
		    d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst);				\
204
		    s2 = CONVERT_## SRC_FORMAT ## _TO_8888 (s2);				\
205
		    a2 ^= 0xff;									\
206
		    UN8x4_MUL_UN8_ADD_UN8x4 (d, a2, s2);					\
207
		    *dst = CONVERT_8888_TO_ ## DST_FORMAT (d);					\
208
		}										\
209
		dst++;										\
210
	    }											\
211
	    else /* PIXMAN_OP_SRC */								\
212
	    {											\
213
		*dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1);			\
214
		*dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2);			\
215
	    }											\
216
	}											\
217
												\
218
	if (w & 1)										\
219
	{											\
220
	    x1 = vx >> 16;									\
221
	    s1 = src[x1];									\
222
												\
223
	    if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER)						\
224
	    {											\
225
		a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1);						\
226
												\
227
		if (a1 == 0xff)									\
228
		{										\
229
		    *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1);			\
230
		}										\
231
		else if (s1)									\
232
		{										\
233
		    d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst);				\
234
		    s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1);				\
235
		    a1 ^= 0xff;									\
236
		    UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1);					\
237
		    *dst = CONVERT_8888_TO_ ## DST_FORMAT (d);					\
238
		}										\
239
		dst++;										\
240
	    }											\
241
	    else /* PIXMAN_OP_SRC */								\
242
	    {											\
243
		*dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1);			\
244
	    }											\
245
	}											\
246
}
247
 
248
#define FAST_NEAREST_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, dst_type_t,	\
249
				  repeat_mode)							\
250
static void											\
251
fast_composite_scaled_nearest  ## scale_func_name (pixman_implementation_t *imp,		\
252
						   pixman_op_t              op,			\
253
						   pixman_image_t *         src_image,		\
254
						   pixman_image_t *         mask_image,		\
255
						   pixman_image_t *         dst_image,		\
256
						   int32_t                  src_x,		\
257
						   int32_t                  src_y,		\
258
						   int32_t                  mask_x,		\
259
						   int32_t                  mask_y,		\
260
						   int32_t                  dst_x,		\
261
						   int32_t                  dst_y,		\
262
						   int32_t                  width,		\
263
						   int32_t                  height)		\
264
{												\
265
    dst_type_t *dst_line;									\
266
    src_type_t *src_first_line;									\
267
    int       y;										\
268
    pixman_fixed_t max_vx = max_vx; /* suppress uninitialized variable warning */		\
269
    pixman_fixed_t max_vy;									\
270
    pixman_vector_t v;										\
271
    pixman_fixed_t vx, vy;									\
272
    pixman_fixed_t unit_x, unit_y;								\
273
    int32_t left_pad, right_pad;								\
274
												\
275
    src_type_t *src;										\
276
    dst_type_t *dst;										\
277
    int       src_stride, dst_stride;								\
278
												\
279
    PIXMAN_IMAGE_GET_LINE (dst_image, dst_x, dst_y, dst_type_t, dst_stride, dst_line, 1);	\
280
    /* pass in 0 instead of src_x and src_y because src_x and src_y need to be			\
281
     * transformed from destination space to source space */					\
282
    PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1);		\
283
												\
284
    /* reference point is the center of the pixel */						\
285
    v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;				\
286
    v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;				\
287
    v.vector[2] = pixman_fixed_1;								\
288
												\
289
    if (!pixman_transform_point_3d (src_image->common.transform, &v))				\
290
	return;											\
291
												\
292
    unit_x = src_image->common.transform->matrix[0][0];						\
293
    unit_y = src_image->common.transform->matrix[1][1];						\
294
												\
295
    /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */			\
296
    v.vector[0] -= pixman_fixed_e;								\
297
    v.vector[1] -= pixman_fixed_e;								\
298
												\
299
    vx = v.vector[0];										\
300
    vy = v.vector[1];										\
301
												\
302
    if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)					\
303
    {												\
304
	/* Clamp repeating positions inside the actual samples */				\
305
	max_vx = src_image->bits.width << 16;							\
306
	max_vy = src_image->bits.height << 16;							\
307
												\
308
	repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx);						\
309
	repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy);						\
310
    }												\
311
												\
312
    if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD ||					\
313
	PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)					\
314
    {												\
315
	pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x,			\
316
					&width, &left_pad, &right_pad);				\
317
	vx += left_pad * unit_x;								\
318
    }												\
319
												\
320
    while (--height >= 0)									\
321
    {												\
322
	dst = dst_line;										\
323
	dst_line += dst_stride;									\
324
												\
325
	y = vy >> 16;										\
326
	vy += unit_y;										\
327
	if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL)				\
328
	    repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy);						\
329
	if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD)					\
330
	{											\
331
	    repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height);				\
332
	    src = src_first_line + src_stride * y;						\
333
	    if (left_pad > 0)									\
334
	    {											\
335
		scanline_func (dst, src, left_pad, 0, 0, 0);					\
336
	    }											\
337
	    if (width > 0)									\
338
	    {											\
339
		scanline_func (dst + left_pad, src, width, vx, unit_x, 0);			\
340
	    }											\
341
	    if (right_pad > 0)									\
342
	    {											\
343
		scanline_func (dst + left_pad + width, src + src_image->bits.width - 1,		\
344
			        right_pad, 0, 0, 0);						\
345
	    }											\
346
	}											\
347
	else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)				\
348
	{											\
349
	    static src_type_t zero = 0;								\
350
	    if (y < 0 || y >= src_image->bits.height)						\
351
	    {											\
352
		scanline_func (dst, &zero, left_pad + width + right_pad, 0, 0, 0);		\
353
		continue;									\
354
	    }											\
355
	    src = src_first_line + src_stride * y;						\
356
	    if (left_pad > 0)									\
357
	    {											\
358
		scanline_func (dst, &zero, left_pad, 0, 0, 0);					\
359
	    }											\
360
	    if (width > 0)									\
361
	    {											\
362
		scanline_func (dst + left_pad, src, width, vx, unit_x, 0);			\
363
	    }											\
364
	    if (right_pad > 0)									\
365
	    {											\
366
		scanline_func (dst + left_pad + width, &zero, right_pad, 0, 0, 0);		\
367
	    }											\
368
	}											\
369
	else											\
370
	{											\
371
	    src = src_first_line + src_stride * y;						\
372
	    scanline_func (dst, src, width, vx, unit_x, max_vx);				\
373
	}											\
374
    }												\
375
}
376
 
377
/* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
378
#define FAST_NEAREST_MAINLOOP(scale_func_name, scanline_func, src_type_t, dst_type_t,		\
379
			      repeat_mode)							\
380
	FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, dst_type_t,	\
381
			      repeat_mode)							\
382
 
383
#define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT,				\
384
		     src_type_t, dst_type_t, OP, repeat_mode)				\
385
    FAST_NEAREST_SCANLINE(scaled_nearest_scanline_ ## scale_func_name ## _ ## OP,	\
386
			  SRC_FORMAT, DST_FORMAT, src_type_t, dst_type_t,		\
387
			  OP, repeat_mode)						\
388
    FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name ## _ ## OP,				\
389
			  scaled_nearest_scanline_ ## scale_func_name ## _ ## OP,	\
390
			  src_type_t, dst_type_t, repeat_mode)				\
391
											\
392
    extern int no_such_variable
393
 
394
 
395
#define SCALED_NEAREST_FLAGS						\
396
    (FAST_PATH_SCALE_TRANSFORM	|					\
397
     FAST_PATH_NO_ALPHA_MAP	|					\
398
     FAST_PATH_NEAREST_FILTER	|					\
399
     FAST_PATH_NO_ACCESSORS	|					\
400
     FAST_PATH_NARROW_FORMAT)
401
 
402
#define SIMPLE_NEAREST_FAST_PATH_NORMAL(op,s,d,func)			\
403
    {   PIXMAN_OP_ ## op,						\
404
	PIXMAN_ ## s,							\
405
	(SCALED_NEAREST_FLAGS		|				\
406
	 FAST_PATH_NORMAL_REPEAT	|				\
407
	 FAST_PATH_X_UNIT_POSITIVE),					\
408
	PIXMAN_null, 0,							\
409
	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
410
	fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op,	\
411
    }
412
 
413
#define SIMPLE_NEAREST_FAST_PATH_PAD(op,s,d,func)			\
414
    {   PIXMAN_OP_ ## op,						\
415
	PIXMAN_ ## s,							\
416
	(SCALED_NEAREST_FLAGS		|				\
417
	 FAST_PATH_PAD_REPEAT		|				\
418
	 FAST_PATH_X_UNIT_POSITIVE),					\
419
	PIXMAN_null, 0,							\
420
	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
421
	fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op,	\
422
    }
423
 
424
#define SIMPLE_NEAREST_FAST_PATH_NONE(op,s,d,func)			\
425
    {   PIXMAN_OP_ ## op,						\
426
	PIXMAN_ ## s,							\
427
	(SCALED_NEAREST_FLAGS		|				\
428
	 FAST_PATH_NONE_REPEAT		|				\
429
	 FAST_PATH_X_UNIT_POSITIVE),					\
430
	PIXMAN_null, 0,							\
431
	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
432
	fast_composite_scaled_nearest_ ## func ## _none ## _ ## op,	\
433
    }
434
 
435
#define SIMPLE_NEAREST_FAST_PATH_COVER(op,s,d,func)			\
436
    {   PIXMAN_OP_ ## op,						\
437
	PIXMAN_ ## s,							\
438
	SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP,		\
439
	PIXMAN_null, 0,							\
440
	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
441
	fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op,	\
442
    }
443
 
444
/* Prefer the use of 'cover' variant, because it is faster */
445
#define SIMPLE_NEAREST_FAST_PATH(op,s,d,func)				\
446
    SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func),			\
447
    SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func),			\
448
    SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func),				\
449
    SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func)
450
 
451
#endif