Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
1891 | serge | 1 | /* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */ |
2 | /* |
||
3 | * Copyright © 2000 SuSE, Inc. |
||
4 | * Copyright © 2007 Red Hat, Inc. |
||
5 | * |
||
6 | * Permission to use, copy, modify, distribute, and sell this software and its |
||
7 | * documentation for any purpose is hereby granted without fee, provided that |
||
8 | * the above copyright notice appear in all copies and that both that |
||
9 | * copyright notice and this permission notice appear in supporting |
||
10 | * documentation, and that the name of SuSE not be used in advertising or |
||
11 | * publicity pertaining to distribution of the software without specific, |
||
12 | * written prior permission. SuSE makes no representations about the |
||
13 | * suitability of this software for any purpose. It is provided "as is" |
||
14 | * without express or implied warranty. |
||
15 | * |
||
16 | * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL |
||
17 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE |
||
18 | * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
||
19 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION |
||
20 | * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN |
||
21 | * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
||
22 | * |
||
23 | * Author: Keith Packard, SuSE, Inc. |
||
24 | */ |
||
25 | |||
26 | #ifndef PIXMAN_FAST_PATH_H__ |
||
27 | #define PIXMAN_FAST_PATH_H__ |
||
28 | |||
29 | #include "pixman-private.h" |
||
30 | |||
31 | #define PIXMAN_REPEAT_COVER -1 |
||
32 | |||
33 | static force_inline pixman_bool_t |
||
34 | repeat (pixman_repeat_t repeat, int *c, int size) |
||
35 | { |
||
36 | if (repeat == PIXMAN_REPEAT_NONE) |
||
37 | { |
||
38 | if (*c < 0 || *c >= size) |
||
39 | return FALSE; |
||
40 | } |
||
41 | else if (repeat == PIXMAN_REPEAT_NORMAL) |
||
42 | { |
||
43 | while (*c >= size) |
||
44 | *c -= size; |
||
45 | while (*c < 0) |
||
46 | *c += size; |
||
47 | } |
||
48 | else if (repeat == PIXMAN_REPEAT_PAD) |
||
49 | { |
||
50 | *c = CLIP (*c, 0, size - 1); |
||
51 | } |
||
52 | else /* REFLECT */ |
||
53 | { |
||
54 | *c = MOD (*c, size * 2); |
||
55 | if (*c >= size) |
||
56 | *c = size * 2 - *c - 1; |
||
57 | } |
||
58 | return TRUE; |
||
59 | } |
||
60 | |||
61 | /* |
||
62 | * For each scanline fetched from source image with PAD repeat: |
||
63 | * - calculate how many pixels need to be padded on the left side |
||
64 | * - calculate how many pixels need to be padded on the right side |
||
65 | * - update width to only count pixels which are fetched from the image |
||
66 | * All this information is returned via 'width', 'left_pad', 'right_pad' |
||
67 | * arguments. The code is assuming that 'unit_x' is positive. |
||
68 | * |
||
69 | * Note: 64-bit math is used in order to avoid potential overflows, which |
||
70 | * is probably excessive in many cases. This particular function |
||
71 | * may need its own correctness test and performance tuning. |
||
72 | */ |
||
73 | static force_inline void |
||
74 | pad_repeat_get_scanline_bounds (int32_t source_image_width, |
||
75 | pixman_fixed_t vx, |
||
76 | pixman_fixed_t unit_x, |
||
77 | int32_t * width, |
||
78 | int32_t * left_pad, |
||
79 | int32_t * right_pad) |
||
80 | { |
||
81 | int64_t max_vx = (int64_t) source_image_width << 16; |
||
82 | int64_t tmp; |
||
83 | if (vx < 0) |
||
84 | { |
||
85 | tmp = ((int64_t) unit_x - 1 - vx) / unit_x; |
||
86 | if (tmp > *width) |
||
87 | { |
||
88 | *left_pad = *width; |
||
89 | *width = 0; |
||
90 | } |
||
91 | else |
||
92 | { |
||
93 | *left_pad = (int32_t) tmp; |
||
94 | *width -= (int32_t) tmp; |
||
95 | } |
||
96 | } |
||
97 | else |
||
98 | { |
||
99 | *left_pad = 0; |
||
100 | } |
||
101 | tmp = ((int64_t) unit_x - 1 - vx + max_vx) / unit_x - *left_pad; |
||
102 | if (tmp < 0) |
||
103 | { |
||
104 | *right_pad = *width; |
||
105 | *width = 0; |
||
106 | } |
||
107 | else if (tmp >= *width) |
||
108 | { |
||
109 | *right_pad = 0; |
||
110 | } |
||
111 | else |
||
112 | { |
||
113 | *right_pad = *width - (int32_t) tmp; |
||
114 | *width = (int32_t) tmp; |
||
115 | } |
||
116 | } |
||
117 | |||
118 | /* A macroified version of specialized nearest scalers for some |
||
119 | * common 8888 and 565 formats. It supports SRC and OVER ops. |
||
120 | * |
||
121 | * There are two repeat versions, one that handles repeat normal, |
||
122 | * and one without repeat handling that only works if the src region |
||
123 | * used is completely covered by the pre-repeated source samples. |
||
124 | * |
||
125 | * The loops are unrolled to process two pixels per iteration for better |
||
126 | * performance on most CPU architectures (superscalar processors |
||
127 | * can issue several operations simultaneously, other processors can hide |
||
128 | * instructions latencies by pipelining operations). Unrolling more |
||
129 | * does not make much sense because the compiler will start running out |
||
130 | * of spare registers soon. |
||
131 | */ |
||
132 | |||
133 | #define GET_8888_ALPHA(s) ((s) >> 24) |
||
134 | /* This is not actually used since we don't have an OVER with |
||
135 | 565 source, but it is needed to build. */ |
||
136 | #define GET_0565_ALPHA(s) 0xff |
||
137 | |||
138 | #define FAST_NEAREST_SCANLINE(scanline_func_name, SRC_FORMAT, DST_FORMAT, \ |
||
139 | src_type_t, dst_type_t, OP, repeat_mode) \ |
||
140 | static force_inline void \ |
||
141 | scanline_func_name (dst_type_t *dst, \ |
||
142 | src_type_t *src, \ |
||
143 | int32_t w, \ |
||
144 | pixman_fixed_t vx, \ |
||
145 | pixman_fixed_t unit_x, \ |
||
146 | pixman_fixed_t max_vx) \ |
||
147 | { \ |
||
148 | uint32_t d; \ |
||
149 | src_type_t s1, s2; \ |
||
150 | uint8_t a1, a2; \ |
||
151 | int x1, x2; \ |
||
152 | \ |
||
153 | if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER) \ |
||
154 | abort(); \ |
||
155 | \ |
||
156 | while ((w -= 2) >= 0) \ |
||
157 | { \ |
||
158 | x1 = vx >> 16; \ |
||
159 | vx += unit_x; \ |
||
160 | if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ |
||
161 | { \ |
||
162 | /* This works because we know that unit_x is positive */ \ |
||
163 | while (vx >= max_vx) \ |
||
164 | vx -= max_vx; \ |
||
165 | } \ |
||
166 | s1 = src[x1]; \ |
||
167 | \ |
||
168 | x2 = vx >> 16; \ |
||
169 | vx += unit_x; \ |
||
170 | if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ |
||
171 | { \ |
||
172 | /* This works because we know that unit_x is positive */ \ |
||
173 | while (vx >= max_vx) \ |
||
174 | vx -= max_vx; \ |
||
175 | } \ |
||
176 | s2 = src[x2]; \ |
||
177 | \ |
||
178 | if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \ |
||
179 | { \ |
||
180 | a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \ |
||
181 | a2 = GET_ ## SRC_FORMAT ## _ALPHA(s2); \ |
||
182 | \ |
||
183 | if (a1 == 0xff) \ |
||
184 | { \ |
||
185 | *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \ |
||
186 | } \ |
||
187 | else if (s1) \ |
||
188 | { \ |
||
189 | d = CONVERT_ ## DST_FORMAT ## _TO_8888 (*dst); \ |
||
190 | s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1); \ |
||
191 | a1 ^= 0xff; \ |
||
192 | UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \ |
||
193 | *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \ |
||
194 | } \ |
||
195 | dst++; \ |
||
196 | \ |
||
197 | if (a2 == 0xff) \ |
||
198 | { \ |
||
199 | *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2); \ |
||
200 | } \ |
||
201 | else if (s2) \ |
||
202 | { \ |
||
203 | d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst); \ |
||
204 | s2 = CONVERT_## SRC_FORMAT ## _TO_8888 (s2); \ |
||
205 | a2 ^= 0xff; \ |
||
206 | UN8x4_MUL_UN8_ADD_UN8x4 (d, a2, s2); \ |
||
207 | *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \ |
||
208 | } \ |
||
209 | dst++; \ |
||
210 | } \ |
||
211 | else /* PIXMAN_OP_SRC */ \ |
||
212 | { \ |
||
213 | *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \ |
||
214 | *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2); \ |
||
215 | } \ |
||
216 | } \ |
||
217 | \ |
||
218 | if (w & 1) \ |
||
219 | { \ |
||
220 | x1 = vx >> 16; \ |
||
221 | s1 = src[x1]; \ |
||
222 | \ |
||
223 | if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \ |
||
224 | { \ |
||
225 | a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \ |
||
226 | \ |
||
227 | if (a1 == 0xff) \ |
||
228 | { \ |
||
229 | *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \ |
||
230 | } \ |
||
231 | else if (s1) \ |
||
232 | { \ |
||
233 | d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst); \ |
||
234 | s1 = CONVERT_ ## SRC_FORMAT ## _TO_8888 (s1); \ |
||
235 | a1 ^= 0xff; \ |
||
236 | UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \ |
||
237 | *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \ |
||
238 | } \ |
||
239 | dst++; \ |
||
240 | } \ |
||
241 | else /* PIXMAN_OP_SRC */ \ |
||
242 | { \ |
||
243 | *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \ |
||
244 | } \ |
||
245 | } \ |
||
246 | } |
||
247 | |||
248 | #define FAST_NEAREST_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, dst_type_t, \ |
||
249 | repeat_mode) \ |
||
250 | static void \ |
||
251 | fast_composite_scaled_nearest ## scale_func_name (pixman_implementation_t *imp, \ |
||
252 | pixman_op_t op, \ |
||
253 | pixman_image_t * src_image, \ |
||
254 | pixman_image_t * mask_image, \ |
||
255 | pixman_image_t * dst_image, \ |
||
256 | int32_t src_x, \ |
||
257 | int32_t src_y, \ |
||
258 | int32_t mask_x, \ |
||
259 | int32_t mask_y, \ |
||
260 | int32_t dst_x, \ |
||
261 | int32_t dst_y, \ |
||
262 | int32_t width, \ |
||
263 | int32_t height) \ |
||
264 | { \ |
||
265 | dst_type_t *dst_line; \ |
||
266 | src_type_t *src_first_line; \ |
||
267 | int y; \ |
||
268 | pixman_fixed_t max_vx = max_vx; /* suppress uninitialized variable warning */ \ |
||
269 | pixman_fixed_t max_vy; \ |
||
270 | pixman_vector_t v; \ |
||
271 | pixman_fixed_t vx, vy; \ |
||
272 | pixman_fixed_t unit_x, unit_y; \ |
||
273 | int32_t left_pad, right_pad; \ |
||
274 | \ |
||
275 | src_type_t *src; \ |
||
276 | dst_type_t *dst; \ |
||
277 | int src_stride, dst_stride; \ |
||
278 | \ |
||
279 | PIXMAN_IMAGE_GET_LINE (dst_image, dst_x, dst_y, dst_type_t, dst_stride, dst_line, 1); \ |
||
280 | /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \ |
||
281 | * transformed from destination space to source space */ \ |
||
282 | PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \ |
||
283 | \ |
||
284 | /* reference point is the center of the pixel */ \ |
||
285 | v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \ |
||
286 | v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \ |
||
287 | v.vector[2] = pixman_fixed_1; \ |
||
288 | \ |
||
289 | if (!pixman_transform_point_3d (src_image->common.transform, &v)) \ |
||
290 | return; \ |
||
291 | \ |
||
292 | unit_x = src_image->common.transform->matrix[0][0]; \ |
||
293 | unit_y = src_image->common.transform->matrix[1][1]; \ |
||
294 | \ |
||
295 | /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */ \ |
||
296 | v.vector[0] -= pixman_fixed_e; \ |
||
297 | v.vector[1] -= pixman_fixed_e; \ |
||
298 | \ |
||
299 | vx = v.vector[0]; \ |
||
300 | vy = v.vector[1]; \ |
||
301 | \ |
||
302 | if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ |
||
303 | { \ |
||
304 | /* Clamp repeating positions inside the actual samples */ \ |
||
305 | max_vx = src_image->bits.width << 16; \ |
||
306 | max_vy = src_image->bits.height << 16; \ |
||
307 | \ |
||
308 | repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx); \ |
||
309 | repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \ |
||
310 | } \ |
||
311 | \ |
||
312 | if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD || \ |
||
313 | PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \ |
||
314 | { \ |
||
315 | pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, \ |
||
316 | &width, &left_pad, &right_pad); \ |
||
317 | vx += left_pad * unit_x; \ |
||
318 | } \ |
||
319 | \ |
||
320 | while (--height >= 0) \ |
||
321 | { \ |
||
322 | dst = dst_line; \ |
||
323 | dst_line += dst_stride; \ |
||
324 | \ |
||
325 | y = vy >> 16; \ |
||
326 | vy += unit_y; \ |
||
327 | if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ |
||
328 | repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \ |
||
329 | if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \ |
||
330 | { \ |
||
331 | repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height); \ |
||
332 | src = src_first_line + src_stride * y; \ |
||
333 | if (left_pad > 0) \ |
||
334 | { \ |
||
335 | scanline_func (dst, src, left_pad, 0, 0, 0); \ |
||
336 | } \ |
||
337 | if (width > 0) \ |
||
338 | { \ |
||
339 | scanline_func (dst + left_pad, src, width, vx, unit_x, 0); \ |
||
340 | } \ |
||
341 | if (right_pad > 0) \ |
||
342 | { \ |
||
343 | scanline_func (dst + left_pad + width, src + src_image->bits.width - 1, \ |
||
344 | right_pad, 0, 0, 0); \ |
||
345 | } \ |
||
346 | } \ |
||
347 | else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \ |
||
348 | { \ |
||
349 | static src_type_t zero = 0; \ |
||
350 | if (y < 0 || y >= src_image->bits.height) \ |
||
351 | { \ |
||
352 | scanline_func (dst, &zero, left_pad + width + right_pad, 0, 0, 0); \ |
||
353 | continue; \ |
||
354 | } \ |
||
355 | src = src_first_line + src_stride * y; \ |
||
356 | if (left_pad > 0) \ |
||
357 | { \ |
||
358 | scanline_func (dst, &zero, left_pad, 0, 0, 0); \ |
||
359 | } \ |
||
360 | if (width > 0) \ |
||
361 | { \ |
||
362 | scanline_func (dst + left_pad, src, width, vx, unit_x, 0); \ |
||
363 | } \ |
||
364 | if (right_pad > 0) \ |
||
365 | { \ |
||
366 | scanline_func (dst + left_pad + width, &zero, right_pad, 0, 0, 0); \ |
||
367 | } \ |
||
368 | } \ |
||
369 | else \ |
||
370 | { \ |
||
371 | src = src_first_line + src_stride * y; \ |
||
372 | scanline_func (dst, src, width, vx, unit_x, max_vx); \ |
||
373 | } \ |
||
374 | } \ |
||
375 | } |
||
376 | |||
377 | /* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */ |
||
378 | #define FAST_NEAREST_MAINLOOP(scale_func_name, scanline_func, src_type_t, dst_type_t, \ |
||
379 | repeat_mode) \ |
||
380 | FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, dst_type_t, \ |
||
381 | repeat_mode) \ |
||
382 | |||
383 | #define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT, \ |
||
384 | src_type_t, dst_type_t, OP, repeat_mode) \ |
||
385 | FAST_NEAREST_SCANLINE(scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \ |
||
386 | SRC_FORMAT, DST_FORMAT, src_type_t, dst_type_t, \ |
||
387 | OP, repeat_mode) \ |
||
388 | FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name ## _ ## OP, \ |
||
389 | scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \ |
||
390 | src_type_t, dst_type_t, repeat_mode) \ |
||
391 | \ |
||
392 | extern int no_such_variable |
||
393 | |||
394 | |||
395 | #define SCALED_NEAREST_FLAGS \ |
||
396 | (FAST_PATH_SCALE_TRANSFORM | \ |
||
397 | FAST_PATH_NO_ALPHA_MAP | \ |
||
398 | FAST_PATH_NEAREST_FILTER | \ |
||
399 | FAST_PATH_NO_ACCESSORS | \ |
||
400 | FAST_PATH_NARROW_FORMAT) |
||
401 | |||
402 | #define SIMPLE_NEAREST_FAST_PATH_NORMAL(op,s,d,func) \ |
||
403 | { PIXMAN_OP_ ## op, \ |
||
404 | PIXMAN_ ## s, \ |
||
405 | (SCALED_NEAREST_FLAGS | \ |
||
406 | FAST_PATH_NORMAL_REPEAT | \ |
||
407 | FAST_PATH_X_UNIT_POSITIVE), \ |
||
408 | PIXMAN_null, 0, \ |
||
409 | PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ |
||
410 | fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \ |
||
411 | } |
||
412 | |||
413 | #define SIMPLE_NEAREST_FAST_PATH_PAD(op,s,d,func) \ |
||
414 | { PIXMAN_OP_ ## op, \ |
||
415 | PIXMAN_ ## s, \ |
||
416 | (SCALED_NEAREST_FLAGS | \ |
||
417 | FAST_PATH_PAD_REPEAT | \ |
||
418 | FAST_PATH_X_UNIT_POSITIVE), \ |
||
419 | PIXMAN_null, 0, \ |
||
420 | PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ |
||
421 | fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \ |
||
422 | } |
||
423 | |||
424 | #define SIMPLE_NEAREST_FAST_PATH_NONE(op,s,d,func) \ |
||
425 | { PIXMAN_OP_ ## op, \ |
||
426 | PIXMAN_ ## s, \ |
||
427 | (SCALED_NEAREST_FLAGS | \ |
||
428 | FAST_PATH_NONE_REPEAT | \ |
||
429 | FAST_PATH_X_UNIT_POSITIVE), \ |
||
430 | PIXMAN_null, 0, \ |
||
431 | PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ |
||
432 | fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \ |
||
433 | } |
||
434 | |||
435 | #define SIMPLE_NEAREST_FAST_PATH_COVER(op,s,d,func) \ |
||
436 | { PIXMAN_OP_ ## op, \ |
||
437 | PIXMAN_ ## s, \ |
||
438 | SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \ |
||
439 | PIXMAN_null, 0, \ |
||
440 | PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ |
||
441 | fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \ |
||
442 | } |
||
443 | |||
444 | /* Prefer the use of 'cover' variant, because it is faster */ |
||
445 | #define SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \ |
||
446 | SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \ |
||
447 | SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \ |
||
448 | SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func), \ |
||
449 | SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func) |
||
450 | |||
451 | #endif>><>><>>>><>>> |