Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4349 | Serge | 1 | #define COMPONENT_SIZE 8 |
2 | #define MASK 0xff |
||
3 | #define ONE_HALF 0x80 |
||
4 | |||
5 | #define A_SHIFT 8 * 3 |
||
6 | #define R_SHIFT 8 * 2 |
||
7 | #define G_SHIFT 8 |
||
8 | #define A_MASK 0xff000000 |
||
9 | #define R_MASK 0xff0000 |
||
10 | #define G_MASK 0xff00 |
||
11 | |||
12 | #define RB_MASK 0xff00ff |
||
13 | #define AG_MASK 0xff00ff00 |
||
14 | #define RB_ONE_HALF 0x800080 |
||
15 | #define RB_MASK_PLUS_ONE 0x10000100 |
||
16 | |||
17 | #define ALPHA_8(x) ((x) >> A_SHIFT) |
||
18 | #define RED_8(x) (((x) >> R_SHIFT) & MASK) |
||
19 | #define GREEN_8(x) (((x) >> G_SHIFT) & MASK) |
||
20 | #define BLUE_8(x) ((x) & MASK) |
||
21 | |||
22 | /* |
||
23 | * ARMv6 has UQADD8 instruction, which implements unsigned saturated |
||
24 | * addition for 8-bit values packed in 32-bit registers. It is very useful |
||
25 | * for UN8x4_ADD_UN8x4, UN8_rb_ADD_UN8_rb and ADD_UN8 macros (which would |
||
26 | * otherwise need a lot of arithmetic operations to simulate this operation). |
||
27 | * Since most of the major ARM linux distros are built for ARMv7, we are |
||
28 | * much less dependent on runtime CPU detection and can get practical |
||
29 | * benefits from conditional compilation here for a lot of users. |
||
30 | */ |
||
31 | |||
32 | #if defined(USE_GCC_INLINE_ASM) && defined(__arm__) && \ |
||
33 | !defined(__aarch64__) && (!defined(__thumb__) || defined(__thumb2__)) |
||
34 | #if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \ |
||
35 | defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \ |
||
36 | defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) || \ |
||
37 | defined(__ARM_ARCH_6M__) || defined(__ARM_ARCH_7__) || \ |
||
38 | defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || \ |
||
39 | defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7EM__) |
||
40 | |||
41 | static force_inline uint32_t |
||
42 | un8x4_add_un8x4 (uint32_t x, uint32_t y) |
||
43 | { |
||
44 | uint32_t t; |
||
45 | asm ("uqadd8 %0, %1, %2" : "=r" (t) : "%r" (x), "r" (y)); |
||
46 | return t; |
||
47 | } |
||
48 | |||
49 | #define UN8x4_ADD_UN8x4(x, y) \ |
||
50 | ((x) = un8x4_add_un8x4 ((x), (y))) |
||
51 | |||
52 | #define UN8_rb_ADD_UN8_rb(x, y, t) \ |
||
53 | ((t) = un8x4_add_un8x4 ((x), (y)), (x) = (t)) |
||
54 | |||
55 | #define ADD_UN8(x, y, t) \ |
||
56 | ((t) = (x), un8x4_add_un8x4 ((t), (y))) |
||
57 | |||
58 | #endif |
||
59 | #endif |
||
60 | |||
61 | /*****************************************************************************/ |
||
62 | |||
63 | /* |
||
64 | * Helper macros. |
||
65 | */ |
||
66 | |||
67 | #define MUL_UN8(a, b, t) \ |
||
68 | ((t) = (a) * (uint16_t)(b) + ONE_HALF, ((((t) >> G_SHIFT ) + (t) ) >> G_SHIFT )) |
||
69 | |||
70 | #define DIV_UN8(a, b) \ |
||
71 | (((uint16_t) (a) * MASK + ((b) / 2)) / (b)) |
||
72 | |||
73 | #ifndef ADD_UN8 |
||
74 | #define ADD_UN8(x, y, t) \ |
||
75 | ((t) = (x) + (y), \ |
||
76 | (uint32_t) (uint8_t) ((t) | (0 - ((t) >> G_SHIFT)))) |
||
77 | #endif |
||
78 | |||
79 | #define DIV_ONE_UN8(x) \ |
||
80 | (((x) + ONE_HALF + (((x) + ONE_HALF) >> G_SHIFT)) >> G_SHIFT) |
||
81 | |||
82 | /* |
||
83 | * The methods below use some tricks to be able to do two color |
||
84 | * components at the same time. |
||
85 | */ |
||
86 | |||
87 | /* |
||
88 | * x_rb = (x_rb * a) / 255 |
||
89 | */ |
||
90 | #define UN8_rb_MUL_UN8(x, a, t) \ |
||
91 | do \ |
||
92 | { \ |
||
93 | t = ((x) & RB_MASK) * (a); \ |
||
94 | t += RB_ONE_HALF; \ |
||
95 | x = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT; \ |
||
96 | x &= RB_MASK; \ |
||
97 | } while (0) |
||
98 | |||
99 | /* |
||
100 | * x_rb = min (x_rb + y_rb, 255) |
||
101 | */ |
||
102 | #ifndef UN8_rb_ADD_UN8_rb |
||
103 | #define UN8_rb_ADD_UN8_rb(x, y, t) \ |
||
104 | do \ |
||
105 | { \ |
||
106 | t = ((x) + (y)); \ |
||
107 | t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK); \ |
||
108 | x = (t & RB_MASK); \ |
||
109 | } while (0) |
||
110 | #endif |
||
111 | |||
112 | /* |
||
113 | * x_rb = (x_rb * a_rb) / 255 |
||
114 | */ |
||
115 | #define UN8_rb_MUL_UN8_rb(x, a, t) \ |
||
116 | do \ |
||
117 | { \ |
||
118 | t = (x & MASK) * (a & MASK); \ |
||
119 | t |= (x & R_MASK) * ((a >> R_SHIFT) & MASK); \ |
||
120 | t += RB_ONE_HALF; \ |
||
121 | t = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT; \ |
||
122 | x = t & RB_MASK; \ |
||
123 | } while (0) |
||
124 | |||
125 | /* |
||
126 | * x_c = (x_c * a) / 255 |
||
127 | */ |
||
128 | #define UN8x4_MUL_UN8(x, a) \ |
||
129 | do \ |
||
130 | { \ |
||
131 | uint32_t r1__, r2__, t__; \ |
||
132 | \ |
||
133 | r1__ = (x); \ |
||
134 | UN8_rb_MUL_UN8 (r1__, (a), t__); \ |
||
135 | \ |
||
136 | r2__ = (x) >> G_SHIFT; \ |
||
137 | UN8_rb_MUL_UN8 (r2__, (a), t__); \ |
||
138 | \ |
||
139 | (x) = r1__ | (r2__ << G_SHIFT); \ |
||
140 | } while (0) |
||
141 | |||
142 | /* |
||
143 | * x_c = (x_c * a) / 255 + y_c |
||
144 | */ |
||
145 | #define UN8x4_MUL_UN8_ADD_UN8x4(x, a, y) \ |
||
146 | do \ |
||
147 | { \ |
||
148 | uint32_t r1__, r2__, r3__, t__; \ |
||
149 | \ |
||
150 | r1__ = (x); \ |
||
151 | r2__ = (y) & RB_MASK; \ |
||
152 | UN8_rb_MUL_UN8 (r1__, (a), t__); \ |
||
153 | UN8_rb_ADD_UN8_rb (r1__, r2__, t__); \ |
||
154 | \ |
||
155 | r2__ = (x) >> G_SHIFT; \ |
||
156 | r3__ = ((y) >> G_SHIFT) & RB_MASK; \ |
||
157 | UN8_rb_MUL_UN8 (r2__, (a), t__); \ |
||
158 | UN8_rb_ADD_UN8_rb (r2__, r3__, t__); \ |
||
159 | \ |
||
160 | (x) = r1__ | (r2__ << G_SHIFT); \ |
||
161 | } while (0) |
||
162 | |||
163 | /* |
||
164 | * x_c = (x_c * a + y_c * b) / 255 |
||
165 | */ |
||
166 | #define UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8(x, a, y, b) \ |
||
167 | do \ |
||
168 | { \ |
||
169 | uint32_t r1__, r2__, r3__, t__; \ |
||
170 | \ |
||
171 | r1__ = (x); \ |
||
172 | r2__ = (y); \ |
||
173 | UN8_rb_MUL_UN8 (r1__, (a), t__); \ |
||
174 | UN8_rb_MUL_UN8 (r2__, (b), t__); \ |
||
175 | UN8_rb_ADD_UN8_rb (r1__, r2__, t__); \ |
||
176 | \ |
||
177 | r2__ = ((x) >> G_SHIFT); \ |
||
178 | r3__ = ((y) >> G_SHIFT); \ |
||
179 | UN8_rb_MUL_UN8 (r2__, (a), t__); \ |
||
180 | UN8_rb_MUL_UN8 (r3__, (b), t__); \ |
||
181 | UN8_rb_ADD_UN8_rb (r2__, r3__, t__); \ |
||
182 | \ |
||
183 | (x) = r1__ | (r2__ << G_SHIFT); \ |
||
184 | } while (0) |
||
185 | |||
186 | /* |
||
187 | * x_c = (x_c * a_c) / 255 |
||
188 | */ |
||
189 | #define UN8x4_MUL_UN8x4(x, a) \ |
||
190 | do \ |
||
191 | { \ |
||
192 | uint32_t r1__, r2__, r3__, t__; \ |
||
193 | \ |
||
194 | r1__ = (x); \ |
||
195 | r2__ = (a); \ |
||
196 | UN8_rb_MUL_UN8_rb (r1__, r2__, t__); \ |
||
197 | \ |
||
198 | r2__ = (x) >> G_SHIFT; \ |
||
199 | r3__ = (a) >> G_SHIFT; \ |
||
200 | UN8_rb_MUL_UN8_rb (r2__, r3__, t__); \ |
||
201 | \ |
||
202 | (x) = r1__ | (r2__ << G_SHIFT); \ |
||
203 | } while (0) |
||
204 | |||
205 | /* |
||
206 | * x_c = (x_c * a_c) / 255 + y_c |
||
207 | */ |
||
208 | #define UN8x4_MUL_UN8x4_ADD_UN8x4(x, a, y) \ |
||
209 | do \ |
||
210 | { \ |
||
211 | uint32_t r1__, r2__, r3__, t__; \ |
||
212 | \ |
||
213 | r1__ = (x); \ |
||
214 | r2__ = (a); \ |
||
215 | UN8_rb_MUL_UN8_rb (r1__, r2__, t__); \ |
||
216 | r2__ = (y) & RB_MASK; \ |
||
217 | UN8_rb_ADD_UN8_rb (r1__, r2__, t__); \ |
||
218 | \ |
||
219 | r2__ = ((x) >> G_SHIFT); \ |
||
220 | r3__ = ((a) >> G_SHIFT); \ |
||
221 | UN8_rb_MUL_UN8_rb (r2__, r3__, t__); \ |
||
222 | r3__ = ((y) >> G_SHIFT) & RB_MASK; \ |
||
223 | UN8_rb_ADD_UN8_rb (r2__, r3__, t__); \ |
||
224 | \ |
||
225 | (x) = r1__ | (r2__ << G_SHIFT); \ |
||
226 | } while (0) |
||
227 | |||
228 | /* |
||
229 | * x_c = (x_c * a_c + y_c * b) / 255 |
||
230 | */ |
||
231 | #define UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8(x, a, y, b) \ |
||
232 | do \ |
||
233 | { \ |
||
234 | uint32_t r1__, r2__, r3__, t__; \ |
||
235 | \ |
||
236 | r1__ = (x); \ |
||
237 | r2__ = (a); \ |
||
238 | UN8_rb_MUL_UN8_rb (r1__, r2__, t__); \ |
||
239 | r2__ = (y); \ |
||
240 | UN8_rb_MUL_UN8 (r2__, (b), t__); \ |
||
241 | UN8_rb_ADD_UN8_rb (r1__, r2__, t__); \ |
||
242 | \ |
||
243 | r2__ = (x) >> G_SHIFT; \ |
||
244 | r3__ = (a) >> G_SHIFT; \ |
||
245 | UN8_rb_MUL_UN8_rb (r2__, r3__, t__); \ |
||
246 | r3__ = (y) >> G_SHIFT; \ |
||
247 | UN8_rb_MUL_UN8 (r3__, (b), t__); \ |
||
248 | UN8_rb_ADD_UN8_rb (r2__, r3__, t__); \ |
||
249 | \ |
||
250 | x = r1__ | (r2__ << G_SHIFT); \ |
||
251 | } while (0) |
||
252 | |||
253 | /* |
||
254 | x_c = min(x_c + y_c, 255) |
||
255 | */ |
||
256 | #ifndef UN8x4_ADD_UN8x4 |
||
257 | #define UN8x4_ADD_UN8x4(x, y) \ |
||
258 | do \ |
||
259 | { \ |
||
260 | uint32_t r1__, r2__, r3__, t__; \ |
||
261 | \ |
||
262 | r1__ = (x) & RB_MASK; \ |
||
263 | r2__ = (y) & RB_MASK; \ |
||
264 | UN8_rb_ADD_UN8_rb (r1__, r2__, t__); \ |
||
265 | \ |
||
266 | r2__ = ((x) >> G_SHIFT) & RB_MASK; \ |
||
267 | r3__ = ((y) >> G_SHIFT) & RB_MASK; \ |
||
268 | UN8_rb_ADD_UN8_rb (r2__, r3__, t__); \ |
||
269 | \ |
||
270 | x = r1__ | (r2__ << G_SHIFT); \ |
||
271 | } while (0) |
||
272 | #endif><>><>><>><>><>><>><> |