Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
1901 | serge | 1 | ; |
2 | /* |
||
3 | * Written by Jos� Fonseca |
||
4 | */ |
||
5 | |||
6 | |||
7 | #ifdef USE_MMX_ASM |
||
8 | #include "assyntax.h" |
||
9 | #include "matypes.h" |
||
10 | |||
11 | /* integer multiplication - alpha plus one |
||
12 | * |
||
13 | * makes the following approximation to the division (Sree) |
||
14 | * |
||
15 | * rgb*a/255 ~= (rgb*(a+1)) >> 256 |
||
16 | * |
||
17 | * which is the fastest method that satisfies the following OpenGL criteria |
||
18 | * |
||
19 | * 0*0 = 0 and 255*255 = 255 |
||
20 | * |
||
21 | * note that MX1 is a register with 0xffffffffffffffff constant which can be easily obtained making |
||
22 | * |
||
23 | * PCMPEQW ( MX1, MX1 ) |
||
24 | */ |
||
25 | #define GMB_MULT_AP1( MP1, MA1, MP2, MA2, MX1 ) \ |
||
26 | PSUBW ( MX1, MA1 ) /* a1 + 1 | a1 + 1 | a1 + 1 | a1 + 1 */ ;\ |
||
27 | PMULLW ( MP1, MA1 ) /* t1 = p1*a1 */ ;\ |
||
28 | ;\ |
||
29 | TWO(PSUBW ( MX1, MA2 )) /* a2 + 1 | a2 + 1 | a2 + 1 | a2 + 1 */ ;\ |
||
30 | TWO(PMULLW ( MP2, MA2 )) /* t2 = p2*a2 */ ;\ |
||
31 | ;\ |
||
32 | PSRLW ( CONST(8), MA1 ) /* t1 >> 8 ~= t1/255 */ ;\ |
||
33 | TWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 ~= t2/255 */ |
||
34 | |||
35 | |||
36 | /* integer multiplication - geometric series |
||
37 | * |
||
38 | * takes the geometric series approximation to the division |
||
39 | * |
||
40 | * t/255 = (t >> 8) + (t >> 16) + (t >> 24) .. |
||
41 | * |
||
42 | * in this case just the first two terms to fit in 16bit arithmetic |
||
43 | * |
||
44 | * t/255 ~= (t + (t >> 8)) >> 8 |
||
45 | * |
||
46 | * note that just by itself it doesn't satisfies the OpenGL criteria, as 255*255 = 254, |
||
47 | * so the special case a = 255 must be accounted or roundoff must be used |
||
48 | */ |
||
49 | #define GMB_MULT_GS( MP1, MA1, MP2, MA2 ) \ |
||
50 | PMULLW ( MP1, MA1 ) /* t1 = p1*a1 */ ;\ |
||
51 | TWO(PMULLW ( MP2, MA2 )) /* t2 = p2*a2 */ ;\ |
||
52 | ;\ |
||
53 | MOVQ ( MA1, MP1 ) ;\ |
||
54 | PSRLW ( CONST(8), MA1 ) /* t1 >> 8 */ ;\ |
||
55 | ;\ |
||
56 | TWO(MOVQ ( MA2, MP2 )) ;\ |
||
57 | TWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 */ ;\ |
||
58 | ;\ |
||
59 | PADDW ( MP1, MA1 ) /* t1 + (t1 >> 8) ~= (t1/255) << 8 */ ;\ |
||
60 | PSRLW ( CONST(8), MA1 ) /* sa1 | sb1 | sg1 | sr1 */ ;\ |
||
61 | ;\ |
||
62 | TWO(PADDW ( MP2, MA2 )) /* t2 + (t2 >> 8) ~= (t2/255) << 8 */ ;\ |
||
63 | TWO(PSRLW ( CONST(8), MA2 )) /* sa2 | sb2 | sg2 | sr2 */ |
||
64 | |||
65 | |||
66 | /* integer multiplication - geometric series plus rounding |
||
67 | * |
||
68 | * when using a geometric series division instead of truncating the result |
||
69 | * use roundoff in the approximation (Jim Blinn) |
||
70 | * |
||
71 | * t = rgb*a + 0x80 |
||
72 | * |
||
73 | * achieving the exact results |
||
74 | * |
||
75 | * note that M80 is register with the 0x0080008000800080 constant |
||
76 | */ |
||
77 | #define GMB_MULT_GSR( MP1, MA1, MP2, MA2, M80 ) \ |
||
78 | PMULLW ( MP1, MA1 ) /* t1 = p1*a1 */ ;\ |
||
79 | PADDW ( M80, MA1 ) /* t1 += 0x80 */ ;\ |
||
80 | ;\ |
||
81 | TWO(PMULLW ( MP2, MA2 )) /* t2 = p2*a2 */ ;\ |
||
82 | TWO(PADDW ( M80, MA2 )) /* t2 += 0x80 */ ;\ |
||
83 | ;\ |
||
84 | MOVQ ( MA1, MP1 ) ;\ |
||
85 | PSRLW ( CONST(8), MA1 ) /* t1 >> 8 */ ;\ |
||
86 | ;\ |
||
87 | TWO(MOVQ ( MA2, MP2 )) ;\ |
||
88 | TWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 */ ;\ |
||
89 | ;\ |
||
90 | PADDW ( MP1, MA1 ) /* t1 + (t1 >> 8) ~= (t1/255) << 8 */ ;\ |
||
91 | PSRLW ( CONST(8), MA1 ) /* sa1 | sb1 | sg1 | sr1 */ ;\ |
||
92 | ;\ |
||
93 | TWO(PADDW ( MP2, MA2 )) /* t2 + (t2 >> 8) ~= (t2/255) << 8 */ ;\ |
||
94 | TWO(PSRLW ( CONST(8), MA2 )) /* sa2 | sb2 | sg2 | sr2 */ |
||
95 | |||
96 | |||
97 | /* linear interpolation - geometric series |
||
98 | */ |
||
99 | #define GMB_LERP_GS( MP1, MQ1, MA1, MP2, MQ2, MA2) \ |
||
100 | PSUBW ( MQ1, MP1 ) /* pa1 - qa1 | pb1 - qb1 | pg1 - qg1 | pr1 - qr1 */ ;\ |
||
101 | PSLLW ( CONST(8), MQ1 ) /* q1 << 8 */ ;\ |
||
102 | PMULLW ( MP1, MA1 ) /* t1 = (q1 - p1)*pa1 */ ;\ |
||
103 | ;\ |
||
104 | TWO(PSUBW ( MQ2, MP2 )) /* pa2 - qa2 | pb2 - qb2 | pg2 - qg2 | pr2 - qr2 */ ;\ |
||
105 | TWO(PSLLW ( CONST(8), MQ2 )) /* q2 << 8 */ ;\ |
||
106 | TWO(PMULLW ( MP2, MA2 )) /* t2 = (q2 - p2)*pa2 */ ;\ |
||
107 | ;\ |
||
108 | MOVQ ( MA1, MP1 ) ;\ |
||
109 | PSRLW ( CONST(8), MA1 ) /* t1 >> 8 */ ;\ |
||
110 | ;\ |
||
111 | TWO(MOVQ ( MA2, MP2 )) ;\ |
||
112 | TWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 */ ;\ |
||
113 | ;\ |
||
114 | PADDW ( MP1, MA1 ) /* t1 + (t1 >> 8) ~= (t1/255) << 8 */ ;\ |
||
115 | TWO(PADDW ( MP2, MA2 )) /* t2 + (t2 >> 8) ~= (t2/255) << 8 */ ;\ |
||
116 | ;\ |
||
117 | PADDW ( MQ1, MA1 ) /* (t1/255 + q1) << 8 */ ;\ |
||
118 | TWO(PADDW ( MQ2, MA2 )) /* (t2/255 + q2) << 8 */ ;\ |
||
119 | ;\ |
||
120 | PSRLW ( CONST(8), MA1 ) /* sa1 | sb1 | sg1 | sr1 */ ;\ |
||
121 | TWO(PSRLW ( CONST(8), MA2 )) /* sa2 | sb2 | sg2 | sr2 */ |
||
122 | |||
123 | |||
124 | /* linear interpolation - geometric series with roundoff |
||
125 | * |
||
126 | * this is a generalization of Blinn's formula to signed arithmetic |
||
127 | * |
||
128 | * note that M80 is a register with the 0x0080008000800080 constant |
||
129 | */ |
||
130 | #define GMB_LERP_GSR( MP1, MQ1, MA1, MP2, MQ2, MA2, M80) \ |
||
131 | PSUBW ( MQ1, MP1 ) /* pa1 - qa1 | pb1 - qb1 | pg1 - qg1 | pr1 - qr1 */ ;\ |
||
132 | PSLLW ( CONST(8), MQ1 ) /* q1 << 8 */ ;\ |
||
133 | PMULLW ( MP1, MA1 ) /* t1 = (q1 - p1)*pa1 */ ;\ |
||
134 | ;\ |
||
135 | TWO(PSUBW ( MQ2, MP2 )) /* pa2 - qa2 | pb2 - qb2 | pg2 - qg2 | pr2 - qr2 */ ;\ |
||
136 | TWO(PSLLW ( CONST(8), MQ2 )) /* q2 << 8 */ ;\ |
||
137 | TWO(PMULLW ( MP2, MA2 )) /* t2 = (q2 - p2)*pa2 */ ;\ |
||
138 | ;\ |
||
139 | PSRLW ( CONST(15), MP1 ) /* q1 > p1 ? 1 : 0 */ ;\ |
||
140 | TWO(PSRLW ( CONST(15), MP2 )) /* q2 > q2 ? 1 : 0 */ ;\ |
||
141 | ;\ |
||
142 | PSLLW ( CONST(8), MP1 ) /* q1 > p1 ? 0x100 : 0 */ ;\ |
||
143 | TWO(PSLLW ( CONST(8), MP2 )) /* q2 > q2 ? 0x100 : 0 */ ;\ |
||
144 | ;\ |
||
145 | PSUBW ( MP1, MA1 ) /* t1 -=? 0x100 */ ;\ |
||
146 | TWO(PSUBW ( MP2, MA2 )) /* t2 -=? 0x100 */ ;\ |
||
147 | ;\ |
||
148 | PADDW ( M80, MA1 ) /* t1 += 0x80 */ ;\ |
||
149 | TWO(PADDW ( M80, MA2 )) /* t2 += 0x80 */ ;\ |
||
150 | ;\ |
||
151 | MOVQ ( MA1, MP1 ) ;\ |
||
152 | PSRLW ( CONST(8), MA1 ) /* t1 >> 8 */ ;\ |
||
153 | ;\ |
||
154 | TWO(MOVQ ( MA2, MP2 )) ;\ |
||
155 | TWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 */ ;\ |
||
156 | ;\ |
||
157 | PADDW ( MP1, MA1 ) /* t1 + (t1 >> 8) ~= (t1/255) << 8 */ ;\ |
||
158 | TWO(PADDW ( MP2, MA2 )) /* t2 + (t2 >> 8) ~= (t2/255) << 8 */ ;\ |
||
159 | ;\ |
||
160 | PADDW ( MQ1, MA1 ) /* (t1/255 + q1) << 8 */ ;\ |
||
161 | TWO(PADDW ( MQ2, MA2 )) /* (t2/255 + q2) << 8 */ ;\ |
||
162 | ;\ |
||
163 | PSRLW ( CONST(8), MA1 ) /* sa1 | sb1 | sg1 | sr1 */ ;\ |
||
164 | TWO(PSRLW ( CONST(8), MA2 )) /* sa2 | sb2 | sg2 | sr2 */ |
||
165 | |||
166 | |||
167 | /* linear interpolation - geometric series with correction |
||
168 | * |
||
169 | * instead of the roundoff this adds a small correction to satisfy the OpenGL criteria |
||
170 | * |
||
171 | * t/255 ~= (t + (t >> 8) + (t >> 15)) >> 8 |
||
172 | * |
||
173 | * note that although is faster than rounding off it doesn't give always the exact results |
||
174 | */ |
||
175 | #define GMB_LERP_GSC( MP1, MQ1, MA1, MP2, MQ2, MA2) \ |
||
176 | PSUBW ( MQ1, MP1 ) /* pa1 - qa1 | pb1 - qb1 | pg1 - qg1 | pr1 - qr1 */ ;\ |
||
177 | PSLLW ( CONST(8), MQ1 ) /* q1 << 8 */ ;\ |
||
178 | PMULLW ( MP1, MA1 ) /* t1 = (q1 - p1)*pa1 */ ;\ |
||
179 | ;\ |
||
180 | TWO(PSUBW ( MQ2, MP2 )) /* pa2 - qa2 | pb2 - qb2 | pg2 - qg2 | pr2 - qr2 */ ;\ |
||
181 | TWO(PSLLW ( CONST(8), MQ2 )) /* q2 << 8 */ ;\ |
||
182 | TWO(PMULLW ( MP2, MA2 )) /* t2 = (q2 - p2)*pa2 */ ;\ |
||
183 | ;\ |
||
184 | MOVQ ( MA1, MP1 ) ;\ |
||
185 | PSRLW ( CONST(8), MA1 ) /* t1 >> 8 */ ;\ |
||
186 | ;\ |
||
187 | TWO(MOVQ ( MA2, MP2 )) ;\ |
||
188 | TWO(PSRLW ( CONST(8), MA2 )) /* t2 >> 8 */ ;\ |
||
189 | ;\ |
||
190 | PADDW ( MA1, MP1 ) /* t1 + (t1 >> 8) ~= (t1/255) << 8 */ ;\ |
||
191 | PSRLW ( CONST(7), MA1 ) /* t1 >> 15 */ ;\ |
||
192 | ;\ |
||
193 | TWO(PADDW ( MA2, MP2 )) /* t2 + (t2 >> 8) ~= (t2/255) << 8 */ ;\ |
||
194 | TWO(PSRLW ( CONST(7), MA2 )) /* t2 >> 15 */ ;\ |
||
195 | ;\ |
||
196 | PADDW ( MP1, MA1 ) /* t1 + (t1 >> 8) + (t1 >>15) ~= (t1/255) << 8 */ ;\ |
||
197 | TWO(PADDW ( MP2, MA2 )) /* t2 + (t2 >> 8) + (t2 >>15) ~= (t2/255) << 8 */ ;\ |
||
198 | ;\ |
||
199 | PADDW ( MQ1, MA1 ) /* (t1/255 + q1) << 8 */ ;\ |
||
200 | TWO(PADDW ( MQ2, MA2 )) /* (t2/255 + q2) << 8 */ ;\ |
||
201 | ;\ |
||
202 | PSRLW ( CONST(8), MA1 ) /* sa1 | sb1 | sg1 | sr1 */ ;\ |
||
203 | TWO(PSRLW ( CONST(8), MA2 )) /* sa2 | sb2 | sg2 | sr2 */ |
||
204 | |||
205 | |||
206 | /* common blending setup code |
||
207 | * |
||
208 | * note that M00 is a register with 0x0000000000000000 constant which can be easily obtained making |
||
209 | * |
||
210 | * PXOR ( M00, M00 ) |
||
211 | */ |
||
212 | #define GMB_LOAD(rgba, dest, MPP, MQQ) \ |
||
213 | ONE(MOVD ( REGIND(rgba), MPP )) /* | | | | qa1 | qb1 | qg1 | qr1 */ ;\ |
||
214 | ONE(MOVD ( REGIND(dest), MQQ )) /* | | | | pa1 | pb1 | pg1 | pr1 */ ;\ |
||
215 | ;\ |
||
216 | TWO(MOVQ ( REGIND(rgba), MPP )) /* qa2 | qb2 | qg2 | qr2 | qa1 | qb1 | qg1 | qr1 */ ;\ |
||
217 | TWO(MOVQ ( REGIND(dest), MQQ )) /* pa2 | pb2 | pg2 | pr2 | pa1 | pb1 | pg1 | pr1 */ |
||
218 | |||
219 | #define GMB_UNPACK(MP1, MQ1, MP2, MQ2, M00) \ |
||
220 | TWO(MOVQ ( MP1, MP2 )) ;\ |
||
221 | TWO(MOVQ ( MQ1, MQ2 )) ;\ |
||
222 | ;\ |
||
223 | PUNPCKLBW ( M00, MQ1 ) /* qa1 | qb1 | qg1 | qr1 */ ;\ |
||
224 | TWO(PUNPCKHBW ( M00, MQ2 )) /* qa2 | qb2 | qg2 | qr2 */ ;\ |
||
225 | PUNPCKLBW ( M00, MP1 ) /* pa1 | pb1 | pg1 | pr1 */ ;\ |
||
226 | TWO(PUNPCKHBW ( M00, MP2 )) /* pa2 | pb2 | pg2 | pr2 */ |
||
227 | |||
228 | #define GMB_ALPHA(MP1, MA1, MP2, MA2) \ |
||
229 | MOVQ ( MP1, MA1 ) ;\ |
||
230 | TWO(MOVQ ( MP2, MA2 )) ;\ |
||
231 | ;\ |
||
232 | PUNPCKHWD ( MA1, MA1 ) /* pa1 | pa1 | | */ ;\ |
||
233 | TWO(PUNPCKHWD ( MA2, MA2 )) /* pa2 | pa2 | | */ ;\ |
||
234 | PUNPCKHDQ ( MA1, MA1 ) /* pa1 | pa1 | pa1 | pa1 */ ;\ |
||
235 | TWO(PUNPCKHDQ ( MA2, MA2 )) /* pa2 | pa2 | pa2 | pa2 */ |
||
236 | |||
237 | #define GMB_PACK( MS1, MS2 ) \ |
||
238 | PACKUSWB ( MS2, MS1 ) /* sa2 | sb2 | sg2 | sr2 | sa1 | sb1 | sg1 | sr1 */ ;\ |
||
239 | |||
240 | #define GMB_STORE(rgba, MSS ) \ |
||
241 | ONE(MOVD ( MSS, REGIND(rgba) )) /* | | | | sa1 | sb1 | sg1 | sr1 */ ;\ |
||
242 | TWO(MOVQ ( MSS, REGIND(rgba) )) /* sa2 | sb2 | sg2 | sr2 | sa1 | sb1 | sg1 | sr1 */ |
||
243 | |||
244 | /* Kevin F. Quinn |
||
245 | * Replace data segment constants with text-segment |
||
246 | * constants (via pushl/movq) |
||
247 | SEG_DATA |
||
248 | |||
249 | ALIGNDATA8 |
||
250 | const_0080: |
||
251 | D_LONG 0x00800080, 0x00800080 |
||
252 | |||
253 | const_80: |
||
254 | D_LONG 0x80808080, 0x80808080 |
||
255 | */ |
||
256 | #define const_0080_l 0x00800080 |
||
257 | #define const_0080_h 0x00800080 |
||
258 | #define const_80_l 0x80808080 |
||
259 | #define const_80_h 0x80808080 |
||
260 | |||
261 | SEG_TEXT |
||
262 | |||
263 | |||
264 | /* Blend transparency function |
||
265 | */ |
||
266 | |||
267 | #define TAG(x) CONCAT(x,_transparency) |
||
268 | #define LLTAG(x) LLBL2(x,_transparency) |
||
269 | |||
270 | #define INIT \ |
||
271 | PXOR ( MM0, MM0 ) /* 0x0000 | 0x0000 | 0x0000 | 0x0000 */ |
||
272 | |||
273 | #define MAIN( rgba, dest ) \ |
||
274 | GMB_LOAD( rgba, dest, MM1, MM2 ) ;\ |
||
275 | GMB_UNPACK( MM1, MM2, MM4, MM5, MM0 ) ;\ |
||
276 | GMB_ALPHA( MM1, MM3, MM4, MM6 ) ;\ |
||
277 | GMB_LERP_GSC( MM1, MM2, MM3, MM4, MM5, MM6 ) ;\ |
||
278 | GMB_PACK( MM3, MM6 ) ;\ |
||
279 | GMB_STORE( rgba, MM3 ) |
||
280 | |||
281 | #include "mmx_blendtmp.h" |
||
282 | |||
283 | |||
284 | /* Blend add function |
||
285 | * |
||
286 | * FIXME: Add some loop unrolling here... |
||
287 | */ |
||
288 | |||
289 | #define TAG(x) CONCAT(x,_add) |
||
290 | #define LLTAG(x) LLBL2(x,_add) |
||
291 | |||
292 | #define INIT |
||
293 | |||
294 | #define MAIN( rgba, dest ) \ |
||
295 | ONE(MOVD ( REGIND(rgba), MM1 )) /* | | | | qa1 | qb1 | qg1 | qr1 */ ;\ |
||
296 | ONE(MOVD ( REGIND(dest), MM2 )) /* | | | | pa1 | pb1 | pg1 | pr1 */ ;\ |
||
297 | ONE(PADDUSB ( MM2, MM1 )) ;\ |
||
298 | ONE(MOVD ( MM1, REGIND(rgba) )) /* | | | | sa1 | sb1 | sg1 | sr1 */ ;\ |
||
299 | ;\ |
||
300 | TWO(MOVQ ( REGIND(rgba), MM1 )) /* qa2 | qb2 | qg2 | qr2 | qa1 | qb1 | qg1 | qr1 */ ;\ |
||
301 | TWO(PADDUSB ( REGIND(dest), MM1 )) /* sa2 | sb2 | sg2 | sr2 | sa1 | sb1 | sg1 | sr1 */ ;\ |
||
302 | TWO(MOVQ ( MM1, REGIND(rgba) )) |
||
303 | |||
304 | #include "mmx_blendtmp.h" |
||
305 | |||
306 | |||
307 | /* Blend min function |
||
308 | */ |
||
309 | |||
310 | #define TAG(x) CONCAT(x,_min) |
||
311 | #define LLTAG(x) LLBL2(x,_min) |
||
312 | |||
313 | /* Kevin F. Quinn 2nd July 2006 |
||
314 | * Replace data segment constants with text-segment instructions |
||
315 | #define INIT \ |
||
316 | MOVQ ( CONTENT(const_80), MM7 ) |
||
317 | */ |
||
318 | #define INIT \ |
||
319 | PUSH_L ( CONST(const_80_h) ) /* 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80*/ ;\ |
||
320 | PUSH_L ( CONST(const_80_l) ) ;\ |
||
321 | MOVQ ( REGIND(ESP), MM7 ) ;\ |
||
322 | ADD_L ( CONST(8), ESP) |
||
323 | |||
324 | #define MAIN( rgba, dest ) \ |
||
325 | GMB_LOAD( rgba, dest, MM1, MM2 ) ;\ |
||
326 | MOVQ ( MM1, MM3 ) ;\ |
||
327 | MOVQ ( MM2, MM4 ) ;\ |
||
328 | PXOR ( MM7, MM3 ) /* unsigned -> signed */ ;\ |
||
329 | PXOR ( MM7, MM4 ) /* unsigned -> signed */ ;\ |
||
330 | PCMPGTB ( MM3, MM4 ) /* q > p ? 0xff : 0x00 */ ;\ |
||
331 | PAND ( MM4, MM1 ) /* q > p ? p : 0 */ ;\ |
||
332 | PANDN ( MM2, MM4 ) /* q > p ? 0 : q */ ;\ |
||
333 | POR ( MM1, MM4 ) /* q > p ? p : q */ ;\ |
||
334 | GMB_STORE( rgba, MM4 ) |
||
335 | |||
336 | #include "mmx_blendtmp.h" |
||
337 | |||
338 | |||
339 | /* Blend max function |
||
340 | */ |
||
341 | |||
342 | #define TAG(x) CONCAT(x,_max) |
||
343 | #define LLTAG(x) LLBL2(x,_max) |
||
344 | |||
345 | /* Kevin F. Quinn 2nd July 2006 |
||
346 | * Replace data segment constants with text-segment instructions |
||
347 | #define INIT \ |
||
348 | MOVQ ( CONTENT(const_80), MM7 ) |
||
349 | */ |
||
350 | #define INIT \ |
||
351 | PUSH_L ( CONST(const_80_l) ) /* 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80| 0x80*/ ;\ |
||
352 | PUSH_L ( CONST(const_80_h) ) ;\ |
||
353 | MOVQ ( REGIND(ESP), MM7 ) ;\ |
||
354 | ADD_L ( CONST(8), ESP) |
||
355 | |||
356 | #define MAIN( rgba, dest ) \ |
||
357 | GMB_LOAD( rgba, dest, MM1, MM2 ) ;\ |
||
358 | MOVQ ( MM1, MM3 ) ;\ |
||
359 | MOVQ ( MM2, MM4 ) ;\ |
||
360 | PXOR ( MM7, MM3 ) /* unsigned -> signed */ ;\ |
||
361 | PXOR ( MM7, MM4 ) /* unsigned -> signed */ ;\ |
||
362 | PCMPGTB ( MM3, MM4 ) /* q > p ? 0xff : 0x00 */ ;\ |
||
363 | PAND ( MM4, MM2 ) /* q > p ? q : 0 */ ;\ |
||
364 | PANDN ( MM1, MM4 ) /* q > p ? 0 : p */ ;\ |
||
365 | POR ( MM2, MM4 ) /* q > p ? p : q */ ;\ |
||
366 | GMB_STORE( rgba, MM4 ) |
||
367 | |||
368 | #include "mmx_blendtmp.h" |
||
369 | |||
370 | |||
371 | /* Blend modulate function |
||
372 | */ |
||
373 | |||
374 | #define TAG(x) CONCAT(x,_modulate) |
||
375 | #define LLTAG(x) LLBL2(x,_modulate) |
||
376 | |||
377 | /* Kevin F. Quinn 2nd July 2006 |
||
378 | * Replace data segment constants with text-segment instructions |
||
379 | #define INIT \ |
||
380 | MOVQ ( CONTENT(const_0080), MM7 ) |
||
381 | */ |
||
382 | #define INIT \ |
||
383 | PXOR ( MM0, MM0 ) /* 0x0000 | 0x0000 | 0x0000 | 0x0000 */ ;\ |
||
384 | PUSH_L ( CONST(const_0080_l) ) /* 0x0080 | 0x0080 | 0x0080 | 0x0080 */ ;\ |
||
385 | PUSH_L ( CONST(const_0080_h) ) ;\ |
||
386 | MOVQ ( REGIND(ESP), MM7 ) ;\ |
||
387 | ADD_L ( CONST(8), ESP) |
||
388 | |||
389 | #define MAIN( rgba, dest ) \ |
||
390 | GMB_LOAD( rgba, dest, MM1, MM2 ) ;\ |
||
391 | GMB_UNPACK( MM1, MM2, MM4, MM5, MM0 ) ;\ |
||
392 | GMB_MULT_GSR( MM1, MM2, MM4, MM5, MM7 ) ;\ |
||
393 | GMB_PACK( MM2, MM5 ) ;\ |
||
394 | GMB_STORE( rgba, MM2 ) |
||
395 | |||
396 | #include "mmx_blendtmp.h" |
||
397 | |||
398 | #endif |
||
399 | |||
400 | #if defined (__ELF__) && defined (__linux__) |
||
401 | .section .note.GNU-stack,"",%progbits |
||
402 | #endif><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><>><> |