Rev 8210 | Go to most recent revision | Show entire file | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 8210 | Rev 9172 | ||
---|---|---|---|
Line 18... | Line 18... | ||
18 | ; are, they're terrible on p5 MMXs, but less so on pIIs. Someone needs to |
18 | ; are, they're terrible on p5 MMXs, but less so on pIIs. Someone needs to |
19 | ; optimise them for p5 MMXs.. |
19 | ; optimise them for p5 MMXs.. |
Line 20... | Line 20... | ||
20 | 20 | ||
Line -... | Line 21... | ||
- | 21 | BITS 32 |
|
Line 21... | Line 22... | ||
21 | BITS 32 |
22 | |
22 | 23 | %include "common.inc" |
|
23 | 24 | ||
24 | GLOBAL _ConvertMMXpII32_24RGB888 |
25 | SDL_FUNC _ConvertMMXpII32_24RGB888 |
25 | GLOBAL _ConvertMMXpII32_16RGB565 |
26 | SDL_FUNC _ConvertMMXpII32_16RGB565 |
26 | GLOBAL _ConvertMMXpII32_16BGR565 |
27 | SDL_FUNC _ConvertMMXpII32_16BGR565 |
27 | GLOBAL _ConvertMMXpII32_16RGB555 |
28 | SDL_FUNC _ConvertMMXpII32_16RGB555 |
28 | GLOBAL _ConvertMMXpII32_16BGR555 |
29 | SDL_FUNC _ConvertMMXpII32_16BGR555 |
- | 30 | ||
- | 31 | ;; Macros for conversion routines |
|
29 | 32 | ||
- | 33 | %macro _push_immq_mask 1 |
|
30 | EXTERN _mmxreturn |
34 | push dword %1 |
- | 35 | push dword %1 |
|
- | 36 | %endmacro |
|
- | 37 | ||
31 | 38 | %macro load_immq 2 |
|
32 | SECTION .data |
39 | _push_immq_mask %2 |
33 | 40 | movq %1, [esp] |
|
- | 41 | %endmacro |
|
- | 42 | ||
- | 43 | %macro pand_immq 2 |
|
34 | ALIGN 8 |
44 | _push_immq_mask %2 |
35 | 45 | pand %1, [esp] |
|
- | 46 | %endmacro |
|
36 | ;; Constants for conversion routines |
47 | |
- | 48 | %define CLEANUP_IMMQ_LOADS(num) \ |
|
37 | 49 | add esp, byte 8 * num |
|
38 | mmx32_rgb888_mask dd 00ffffffh,00ffffffh |
50 | |
39 | 51 | %define mmx32_rgb888_mask 00ffffffh |
|
40 | mmx32_rgb565_b dd 000000f8h, 000000f8h |
52 | %define mmx32_rgb565_b 000000f8h |
41 | mmx32_rgb565_g dd 0000fc00h, 0000fc00h |
53 | %define mmx32_rgb565_g 0000fc00h |
42 | mmx32_rgb565_r dd 00f80000h, 00f80000h |
54 | %define mmx32_rgb565_r 00f80000h |
43 | 55 | ||
44 | mmx32_rgb555_rb dd 00f800f8h,00f800f8h |
56 | %define mmx32_rgb555_rb 00f800f8h |
45 | mmx32_rgb555_g dd 0000f800h,0000f800h |
- | |
46 | mmx32_rgb555_mul dd 20000008h,20000008h |
- | |
Line 47... | Line 57... | ||
47 | mmx32_bgr555_mul dd 00082000h,00082000h |
57 | %define mmx32_rgb555_g 0000f800h |
Line 48... | Line 58... | ||
48 | 58 | %define mmx32_rgb555_mul 20000008h |
|
Line 49... | Line 59... | ||
49 | 59 | %define mmx32_bgr555_mul 00082000h |
|
50 | 60 | ||
- | 61 | SECTION .text |
|
51 | SECTION .text |
62 | |
Line 52... | Line 63... | ||
52 | 63 | _ConvertMMXpII32_24RGB888: |
|
53 | _ConvertMMXpII32_24RGB888: |
64 | |
54 | 65 | ; set up mm6 as the mask, mm7 as zero |
|
Line 106... | Line 117... | ||
106 | add esi, BYTE 4 |
117 | add esi, BYTE 4 |
107 | add edi, BYTE 3 |
118 | add edi, BYTE 3 |
108 | dec ecx |
119 | dec ecx |
109 | jnz .L3 |
120 | jnz .L3 |
110 | .L4: |
121 | .L4: |
111 | jmp _mmxreturn |
122 | retn |
Line 112... | Line 123... | ||
112 | 123 | ||
Line 113... | Line 124... | ||
113 | 124 | ||
114 | 125 | ||
115 | _ConvertMMXpII32_16RGB565: |
126 | _ConvertMMXpII32_16RGB565: |
116 | 127 | ||
- | 128 | ; set up masks |
|
Line 117... | Line 129... | ||
117 | ; set up masks |
129 | load_immq mm5, mmx32_rgb565_b |
118 | movq mm5, [mmx32_rgb565_b] |
130 | load_immq mm6, mmx32_rgb565_g |
119 | movq mm6, [mmx32_rgb565_g] |
131 | load_immq mm7, mmx32_rgb565_r |
120 | movq mm7, [mmx32_rgb565_r] |
132 | CLEANUP_IMMQ_LOADS(3) |
Line 174... | Line 186... | ||
174 | add edi, BYTE 2 |
186 | add edi, BYTE 2 |
175 | dec ecx |
187 | dec ecx |
176 | jnz .L3 |
188 | jnz .L3 |
Line 177... | Line 189... | ||
177 | 189 | ||
178 | .L4: |
190 | .L4: |
Line 179... | Line 191... | ||
179 | jmp _mmxreturn |
191 | retn |
Line 180... | Line 192... | ||
180 | 192 | ||
181 | 193 | ||
182 | _ConvertMMXpII32_16BGR565: |
194 | _ConvertMMXpII32_16BGR565: |
- | 195 | ||
Line 183... | Line 196... | ||
183 | 196 | load_immq mm5, mmx32_rgb565_r |
|
184 | movq mm5, [mmx32_rgb565_r] |
197 | load_immq mm6, mmx32_rgb565_g |
185 | movq mm6, [mmx32_rgb565_g] |
198 | load_immq mm7, mmx32_rgb565_b |
186 | movq mm7, [mmx32_rgb565_b] |
199 | CLEANUP_IMMQ_LOADS(3) |
Line 243... | Line 256... | ||
243 | add edi, BYTE 2 |
256 | add edi, BYTE 2 |
244 | dec edx |
257 | dec edx |
245 | jnz .L3 |
258 | jnz .L3 |
Line 246... | Line 259... | ||
246 | 259 | ||
247 | .L4: |
260 | .L4: |
Line 248... | Line 261... | ||
248 | jmp _mmxreturn |
261 | retn |
Line 249... | Line 262... | ||
249 | 262 | ||
250 | _ConvertMMXpII32_16BGR555: |
263 | _ConvertMMXpII32_16BGR555: |
251 | 264 | ||
Line 252... | Line 265... | ||
252 | ; the 16BGR555 converter is identical to the RGB555 one, |
265 | ; the 16BGR555 converter is identical to the RGB555 one, |
253 | ; except it uses a different multiplier for the pmaddwd |
266 | ; except it uses a different multiplier for the pmaddwd |
Line 254... | Line 267... | ||
254 | ; instruction. cool huh. |
267 | ; instruction. cool huh. |
255 | 268 | ||
256 | movq mm7, qword [mmx32_bgr555_mul] |
269 | load_immq mm7, mmx32_bgr555_mul |
257 | jmp _convert_bgr555_cheat |
270 | jmp _convert_bgr555_cheat |
258 | 271 | ||
259 | ; This is the same as the Intel version.. they obviously went to |
272 | ; This is the same as the Intel version.. they obviously went to |
Line 260... | Line 273... | ||
260 | ; much more trouble to expand/coil the loop than I did, so theirs |
273 | ; much more trouble to expand/coil the loop than I did, so theirs |
261 | ; would almost certainly be faster, even if only a little. |
274 | ; would almost certainly be faster, even if only a little. |
262 | ; I did rename 'mmx32_rgb555_add' to 'mmx32_rgb555_mul', which is |
275 | ; I did rename 'mmx32_rgb555_add' to 'mmx32_rgb555_mul', which is |
- | 276 | ; (I think) a more accurate name.. |
|
Line 263... | Line 277... | ||
263 | ; (I think) a more accurate name.. |
277 | _ConvertMMXpII32_16RGB555: |
Line 264... | Line 278... | ||
264 | _ConvertMMXpII32_16RGB555: |
278 | |
265 | 279 | load_immq mm7, mmx32_rgb555_mul |
|
266 | movq mm7,qword [mmx32_rgb555_mul] |
280 | _convert_bgr555_cheat: |
Line 267... | Line 281... | ||
267 | _convert_bgr555_cheat: |
281 | load_immq mm6, mmx32_rgb555_g |
Line 268... | Line 282... | ||
268 | movq mm6,qword [mmx32_rgb555_g] |
282 | CLEANUP_IMMQ_LOADS(2) |
Line 269... | Line 283... | ||
269 | 283 | ||
270 | mov edx,ecx ; Save ecx |
284 | mov edx,ecx ; Save ecx |
Line 271... | Line 285... | ||
271 | 285 | ||
272 | and ecx,BYTE 0fffffff8h ; clear lower three bits |
286 | and ecx,DWORD 0fffffff8h ; clear lower three bits |
Line 273... | Line 287... | ||
273 | jnz .L_OK |
287 | jnz .L_OK |
274 | jmp .L2 |
288 | jmp near .L2 |
Line -... | Line 289... | ||
- | 289 | ||
- | 290 | .L_OK: |
|
275 | 291 | ||
276 | .L_OK: |
292 | movq mm2,[esi+8] |
Line 277... | Line 293... | ||
277 | 293 | ||
278 | movq mm2,[esi+8] |
294 | movq mm0,[esi] |
Line 300... | Line 316... | ||
300 | por mm1,mm0 |
316 | por mm1,mm0 |
Line 301... | Line 317... | ||
301 | 317 | ||
302 | movq mm0,mm4 |
318 | movq mm0,mm4 |
Line 303... | Line 319... | ||
303 | psrld mm1,6 |
319 | psrld mm1,6 |
304 | 320 | ||
Line 305... | Line 321... | ||
305 | pand mm0,qword [mmx32_rgb555_rb] |
321 | pand_immq mm0, mmx32_rgb555_rb |
306 | packssdw mm1,mm3 |
322 | packssdw mm1,mm3 |
Line 307... | Line 323... | ||
307 | 323 | ||
308 | movq mm3,mm5 |
324 | movq mm3,mm5 |
Line 309... | Line 325... | ||
309 | pmaddwd mm0,mm7 |
325 | pmaddwd mm0,mm7 |
310 | 326 | ||
Line 327... | Line 343... | ||
327 | psrld mm5,6 |
343 | psrld mm5,6 |
Line 328... | Line 344... | ||
328 | 344 | ||
329 | movq mm3,mm2 |
345 | movq mm3,mm2 |
Line 330... | Line 346... | ||
330 | movq mm1,mm0 |
346 | movq mm1,mm0 |
331 | 347 | ||
Line 332... | Line 348... | ||
332 | pand mm3,qword [mmx32_rgb555_rb] |
348 | pand_immq mm3, mmx32_rgb555_rb |
333 | packssdw mm5,mm4 |
349 | packssdw mm5,mm4 |
Line -... | Line 350... | ||
- | 350 | ||
- | 351 | pand_immq mm1, mmx32_rgb555_rb |
|
334 | 352 | pand mm2,mm6 |
|
335 | pand mm1,qword [mmx32_rgb555_rb] |
353 | |
Line 336... | Line 354... | ||
336 | pand mm2,mm6 |
354 | CLEANUP_IMMQ_LOADS(4) |
337 | 355 | ||
Line 378... | Line 396... | ||
378 | 396 | ||
379 | dec ecx |
397 | dec ecx |
Line 380... | Line 398... | ||
380 | jnz .L3 |
398 | jnz .L3 |
381 | 399 | ||
382 | .L4: |
- | |
383 | jmp _mmxreturn |
- | |
Line -... | Line 400... | ||
- | 400 | .L4: |
|
- | 401 | retn |
|
- | 402 |