Rev 8210 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
8210 | maxcodehac | 1 | ; |
2 | ; x86 format converters for HERMES |
||
3 | ; Some routines Copyright (c) 1998 Christian Nentwich (brn@eleet.mcb.at) |
||
4 | ; This source code is licensed under the GNU LGPL |
||
5 | ; |
||
6 | ; Please refer to the file COPYING.LIB contained in the distribution for |
||
7 | ; licensing conditions |
||
8 | ; |
||
9 | ; Most routines are (c) Glenn Fiedler (ptc@gaffer.org), used with permission |
||
10 | ; |
||
11 | |||
12 | BITS 32 |
||
13 | |||
9172 | turbocat | 14 | %include "common.inc" |
8210 | maxcodehac | 15 | |
9172 | turbocat | 16 | SDL_FUNC _ConvertX86p32_32BGR888 |
17 | SDL_FUNC _ConvertX86p32_32RGBA888 |
||
18 | SDL_FUNC _ConvertX86p32_32BGRA888 |
||
19 | SDL_FUNC _ConvertX86p32_24RGB888 |
||
20 | SDL_FUNC _ConvertX86p32_24BGR888 |
||
21 | SDL_FUNC _ConvertX86p32_16RGB565 |
||
22 | SDL_FUNC _ConvertX86p32_16BGR565 |
||
23 | SDL_FUNC _ConvertX86p32_16RGB555 |
||
24 | SDL_FUNC _ConvertX86p32_16BGR555 |
||
25 | SDL_FUNC _ConvertX86p32_8RGB332 |
||
26 | |||
8210 | maxcodehac | 27 | SECTION .text |
28 | |||
29 | ;; _Convert_* |
||
30 | ;; Paramters: |
||
31 | ;; ESI = source |
||
32 | ;; EDI = dest |
||
33 | ;; ECX = amount (NOT 0!!! (the _ConvertX86 routine checks for that though)) |
||
34 | ;; Destroys: |
||
35 | ;; EAX, EBX, EDX |
||
36 | |||
37 | |||
38 | _ConvertX86p32_32BGR888: |
||
39 | |||
40 | ; check short |
||
41 | cmp ecx,BYTE 32 |
||
42 | ja .L3 |
||
43 | |||
9172 | turbocat | 44 | .L1: ; short loop |
8210 | maxcodehac | 45 | mov edx,[esi] |
46 | bswap edx |
||
47 | ror edx,8 |
||
48 | mov [edi],edx |
||
49 | add esi,BYTE 4 |
||
50 | add edi,BYTE 4 |
||
51 | dec ecx |
||
52 | jnz .L1 |
||
9172 | turbocat | 53 | .L2: |
54 | retn |
||
8210 | maxcodehac | 55 | |
9172 | turbocat | 56 | .L3: ; save ebp |
8210 | maxcodehac | 57 | push ebp |
58 | |||
59 | ; unroll four times |
||
60 | mov ebp,ecx |
||
61 | shr ebp,2 |
||
62 | |||
63 | ; save count |
||
64 | push ecx |
||
65 | |||
9172 | turbocat | 66 | .L4: mov eax,[esi] |
8210 | maxcodehac | 67 | mov ebx,[esi+4] |
68 | |||
69 | bswap eax |
||
70 | |||
71 | bswap ebx |
||
72 | |||
73 | ror eax,8 |
||
74 | mov ecx,[esi+8] |
||
75 | |||
76 | ror ebx,8 |
||
77 | mov edx,[esi+12] |
||
78 | |||
79 | bswap ecx |
||
80 | |||
81 | bswap edx |
||
82 | |||
83 | ror ecx,8 |
||
84 | mov [edi+0],eax |
||
85 | |||
86 | ror edx,8 |
||
87 | mov [edi+4],ebx |
||
88 | |||
89 | mov [edi+8],ecx |
||
90 | mov [edi+12],edx |
||
91 | |||
92 | add esi,BYTE 16 |
||
93 | add edi,BYTE 16 |
||
94 | |||
95 | dec ebp |
||
96 | jnz .L4 |
||
97 | |||
98 | ; check tail |
||
99 | pop ecx |
||
100 | and ecx,BYTE 11b |
||
101 | jz .L6 |
||
102 | |||
9172 | turbocat | 103 | .L5: ; tail loop |
8210 | maxcodehac | 104 | mov edx,[esi] |
105 | bswap edx |
||
106 | ror edx,8 |
||
107 | mov [edi],edx |
||
108 | add esi,BYTE 4 |
||
109 | add edi,BYTE 4 |
||
110 | dec ecx |
||
111 | jnz .L5 |
||
112 | |||
9172 | turbocat | 113 | .L6: pop ebp |
114 | retn |
||
8210 | maxcodehac | 115 | |
116 | |||
117 | |||
118 | |||
119 | _ConvertX86p32_32RGBA888: |
||
120 | |||
121 | ; check short |
||
122 | cmp ecx,BYTE 32 |
||
123 | ja .L3 |
||
124 | |||
9172 | turbocat | 125 | .L1: ; short loop |
8210 | maxcodehac | 126 | mov edx,[esi] |
127 | rol edx,8 |
||
128 | mov [edi],edx |
||
129 | add esi,BYTE 4 |
||
130 | add edi,BYTE 4 |
||
131 | dec ecx |
||
132 | jnz .L1 |
||
9172 | turbocat | 133 | .L2: |
134 | retn |
||
8210 | maxcodehac | 135 | |
9172 | turbocat | 136 | .L3: ; save ebp |
8210 | maxcodehac | 137 | push ebp |
138 | |||
139 | ; unroll four times |
||
140 | mov ebp,ecx |
||
141 | shr ebp,2 |
||
142 | |||
143 | ; save count |
||
144 | push ecx |
||
145 | |||
9172 | turbocat | 146 | .L4: mov eax,[esi] |
8210 | maxcodehac | 147 | mov ebx,[esi+4] |
148 | |||
149 | rol eax,8 |
||
150 | mov ecx,[esi+8] |
||
151 | |||
152 | rol ebx,8 |
||
153 | mov edx,[esi+12] |
||
154 | |||
155 | rol ecx,8 |
||
156 | mov [edi+0],eax |
||
157 | |||
158 | rol edx,8 |
||
159 | mov [edi+4],ebx |
||
160 | |||
161 | mov [edi+8],ecx |
||
162 | mov [edi+12],edx |
||
163 | |||
164 | add esi,BYTE 16 |
||
165 | add edi,BYTE 16 |
||
166 | |||
167 | dec ebp |
||
168 | jnz .L4 |
||
169 | |||
170 | ; check tail |
||
171 | pop ecx |
||
172 | and ecx,BYTE 11b |
||
173 | jz .L6 |
||
174 | |||
9172 | turbocat | 175 | .L5: ; tail loop |
8210 | maxcodehac | 176 | mov edx,[esi] |
177 | rol edx,8 |
||
178 | mov [edi],edx |
||
179 | add esi,BYTE 4 |
||
180 | add edi,BYTE 4 |
||
181 | dec ecx |
||
182 | jnz .L5 |
||
183 | |||
9172 | turbocat | 184 | .L6: pop ebp |
185 | retn |
||
8210 | maxcodehac | 186 | |
187 | |||
188 | |||
189 | |||
190 | _ConvertX86p32_32BGRA888: |
||
191 | |||
192 | ; check short |
||
193 | cmp ecx,BYTE 32 |
||
194 | ja .L3 |
||
195 | |||
9172 | turbocat | 196 | .L1: ; short loop |
8210 | maxcodehac | 197 | mov edx,[esi] |
198 | bswap edx |
||
199 | mov [edi],edx |
||
200 | add esi,BYTE 4 |
||
201 | add edi,BYTE 4 |
||
202 | dec ecx |
||
203 | jnz .L1 |
||
9172 | turbocat | 204 | .L2: |
205 | retn |
||
8210 | maxcodehac | 206 | |
9172 | turbocat | 207 | .L3: ; save ebp |
8210 | maxcodehac | 208 | push ebp |
209 | |||
210 | ; unroll four times |
||
211 | mov ebp,ecx |
||
212 | shr ebp,2 |
||
213 | |||
214 | ; save count |
||
215 | push ecx |
||
216 | |||
9172 | turbocat | 217 | .L4: mov eax,[esi] |
8210 | maxcodehac | 218 | mov ebx,[esi+4] |
219 | |||
220 | mov ecx,[esi+8] |
||
221 | mov edx,[esi+12] |
||
222 | |||
223 | bswap eax |
||
224 | |||
225 | bswap ebx |
||
226 | |||
227 | bswap ecx |
||
228 | |||
229 | bswap edx |
||
230 | |||
231 | mov [edi+0],eax |
||
232 | mov [edi+4],ebx |
||
233 | |||
234 | mov [edi+8],ecx |
||
235 | mov [edi+12],edx |
||
236 | |||
237 | add esi,BYTE 16 |
||
238 | add edi,BYTE 16 |
||
239 | |||
240 | dec ebp |
||
241 | jnz .L4 |
||
242 | |||
243 | ; check tail |
||
244 | pop ecx |
||
245 | and ecx,BYTE 11b |
||
246 | jz .L6 |
||
247 | |||
9172 | turbocat | 248 | .L5: ; tail loop |
8210 | maxcodehac | 249 | mov edx,[esi] |
250 | bswap edx |
||
251 | mov [edi],edx |
||
252 | add esi,BYTE 4 |
||
253 | add edi,BYTE 4 |
||
254 | dec ecx |
||
255 | jnz .L5 |
||
256 | |||
9172 | turbocat | 257 | .L6: pop ebp |
258 | retn |
||
8210 | maxcodehac | 259 | |
260 | |||
261 | |||
262 | |||
263 | ;; 32 bit RGB 888 to 24 BIT RGB 888 |
||
264 | |||
265 | _ConvertX86p32_24RGB888: |
||
266 | |||
267 | ; check short |
||
268 | cmp ecx,BYTE 32 |
||
269 | ja .L3 |
||
270 | |||
9172 | turbocat | 271 | .L1: ; short loop |
8210 | maxcodehac | 272 | mov al,[esi] |
273 | mov bl,[esi+1] |
||
274 | mov dl,[esi+2] |
||
275 | mov [edi],al |
||
276 | mov [edi+1],bl |
||
277 | mov [edi+2],dl |
||
278 | add esi,BYTE 4 |
||
279 | add edi,BYTE 3 |
||
280 | dec ecx |
||
281 | jnz .L1 |
||
9172 | turbocat | 282 | .L2: |
283 | retn |
||
8210 | maxcodehac | 284 | |
9172 | turbocat | 285 | .L3: ; head |
8210 | maxcodehac | 286 | mov edx,edi |
287 | and edx,BYTE 11b |
||
288 | jz .L4 |
||
289 | mov al,[esi] |
||
290 | mov bl,[esi+1] |
||
291 | mov dl,[esi+2] |
||
292 | mov [edi],al |
||
293 | mov [edi+1],bl |
||
294 | mov [edi+2],dl |
||
295 | add esi,BYTE 4 |
||
296 | add edi,BYTE 3 |
||
297 | dec ecx |
||
298 | jmp SHORT .L3 |
||
299 | |||
9172 | turbocat | 300 | .L4: ; unroll 4 times |
8210 | maxcodehac | 301 | push ebp |
302 | mov ebp,ecx |
||
303 | shr ebp,2 |
||
304 | |||
305 | ; save count |
||
306 | push ecx |
||
307 | |||
9172 | turbocat | 308 | .L5: mov eax,[esi] ; first dword eax = [A][R][G][B] |
8210 | maxcodehac | 309 | mov ebx,[esi+4] ; second dword ebx = [a][r][g][b] |
310 | |||
311 | shl eax,8 ; eax = [R][G][B][.] |
||
312 | mov ecx,[esi+12] ; third dword ecx = [a][r][g][b] |
||
313 | |||
314 | shl ebx,8 ; ebx = [r][g][b][.] |
||
315 | mov al,[esi+4] ; eax = [R][G][B][b] |
||
316 | |||
317 | ror eax,8 ; eax = [b][R][G][B] (done) |
||
318 | mov bh,[esi+8+1] ; ebx = [r][g][G][.] |
||
319 | |||
320 | mov [edi],eax |
||
321 | add edi,BYTE 3*4 |
||
322 | |||
323 | shl ecx,8 ; ecx = [r][g][b][.] |
||
324 | mov bl,[esi+8+0] ; ebx = [r][g][G][B] |
||
325 | |||
326 | rol ebx,16 ; ebx = [G][B][r][g] (done) |
||
327 | mov cl,[esi+8+2] ; ecx = [r][g][b][R] (done) |
||
328 | |||
329 | mov [edi+4-3*4],ebx |
||
330 | add esi,BYTE 4*4 |
||
331 | |||
332 | mov [edi+8-3*4],ecx |
||
333 | dec ebp |
||
334 | |||
335 | jnz .L5 |
||
336 | |||
337 | ; check tail |
||
338 | pop ecx |
||
339 | and ecx,BYTE 11b |
||
340 | jz .L7 |
||
341 | |||
9172 | turbocat | 342 | .L6: ; tail loop |
8210 | maxcodehac | 343 | mov al,[esi] |
344 | mov bl,[esi+1] |
||
345 | mov dl,[esi+2] |
||
346 | mov [edi],al |
||
347 | mov [edi+1],bl |
||
348 | mov [edi+2],dl |
||
349 | add esi,BYTE 4 |
||
350 | add edi,BYTE 3 |
||
351 | dec ecx |
||
352 | jnz .L6 |
||
353 | |||
9172 | turbocat | 354 | .L7: pop ebp |
355 | retn |
||
8210 | maxcodehac | 356 | |
357 | |||
358 | |||
359 | |||
360 | ;; 32 bit RGB 888 to 24 bit BGR 888 |
||
361 | |||
362 | _ConvertX86p32_24BGR888: |
||
363 | |||
364 | ; check short |
||
365 | cmp ecx,BYTE 32 |
||
366 | ja .L3 |
||
367 | |||
9172 | turbocat | 368 | .L1: ; short loop |
8210 | maxcodehac | 369 | mov dl,[esi] |
370 | mov bl,[esi+1] |
||
371 | mov al,[esi+2] |
||
372 | mov [edi],al |
||
373 | mov [edi+1],bl |
||
374 | mov [edi+2],dl |
||
375 | add esi,BYTE 4 |
||
376 | add edi,BYTE 3 |
||
377 | dec ecx |
||
378 | jnz .L1 |
||
9172 | turbocat | 379 | .L2: |
380 | retn |
||
8210 | maxcodehac | 381 | |
9172 | turbocat | 382 | .L3: ; head |
8210 | maxcodehac | 383 | mov edx,edi |
384 | and edx,BYTE 11b |
||
385 | jz .L4 |
||
386 | mov dl,[esi] |
||
387 | mov bl,[esi+1] |
||
388 | mov al,[esi+2] |
||
389 | mov [edi],al |
||
390 | mov [edi+1],bl |
||
391 | mov [edi+2],dl |
||
392 | add esi,BYTE 4 |
||
393 | add edi,BYTE 3 |
||
394 | dec ecx |
||
395 | jmp SHORT .L3 |
||
396 | |||
9172 | turbocat | 397 | .L4: ; unroll 4 times |
8210 | maxcodehac | 398 | push ebp |
399 | mov ebp,ecx |
||
400 | shr ebp,2 |
||
401 | |||
402 | ; save count |
||
403 | push ecx |
||
404 | |||
9172 | turbocat | 405 | .L5: |
8210 | maxcodehac | 406 | mov eax,[esi] ; first dword eax = [A][R][G][B] |
407 | mov ebx,[esi+4] ; second dword ebx = [a][r][g][b] |
||
9172 | turbocat | 408 | |
8210 | maxcodehac | 409 | bswap eax ; eax = [B][G][R][A] |
410 | |||
411 | bswap ebx ; ebx = [b][g][r][a] |
||
412 | |||
413 | mov al,[esi+4+2] ; eax = [B][G][R][r] |
||
414 | mov bh,[esi+4+4+1] ; ebx = [b][g][G][a] |
||
415 | |||
416 | ror eax,8 ; eax = [r][B][G][R] (done) |
||
417 | mov bl,[esi+4+4+2] ; ebx = [b][g][G][R] |
||
418 | |||
419 | ror ebx,16 ; ebx = [G][R][b][g] (done) |
||
420 | mov [edi],eax |
||
421 | |||
422 | mov [edi+4],ebx |
||
423 | mov ecx,[esi+12] ; third dword ecx = [a][r][g][b] |
||
424 | |||
425 | bswap ecx ; ecx = [b][g][r][a] |
||
426 | |||
427 | mov cl,[esi+8] ; ecx = [b][g][r][B] (done) |
||
428 | add esi,BYTE 4*4 |
||
429 | |||
430 | mov [edi+8],ecx |
||
431 | add edi,BYTE 3*4 |
||
432 | |||
433 | dec ebp |
||
434 | jnz .L5 |
||
435 | |||
436 | ; check tail |
||
437 | pop ecx |
||
438 | and ecx,BYTE 11b |
||
439 | jz .L7 |
||
440 | |||
9172 | turbocat | 441 | .L6: ; tail loop |
8210 | maxcodehac | 442 | mov dl,[esi] |
443 | mov bl,[esi+1] |
||
444 | mov al,[esi+2] |
||
445 | mov [edi],al |
||
446 | mov [edi+1],bl |
||
447 | mov [edi+2],dl |
||
448 | add esi,BYTE 4 |
||
449 | add edi,BYTE 3 |
||
450 | dec ecx |
||
451 | jnz .L6 |
||
452 | |||
9172 | turbocat | 453 | .L7: |
8210 | maxcodehac | 454 | pop ebp |
9172 | turbocat | 455 | retn |
8210 | maxcodehac | 456 | |
457 | |||
458 | |||
459 | |||
460 | ;; 32 bit RGB 888 to 16 BIT RGB 565 |
||
461 | |||
462 | _ConvertX86p32_16RGB565: |
||
463 | ; check short |
||
464 | cmp ecx,BYTE 16 |
||
465 | ja .L3 |
||
466 | |||
9172 | turbocat | 467 | .L1: ; short loop |
8210 | maxcodehac | 468 | mov bl,[esi+0] ; blue |
469 | mov al,[esi+1] ; green |
||
470 | mov ah,[esi+2] ; red |
||
471 | shr ah,3 |
||
472 | and al,11111100b |
||
473 | shl eax,3 |
||
474 | shr bl,3 |
||
475 | add al,bl |
||
476 | mov [edi+0],al |
||
477 | mov [edi+1],ah |
||
478 | add esi,BYTE 4 |
||
479 | add edi,BYTE 2 |
||
480 | dec ecx |
||
481 | jnz .L1 |
||
482 | |||
483 | .L2: ; End of short loop |
||
9172 | turbocat | 484 | retn |
8210 | maxcodehac | 485 | |
486 | |||
9172 | turbocat | 487 | .L3: ; head |
8210 | maxcodehac | 488 | mov ebx,edi |
489 | and ebx,BYTE 11b |
||
490 | jz .L4 |
||
491 | |||
492 | mov bl,[esi+0] ; blue |
||
493 | mov al,[esi+1] ; green |
||
494 | mov ah,[esi+2] ; red |
||
495 | shr ah,3 |
||
496 | and al,11111100b |
||
497 | shl eax,3 |
||
498 | shr bl,3 |
||
499 | add al,bl |
||
500 | mov [edi+0],al |
||
501 | mov [edi+1],ah |
||
502 | add esi,BYTE 4 |
||
503 | add edi,BYTE 2 |
||
504 | dec ecx |
||
505 | |||
506 | .L4: |
||
507 | ; save count |
||
508 | push ecx |
||
509 | |||
510 | ; unroll twice |
||
511 | shr ecx,1 |
||
512 | |||
513 | ; point arrays to end |
||
514 | lea esi,[esi+ecx*8] |
||
515 | lea edi,[edi+ecx*4] |
||
516 | |||
517 | ; negative counter |
||
518 | neg ecx |
||
519 | jmp SHORT .L6 |
||
520 | |||
521 | .L5: |
||
522 | mov [edi+ecx*4-4],eax |
||
523 | .L6: |
||
524 | mov eax,[esi+ecx*8] |
||
525 | |||
526 | shr ah,2 |
||
527 | mov ebx,[esi+ecx*8+4] |
||
528 | |||
529 | shr eax,3 |
||
530 | mov edx,[esi+ecx*8+4] |
||
531 | |||
532 | shr bh,2 |
||
533 | mov dl,[esi+ecx*8+2] |
||
534 | |||
535 | shl ebx,13 |
||
536 | and eax,000007FFh |
||
537 | |||
538 | shl edx,8 |
||
539 | and ebx,07FF0000h |
||
540 | |||
541 | and edx,0F800F800h |
||
542 | add eax,ebx |
||
543 | |||
544 | add eax,edx |
||
545 | inc ecx |
||
546 | |||
547 | jnz .L5 |
||
548 | |||
549 | mov [edi+ecx*4-4],eax |
||
550 | |||
551 | ; tail |
||
552 | pop ecx |
||
553 | test cl,1 |
||
554 | jz .L7 |
||
555 | |||
556 | mov bl,[esi+0] ; blue |
||
557 | mov al,[esi+1] ; green |
||
558 | mov ah,[esi+2] ; red |
||
559 | shr ah,3 |
||
560 | and al,11111100b |
||
561 | shl eax,3 |
||
562 | shr bl,3 |
||
563 | add al,bl |
||
564 | mov [edi+0],al |
||
565 | mov [edi+1],ah |
||
566 | add esi,BYTE 4 |
||
567 | add edi,BYTE 2 |
||
568 | |||
569 | .L7: |
||
9172 | turbocat | 570 | retn |
8210 | maxcodehac | 571 | |
572 | |||
573 | |||
574 | |||
575 | ;; 32 bit RGB 888 to 16 BIT BGR 565 |
||
576 | |||
577 | _ConvertX86p32_16BGR565: |
||
578 | |||
579 | ; check short |
||
580 | cmp ecx,BYTE 16 |
||
581 | ja .L3 |
||
582 | |||
9172 | turbocat | 583 | .L1: ; short loop |
8210 | maxcodehac | 584 | mov ah,[esi+0] ; blue |
585 | mov al,[esi+1] ; green |
||
586 | mov bl,[esi+2] ; red |
||
587 | shr ah,3 |
||
588 | and al,11111100b |
||
589 | shl eax,3 |
||
590 | shr bl,3 |
||
591 | add al,bl |
||
592 | mov [edi+0],al |
||
593 | mov [edi+1],ah |
||
594 | add esi,BYTE 4 |
||
595 | add edi,BYTE 2 |
||
596 | dec ecx |
||
597 | jnz .L1 |
||
9172 | turbocat | 598 | .L2: |
599 | retn |
||
8210 | maxcodehac | 600 | |
9172 | turbocat | 601 | .L3: ; head |
8210 | maxcodehac | 602 | mov ebx,edi |
603 | and ebx,BYTE 11b |
||
604 | jz .L4 |
||
605 | mov ah,[esi+0] ; blue |
||
606 | mov al,[esi+1] ; green |
||
607 | mov bl,[esi+2] ; red |
||
608 | shr ah,3 |
||
609 | and al,11111100b |
||
610 | shl eax,3 |
||
611 | shr bl,3 |
||
612 | add al,bl |
||
613 | mov [edi+0],al |
||
614 | mov [edi+1],ah |
||
615 | add esi,BYTE 4 |
||
616 | add edi,BYTE 2 |
||
617 | dec ecx |
||
618 | |||
9172 | turbocat | 619 | .L4: ; save count |
8210 | maxcodehac | 620 | push ecx |
621 | |||
622 | ; unroll twice |
||
623 | shr ecx,1 |
||
624 | |||
625 | ; point arrays to end |
||
626 | lea esi,[esi+ecx*8] |
||
627 | lea edi,[edi+ecx*4] |
||
628 | |||
629 | ; negative count |
||
630 | neg ecx |
||
631 | jmp SHORT .L6 |
||
632 | |||
9172 | turbocat | 633 | .L5: |
8210 | maxcodehac | 634 | mov [edi+ecx*4-4],eax |
9172 | turbocat | 635 | .L6: |
8210 | maxcodehac | 636 | mov edx,[esi+ecx*8+4] |
637 | |||
638 | mov bh,[esi+ecx*8+4] |
||
639 | mov ah,[esi+ecx*8] |
||
640 | |||
641 | shr bh,3 |
||
642 | mov al,[esi+ecx*8+1] |
||
643 | |||
644 | shr ah,3 |
||
645 | mov bl,[esi+ecx*8+5] |
||
646 | |||
647 | shl eax,3 |
||
648 | mov dl,[esi+ecx*8+2] |
||
649 | |||
650 | shl ebx,19 |
||
651 | and eax,0000FFE0h |
||
652 | |||
653 | shr edx,3 |
||
654 | and ebx,0FFE00000h |
||
655 | |||
656 | and edx,001F001Fh |
||
657 | add eax,ebx |
||
658 | |||
659 | add eax,edx |
||
660 | inc ecx |
||
661 | |||
662 | jnz .L5 |
||
663 | |||
664 | mov [edi+ecx*4-4],eax |
||
665 | |||
666 | ; tail |
||
667 | pop ecx |
||
668 | and ecx,BYTE 1 |
||
669 | jz .L7 |
||
670 | mov ah,[esi+0] ; blue |
||
671 | mov al,[esi+1] ; green |
||
672 | mov bl,[esi+2] ; red |
||
673 | shr ah,3 |
||
674 | and al,11111100b |
||
675 | shl eax,3 |
||
676 | shr bl,3 |
||
677 | add al,bl |
||
678 | mov [edi+0],al |
||
679 | mov [edi+1],ah |
||
680 | add esi,BYTE 4 |
||
681 | add edi,BYTE 2 |
||
682 | |||
9172 | turbocat | 683 | .L7: |
684 | retn |
||
8210 | maxcodehac | 685 | |
686 | |||
687 | |||
688 | |||
689 | ;; 32 BIT RGB TO 16 BIT RGB 555 |
||
690 | |||
691 | _ConvertX86p32_16RGB555: |
||
692 | |||
693 | ; check short |
||
694 | cmp ecx,BYTE 16 |
||
695 | ja .L3 |
||
696 | |||
9172 | turbocat | 697 | .L1: ; short loop |
8210 | maxcodehac | 698 | mov bl,[esi+0] ; blue |
699 | mov al,[esi+1] ; green |
||
700 | mov ah,[esi+2] ; red |
||
701 | shr ah,3 |
||
702 | and al,11111000b |
||
703 | shl eax,2 |
||
704 | shr bl,3 |
||
705 | add al,bl |
||
706 | mov [edi+0],al |
||
707 | mov [edi+1],ah |
||
708 | add esi,BYTE 4 |
||
709 | add edi,BYTE 2 |
||
710 | dec ecx |
||
711 | jnz .L1 |
||
9172 | turbocat | 712 | .L2: |
713 | retn |
||
8210 | maxcodehac | 714 | |
9172 | turbocat | 715 | .L3: ; head |
8210 | maxcodehac | 716 | mov ebx,edi |
717 | and ebx,BYTE 11b |
||
718 | jz .L4 |
||
719 | mov bl,[esi+0] ; blue |
||
720 | mov al,[esi+1] ; green |
||
721 | mov ah,[esi+2] ; red |
||
722 | shr ah,3 |
||
723 | and al,11111000b |
||
724 | shl eax,2 |
||
725 | shr bl,3 |
||
726 | add al,bl |
||
727 | mov [edi+0],al |
||
728 | mov [edi+1],ah |
||
729 | add esi,BYTE 4 |
||
730 | add edi,BYTE 2 |
||
731 | dec ecx |
||
732 | |||
9172 | turbocat | 733 | .L4: ; save count |
8210 | maxcodehac | 734 | push ecx |
735 | |||
736 | ; unroll twice |
||
737 | shr ecx,1 |
||
738 | |||
739 | ; point arrays to end |
||
740 | lea esi,[esi+ecx*8] |
||
741 | lea edi,[edi+ecx*4] |
||
742 | |||
743 | ; negative counter |
||
744 | neg ecx |
||
745 | jmp SHORT .L6 |
||
746 | |||
9172 | turbocat | 747 | .L5: |
8210 | maxcodehac | 748 | mov [edi+ecx*4-4],eax |
9172 | turbocat | 749 | .L6: |
8210 | maxcodehac | 750 | mov eax,[esi+ecx*8] |
751 | |||
752 | shr ah,3 |
||
753 | mov ebx,[esi+ecx*8+4] |
||
754 | |||
755 | shr eax,3 |
||
756 | mov edx,[esi+ecx*8+4] |
||
757 | |||
758 | shr bh,3 |
||
759 | mov dl,[esi+ecx*8+2] |
||
760 | |||
761 | shl ebx,13 |
||
762 | and eax,000007FFh |
||
763 | |||
764 | shl edx,7 |
||
765 | and ebx,07FF0000h |
||
766 | |||
767 | and edx,07C007C00h |
||
768 | add eax,ebx |
||
769 | |||
770 | add eax,edx |
||
771 | inc ecx |
||
772 | |||
773 | jnz .L5 |
||
774 | |||
775 | mov [edi+ecx*4-4],eax |
||
776 | |||
777 | ; tail |
||
778 | pop ecx |
||
779 | and ecx,BYTE 1 |
||
780 | jz .L7 |
||
781 | mov bl,[esi+0] ; blue |
||
782 | mov al,[esi+1] ; green |
||
783 | mov ah,[esi+2] ; red |
||
784 | shr ah,3 |
||
785 | and al,11111000b |
||
786 | shl eax,2 |
||
787 | shr bl,3 |
||
788 | add al,bl |
||
789 | mov [edi+0],al |
||
790 | mov [edi+1],ah |
||
791 | add esi,BYTE 4 |
||
792 | add edi,BYTE 2 |
||
793 | |||
9172 | turbocat | 794 | .L7: |
795 | retn |
||
8210 | maxcodehac | 796 | |
797 | |||
798 | |||
799 | |||
800 | ;; 32 BIT RGB TO 16 BIT BGR 555 |
||
801 | |||
802 | _ConvertX86p32_16BGR555: |
||
803 | |||
804 | ; check short |
||
805 | cmp ecx,BYTE 16 |
||
806 | ja .L3 |
||
807 | |||
808 | |||
9172 | turbocat | 809 | .L1: ; short loop |
8210 | maxcodehac | 810 | mov ah,[esi+0] ; blue |
811 | mov al,[esi+1] ; green |
||
812 | mov bl,[esi+2] ; red |
||
813 | shr ah,3 |
||
814 | and al,11111000b |
||
815 | shl eax,2 |
||
816 | shr bl,3 |
||
817 | add al,bl |
||
818 | mov [edi+0],al |
||
819 | mov [edi+1],ah |
||
820 | add esi,BYTE 4 |
||
821 | add edi,BYTE 2 |
||
822 | dec ecx |
||
823 | jnz .L1 |
||
9172 | turbocat | 824 | .L2: |
825 | retn |
||
8210 | maxcodehac | 826 | |
9172 | turbocat | 827 | .L3: ; head |
8210 | maxcodehac | 828 | mov ebx,edi |
829 | and ebx,BYTE 11b |
||
830 | jz .L4 |
||
831 | mov ah,[esi+0] ; blue |
||
832 | mov al,[esi+1] ; green |
||
833 | mov bl,[esi+2] ; red |
||
834 | shr ah,3 |
||
835 | and al,11111000b |
||
836 | shl eax,2 |
||
837 | shr bl,3 |
||
838 | add al,bl |
||
839 | mov [edi+0],al |
||
840 | mov [edi+1],ah |
||
841 | add esi,BYTE 4 |
||
842 | add edi,BYTE 2 |
||
843 | dec ecx |
||
844 | |||
9172 | turbocat | 845 | .L4: ; save count |
8210 | maxcodehac | 846 | push ecx |
847 | |||
848 | ; unroll twice |
||
849 | shr ecx,1 |
||
850 | |||
851 | ; point arrays to end |
||
852 | lea esi,[esi+ecx*8] |
||
853 | lea edi,[edi+ecx*4] |
||
854 | |||
855 | ; negative counter |
||
856 | neg ecx |
||
857 | jmp SHORT .L6 |
||
858 | |||
9172 | turbocat | 859 | .L5: |
8210 | maxcodehac | 860 | mov [edi+ecx*4-4],eax |
9172 | turbocat | 861 | .L6: |
8210 | maxcodehac | 862 | mov edx,[esi+ecx*8+4] |
863 | |||
864 | mov bh,[esi+ecx*8+4] |
||
865 | mov ah,[esi+ecx*8] |
||
866 | |||
867 | shr bh,3 |
||
868 | mov al,[esi+ecx*8+1] |
||
869 | |||
870 | shr ah,3 |
||
871 | mov bl,[esi+ecx*8+5] |
||
872 | |||
873 | shl eax,2 |
||
874 | mov dl,[esi+ecx*8+2] |
||
875 | |||
876 | shl ebx,18 |
||
877 | and eax,00007FE0h |
||
878 | |||
879 | shr edx,3 |
||
880 | and ebx,07FE00000h |
||
881 | |||
882 | and edx,001F001Fh |
||
883 | add eax,ebx |
||
884 | |||
885 | add eax,edx |
||
886 | inc ecx |
||
887 | |||
888 | jnz .L5 |
||
889 | |||
890 | mov [edi+ecx*4-4],eax |
||
891 | |||
892 | ; tail |
||
893 | pop ecx |
||
894 | and ecx,BYTE 1 |
||
895 | jz .L7 |
||
896 | mov ah,[esi+0] ; blue |
||
897 | mov al,[esi+1] ; green |
||
898 | mov bl,[esi+2] ; red |
||
899 | shr ah,3 |
||
900 | and al,11111000b |
||
901 | shl eax,2 |
||
902 | shr bl,3 |
||
903 | add al,bl |
||
904 | mov [edi+0],al |
||
905 | mov [edi+1],ah |
||
906 | add esi,BYTE 4 |
||
907 | add edi,BYTE 2 |
||
908 | |||
9172 | turbocat | 909 | .L7: |
910 | retn |
||
8210 | maxcodehac | 911 | |
912 | |||
913 | |||
914 | |||
915 | |||
916 | ;; FROM 32 BIT RGB to 8 BIT RGB (rrrgggbbb) |
||
917 | ;; This routine writes FOUR pixels at once (dword) and then, if they exist |
||
918 | ;; the trailing three pixels |
||
919 | _ConvertX86p32_8RGB332: |
||
920 | |||
921 | |||
9172 | turbocat | 922 | .L_ALIGNED: |
8210 | maxcodehac | 923 | push ecx |
924 | |||
925 | shr ecx,2 ; We will draw 4 pixels at once |
||
926 | jnz .L1 |
||
927 | |||
928 | jmp .L2 ; short jump out of range :( |
||
929 | |||
930 | .L1: |
||
931 | mov eax,[esi] ; first pair of pixels |
||
932 | mov edx,[esi+4] |
||
933 | |||
934 | shr dl,6 |
||
935 | mov ebx,eax |
||
936 | |||
937 | shr al,6 |
||
938 | and ah,0e0h |
||
939 | |||
940 | shr ebx,16 |
||
941 | and dh,0e0h |
||
942 | |||
943 | shr ah,3 |
||
944 | and bl,0e0h |
||
945 | |||
946 | shr dh,3 |
||
947 | |||
948 | or al,bl |
||
949 | |||
950 | mov ebx,edx |
||
951 | or al,ah |
||
952 | |||
953 | shr ebx,16 |
||
954 | or dl,dh |
||
955 | |||
956 | and bl,0e0h |
||
957 | |||
958 | or dl,bl |
||
959 | |||
960 | mov ah,dl |
||
961 | |||
962 | |||
963 | |||
964 | mov ebx,[esi+8] ; second pair of pixels |
||
965 | |||
966 | mov edx,ebx |
||
967 | and bh,0e0h |
||
968 | |||
969 | shr bl,6 |
||
970 | and edx,0e00000h |
||
971 | |||
972 | shr edx,16 |
||
973 | |||
974 | shr bh,3 |
||
975 | |||
976 | ror eax,16 |
||
977 | or bl,dl |
||
978 | |||
979 | mov edx,[esi+12] |
||
980 | or bl,bh |
||
981 | |||
982 | mov al,bl |
||
983 | |||
984 | mov ebx,edx |
||
985 | and dh,0e0h |
||
986 | |||
987 | shr dl,6 |
||
988 | and ebx,0e00000h |
||
989 | |||
990 | shr dh,3 |
||
991 | mov ah,dl |
||
992 | |||
993 | shr ebx,16 |
||
994 | or ah,dh |
||
995 | |||
996 | or ah,bl |
||
997 | |||
998 | rol eax,16 |
||
999 | add esi,BYTE 16 |
||
1000 | |||
1001 | mov [edi],eax |
||
1002 | add edi,BYTE 4 |
||
1003 | |||
1004 | dec ecx |
||
1005 | jz .L2 ; L1 out of range for short jump :( |
||
1006 | |||
1007 | jmp .L1 |
||
1008 | .L2: |
||
1009 | |||
1010 | pop ecx |
||
1011 | and ecx,BYTE 3 ; mask out number of pixels to draw |
||
1012 | |||
1013 | jz .L4 ; Nothing to do anymore |
||
1014 | |||
1015 | .L3: |
||
1016 | mov eax,[esi] ; single pixel conversion for trailing pixels |
||
1017 | |||
1018 | mov ebx,eax |
||
1019 | |||
1020 | shr al,6 |
||
1021 | and ah,0e0h |
||
1022 | |||
1023 | shr ebx,16 |
||
1024 | |||
1025 | shr ah,3 |
||
1026 | and bl,0e0h |
||
1027 | |||
1028 | or al,ah |
||
1029 | or al,bl |
||
1030 | |||
1031 | mov [edi],al |
||
1032 | |||
1033 | inc edi |
||
1034 | add esi,BYTE 4 |
||
1035 | |||
1036 | dec ecx |
||
1037 | jnz .L3 |
||
1038 | |||
1039 | .L4: |
||
9172 | turbocat | 1040 | retn |
1041 | |||
1042 | %ifidn __OUTPUT_FORMAT__,elf32 |
||
1043 | section .note.GNU-stack noalloc noexec nowrite progbits |
||
1044 | %endif |