Rev 9172 | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
8210 | maxcodehac | 1 | ; |
2 | ; x86 format converters for HERMES |
||
3 | ; Some routines Copyright (c) 1998 Christian Nentwich (brn@eleet.mcb.at) |
||
4 | ; This source code is licensed under the GNU LGPL |
||
5 | ; |
||
6 | ; Please refer to the file COPYING.LIB contained in the distribution for |
||
7 | ; licensing conditions |
||
8 | ; |
||
9 | ; Most routines are (c) Glenn Fiedler (ptc@gaffer.org), used with permission |
||
10 | ; |
||
11 | |||
12 | BITS 32 |
||
13 | |||
9172 | turbocat | 14 | %include "common.inc" |
8210 | maxcodehac | 15 | |
9172 | turbocat | 16 | SDL_FUNC _ConvertX86p32_32BGR888 |
17 | SDL_FUNC _ConvertX86p32_32RGBA888 |
||
18 | SDL_FUNC _ConvertX86p32_32BGRA888 |
||
19 | SDL_FUNC _ConvertX86p32_24RGB888 |
||
20 | SDL_FUNC _ConvertX86p32_24BGR888 |
||
21 | SDL_FUNC _ConvertX86p32_16RGB565 |
||
22 | SDL_FUNC _ConvertX86p32_16BGR565 |
||
23 | SDL_FUNC _ConvertX86p32_16RGB555 |
||
24 | SDL_FUNC _ConvertX86p32_16BGR555 |
||
25 | SDL_FUNC _ConvertX86p32_8RGB332 |
||
26 | |||
9202 | turbocat | 27 | SDL_FUNC ConvertX86p32_32BGR888 |
28 | SDL_FUNC ConvertX86p32_32RGBA888 |
||
29 | SDL_FUNC ConvertX86p32_32BGRA888 |
||
30 | SDL_FUNC ConvertX86p32_24RGB888 |
||
31 | SDL_FUNC ConvertX86p32_24BGR888 |
||
32 | SDL_FUNC ConvertX86p32_16RGB565 |
||
33 | SDL_FUNC ConvertX86p32_16BGR565 |
||
34 | SDL_FUNC ConvertX86p32_16RGB555 |
||
35 | SDL_FUNC ConvertX86p32_16BGR555 |
||
36 | SDL_FUNC ConvertX86p32_8RGB332 |
||
37 | |||
38 | |||
8210 | maxcodehac | 39 | SECTION .text |
40 | |||
41 | ;; _Convert_* |
||
42 | ;; Paramters: |
||
43 | ;; ESI = source |
||
44 | ;; EDI = dest |
||
45 | ;; ECX = amount (NOT 0!!! (the _ConvertX86 routine checks for that though)) |
||
46 | ;; Destroys: |
||
47 | ;; EAX, EBX, EDX |
||
48 | |||
49 | |||
9202 | turbocat | 50 | ConvertX86p32_32BGR888: |
8210 | maxcodehac | 51 | _ConvertX86p32_32BGR888: |
52 | |||
53 | ; check short |
||
54 | cmp ecx,BYTE 32 |
||
55 | ja .L3 |
||
56 | |||
9172 | turbocat | 57 | .L1: ; short loop |
8210 | maxcodehac | 58 | mov edx,[esi] |
59 | bswap edx |
||
60 | ror edx,8 |
||
61 | mov [edi],edx |
||
62 | add esi,BYTE 4 |
||
63 | add edi,BYTE 4 |
||
64 | dec ecx |
||
65 | jnz .L1 |
||
9172 | turbocat | 66 | .L2: |
67 | retn |
||
8210 | maxcodehac | 68 | |
9172 | turbocat | 69 | .L3: ; save ebp |
8210 | maxcodehac | 70 | push ebp |
71 | |||
72 | ; unroll four times |
||
73 | mov ebp,ecx |
||
74 | shr ebp,2 |
||
75 | |||
76 | ; save count |
||
77 | push ecx |
||
78 | |||
9172 | turbocat | 79 | .L4: mov eax,[esi] |
8210 | maxcodehac | 80 | mov ebx,[esi+4] |
81 | |||
82 | bswap eax |
||
83 | |||
84 | bswap ebx |
||
85 | |||
86 | ror eax,8 |
||
87 | mov ecx,[esi+8] |
||
88 | |||
89 | ror ebx,8 |
||
90 | mov edx,[esi+12] |
||
91 | |||
92 | bswap ecx |
||
93 | |||
94 | bswap edx |
||
95 | |||
96 | ror ecx,8 |
||
97 | mov [edi+0],eax |
||
98 | |||
99 | ror edx,8 |
||
100 | mov [edi+4],ebx |
||
101 | |||
102 | mov [edi+8],ecx |
||
103 | mov [edi+12],edx |
||
104 | |||
105 | add esi,BYTE 16 |
||
106 | add edi,BYTE 16 |
||
107 | |||
108 | dec ebp |
||
109 | jnz .L4 |
||
110 | |||
111 | ; check tail |
||
112 | pop ecx |
||
113 | and ecx,BYTE 11b |
||
114 | jz .L6 |
||
115 | |||
9172 | turbocat | 116 | .L5: ; tail loop |
8210 | maxcodehac | 117 | mov edx,[esi] |
118 | bswap edx |
||
119 | ror edx,8 |
||
120 | mov [edi],edx |
||
121 | add esi,BYTE 4 |
||
122 | add edi,BYTE 4 |
||
123 | dec ecx |
||
124 | jnz .L5 |
||
125 | |||
9172 | turbocat | 126 | .L6: pop ebp |
127 | retn |
||
8210 | maxcodehac | 128 | |
129 | |||
130 | |||
9202 | turbocat | 131 | ConvertX86p32_32RGBA888: |
8210 | maxcodehac | 132 | _ConvertX86p32_32RGBA888: |
133 | |||
134 | ; check short |
||
135 | cmp ecx,BYTE 32 |
||
136 | ja .L3 |
||
137 | |||
9172 | turbocat | 138 | .L1: ; short loop |
8210 | maxcodehac | 139 | mov edx,[esi] |
140 | rol edx,8 |
||
141 | mov [edi],edx |
||
142 | add esi,BYTE 4 |
||
143 | add edi,BYTE 4 |
||
144 | dec ecx |
||
145 | jnz .L1 |
||
9172 | turbocat | 146 | .L2: |
147 | retn |
||
8210 | maxcodehac | 148 | |
9172 | turbocat | 149 | .L3: ; save ebp |
8210 | maxcodehac | 150 | push ebp |
151 | |||
152 | ; unroll four times |
||
153 | mov ebp,ecx |
||
154 | shr ebp,2 |
||
155 | |||
156 | ; save count |
||
157 | push ecx |
||
158 | |||
9172 | turbocat | 159 | .L4: mov eax,[esi] |
8210 | maxcodehac | 160 | mov ebx,[esi+4] |
161 | |||
162 | rol eax,8 |
||
163 | mov ecx,[esi+8] |
||
164 | |||
165 | rol ebx,8 |
||
166 | mov edx,[esi+12] |
||
167 | |||
168 | rol ecx,8 |
||
169 | mov [edi+0],eax |
||
170 | |||
171 | rol edx,8 |
||
172 | mov [edi+4],ebx |
||
173 | |||
174 | mov [edi+8],ecx |
||
175 | mov [edi+12],edx |
||
176 | |||
177 | add esi,BYTE 16 |
||
178 | add edi,BYTE 16 |
||
179 | |||
180 | dec ebp |
||
181 | jnz .L4 |
||
182 | |||
183 | ; check tail |
||
184 | pop ecx |
||
185 | and ecx,BYTE 11b |
||
186 | jz .L6 |
||
187 | |||
9172 | turbocat | 188 | .L5: ; tail loop |
8210 | maxcodehac | 189 | mov edx,[esi] |
190 | rol edx,8 |
||
191 | mov [edi],edx |
||
192 | add esi,BYTE 4 |
||
193 | add edi,BYTE 4 |
||
194 | dec ecx |
||
195 | jnz .L5 |
||
196 | |||
9172 | turbocat | 197 | .L6: pop ebp |
198 | retn |
||
8210 | maxcodehac | 199 | |
200 | |||
201 | |||
9202 | turbocat | 202 | ConvertX86p32_32BGRA888: |
8210 | maxcodehac | 203 | _ConvertX86p32_32BGRA888: |
204 | |||
205 | ; check short |
||
206 | cmp ecx,BYTE 32 |
||
207 | ja .L3 |
||
208 | |||
9172 | turbocat | 209 | .L1: ; short loop |
8210 | maxcodehac | 210 | mov edx,[esi] |
211 | bswap edx |
||
212 | mov [edi],edx |
||
213 | add esi,BYTE 4 |
||
214 | add edi,BYTE 4 |
||
215 | dec ecx |
||
216 | jnz .L1 |
||
9172 | turbocat | 217 | .L2: |
218 | retn |
||
8210 | maxcodehac | 219 | |
9172 | turbocat | 220 | .L3: ; save ebp |
8210 | maxcodehac | 221 | push ebp |
222 | |||
223 | ; unroll four times |
||
224 | mov ebp,ecx |
||
225 | shr ebp,2 |
||
226 | |||
227 | ; save count |
||
228 | push ecx |
||
229 | |||
9172 | turbocat | 230 | .L4: mov eax,[esi] |
8210 | maxcodehac | 231 | mov ebx,[esi+4] |
232 | |||
233 | mov ecx,[esi+8] |
||
234 | mov edx,[esi+12] |
||
235 | |||
236 | bswap eax |
||
237 | |||
238 | bswap ebx |
||
239 | |||
240 | bswap ecx |
||
241 | |||
242 | bswap edx |
||
243 | |||
244 | mov [edi+0],eax |
||
245 | mov [edi+4],ebx |
||
246 | |||
247 | mov [edi+8],ecx |
||
248 | mov [edi+12],edx |
||
249 | |||
250 | add esi,BYTE 16 |
||
251 | add edi,BYTE 16 |
||
252 | |||
253 | dec ebp |
||
254 | jnz .L4 |
||
255 | |||
256 | ; check tail |
||
257 | pop ecx |
||
258 | and ecx,BYTE 11b |
||
259 | jz .L6 |
||
260 | |||
9172 | turbocat | 261 | .L5: ; tail loop |
8210 | maxcodehac | 262 | mov edx,[esi] |
263 | bswap edx |
||
264 | mov [edi],edx |
||
265 | add esi,BYTE 4 |
||
266 | add edi,BYTE 4 |
||
267 | dec ecx |
||
268 | jnz .L5 |
||
269 | |||
9172 | turbocat | 270 | .L6: pop ebp |
271 | retn |
||
8210 | maxcodehac | 272 | |
273 | |||
274 | |||
275 | |||
276 | ;; 32 bit RGB 888 to 24 BIT RGB 888 |
||
277 | |||
9202 | turbocat | 278 | ConvertX86p32_24RGB888: |
8210 | maxcodehac | 279 | _ConvertX86p32_24RGB888: |
280 | |||
281 | ; check short |
||
282 | cmp ecx,BYTE 32 |
||
283 | ja .L3 |
||
284 | |||
9172 | turbocat | 285 | .L1: ; short loop |
8210 | maxcodehac | 286 | mov al,[esi] |
287 | mov bl,[esi+1] |
||
288 | mov dl,[esi+2] |
||
289 | mov [edi],al |
||
290 | mov [edi+1],bl |
||
291 | mov [edi+2],dl |
||
292 | add esi,BYTE 4 |
||
293 | add edi,BYTE 3 |
||
294 | dec ecx |
||
295 | jnz .L1 |
||
9172 | turbocat | 296 | .L2: |
297 | retn |
||
8210 | maxcodehac | 298 | |
9172 | turbocat | 299 | .L3: ; head |
8210 | maxcodehac | 300 | mov edx,edi |
301 | and edx,BYTE 11b |
||
302 | jz .L4 |
||
303 | mov al,[esi] |
||
304 | mov bl,[esi+1] |
||
305 | mov dl,[esi+2] |
||
306 | mov [edi],al |
||
307 | mov [edi+1],bl |
||
308 | mov [edi+2],dl |
||
309 | add esi,BYTE 4 |
||
310 | add edi,BYTE 3 |
||
311 | dec ecx |
||
312 | jmp SHORT .L3 |
||
313 | |||
9172 | turbocat | 314 | .L4: ; unroll 4 times |
8210 | maxcodehac | 315 | push ebp |
316 | mov ebp,ecx |
||
317 | shr ebp,2 |
||
318 | |||
319 | ; save count |
||
320 | push ecx |
||
321 | |||
9172 | turbocat | 322 | .L5: mov eax,[esi] ; first dword eax = [A][R][G][B] |
8210 | maxcodehac | 323 | mov ebx,[esi+4] ; second dword ebx = [a][r][g][b] |
324 | |||
325 | shl eax,8 ; eax = [R][G][B][.] |
||
326 | mov ecx,[esi+12] ; third dword ecx = [a][r][g][b] |
||
327 | |||
328 | shl ebx,8 ; ebx = [r][g][b][.] |
||
329 | mov al,[esi+4] ; eax = [R][G][B][b] |
||
330 | |||
331 | ror eax,8 ; eax = [b][R][G][B] (done) |
||
332 | mov bh,[esi+8+1] ; ebx = [r][g][G][.] |
||
333 | |||
334 | mov [edi],eax |
||
335 | add edi,BYTE 3*4 |
||
336 | |||
337 | shl ecx,8 ; ecx = [r][g][b][.] |
||
338 | mov bl,[esi+8+0] ; ebx = [r][g][G][B] |
||
339 | |||
340 | rol ebx,16 ; ebx = [G][B][r][g] (done) |
||
341 | mov cl,[esi+8+2] ; ecx = [r][g][b][R] (done) |
||
342 | |||
343 | mov [edi+4-3*4],ebx |
||
344 | add esi,BYTE 4*4 |
||
345 | |||
346 | mov [edi+8-3*4],ecx |
||
347 | dec ebp |
||
348 | |||
349 | jnz .L5 |
||
350 | |||
351 | ; check tail |
||
352 | pop ecx |
||
353 | and ecx,BYTE 11b |
||
354 | jz .L7 |
||
355 | |||
9172 | turbocat | 356 | .L6: ; tail loop |
8210 | maxcodehac | 357 | mov al,[esi] |
358 | mov bl,[esi+1] |
||
359 | mov dl,[esi+2] |
||
360 | mov [edi],al |
||
361 | mov [edi+1],bl |
||
362 | mov [edi+2],dl |
||
363 | add esi,BYTE 4 |
||
364 | add edi,BYTE 3 |
||
365 | dec ecx |
||
366 | jnz .L6 |
||
367 | |||
9172 | turbocat | 368 | .L7: pop ebp |
369 | retn |
||
8210 | maxcodehac | 370 | |
371 | |||
372 | |||
373 | |||
374 | ;; 32 bit RGB 888 to 24 bit BGR 888 |
||
375 | |||
9202 | turbocat | 376 | ConvertX86p32_24BGR888: |
8210 | maxcodehac | 377 | _ConvertX86p32_24BGR888: |
378 | |||
379 | ; check short |
||
380 | cmp ecx,BYTE 32 |
||
381 | ja .L3 |
||
382 | |||
9172 | turbocat | 383 | .L1: ; short loop |
8210 | maxcodehac | 384 | mov dl,[esi] |
385 | mov bl,[esi+1] |
||
386 | mov al,[esi+2] |
||
387 | mov [edi],al |
||
388 | mov [edi+1],bl |
||
389 | mov [edi+2],dl |
||
390 | add esi,BYTE 4 |
||
391 | add edi,BYTE 3 |
||
392 | dec ecx |
||
393 | jnz .L1 |
||
9172 | turbocat | 394 | .L2: |
395 | retn |
||
8210 | maxcodehac | 396 | |
9172 | turbocat | 397 | .L3: ; head |
8210 | maxcodehac | 398 | mov edx,edi |
399 | and edx,BYTE 11b |
||
400 | jz .L4 |
||
401 | mov dl,[esi] |
||
402 | mov bl,[esi+1] |
||
403 | mov al,[esi+2] |
||
404 | mov [edi],al |
||
405 | mov [edi+1],bl |
||
406 | mov [edi+2],dl |
||
407 | add esi,BYTE 4 |
||
408 | add edi,BYTE 3 |
||
409 | dec ecx |
||
410 | jmp SHORT .L3 |
||
411 | |||
9172 | turbocat | 412 | .L4: ; unroll 4 times |
8210 | maxcodehac | 413 | push ebp |
414 | mov ebp,ecx |
||
415 | shr ebp,2 |
||
416 | |||
417 | ; save count |
||
418 | push ecx |
||
419 | |||
9172 | turbocat | 420 | .L5: |
8210 | maxcodehac | 421 | mov eax,[esi] ; first dword eax = [A][R][G][B] |
422 | mov ebx,[esi+4] ; second dword ebx = [a][r][g][b] |
||
9172 | turbocat | 423 | |
8210 | maxcodehac | 424 | bswap eax ; eax = [B][G][R][A] |
425 | |||
426 | bswap ebx ; ebx = [b][g][r][a] |
||
427 | |||
428 | mov al,[esi+4+2] ; eax = [B][G][R][r] |
||
429 | mov bh,[esi+4+4+1] ; ebx = [b][g][G][a] |
||
430 | |||
431 | ror eax,8 ; eax = [r][B][G][R] (done) |
||
432 | mov bl,[esi+4+4+2] ; ebx = [b][g][G][R] |
||
433 | |||
434 | ror ebx,16 ; ebx = [G][R][b][g] (done) |
||
435 | mov [edi],eax |
||
436 | |||
437 | mov [edi+4],ebx |
||
438 | mov ecx,[esi+12] ; third dword ecx = [a][r][g][b] |
||
439 | |||
440 | bswap ecx ; ecx = [b][g][r][a] |
||
441 | |||
442 | mov cl,[esi+8] ; ecx = [b][g][r][B] (done) |
||
443 | add esi,BYTE 4*4 |
||
444 | |||
445 | mov [edi+8],ecx |
||
446 | add edi,BYTE 3*4 |
||
447 | |||
448 | dec ebp |
||
449 | jnz .L5 |
||
450 | |||
451 | ; check tail |
||
452 | pop ecx |
||
453 | and ecx,BYTE 11b |
||
454 | jz .L7 |
||
455 | |||
9172 | turbocat | 456 | .L6: ; tail loop |
8210 | maxcodehac | 457 | mov dl,[esi] |
458 | mov bl,[esi+1] |
||
459 | mov al,[esi+2] |
||
460 | mov [edi],al |
||
461 | mov [edi+1],bl |
||
462 | mov [edi+2],dl |
||
463 | add esi,BYTE 4 |
||
464 | add edi,BYTE 3 |
||
465 | dec ecx |
||
466 | jnz .L6 |
||
467 | |||
9172 | turbocat | 468 | .L7: |
8210 | maxcodehac | 469 | pop ebp |
9172 | turbocat | 470 | retn |
8210 | maxcodehac | 471 | |
472 | |||
473 | |||
474 | |||
475 | ;; 32 bit RGB 888 to 16 BIT RGB 565 |
||
9202 | turbocat | 476 | ConvertX86p32_16RGB565: |
8210 | maxcodehac | 477 | _ConvertX86p32_16RGB565: |
478 | ; check short |
||
479 | cmp ecx,BYTE 16 |
||
480 | ja .L3 |
||
481 | |||
9172 | turbocat | 482 | .L1: ; short loop |
8210 | maxcodehac | 483 | mov bl,[esi+0] ; blue |
484 | mov al,[esi+1] ; green |
||
485 | mov ah,[esi+2] ; red |
||
486 | shr ah,3 |
||
487 | and al,11111100b |
||
488 | shl eax,3 |
||
489 | shr bl,3 |
||
490 | add al,bl |
||
491 | mov [edi+0],al |
||
492 | mov [edi+1],ah |
||
493 | add esi,BYTE 4 |
||
494 | add edi,BYTE 2 |
||
495 | dec ecx |
||
496 | jnz .L1 |
||
497 | |||
498 | .L2: ; End of short loop |
||
9172 | turbocat | 499 | retn |
8210 | maxcodehac | 500 | |
501 | |||
9172 | turbocat | 502 | .L3: ; head |
8210 | maxcodehac | 503 | mov ebx,edi |
504 | and ebx,BYTE 11b |
||
505 | jz .L4 |
||
506 | |||
507 | mov bl,[esi+0] ; blue |
||
508 | mov al,[esi+1] ; green |
||
509 | mov ah,[esi+2] ; red |
||
510 | shr ah,3 |
||
511 | and al,11111100b |
||
512 | shl eax,3 |
||
513 | shr bl,3 |
||
514 | add al,bl |
||
515 | mov [edi+0],al |
||
516 | mov [edi+1],ah |
||
517 | add esi,BYTE 4 |
||
518 | add edi,BYTE 2 |
||
519 | dec ecx |
||
520 | |||
521 | .L4: |
||
522 | ; save count |
||
523 | push ecx |
||
524 | |||
525 | ; unroll twice |
||
526 | shr ecx,1 |
||
527 | |||
528 | ; point arrays to end |
||
529 | lea esi,[esi+ecx*8] |
||
530 | lea edi,[edi+ecx*4] |
||
531 | |||
532 | ; negative counter |
||
533 | neg ecx |
||
534 | jmp SHORT .L6 |
||
535 | |||
536 | .L5: |
||
537 | mov [edi+ecx*4-4],eax |
||
538 | .L6: |
||
539 | mov eax,[esi+ecx*8] |
||
540 | |||
541 | shr ah,2 |
||
542 | mov ebx,[esi+ecx*8+4] |
||
543 | |||
544 | shr eax,3 |
||
545 | mov edx,[esi+ecx*8+4] |
||
546 | |||
547 | shr bh,2 |
||
548 | mov dl,[esi+ecx*8+2] |
||
549 | |||
550 | shl ebx,13 |
||
551 | and eax,000007FFh |
||
552 | |||
553 | shl edx,8 |
||
554 | and ebx,07FF0000h |
||
555 | |||
556 | and edx,0F800F800h |
||
557 | add eax,ebx |
||
558 | |||
559 | add eax,edx |
||
560 | inc ecx |
||
561 | |||
562 | jnz .L5 |
||
563 | |||
564 | mov [edi+ecx*4-4],eax |
||
565 | |||
566 | ; tail |
||
567 | pop ecx |
||
568 | test cl,1 |
||
569 | jz .L7 |
||
570 | |||
571 | mov bl,[esi+0] ; blue |
||
572 | mov al,[esi+1] ; green |
||
573 | mov ah,[esi+2] ; red |
||
574 | shr ah,3 |
||
575 | and al,11111100b |
||
576 | shl eax,3 |
||
577 | shr bl,3 |
||
578 | add al,bl |
||
579 | mov [edi+0],al |
||
580 | mov [edi+1],ah |
||
581 | add esi,BYTE 4 |
||
582 | add edi,BYTE 2 |
||
583 | |||
584 | .L7: |
||
9172 | turbocat | 585 | retn |
8210 | maxcodehac | 586 | |
587 | |||
588 | |||
589 | |||
590 | ;; 32 bit RGB 888 to 16 BIT BGR 565 |
||
591 | |||
9202 | turbocat | 592 | ConvertX86p32_16BGR565: |
8210 | maxcodehac | 593 | _ConvertX86p32_16BGR565: |
594 | |||
595 | ; check short |
||
596 | cmp ecx,BYTE 16 |
||
597 | ja .L3 |
||
598 | |||
9172 | turbocat | 599 | .L1: ; short loop |
8210 | maxcodehac | 600 | mov ah,[esi+0] ; blue |
601 | mov al,[esi+1] ; green |
||
602 | mov bl,[esi+2] ; red |
||
603 | shr ah,3 |
||
604 | and al,11111100b |
||
605 | shl eax,3 |
||
606 | shr bl,3 |
||
607 | add al,bl |
||
608 | mov [edi+0],al |
||
609 | mov [edi+1],ah |
||
610 | add esi,BYTE 4 |
||
611 | add edi,BYTE 2 |
||
612 | dec ecx |
||
613 | jnz .L1 |
||
9172 | turbocat | 614 | .L2: |
615 | retn |
||
8210 | maxcodehac | 616 | |
9172 | turbocat | 617 | .L3: ; head |
8210 | maxcodehac | 618 | mov ebx,edi |
619 | and ebx,BYTE 11b |
||
620 | jz .L4 |
||
621 | mov ah,[esi+0] ; blue |
||
622 | mov al,[esi+1] ; green |
||
623 | mov bl,[esi+2] ; red |
||
624 | shr ah,3 |
||
625 | and al,11111100b |
||
626 | shl eax,3 |
||
627 | shr bl,3 |
||
628 | add al,bl |
||
629 | mov [edi+0],al |
||
630 | mov [edi+1],ah |
||
631 | add esi,BYTE 4 |
||
632 | add edi,BYTE 2 |
||
633 | dec ecx |
||
634 | |||
9172 | turbocat | 635 | .L4: ; save count |
8210 | maxcodehac | 636 | push ecx |
637 | |||
638 | ; unroll twice |
||
639 | shr ecx,1 |
||
640 | |||
641 | ; point arrays to end |
||
642 | lea esi,[esi+ecx*8] |
||
643 | lea edi,[edi+ecx*4] |
||
644 | |||
645 | ; negative count |
||
646 | neg ecx |
||
647 | jmp SHORT .L6 |
||
648 | |||
9172 | turbocat | 649 | .L5: |
8210 | maxcodehac | 650 | mov [edi+ecx*4-4],eax |
9172 | turbocat | 651 | .L6: |
8210 | maxcodehac | 652 | mov edx,[esi+ecx*8+4] |
653 | |||
654 | mov bh,[esi+ecx*8+4] |
||
655 | mov ah,[esi+ecx*8] |
||
656 | |||
657 | shr bh,3 |
||
658 | mov al,[esi+ecx*8+1] |
||
659 | |||
660 | shr ah,3 |
||
661 | mov bl,[esi+ecx*8+5] |
||
662 | |||
663 | shl eax,3 |
||
664 | mov dl,[esi+ecx*8+2] |
||
665 | |||
666 | shl ebx,19 |
||
667 | and eax,0000FFE0h |
||
668 | |||
669 | shr edx,3 |
||
670 | and ebx,0FFE00000h |
||
671 | |||
672 | and edx,001F001Fh |
||
673 | add eax,ebx |
||
674 | |||
675 | add eax,edx |
||
676 | inc ecx |
||
677 | |||
678 | jnz .L5 |
||
679 | |||
680 | mov [edi+ecx*4-4],eax |
||
681 | |||
682 | ; tail |
||
683 | pop ecx |
||
684 | and ecx,BYTE 1 |
||
685 | jz .L7 |
||
686 | mov ah,[esi+0] ; blue |
||
687 | mov al,[esi+1] ; green |
||
688 | mov bl,[esi+2] ; red |
||
689 | shr ah,3 |
||
690 | and al,11111100b |
||
691 | shl eax,3 |
||
692 | shr bl,3 |
||
693 | add al,bl |
||
694 | mov [edi+0],al |
||
695 | mov [edi+1],ah |
||
696 | add esi,BYTE 4 |
||
697 | add edi,BYTE 2 |
||
698 | |||
9172 | turbocat | 699 | .L7: |
700 | retn |
||
8210 | maxcodehac | 701 | |
702 | |||
703 | |||
704 | |||
705 | ;; 32 BIT RGB TO 16 BIT RGB 555 |
||
9202 | turbocat | 706 | ConvertX86p32_16RGB555: |
8210 | maxcodehac | 707 | _ConvertX86p32_16RGB555: |
708 | |||
709 | ; check short |
||
710 | cmp ecx,BYTE 16 |
||
711 | ja .L3 |
||
712 | |||
9172 | turbocat | 713 | .L1: ; short loop |
8210 | maxcodehac | 714 | mov bl,[esi+0] ; blue |
715 | mov al,[esi+1] ; green |
||
716 | mov ah,[esi+2] ; red |
||
717 | shr ah,3 |
||
718 | and al,11111000b |
||
719 | shl eax,2 |
||
720 | shr bl,3 |
||
721 | add al,bl |
||
722 | mov [edi+0],al |
||
723 | mov [edi+1],ah |
||
724 | add esi,BYTE 4 |
||
725 | add edi,BYTE 2 |
||
726 | dec ecx |
||
727 | jnz .L1 |
||
9172 | turbocat | 728 | .L2: |
729 | retn |
||
8210 | maxcodehac | 730 | |
9172 | turbocat | 731 | .L3: ; head |
8210 | maxcodehac | 732 | mov ebx,edi |
733 | and ebx,BYTE 11b |
||
734 | jz .L4 |
||
735 | mov bl,[esi+0] ; blue |
||
736 | mov al,[esi+1] ; green |
||
737 | mov ah,[esi+2] ; red |
||
738 | shr ah,3 |
||
739 | and al,11111000b |
||
740 | shl eax,2 |
||
741 | shr bl,3 |
||
742 | add al,bl |
||
743 | mov [edi+0],al |
||
744 | mov [edi+1],ah |
||
745 | add esi,BYTE 4 |
||
746 | add edi,BYTE 2 |
||
747 | dec ecx |
||
748 | |||
9172 | turbocat | 749 | .L4: ; save count |
8210 | maxcodehac | 750 | push ecx |
751 | |||
752 | ; unroll twice |
||
753 | shr ecx,1 |
||
754 | |||
755 | ; point arrays to end |
||
756 | lea esi,[esi+ecx*8] |
||
757 | lea edi,[edi+ecx*4] |
||
758 | |||
759 | ; negative counter |
||
760 | neg ecx |
||
761 | jmp SHORT .L6 |
||
762 | |||
9172 | turbocat | 763 | .L5: |
8210 | maxcodehac | 764 | mov [edi+ecx*4-4],eax |
9172 | turbocat | 765 | .L6: |
8210 | maxcodehac | 766 | mov eax,[esi+ecx*8] |
767 | |||
768 | shr ah,3 |
||
769 | mov ebx,[esi+ecx*8+4] |
||
770 | |||
771 | shr eax,3 |
||
772 | mov edx,[esi+ecx*8+4] |
||
773 | |||
774 | shr bh,3 |
||
775 | mov dl,[esi+ecx*8+2] |
||
776 | |||
777 | shl ebx,13 |
||
778 | and eax,000007FFh |
||
779 | |||
780 | shl edx,7 |
||
781 | and ebx,07FF0000h |
||
782 | |||
783 | and edx,07C007C00h |
||
784 | add eax,ebx |
||
785 | |||
786 | add eax,edx |
||
787 | inc ecx |
||
788 | |||
789 | jnz .L5 |
||
790 | |||
791 | mov [edi+ecx*4-4],eax |
||
792 | |||
793 | ; tail |
||
794 | pop ecx |
||
795 | and ecx,BYTE 1 |
||
796 | jz .L7 |
||
797 | mov bl,[esi+0] ; blue |
||
798 | mov al,[esi+1] ; green |
||
799 | mov ah,[esi+2] ; red |
||
800 | shr ah,3 |
||
801 | and al,11111000b |
||
802 | shl eax,2 |
||
803 | shr bl,3 |
||
804 | add al,bl |
||
805 | mov [edi+0],al |
||
806 | mov [edi+1],ah |
||
807 | add esi,BYTE 4 |
||
808 | add edi,BYTE 2 |
||
809 | |||
9172 | turbocat | 810 | .L7: |
811 | retn |
||
8210 | maxcodehac | 812 | |
813 | |||
814 | |||
815 | |||
816 | ;; 32 BIT RGB TO 16 BIT BGR 555 |
||
9202 | turbocat | 817 | ConvertX86p32_16BGR555: |
8210 | maxcodehac | 818 | _ConvertX86p32_16BGR555: |
819 | |||
820 | ; check short |
||
821 | cmp ecx,BYTE 16 |
||
822 | ja .L3 |
||
823 | |||
824 | |||
9172 | turbocat | 825 | .L1: ; short loop |
8210 | maxcodehac | 826 | mov ah,[esi+0] ; blue |
827 | mov al,[esi+1] ; green |
||
828 | mov bl,[esi+2] ; red |
||
829 | shr ah,3 |
||
830 | and al,11111000b |
||
831 | shl eax,2 |
||
832 | shr bl,3 |
||
833 | add al,bl |
||
834 | mov [edi+0],al |
||
835 | mov [edi+1],ah |
||
836 | add esi,BYTE 4 |
||
837 | add edi,BYTE 2 |
||
838 | dec ecx |
||
839 | jnz .L1 |
||
9172 | turbocat | 840 | .L2: |
841 | retn |
||
8210 | maxcodehac | 842 | |
9172 | turbocat | 843 | .L3: ; head |
8210 | maxcodehac | 844 | mov ebx,edi |
845 | and ebx,BYTE 11b |
||
846 | jz .L4 |
||
847 | mov ah,[esi+0] ; blue |
||
848 | mov al,[esi+1] ; green |
||
849 | mov bl,[esi+2] ; red |
||
850 | shr ah,3 |
||
851 | and al,11111000b |
||
852 | shl eax,2 |
||
853 | shr bl,3 |
||
854 | add al,bl |
||
855 | mov [edi+0],al |
||
856 | mov [edi+1],ah |
||
857 | add esi,BYTE 4 |
||
858 | add edi,BYTE 2 |
||
859 | dec ecx |
||
860 | |||
9172 | turbocat | 861 | .L4: ; save count |
8210 | maxcodehac | 862 | push ecx |
863 | |||
864 | ; unroll twice |
||
865 | shr ecx,1 |
||
866 | |||
867 | ; point arrays to end |
||
868 | lea esi,[esi+ecx*8] |
||
869 | lea edi,[edi+ecx*4] |
||
870 | |||
871 | ; negative counter |
||
872 | neg ecx |
||
873 | jmp SHORT .L6 |
||
874 | |||
9172 | turbocat | 875 | .L5: |
8210 | maxcodehac | 876 | mov [edi+ecx*4-4],eax |
9172 | turbocat | 877 | .L6: |
8210 | maxcodehac | 878 | mov edx,[esi+ecx*8+4] |
879 | |||
880 | mov bh,[esi+ecx*8+4] |
||
881 | mov ah,[esi+ecx*8] |
||
882 | |||
883 | shr bh,3 |
||
884 | mov al,[esi+ecx*8+1] |
||
885 | |||
886 | shr ah,3 |
||
887 | mov bl,[esi+ecx*8+5] |
||
888 | |||
889 | shl eax,2 |
||
890 | mov dl,[esi+ecx*8+2] |
||
891 | |||
892 | shl ebx,18 |
||
893 | and eax,00007FE0h |
||
894 | |||
895 | shr edx,3 |
||
896 | and ebx,07FE00000h |
||
897 | |||
898 | and edx,001F001Fh |
||
899 | add eax,ebx |
||
900 | |||
901 | add eax,edx |
||
902 | inc ecx |
||
903 | |||
904 | jnz .L5 |
||
905 | |||
906 | mov [edi+ecx*4-4],eax |
||
907 | |||
908 | ; tail |
||
909 | pop ecx |
||
910 | and ecx,BYTE 1 |
||
911 | jz .L7 |
||
912 | mov ah,[esi+0] ; blue |
||
913 | mov al,[esi+1] ; green |
||
914 | mov bl,[esi+2] ; red |
||
915 | shr ah,3 |
||
916 | and al,11111000b |
||
917 | shl eax,2 |
||
918 | shr bl,3 |
||
919 | add al,bl |
||
920 | mov [edi+0],al |
||
921 | mov [edi+1],ah |
||
922 | add esi,BYTE 4 |
||
923 | add edi,BYTE 2 |
||
924 | |||
9172 | turbocat | 925 | .L7: |
926 | retn |
||
8210 | maxcodehac | 927 | |
928 | |||
929 | |||
930 | |||
931 | |||
932 | ;; FROM 32 BIT RGB to 8 BIT RGB (rrrgggbbb) |
||
933 | ;; This routine writes FOUR pixels at once (dword) and then, if they exist |
||
934 | ;; the trailing three pixels |
||
9202 | turbocat | 935 | |
936 | ConvertX86p32_8RGB332: |
||
8210 | maxcodehac | 937 | _ConvertX86p32_8RGB332: |
938 | |||
9172 | turbocat | 939 | .L_ALIGNED: |
8210 | maxcodehac | 940 | push ecx |
941 | |||
942 | shr ecx,2 ; We will draw 4 pixels at once |
||
943 | jnz .L1 |
||
944 | |||
945 | jmp .L2 ; short jump out of range :( |
||
946 | |||
947 | .L1: |
||
948 | mov eax,[esi] ; first pair of pixels |
||
949 | mov edx,[esi+4] |
||
950 | |||
951 | shr dl,6 |
||
952 | mov ebx,eax |
||
953 | |||
954 | shr al,6 |
||
955 | and ah,0e0h |
||
956 | |||
957 | shr ebx,16 |
||
958 | and dh,0e0h |
||
959 | |||
960 | shr ah,3 |
||
961 | and bl,0e0h |
||
962 | |||
963 | shr dh,3 |
||
964 | |||
965 | or al,bl |
||
966 | |||
967 | mov ebx,edx |
||
968 | or al,ah |
||
969 | |||
970 | shr ebx,16 |
||
971 | or dl,dh |
||
972 | |||
973 | and bl,0e0h |
||
974 | |||
975 | or dl,bl |
||
976 | |||
977 | mov ah,dl |
||
978 | |||
979 | |||
980 | |||
981 | mov ebx,[esi+8] ; second pair of pixels |
||
982 | |||
983 | mov edx,ebx |
||
984 | and bh,0e0h |
||
985 | |||
986 | shr bl,6 |
||
987 | and edx,0e00000h |
||
988 | |||
989 | shr edx,16 |
||
990 | |||
991 | shr bh,3 |
||
992 | |||
993 | ror eax,16 |
||
994 | or bl,dl |
||
995 | |||
996 | mov edx,[esi+12] |
||
997 | or bl,bh |
||
998 | |||
999 | mov al,bl |
||
1000 | |||
1001 | mov ebx,edx |
||
1002 | and dh,0e0h |
||
1003 | |||
1004 | shr dl,6 |
||
1005 | and ebx,0e00000h |
||
1006 | |||
1007 | shr dh,3 |
||
1008 | mov ah,dl |
||
1009 | |||
1010 | shr ebx,16 |
||
1011 | or ah,dh |
||
1012 | |||
1013 | or ah,bl |
||
1014 | |||
1015 | rol eax,16 |
||
1016 | add esi,BYTE 16 |
||
1017 | |||
1018 | mov [edi],eax |
||
1019 | add edi,BYTE 4 |
||
1020 | |||
1021 | dec ecx |
||
1022 | jz .L2 ; L1 out of range for short jump :( |
||
1023 | |||
1024 | jmp .L1 |
||
1025 | .L2: |
||
1026 | |||
1027 | pop ecx |
||
1028 | and ecx,BYTE 3 ; mask out number of pixels to draw |
||
1029 | |||
1030 | jz .L4 ; Nothing to do anymore |
||
1031 | |||
1032 | .L3: |
||
1033 | mov eax,[esi] ; single pixel conversion for trailing pixels |
||
1034 | |||
1035 | mov ebx,eax |
||
1036 | |||
1037 | shr al,6 |
||
1038 | and ah,0e0h |
||
1039 | |||
1040 | shr ebx,16 |
||
1041 | |||
1042 | shr ah,3 |
||
1043 | and bl,0e0h |
||
1044 | |||
1045 | or al,ah |
||
1046 | or al,bl |
||
1047 | |||
1048 | mov [edi],al |
||
1049 | |||
1050 | inc edi |
||
1051 | add esi,BYTE 4 |
||
1052 | |||
1053 | dec ecx |
||
1054 | jnz .L3 |
||
1055 | |||
1056 | .L4: |
||
9172 | turbocat | 1057 | retn |
1058 | |||
1059 | %ifidn __OUTPUT_FORMAT__,elf32 |
||
1060 | section .note.GNU-stack noalloc noexec nowrite progbits |
||
1061 | %endif |