0,0 → 1,1043 |
; |
; x86 format converters for HERMES |
; Some routines Copyright (c) 1998 Christian Nentwich (brn@eleet.mcb.at) |
; This source code is licensed under the GNU LGPL |
; |
; Please refer to the file COPYING.LIB contained in the distribution for |
; licensing conditions |
; |
; Most routines are (c) Glenn Fiedler (ptc@gaffer.org), used with permission |
; |
|
|
BITS 32 |
|
GLOBAL _ConvertX86p32_32BGR888 |
GLOBAL _ConvertX86p32_32RGBA888 |
GLOBAL _ConvertX86p32_32BGRA888 |
GLOBAL _ConvertX86p32_24RGB888 |
GLOBAL _ConvertX86p32_24BGR888 |
GLOBAL _ConvertX86p32_16RGB565 |
GLOBAL _ConvertX86p32_16BGR565 |
GLOBAL _ConvertX86p32_16RGB555 |
GLOBAL _ConvertX86p32_16BGR555 |
GLOBAL _ConvertX86p32_8RGB332 |
|
EXTERN _x86return |
|
SECTION .text |
|
|
;; _Convert_* |
;; Paramters: |
;; ESI = source |
;; EDI = dest |
;; ECX = amount (NOT 0!!! (the _ConvertX86 routine checks for that though)) |
;; Destroys: |
;; EAX, EBX, EDX |
|
|
_ConvertX86p32_32BGR888: |
|
; check short |
cmp ecx,BYTE 32 |
ja .L3 |
|
.L1 ; short loop |
mov edx,[esi] |
bswap edx |
ror edx,8 |
mov [edi],edx |
add esi,BYTE 4 |
add edi,BYTE 4 |
dec ecx |
jnz .L1 |
.L2 |
jmp _x86return |
|
.L3 ; save ebp |
push ebp |
|
; unroll four times |
mov ebp,ecx |
shr ebp,2 |
|
; save count |
push ecx |
|
.L4 mov eax,[esi] |
mov ebx,[esi+4] |
|
bswap eax |
|
bswap ebx |
|
ror eax,8 |
mov ecx,[esi+8] |
|
ror ebx,8 |
mov edx,[esi+12] |
|
bswap ecx |
|
bswap edx |
|
ror ecx,8 |
mov [edi+0],eax |
|
ror edx,8 |
mov [edi+4],ebx |
|
mov [edi+8],ecx |
mov [edi+12],edx |
|
add esi,BYTE 16 |
add edi,BYTE 16 |
|
dec ebp |
jnz .L4 |
|
; check tail |
pop ecx |
and ecx,BYTE 11b |
jz .L6 |
|
.L5 ; tail loop |
mov edx,[esi] |
bswap edx |
ror edx,8 |
mov [edi],edx |
add esi,BYTE 4 |
add edi,BYTE 4 |
dec ecx |
jnz .L5 |
|
.L6 pop ebp |
jmp _x86return |
|
|
|
|
_ConvertX86p32_32RGBA888: |
|
; check short |
cmp ecx,BYTE 32 |
ja .L3 |
|
.L1 ; short loop |
mov edx,[esi] |
rol edx,8 |
mov [edi],edx |
add esi,BYTE 4 |
add edi,BYTE 4 |
dec ecx |
jnz .L1 |
.L2 |
jmp _x86return |
|
.L3 ; save ebp |
push ebp |
|
; unroll four times |
mov ebp,ecx |
shr ebp,2 |
|
; save count |
push ecx |
|
.L4 mov eax,[esi] |
mov ebx,[esi+4] |
|
rol eax,8 |
mov ecx,[esi+8] |
|
rol ebx,8 |
mov edx,[esi+12] |
|
rol ecx,8 |
mov [edi+0],eax |
|
rol edx,8 |
mov [edi+4],ebx |
|
mov [edi+8],ecx |
mov [edi+12],edx |
|
add esi,BYTE 16 |
add edi,BYTE 16 |
|
dec ebp |
jnz .L4 |
|
; check tail |
pop ecx |
and ecx,BYTE 11b |
jz .L6 |
|
.L5 ; tail loop |
mov edx,[esi] |
rol edx,8 |
mov [edi],edx |
add esi,BYTE 4 |
add edi,BYTE 4 |
dec ecx |
jnz .L5 |
|
.L6 pop ebp |
jmp _x86return |
|
|
|
|
_ConvertX86p32_32BGRA888: |
|
; check short |
cmp ecx,BYTE 32 |
ja .L3 |
|
.L1 ; short loop |
mov edx,[esi] |
bswap edx |
mov [edi],edx |
add esi,BYTE 4 |
add edi,BYTE 4 |
dec ecx |
jnz .L1 |
.L2 |
jmp _x86return |
|
.L3 ; save ebp |
push ebp |
|
; unroll four times |
mov ebp,ecx |
shr ebp,2 |
|
; save count |
push ecx |
|
.L4 mov eax,[esi] |
mov ebx,[esi+4] |
|
mov ecx,[esi+8] |
mov edx,[esi+12] |
|
bswap eax |
|
bswap ebx |
|
bswap ecx |
|
bswap edx |
|
mov [edi+0],eax |
mov [edi+4],ebx |
|
mov [edi+8],ecx |
mov [edi+12],edx |
|
add esi,BYTE 16 |
add edi,BYTE 16 |
|
dec ebp |
jnz .L4 |
|
; check tail |
pop ecx |
and ecx,BYTE 11b |
jz .L6 |
|
.L5 ; tail loop |
mov edx,[esi] |
bswap edx |
mov [edi],edx |
add esi,BYTE 4 |
add edi,BYTE 4 |
dec ecx |
jnz .L5 |
|
.L6 pop ebp |
jmp _x86return |
|
|
|
|
;; 32 bit RGB 888 to 24 BIT RGB 888 |
|
_ConvertX86p32_24RGB888: |
|
; check short |
cmp ecx,BYTE 32 |
ja .L3 |
|
.L1 ; short loop |
mov al,[esi] |
mov bl,[esi+1] |
mov dl,[esi+2] |
mov [edi],al |
mov [edi+1],bl |
mov [edi+2],dl |
add esi,BYTE 4 |
add edi,BYTE 3 |
dec ecx |
jnz .L1 |
.L2 |
jmp _x86return |
|
.L3 ; head |
mov edx,edi |
and edx,BYTE 11b |
jz .L4 |
mov al,[esi] |
mov bl,[esi+1] |
mov dl,[esi+2] |
mov [edi],al |
mov [edi+1],bl |
mov [edi+2],dl |
add esi,BYTE 4 |
add edi,BYTE 3 |
dec ecx |
jmp SHORT .L3 |
|
.L4 ; unroll 4 times |
push ebp |
mov ebp,ecx |
shr ebp,2 |
|
; save count |
push ecx |
|
.L5 mov eax,[esi] ; first dword eax = [A][R][G][B] |
mov ebx,[esi+4] ; second dword ebx = [a][r][g][b] |
|
shl eax,8 ; eax = [R][G][B][.] |
mov ecx,[esi+12] ; third dword ecx = [a][r][g][b] |
|
shl ebx,8 ; ebx = [r][g][b][.] |
mov al,[esi+4] ; eax = [R][G][B][b] |
|
ror eax,8 ; eax = [b][R][G][B] (done) |
mov bh,[esi+8+1] ; ebx = [r][g][G][.] |
|
mov [edi],eax |
add edi,BYTE 3*4 |
|
shl ecx,8 ; ecx = [r][g][b][.] |
mov bl,[esi+8+0] ; ebx = [r][g][G][B] |
|
rol ebx,16 ; ebx = [G][B][r][g] (done) |
mov cl,[esi+8+2] ; ecx = [r][g][b][R] (done) |
|
mov [edi+4-3*4],ebx |
add esi,BYTE 4*4 |
|
mov [edi+8-3*4],ecx |
dec ebp |
|
jnz .L5 |
|
; check tail |
pop ecx |
and ecx,BYTE 11b |
jz .L7 |
|
.L6 ; tail loop |
mov al,[esi] |
mov bl,[esi+1] |
mov dl,[esi+2] |
mov [edi],al |
mov [edi+1],bl |
mov [edi+2],dl |
add esi,BYTE 4 |
add edi,BYTE 3 |
dec ecx |
jnz .L6 |
|
.L7 pop ebp |
jmp _x86return |
|
|
|
|
;; 32 bit RGB 888 to 24 bit BGR 888 |
|
_ConvertX86p32_24BGR888: |
|
; check short |
cmp ecx,BYTE 32 |
ja .L3 |
|
|
.L1 ; short loop |
mov dl,[esi] |
mov bl,[esi+1] |
mov al,[esi+2] |
mov [edi],al |
mov [edi+1],bl |
mov [edi+2],dl |
add esi,BYTE 4 |
add edi,BYTE 3 |
dec ecx |
jnz .L1 |
.L2 |
jmp _x86return |
|
.L3 ; head |
mov edx,edi |
and edx,BYTE 11b |
jz .L4 |
mov dl,[esi] |
mov bl,[esi+1] |
mov al,[esi+2] |
mov [edi],al |
mov [edi+1],bl |
mov [edi+2],dl |
add esi,BYTE 4 |
add edi,BYTE 3 |
dec ecx |
jmp SHORT .L3 |
|
.L4 ; unroll 4 times |
push ebp |
mov ebp,ecx |
shr ebp,2 |
|
; save count |
push ecx |
|
.L5 |
mov eax,[esi] ; first dword eax = [A][R][G][B] |
mov ebx,[esi+4] ; second dword ebx = [a][r][g][b] |
|
bswap eax ; eax = [B][G][R][A] |
|
bswap ebx ; ebx = [b][g][r][a] |
|
mov al,[esi+4+2] ; eax = [B][G][R][r] |
mov bh,[esi+4+4+1] ; ebx = [b][g][G][a] |
|
ror eax,8 ; eax = [r][B][G][R] (done) |
mov bl,[esi+4+4+2] ; ebx = [b][g][G][R] |
|
ror ebx,16 ; ebx = [G][R][b][g] (done) |
mov [edi],eax |
|
mov [edi+4],ebx |
mov ecx,[esi+12] ; third dword ecx = [a][r][g][b] |
|
bswap ecx ; ecx = [b][g][r][a] |
|
mov cl,[esi+8] ; ecx = [b][g][r][B] (done) |
add esi,BYTE 4*4 |
|
mov [edi+8],ecx |
add edi,BYTE 3*4 |
|
dec ebp |
jnz .L5 |
|
; check tail |
pop ecx |
and ecx,BYTE 11b |
jz .L7 |
|
.L6 ; tail loop |
mov dl,[esi] |
mov bl,[esi+1] |
mov al,[esi+2] |
mov [edi],al |
mov [edi+1],bl |
mov [edi+2],dl |
add esi,BYTE 4 |
add edi,BYTE 3 |
dec ecx |
jnz .L6 |
|
.L7 |
pop ebp |
jmp _x86return |
|
|
|
|
;; 32 bit RGB 888 to 16 BIT RGB 565 |
|
_ConvertX86p32_16RGB565: |
; check short |
cmp ecx,BYTE 16 |
ja .L3 |
|
.L1 ; short loop |
mov bl,[esi+0] ; blue |
mov al,[esi+1] ; green |
mov ah,[esi+2] ; red |
shr ah,3 |
and al,11111100b |
shl eax,3 |
shr bl,3 |
add al,bl |
mov [edi+0],al |
mov [edi+1],ah |
add esi,BYTE 4 |
add edi,BYTE 2 |
dec ecx |
jnz .L1 |
|
.L2: ; End of short loop |
jmp _x86return |
|
|
.L3 ; head |
mov ebx,edi |
and ebx,BYTE 11b |
jz .L4 |
|
mov bl,[esi+0] ; blue |
mov al,[esi+1] ; green |
mov ah,[esi+2] ; red |
shr ah,3 |
and al,11111100b |
shl eax,3 |
shr bl,3 |
add al,bl |
mov [edi+0],al |
mov [edi+1],ah |
add esi,BYTE 4 |
add edi,BYTE 2 |
dec ecx |
|
.L4: |
; save count |
push ecx |
|
; unroll twice |
shr ecx,1 |
|
; point arrays to end |
lea esi,[esi+ecx*8] |
lea edi,[edi+ecx*4] |
|
; negative counter |
neg ecx |
jmp SHORT .L6 |
|
.L5: |
mov [edi+ecx*4-4],eax |
.L6: |
mov eax,[esi+ecx*8] |
|
shr ah,2 |
mov ebx,[esi+ecx*8+4] |
|
shr eax,3 |
mov edx,[esi+ecx*8+4] |
|
shr bh,2 |
mov dl,[esi+ecx*8+2] |
|
shl ebx,13 |
and eax,000007FFh |
|
shl edx,8 |
and ebx,07FF0000h |
|
and edx,0F800F800h |
add eax,ebx |
|
add eax,edx |
inc ecx |
|
jnz .L5 |
|
mov [edi+ecx*4-4],eax |
|
; tail |
pop ecx |
test cl,1 |
jz .L7 |
|
mov bl,[esi+0] ; blue |
mov al,[esi+1] ; green |
mov ah,[esi+2] ; red |
shr ah,3 |
and al,11111100b |
shl eax,3 |
shr bl,3 |
add al,bl |
mov [edi+0],al |
mov [edi+1],ah |
add esi,BYTE 4 |
add edi,BYTE 2 |
|
.L7: |
jmp _x86return |
|
|
|
|
;; 32 bit RGB 888 to 16 BIT BGR 565 |
|
_ConvertX86p32_16BGR565: |
|
; check short |
cmp ecx,BYTE 16 |
ja .L3 |
|
.L1 ; short loop |
mov ah,[esi+0] ; blue |
mov al,[esi+1] ; green |
mov bl,[esi+2] ; red |
shr ah,3 |
and al,11111100b |
shl eax,3 |
shr bl,3 |
add al,bl |
mov [edi+0],al |
mov [edi+1],ah |
add esi,BYTE 4 |
add edi,BYTE 2 |
dec ecx |
jnz .L1 |
.L2 |
jmp _x86return |
|
.L3 ; head |
mov ebx,edi |
and ebx,BYTE 11b |
jz .L4 |
mov ah,[esi+0] ; blue |
mov al,[esi+1] ; green |
mov bl,[esi+2] ; red |
shr ah,3 |
and al,11111100b |
shl eax,3 |
shr bl,3 |
add al,bl |
mov [edi+0],al |
mov [edi+1],ah |
add esi,BYTE 4 |
add edi,BYTE 2 |
dec ecx |
|
.L4 ; save count |
push ecx |
|
; unroll twice |
shr ecx,1 |
|
; point arrays to end |
lea esi,[esi+ecx*8] |
lea edi,[edi+ecx*4] |
|
; negative count |
neg ecx |
jmp SHORT .L6 |
|
.L5 |
mov [edi+ecx*4-4],eax |
.L6 |
mov edx,[esi+ecx*8+4] |
|
mov bh,[esi+ecx*8+4] |
mov ah,[esi+ecx*8] |
|
shr bh,3 |
mov al,[esi+ecx*8+1] |
|
shr ah,3 |
mov bl,[esi+ecx*8+5] |
|
shl eax,3 |
mov dl,[esi+ecx*8+2] |
|
shl ebx,19 |
and eax,0000FFE0h |
|
shr edx,3 |
and ebx,0FFE00000h |
|
and edx,001F001Fh |
add eax,ebx |
|
add eax,edx |
inc ecx |
|
jnz .L5 |
|
mov [edi+ecx*4-4],eax |
|
; tail |
pop ecx |
and ecx,BYTE 1 |
jz .L7 |
mov ah,[esi+0] ; blue |
mov al,[esi+1] ; green |
mov bl,[esi+2] ; red |
shr ah,3 |
and al,11111100b |
shl eax,3 |
shr bl,3 |
add al,bl |
mov [edi+0],al |
mov [edi+1],ah |
add esi,BYTE 4 |
add edi,BYTE 2 |
|
.L7 |
jmp _x86return |
|
|
|
|
;; 32 BIT RGB TO 16 BIT RGB 555 |
|
_ConvertX86p32_16RGB555: |
|
; check short |
cmp ecx,BYTE 16 |
ja .L3 |
|
.L1 ; short loop |
mov bl,[esi+0] ; blue |
mov al,[esi+1] ; green |
mov ah,[esi+2] ; red |
shr ah,3 |
and al,11111000b |
shl eax,2 |
shr bl,3 |
add al,bl |
mov [edi+0],al |
mov [edi+1],ah |
add esi,BYTE 4 |
add edi,BYTE 2 |
dec ecx |
jnz .L1 |
.L2 |
jmp _x86return |
|
.L3 ; head |
mov ebx,edi |
and ebx,BYTE 11b |
jz .L4 |
mov bl,[esi+0] ; blue |
mov al,[esi+1] ; green |
mov ah,[esi+2] ; red |
shr ah,3 |
and al,11111000b |
shl eax,2 |
shr bl,3 |
add al,bl |
mov [edi+0],al |
mov [edi+1],ah |
add esi,BYTE 4 |
add edi,BYTE 2 |
dec ecx |
|
.L4 ; save count |
push ecx |
|
; unroll twice |
shr ecx,1 |
|
; point arrays to end |
lea esi,[esi+ecx*8] |
lea edi,[edi+ecx*4] |
|
; negative counter |
neg ecx |
jmp SHORT .L6 |
|
.L5 |
mov [edi+ecx*4-4],eax |
.L6 |
mov eax,[esi+ecx*8] |
|
shr ah,3 |
mov ebx,[esi+ecx*8+4] |
|
shr eax,3 |
mov edx,[esi+ecx*8+4] |
|
shr bh,3 |
mov dl,[esi+ecx*8+2] |
|
shl ebx,13 |
and eax,000007FFh |
|
shl edx,7 |
and ebx,07FF0000h |
|
and edx,07C007C00h |
add eax,ebx |
|
add eax,edx |
inc ecx |
|
jnz .L5 |
|
mov [edi+ecx*4-4],eax |
|
; tail |
pop ecx |
and ecx,BYTE 1 |
jz .L7 |
mov bl,[esi+0] ; blue |
mov al,[esi+1] ; green |
mov ah,[esi+2] ; red |
shr ah,3 |
and al,11111000b |
shl eax,2 |
shr bl,3 |
add al,bl |
mov [edi+0],al |
mov [edi+1],ah |
add esi,BYTE 4 |
add edi,BYTE 2 |
|
.L7 |
jmp _x86return |
|
|
|
|
;; 32 BIT RGB TO 16 BIT BGR 555 |
|
_ConvertX86p32_16BGR555: |
|
; check short |
cmp ecx,BYTE 16 |
ja .L3 |
|
|
.L1 ; short loop |
mov ah,[esi+0] ; blue |
mov al,[esi+1] ; green |
mov bl,[esi+2] ; red |
shr ah,3 |
and al,11111000b |
shl eax,2 |
shr bl,3 |
add al,bl |
mov [edi+0],al |
mov [edi+1],ah |
add esi,BYTE 4 |
add edi,BYTE 2 |
dec ecx |
jnz .L1 |
.L2 |
jmp _x86return |
|
.L3 ; head |
mov ebx,edi |
and ebx,BYTE 11b |
jz .L4 |
mov ah,[esi+0] ; blue |
mov al,[esi+1] ; green |
mov bl,[esi+2] ; red |
shr ah,3 |
and al,11111000b |
shl eax,2 |
shr bl,3 |
add al,bl |
mov [edi+0],al |
mov [edi+1],ah |
add esi,BYTE 4 |
add edi,BYTE 2 |
dec ecx |
|
.L4 ; save count |
push ecx |
|
; unroll twice |
shr ecx,1 |
|
; point arrays to end |
lea esi,[esi+ecx*8] |
lea edi,[edi+ecx*4] |
|
; negative counter |
neg ecx |
jmp SHORT .L6 |
|
.L5 |
mov [edi+ecx*4-4],eax |
.L6 |
mov edx,[esi+ecx*8+4] |
|
mov bh,[esi+ecx*8+4] |
mov ah,[esi+ecx*8] |
|
shr bh,3 |
mov al,[esi+ecx*8+1] |
|
shr ah,3 |
mov bl,[esi+ecx*8+5] |
|
shl eax,2 |
mov dl,[esi+ecx*8+2] |
|
shl ebx,18 |
and eax,00007FE0h |
|
shr edx,3 |
and ebx,07FE00000h |
|
and edx,001F001Fh |
add eax,ebx |
|
add eax,edx |
inc ecx |
|
jnz .L5 |
|
mov [edi+ecx*4-4],eax |
|
; tail |
pop ecx |
and ecx,BYTE 1 |
jz .L7 |
mov ah,[esi+0] ; blue |
mov al,[esi+1] ; green |
mov bl,[esi+2] ; red |
shr ah,3 |
and al,11111000b |
shl eax,2 |
shr bl,3 |
add al,bl |
mov [edi+0],al |
mov [edi+1],ah |
add esi,BYTE 4 |
add edi,BYTE 2 |
|
.L7 |
jmp _x86return |
|
|
|
|
|
;; FROM 32 BIT RGB to 8 BIT RGB (rrrgggbbb) |
;; This routine writes FOUR pixels at once (dword) and then, if they exist |
;; the trailing three pixels |
_ConvertX86p32_8RGB332: |
|
|
.L_ALIGNED |
push ecx |
|
shr ecx,2 ; We will draw 4 pixels at once |
jnz .L1 |
|
jmp .L2 ; short jump out of range :( |
|
.L1: |
mov eax,[esi] ; first pair of pixels |
mov edx,[esi+4] |
|
shr dl,6 |
mov ebx,eax |
|
shr al,6 |
and ah,0e0h |
|
shr ebx,16 |
and dh,0e0h |
|
shr ah,3 |
and bl,0e0h |
|
shr dh,3 |
|
or al,bl |
|
mov ebx,edx |
or al,ah |
|
shr ebx,16 |
or dl,dh |
|
and bl,0e0h |
|
or dl,bl |
|
mov ah,dl |
|
|
|
mov ebx,[esi+8] ; second pair of pixels |
|
mov edx,ebx |
and bh,0e0h |
|
shr bl,6 |
and edx,0e00000h |
|
shr edx,16 |
|
shr bh,3 |
|
ror eax,16 |
or bl,dl |
|
mov edx,[esi+12] |
or bl,bh |
|
mov al,bl |
|
mov ebx,edx |
and dh,0e0h |
|
shr dl,6 |
and ebx,0e00000h |
|
shr dh,3 |
mov ah,dl |
|
shr ebx,16 |
or ah,dh |
|
or ah,bl |
|
rol eax,16 |
add esi,BYTE 16 |
|
mov [edi],eax |
add edi,BYTE 4 |
|
dec ecx |
jz .L2 ; L1 out of range for short jump :( |
|
jmp .L1 |
.L2: |
|
pop ecx |
and ecx,BYTE 3 ; mask out number of pixels to draw |
|
jz .L4 ; Nothing to do anymore |
|
.L3: |
mov eax,[esi] ; single pixel conversion for trailing pixels |
|
mov ebx,eax |
|
shr al,6 |
and ah,0e0h |
|
shr ebx,16 |
|
shr ah,3 |
and bl,0e0h |
|
or al,ah |
or al,bl |
|
mov [edi],al |
|
inc edi |
add esi,BYTE 4 |
|
dec ecx |
jnz .L3 |
|
.L4: |
jmp _x86return |
Property changes: |
Added: svn:executable |
+* |
\ No newline at end of property |