/contrib/sdk/sources/SDL-1.2.2_newlib/src/Tupfile.lua |
---|
27,7 → 27,7 |
for i,v in ipairs(FOLDERS) do |
compile_gcc(v .. "*.c", v .. "%B.o") |
tup.append_table(OBJS, |
tup.foreach_rule(v .. "*.asm", "nasm -i hermes -f coff -o %o %f", v .. "%B.o") |
tup.foreach_rule(v .. "*.asm", "nasm -f coff -o %o %f", v .. "%B.o") |
) |
end |
tup.rule(OBJS, "kos32-ar rcs %o %f", {"../../../lib/libSDLn.a", "../../../lib/<libSDLn>"}) |
/contrib/sdk/sources/SDL-1.2.2_newlib/src/hermes/common.inc |
---|
File deleted |
/contrib/sdk/sources/SDL-1.2.2_newlib/src/hermes/HeadMMX.h |
---|
6,6 → 6,7 |
Please refer to the file COPYING.LIB contained in the distribution for |
licensing conditions |
*/ |
#ifndef __HERMES_HEAD_MMX__ |
#define __HERMES_HEAD_MMX__ |
44,25 → 45,27 |
/* Fix the underscore business with ELF compilers */ |
#if (defined(__ELF__) && defined(__GNUC__)) || defined(__SUNPRO_C) |
#if defined(__ELF__) && defined(__GNUC__) |
#ifdef __cplusplus |
extern "C" { |
#endif |
extern void _ConvertMMX(HermesConverterInterface *); |
extern void _ConvertMMXpII32_24RGB888(); |
extern void _ConvertMMXpII32_16RGB565(); |
extern void _ConvertMMXpII32_16BGR565(); |
extern void _ConvertMMXpII32_16RGB555(); |
extern void _ConvertMMXpII32_16BGR555(); |
void ConvertMMX(HermesConverterInterface *) __attribute__ ((alias ("_ConvertMMX"))); |
#if 0 |
void ClearMMX_32(HermesClearInterface *) __attribute__ ((alias ("_ClearMMX_32"))); |
void ClearMMX_24(HermesClearInterface *) __attribute__ ((alias ("_ClearMMX_24"))); |
void ClearMMX_16(HermesClearInterface *) __attribute__ ((alias ("_ClearMMX_16"))); |
void ClearMMX_8(HermesClearInterface *) __attribute__ ((alias ("_ClearMMX_8"))); |
#define ConvertMMX _ConvertMMX |
#define ConvertMMXpII32_24RGB888 _ConvertMMXpII32_24RGB888 |
#define ConvertMMXpII32_16RGB565 _ConvertMMXpII32_16RGB565 |
#define ConvertMMXpII32_16BGR565 _ConvertMMXpII32_16BGR565 |
#define ConvertMMXpII32_16RGB555 _ConvertMMXpII32_16RGB555 |
#define ConvertMMXpII32_16BGR555 _ConvertMMXpII32_16BGR555 |
void ConvertMMXp32_16RGB555() __attribute__ ((alias ("_ConvertMMXp32_16RGB555"))); |
#endif |
void ConvertMMXpII32_24RGB888() __attribute__ ((alias ("_ConvertMMXpII32_24RGB888"))); |
void ConvertMMXpII32_16RGB565() __attribute__ ((alias ("_ConvertMMXpII32_16RGB565"))); |
void ConvertMMXpII32_16BGR565() __attribute__ ((alias ("_ConvertMMXpII32_16BGR565"))); |
void ConvertMMXpII32_16RGB555() __attribute__ ((alias ("_ConvertMMXpII32_16RGB555"))); |
void ConvertMMXpII32_16BGR555() __attribute__ ((alias ("_ConvertMMXpII32_16BGR555"))); |
#ifdef __cplusplus |
} |
#endif |
/contrib/sdk/sources/SDL-1.2.2_newlib/src/hermes/HeadX86.h |
---|
60,10 → 60,10 |
void ConvertX86pI8_24(); |
void ConvertX86pI8_16(); |
extern int ConvertX86p16_32RGB888_LUT_X86[512]; |
extern int ConvertX86p16_32BGR888_LUT_X86[512]; |
extern int ConvertX86p16_32RGBA888_LUT_X86[512]; |
extern int ConvertX86p16_32BGRA888_LUT_X86[512]; |
extern int32 ConvertX86p16_32RGB888_LUT_X86[512]; |
extern int32 ConvertX86p16_32BGR888_LUT_X86[512]; |
extern int32 ConvertX86p16_32RGBA888_LUT_X86[512]; |
extern int32 ConvertX86p16_32BGRA888_LUT_X86[512]; |
#ifdef __cplusplus |
} |
74,53 → 74,62 |
/* Now fix up the ELF underscore problem */ |
#if (defined(__ELF__) && defined(__GNUC__)) || defined(__SUNPRO_C) |
#if defined(__ELF__) && defined(__GNUC__) |
#ifdef __cplusplus |
extern "C" { |
#endif |
extern int _Hermes_X86_CPU(); |
int Hermes_X86_CPU() __attribute__ ((alias ("_Hermes_X86_CPU"))); |
extern void _ConvertX86(HermesConverterInterface *); |
void ConvertX86(HermesConverterInterface *) __attribute__ ((alias ("_ConvertX86"))); |
extern void _ConvertX86p32_32BGR888(); |
extern void _ConvertX86p32_32RGBA888(); |
extern void _ConvertX86p32_32BGRA888(); |
extern void _ConvertX86p32_24RGB888(); |
extern void _ConvertX86p32_24BGR888(); |
extern void _ConvertX86p32_16RGB565(); |
extern void _ConvertX86p32_16BGR565(); |
extern void _ConvertX86p32_16RGB555(); |
extern void _ConvertX86p32_16BGR555(); |
extern void _ConvertX86p32_8RGB332(); |
#if 0 |
void ClearX86_32(HermesClearInterface *) __attribute__ ((alias ("_ClearX86_32"))); |
void ClearX86_24(HermesClearInterface *) __attribute__ ((alias ("_ClearX86_24"))); |
void ClearX86_16(HermesClearInterface *) __attribute__ ((alias ("_ClearX86_16"))); |
void ClearX86_8(HermesClearInterface *) __attribute__ ((alias ("_ClearX86_8"))); |
#endif |
extern void _ConvertX86p16_16BGR565(); |
extern void _ConvertX86p16_16RGB555(); |
extern void _ConvertX86p16_16BGR555(); |
extern void _ConvertX86p16_8RGB332(); |
void ConvertX86p32_32BGR888() __attribute__ ((alias ("_ConvertX86p32_32BGR888"))); |
void ConvertX86p32_32RGBA888() __attribute__ ((alias ("_ConvertX86p32_32RGBA888"))); |
void ConvertX86p32_32BGRA888() __attribute__ ((alias ("_ConvertX86p32_32BGRA888"))); |
void ConvertX86p32_24RGB888() __attribute__ ((alias ("_ConvertX86p32_24RGB888"))); |
void ConvertX86p32_24BGR888() __attribute__ ((alias ("_ConvertX86p32_24BGR888"))); |
void ConvertX86p32_16RGB565() __attribute__ ((alias ("_ConvertX86p32_16RGB565"))); |
void ConvertX86p32_16BGR565() __attribute__ ((alias ("_ConvertX86p32_16BGR565"))); |
void ConvertX86p32_16RGB555() __attribute__ ((alias ("_ConvertX86p32_16RGB555"))); |
void ConvertX86p32_16BGR555() __attribute__ ((alias ("_ConvertX86p32_16BGR555"))); |
void ConvertX86p32_8RGB332() __attribute__ ((alias ("_ConvertX86p32_8RGB332"))); |
#if 0 |
void ConvertX86p16_32RGB888() __attribute__ ((alias ("_ConvertX86p16_32RGB888"))); |
void ConvertX86p16_32BGR888() __attribute__ ((alias ("_ConvertX86p16_32BGR888"))); |
void ConvertX86p16_32RGBA888() __attribute__ ((alias ("_ConvertX86p16_32RGBA888"))); |
void ConvertX86p16_32BGRA888() __attribute__ ((alias ("_ConvertX86p16_32BGRA888"))); |
void ConvertX86p16_24RGB888() __attribute__ ((alias ("_ConvertX86p16_24RGB888"))); |
void ConvertX86p16_24BGR888() __attribute__ ((alias ("_ConvertX86p16_24BGR888"))); |
#endif |
void ConvertX86p16_16BGR565() __attribute__ ((alias ("_ConvertX86p16_16BGR565"))); |
void ConvertX86p16_16RGB555() __attribute__ ((alias ("_ConvertX86p16_16RGB555"))); |
void ConvertX86p16_16BGR555() __attribute__ ((alias ("_ConvertX86p16_16BGR555"))); |
void ConvertX86p16_8RGB332() __attribute__ ((alias ("_ConvertX86p16_8RGB332"))); |
#define Hermes_X86_CPU _Hermes_X86_CPU |
#if 0 |
void CopyX86p_4byte() __attribute__ ((alias ("_CopyX86p_4byte"))); |
void CopyX86p_3byte() __attribute__ ((alias ("_CopyX86p_3byte"))); |
void CopyX86p_2byte() __attribute__ ((alias ("_CopyX86p_2byte"))); |
void CopyX86p_1byte() __attribute__ ((alias ("_CopyX86p_1byte"))); |
#define ConvertX86 _ConvertX86 |
void ConvertX86pI8_32() __attribute__ ((alias ("_ConvertX86pI8_32"))); |
void ConvertX86pI8_24() __attribute__ ((alias ("_ConvertX86pI8_24"))); |
void ConvertX86pI8_16() __attribute__ ((alias ("_ConvertX86pI8_16"))); |
#define ConvertX86p32_32BGR888 _ConvertX86p32_32BGR888 |
#define ConvertX86p32_32RGBA888 _ConvertX86p32_32RGBA888 |
#define ConvertX86p32_32BGRA888 _ConvertX86p32_32BGRA888 |
#define ConvertX86p32_24RGB888 _ConvertX86p32_24RGB888 |
#define ConvertX86p32_24BGR888 _ConvertX86p32_24BGR888 |
#define ConvertX86p32_16RGB565 _ConvertX86p32_16RGB565 |
#define ConvertX86p32_16BGR565 _ConvertX86p32_16BGR565 |
#define ConvertX86p32_16RGB555 _ConvertX86p32_16RGB555 |
#define ConvertX86p32_16BGR555 _ConvertX86p32_16BGR555 |
#define ConvertX86p32_8RGB332 _ConvertX86p32_8RGB332 |
extern int32 ConvertX86p16_32RGB888_LUT_X86[512] __attribute__ ((alias ("_ConvertX86p16_32RGB888_LUT_X86"))); |
extern int32 ConvertX86p16_32BGR888_LUT_X86[512] __attribute__ ((alias ("_ConvertX86p16_32BGR888_LUT_X86"))); |
extern int32 ConvertX86p16_32RGBA888_LUT_X86[512] __attribute__ ((alias ("_ConvertX86p16_32RGBA888_LUT_X86"))); |
extern int32 ConvertX86p16_32BGRA888_LUT_X86[512] __attribute__ ((alias ("_ConvertX86p16_32BGRA888_LUT_X86"))); |
#endif |
#define ConvertX86p16_16BGR565 _ConvertX86p16_16BGR565 |
#define ConvertX86p16_16RGB555 _ConvertX86p16_16RGB555 |
#define ConvertX86p16_16BGR555 _ConvertX86p16_16BGR555 |
#define ConvertX86p16_8RGB332 _ConvertX86p16_8RGB332 |
#ifdef __cplusplus |
} |
#endif |
/contrib/sdk/sources/SDL-1.2.2_newlib/src/hermes/mmx_main.asm |
---|
9,9 → 9,9 |
BITS 32 |
%include "common.inc" |
GLOBAL _ConvertMMX |
GLOBAL _mmxreturn |
SDL_FUNC _ConvertMMX |
SECTION .text |
50,8 → 50,9 |
y_loop: |
mov ecx,[ebp+4] |
call [ebp+32] |
jmp [ebp+32] |
_mmxreturn: |
add esi,[ebp+12] |
add edi,[ebp+28] |
69,6 → 70,5 |
ret |
%ifidn __OUTPUT_FORMAT__,elf32 |
section .note.GNU-stack noalloc noexec nowrite progbits |
%endif |
/contrib/sdk/sources/SDL-1.2.2_newlib/src/hermes/mmxp2_32.asm |
---|
20,51 → 20,40 |
BITS 32 |
%include "common.inc" |
SDL_FUNC _ConvertMMXpII32_24RGB888 |
SDL_FUNC _ConvertMMXpII32_16RGB565 |
SDL_FUNC _ConvertMMXpII32_16BGR565 |
SDL_FUNC _ConvertMMXpII32_16RGB555 |
SDL_FUNC _ConvertMMXpII32_16BGR555 |
GLOBAL _ConvertMMXpII32_24RGB888 |
GLOBAL _ConvertMMXpII32_16RGB565 |
GLOBAL _ConvertMMXpII32_16BGR565 |
GLOBAL _ConvertMMXpII32_16RGB555 |
GLOBAL _ConvertMMXpII32_16BGR555 |
;; Macros for conversion routines |
EXTERN _mmxreturn |
%macro _push_immq_mask 1 |
push dword %1 |
push dword %1 |
%endmacro |
SECTION .data |
%macro load_immq 2 |
_push_immq_mask %2 |
movq %1, [esp] |
%endmacro |
ALIGN 8 |
%macro pand_immq 2 |
_push_immq_mask %2 |
pand %1, [esp] |
%endmacro |
;; Constants for conversion routines |
%define CLEANUP_IMMQ_LOADS(num) \ |
add esp, byte 8 * num |
mmx32_rgb888_mask dd 00ffffffh,00ffffffh |
%define mmx32_rgb888_mask 00ffffffh |
%define mmx32_rgb565_b 000000f8h |
%define mmx32_rgb565_g 0000fc00h |
%define mmx32_rgb565_r 00f80000h |
mmx32_rgb565_b dd 000000f8h, 000000f8h |
mmx32_rgb565_g dd 0000fc00h, 0000fc00h |
mmx32_rgb565_r dd 00f80000h, 00f80000h |
%define mmx32_rgb555_rb 00f800f8h |
%define mmx32_rgb555_g 0000f800h |
%define mmx32_rgb555_mul 20000008h |
%define mmx32_bgr555_mul 00082000h |
mmx32_rgb555_rb dd 00f800f8h,00f800f8h |
mmx32_rgb555_g dd 0000f800h,0000f800h |
mmx32_rgb555_mul dd 20000008h,20000008h |
mmx32_bgr555_mul dd 00082000h,00082000h |
SECTION .text |
_ConvertMMXpII32_24RGB888: |
; set up mm6 as the mask, mm7 as zero |
load_immq mm6, mmx32_rgb888_mask |
CLEANUP_IMMQ_LOADS(1) |
movq mm6, qword [mmx32_rgb888_mask] |
pxor mm7, mm7 |
mov edx, ecx ; save ecx |
119,7 → 108,7 |
dec ecx |
jnz .L3 |
.L4: |
retn |
jmp _mmxreturn |
126,10 → 115,9 |
_ConvertMMXpII32_16RGB565: |
; set up masks |
load_immq mm5, mmx32_rgb565_b |
load_immq mm6, mmx32_rgb565_g |
load_immq mm7, mmx32_rgb565_r |
CLEANUP_IMMQ_LOADS(3) |
movq mm5, [mmx32_rgb565_b] |
movq mm6, [mmx32_rgb565_g] |
movq mm7, [mmx32_rgb565_r] |
mov edx, ecx |
shr ecx, 2 |
188,15 → 176,14 |
jnz .L3 |
.L4: |
retn |
jmp _mmxreturn |
_ConvertMMXpII32_16BGR565: |
load_immq mm5, mmx32_rgb565_r |
load_immq mm6, mmx32_rgb565_g |
load_immq mm7, mmx32_rgb565_b |
CLEANUP_IMMQ_LOADS(3) |
movq mm5, [mmx32_rgb565_r] |
movq mm6, [mmx32_rgb565_g] |
movq mm7, [mmx32_rgb565_b] |
mov edx, ecx |
shr ecx, 2 |
258,7 → 245,7 |
jnz .L3 |
.L4: |
retn |
jmp _mmxreturn |
_ConvertMMXpII32_16BGR555: |
266,7 → 253,7 |
; except it uses a different multiplier for the pmaddwd |
; instruction. cool huh. |
load_immq mm7, mmx32_bgr555_mul |
movq mm7, qword [mmx32_bgr555_mul] |
jmp _convert_bgr555_cheat |
; This is the same as the Intel version.. they obviously went to |
276,16 → 263,15 |
; (I think) a more accurate name.. |
_ConvertMMXpII32_16RGB555: |
load_immq mm7, mmx32_rgb555_mul |
movq mm7,qword [mmx32_rgb555_mul] |
_convert_bgr555_cheat: |
load_immq mm6, mmx32_rgb555_g |
CLEANUP_IMMQ_LOADS(2) |
movq mm6,qword [mmx32_rgb555_g] |
mov edx,ecx ; Save ecx |
and ecx,DWORD 0fffffff8h ; clear lower three bits |
and ecx,BYTE 0fffffff8h ; clear lower three bits |
jnz .L_OK |
jmp near .L2 |
jmp .L2 |
.L_OK: |
294,14 → 280,12 |
movq mm0,[esi] |
movq mm3,mm2 |
pand_immq mm3, mmx32_rgb555_rb |
pand mm3,qword [mmx32_rgb555_rb] |
movq mm1,mm0 |
pand_immq mm1, mmx32_rgb555_rb |
pand mm1,qword [mmx32_rgb555_rb] |
pmaddwd mm3,mm7 |
CLEANUP_IMMQ_LOADS(2) |
pmaddwd mm1,mm7 |
pand mm2,mm6 |
318,13 → 302,13 |
movq mm0,mm4 |
psrld mm1,6 |
pand_immq mm0, mmx32_rgb555_rb |
pand mm0,qword [mmx32_rgb555_rb] |
packssdw mm1,mm3 |
movq mm3,mm5 |
pmaddwd mm0,mm7 |
pand_immq mm3, mmx32_rgb555_rb |
pand mm3,qword [mmx32_rgb555_rb] |
pand mm4,mm6 |
movq [edi],mm1 |
345,14 → 329,12 |
movq mm3,mm2 |
movq mm1,mm0 |
pand_immq mm3, mmx32_rgb555_rb |
pand mm3,qword [mmx32_rgb555_rb] |
packssdw mm5,mm4 |
pand_immq mm1, mmx32_rgb555_rb |
pand mm1,qword [mmx32_rgb555_rb] |
pand mm2,mm6 |
CLEANUP_IMMQ_LOADS(4) |
movq [edi+8],mm5 |
pmaddwd mm3,mm7 |
398,8 → 380,7 |
jnz .L3 |
.L4: |
retn |
jmp _mmxreturn |
%ifidn __OUTPUT_FORMAT__,elf32 |
section .note.GNU-stack noalloc noexec nowrite progbits |
%endif |
/contrib/sdk/sources/SDL-1.2.2_newlib/src/hermes/x86_main.asm |
---|
11,14 → 11,17 |
BITS 32 |
%include "common.inc" |
GLOBAL _ConvertX86 |
GLOBAL _x86return |
SDL_FUNC _ConvertX86 |
SDL_FUNC _Hermes_X86_CPU |
GLOBAL _Hermes_X86_CPU |
SECTION .data |
cpu_flags dd 0 |
SECTION .text |
;; _ConvertX86: |
56,8 → 59,9 |
y_loop: |
mov ecx,[ebp+4] |
call [ebp+32] |
jmp [ebp+32] |
_x86return: |
add esi,[ebp+12] |
add edi,[ebp+28] |
73,6 → 77,9 |
ret |
;; Hermes_X86_CPU returns the CPUID flags in eax |
_Hermes_X86_CPU: |
pushfd |
pop eax |
115,9 → 122,5 |
mov eax,[cpu_flags] |
.L1: |
xor eax,eax |
ret |
%ifidn __OUTPUT_FORMAT__,elf32 |
section .note.GNU-stack noalloc noexec nowrite progbits |
%endif |
/contrib/sdk/sources/SDL-1.2.2_newlib/src/hermes/x86p_16.asm |
---|
10,19 → 10,28 |
; Used with permission. |
; |
BITS 32 |
%include "common.inc" |
GLOBAL _ConvertX86p16_32RGB888 |
GLOBAL _ConvertX86p16_32BGR888 |
GLOBAL _ConvertX86p16_32RGBA888 |
GLOBAL _ConvertX86p16_32BGRA888 |
GLOBAL _ConvertX86p16_24RGB888 |
GLOBAL _ConvertX86p16_24BGR888 |
GLOBAL _ConvertX86p16_16BGR565 |
GLOBAL _ConvertX86p16_16RGB555 |
GLOBAL _ConvertX86p16_16BGR555 |
GLOBAL _ConvertX86p16_8RGB332 |
SDL_FUNC _ConvertX86p16_16BGR565 |
SDL_FUNC _ConvertX86p16_16RGB555 |
SDL_FUNC _ConvertX86p16_16BGR555 |
SDL_FUNC _ConvertX86p16_8RGB332 |
EXTERN _ConvertX86 |
EXTERN _x86return |
SECTION .text |
_ConvertX86p16_16BGR565: |
; check short |
30,7 → 39,7 |
ja .L3 |
.L1: ; short loop |
.L1 ; short loop |
mov al,[esi] |
mov ah,[esi+1] |
mov ebx,eax |
47,10 → 56,10 |
add edi,BYTE 2 |
dec ecx |
jnz .L1 |
.L2: |
retn |
.L2 |
jmp _x86return |
.L3: ; head |
.L3 ; head |
mov eax,edi |
and eax,BYTE 11b |
jz .L4 |
70,7 → 79,7 |
add edi,BYTE 2 |
dec ecx |
.L4: ; save count |
.L4 ; save count |
push ecx |
; unroll twice |
84,8 → 93,8 |
neg ecx |
jmp SHORT .L6 |
.L5: mov [edi+ecx*4-4],eax |
.L6: mov eax,[esi+ecx*4] |
.L5 mov [edi+ecx*4-4],eax |
.L6 mov eax,[esi+ecx*4] |
mov ebx,[esi+ecx*4] |
and eax,07E007E0h |
125,8 → 134,8 |
add esi,BYTE 2 |
add edi,BYTE 2 |
.L7: |
retn |
.L7 |
jmp _x86return |
140,7 → 149,7 |
ja .L3 |
.L1: ; short loop |
.L1 ; short loop |
mov al,[esi] |
mov ah,[esi+1] |
mov ebx,eax |
154,10 → 163,10 |
add edi,BYTE 2 |
dec ecx |
jnz .L1 |
.L2: |
retn |
.L2 |
jmp _x86return |
.L3: ; head |
.L3 ; head |
mov eax,edi |
and eax,BYTE 11b |
jz .L4 |
174,7 → 183,7 |
add edi,BYTE 2 |
dec ecx |
.L4: ; save ebp |
.L4 ; save ebp |
push ebp |
; save count |
191,7 → 200,7 |
xor ebp,ebp |
sub ebp,ecx |
.L5: mov eax,[esi+ebp*8] ; agi? |
.L5 mov eax,[esi+ebp*8] ; agi? |
mov ecx,[esi+ebp*8+4] |
mov ebx,eax |
217,7 → 226,7 |
; tail |
pop ecx |
.L6: and ecx,BYTE 11b |
.L6 and ecx,BYTE 11b |
jz .L7 |
mov al,[esi] |
mov ah,[esi+1] |
233,8 → 242,8 |
dec ecx |
jmp SHORT .L6 |
.L7: pop ebp |
retn |
.L7 pop ebp |
jmp _x86return |
248,7 → 257,7 |
ja .L3 |
.L1: ; short loop |
.L1 ; short loop |
mov al,[esi] |
mov ah,[esi+1] |
mov ebx,eax |
267,10 → 276,10 |
add edi,BYTE 2 |
dec ecx |
jnz .L1 |
.L2: |
retn |
.L2 |
jmp _x86return |
.L3: ; head |
.L3 ; head |
mov eax,edi |
and eax,BYTE 11b |
jz .L4 |
292,7 → 301,7 |
add edi,BYTE 2 |
dec ecx |
.L4: ; save count |
.L4 ; save count |
push ecx |
; unroll twice |
306,8 → 315,8 |
neg ecx |
jmp SHORT .L6 |
.L5: mov [edi+ecx*4-4],eax |
.L6: mov eax,[esi+ecx*4] |
.L5 mov [edi+ecx*4-4],eax |
.L6 mov eax,[esi+ecx*4] |
shr eax,1 |
mov ebx,[esi+ecx*4] |
351,8 → 360,8 |
add esi,BYTE 2 |
add edi,BYTE 2 |
.L7: |
retn |
.L7 |
jmp _x86return |
366,7 → 375,7 |
ja .L3 |
.L1: ; short loop |
.L1 ; short loop |
mov al,[esi+0] |
mov ah,[esi+1] |
mov ebx,eax |
384,10 → 393,10 |
inc edi |
dec ecx |
jnz .L1 |
.L2: |
retn |
.L2 |
jmp _x86return |
.L3: mov eax,edi |
.L3 mov eax,edi |
and eax,BYTE 11b |
jz .L4 |
mov al,[esi+0] |
408,7 → 417,7 |
dec ecx |
jmp SHORT .L3 |
.L4: ; save ebp |
.L4 ; save ebp |
push ebp |
; save count |
422,7 → 431,7 |
mov bl,[esi+1] |
mov dh,[esi+2] |
.L5: shl edx,16 |
.L5 shl edx,16 |
mov bh,[esi+3] |
shl ebx,16 |
463,7 → 472,7 |
and ecx,BYTE 11b |
jz .L7 |
.L6: ; tail |
.L6 ; tail |
mov al,[esi+0] |
mov ah,[esi+1] |
mov ebx,eax |
482,9 → 491,6 |
dec ecx |
jnz .L6 |
.L7: pop ebp |
retn |
.L7 pop ebp |
jmp _x86return |
%ifidn __OUTPUT_FORMAT__,elf32 |
section .note.GNU-stack noalloc noexec nowrite progbits |
%endif |
/contrib/sdk/sources/SDL-1.2.2_newlib/src/hermes/x86p_32.asm |
---|
9,23 → 9,25 |
; Most routines are (c) Glenn Fiedler (ptc@gaffer.org), used with permission |
; |
BITS 32 |
%include "common.inc" |
GLOBAL _ConvertX86p32_32BGR888 |
GLOBAL _ConvertX86p32_32RGBA888 |
GLOBAL _ConvertX86p32_32BGRA888 |
GLOBAL _ConvertX86p32_24RGB888 |
GLOBAL _ConvertX86p32_24BGR888 |
GLOBAL _ConvertX86p32_16RGB565 |
GLOBAL _ConvertX86p32_16BGR565 |
GLOBAL _ConvertX86p32_16RGB555 |
GLOBAL _ConvertX86p32_16BGR555 |
GLOBAL _ConvertX86p32_8RGB332 |
SDL_FUNC _ConvertX86p32_32BGR888 |
SDL_FUNC _ConvertX86p32_32RGBA888 |
SDL_FUNC _ConvertX86p32_32BGRA888 |
SDL_FUNC _ConvertX86p32_24RGB888 |
SDL_FUNC _ConvertX86p32_24BGR888 |
SDL_FUNC _ConvertX86p32_16RGB565 |
SDL_FUNC _ConvertX86p32_16BGR565 |
SDL_FUNC _ConvertX86p32_16RGB555 |
SDL_FUNC _ConvertX86p32_16BGR555 |
SDL_FUNC _ConvertX86p32_8RGB332 |
EXTERN _x86return |
SECTION .text |
;; _Convert_* |
;; Paramters: |
;; ESI = source |
41,7 → 43,7 |
cmp ecx,BYTE 32 |
ja .L3 |
.L1: ; short loop |
.L1 ; short loop |
mov edx,[esi] |
bswap edx |
ror edx,8 |
50,10 → 52,10 |
add edi,BYTE 4 |
dec ecx |
jnz .L1 |
.L2: |
retn |
.L2 |
jmp _x86return |
.L3: ; save ebp |
.L3 ; save ebp |
push ebp |
; unroll four times |
63,7 → 65,7 |
; save count |
push ecx |
.L4: mov eax,[esi] |
.L4 mov eax,[esi] |
mov ebx,[esi+4] |
bswap eax |
100,7 → 102,7 |
and ecx,BYTE 11b |
jz .L6 |
.L5: ; tail loop |
.L5 ; tail loop |
mov edx,[esi] |
bswap edx |
ror edx,8 |
110,8 → 112,8 |
dec ecx |
jnz .L5 |
.L6: pop ebp |
retn |
.L6 pop ebp |
jmp _x86return |
122,7 → 124,7 |
cmp ecx,BYTE 32 |
ja .L3 |
.L1: ; short loop |
.L1 ; short loop |
mov edx,[esi] |
rol edx,8 |
mov [edi],edx |
130,10 → 132,10 |
add edi,BYTE 4 |
dec ecx |
jnz .L1 |
.L2: |
retn |
.L2 |
jmp _x86return |
.L3: ; save ebp |
.L3 ; save ebp |
push ebp |
; unroll four times |
143,7 → 145,7 |
; save count |
push ecx |
.L4: mov eax,[esi] |
.L4 mov eax,[esi] |
mov ebx,[esi+4] |
rol eax,8 |
172,7 → 174,7 |
and ecx,BYTE 11b |
jz .L6 |
.L5: ; tail loop |
.L5 ; tail loop |
mov edx,[esi] |
rol edx,8 |
mov [edi],edx |
181,8 → 183,8 |
dec ecx |
jnz .L5 |
.L6: pop ebp |
retn |
.L6 pop ebp |
jmp _x86return |
193,7 → 195,7 |
cmp ecx,BYTE 32 |
ja .L3 |
.L1: ; short loop |
.L1 ; short loop |
mov edx,[esi] |
bswap edx |
mov [edi],edx |
201,10 → 203,10 |
add edi,BYTE 4 |
dec ecx |
jnz .L1 |
.L2: |
retn |
.L2 |
jmp _x86return |
.L3: ; save ebp |
.L3 ; save ebp |
push ebp |
; unroll four times |
214,7 → 216,7 |
; save count |
push ecx |
.L4: mov eax,[esi] |
.L4 mov eax,[esi] |
mov ebx,[esi+4] |
mov ecx,[esi+8] |
245,7 → 247,7 |
and ecx,BYTE 11b |
jz .L6 |
.L5: ; tail loop |
.L5 ; tail loop |
mov edx,[esi] |
bswap edx |
mov [edi],edx |
254,8 → 256,8 |
dec ecx |
jnz .L5 |
.L6: pop ebp |
retn |
.L6 pop ebp |
jmp _x86return |
268,7 → 270,7 |
cmp ecx,BYTE 32 |
ja .L3 |
.L1: ; short loop |
.L1 ; short loop |
mov al,[esi] |
mov bl,[esi+1] |
mov dl,[esi+2] |
279,10 → 281,10 |
add edi,BYTE 3 |
dec ecx |
jnz .L1 |
.L2: |
retn |
.L2 |
jmp _x86return |
.L3: ; head |
.L3 ; head |
mov edx,edi |
and edx,BYTE 11b |
jz .L4 |
297,7 → 299,7 |
dec ecx |
jmp SHORT .L3 |
.L4: ; unroll 4 times |
.L4 ; unroll 4 times |
push ebp |
mov ebp,ecx |
shr ebp,2 |
305,7 → 307,7 |
; save count |
push ecx |
.L5: mov eax,[esi] ; first dword eax = [A][R][G][B] |
.L5 mov eax,[esi] ; first dword eax = [A][R][G][B] |
mov ebx,[esi+4] ; second dword ebx = [a][r][g][b] |
shl eax,8 ; eax = [R][G][B][.] |
339,7 → 341,7 |
and ecx,BYTE 11b |
jz .L7 |
.L6: ; tail loop |
.L6 ; tail loop |
mov al,[esi] |
mov bl,[esi+1] |
mov dl,[esi+2] |
351,8 → 353,8 |
dec ecx |
jnz .L6 |
.L7: pop ebp |
retn |
.L7 pop ebp |
jmp _x86return |
365,7 → 367,8 |
cmp ecx,BYTE 32 |
ja .L3 |
.L1: ; short loop |
.L1 ; short loop |
mov dl,[esi] |
mov bl,[esi+1] |
mov al,[esi+2] |
376,10 → 379,10 |
add edi,BYTE 3 |
dec ecx |
jnz .L1 |
.L2: |
retn |
.L2 |
jmp _x86return |
.L3: ; head |
.L3 ; head |
mov edx,edi |
and edx,BYTE 11b |
jz .L4 |
394,7 → 397,7 |
dec ecx |
jmp SHORT .L3 |
.L4: ; unroll 4 times |
.L4 ; unroll 4 times |
push ebp |
mov ebp,ecx |
shr ebp,2 |
402,7 → 405,7 |
; save count |
push ecx |
.L5: |
.L5 |
mov eax,[esi] ; first dword eax = [A][R][G][B] |
mov ebx,[esi+4] ; second dword ebx = [a][r][g][b] |
438,7 → 441,7 |
and ecx,BYTE 11b |
jz .L7 |
.L6: ; tail loop |
.L6 ; tail loop |
mov dl,[esi] |
mov bl,[esi+1] |
mov al,[esi+2] |
450,9 → 453,9 |
dec ecx |
jnz .L6 |
.L7: |
.L7 |
pop ebp |
retn |
jmp _x86return |
464,7 → 467,7 |
cmp ecx,BYTE 16 |
ja .L3 |
.L1: ; short loop |
.L1 ; short loop |
mov bl,[esi+0] ; blue |
mov al,[esi+1] ; green |
mov ah,[esi+2] ; red |
481,10 → 484,10 |
jnz .L1 |
.L2: ; End of short loop |
retn |
jmp _x86return |
.L3: ; head |
.L3 ; head |
mov ebx,edi |
and ebx,BYTE 11b |
jz .L4 |
567,7 → 570,7 |
add edi,BYTE 2 |
.L7: |
retn |
jmp _x86return |
580,7 → 583,7 |
cmp ecx,BYTE 16 |
ja .L3 |
.L1: ; short loop |
.L1 ; short loop |
mov ah,[esi+0] ; blue |
mov al,[esi+1] ; green |
mov bl,[esi+2] ; red |
595,10 → 598,10 |
add edi,BYTE 2 |
dec ecx |
jnz .L1 |
.L2: |
retn |
.L2 |
jmp _x86return |
.L3: ; head |
.L3 ; head |
mov ebx,edi |
and ebx,BYTE 11b |
jz .L4 |
616,7 → 619,7 |
add edi,BYTE 2 |
dec ecx |
.L4: ; save count |
.L4 ; save count |
push ecx |
; unroll twice |
630,9 → 633,9 |
neg ecx |
jmp SHORT .L6 |
.L5: |
.L5 |
mov [edi+ecx*4-4],eax |
.L6: |
.L6 |
mov edx,[esi+ecx*8+4] |
mov bh,[esi+ecx*8+4] |
680,8 → 683,8 |
add esi,BYTE 4 |
add edi,BYTE 2 |
.L7: |
retn |
.L7 |
jmp _x86return |
694,7 → 697,7 |
cmp ecx,BYTE 16 |
ja .L3 |
.L1: ; short loop |
.L1 ; short loop |
mov bl,[esi+0] ; blue |
mov al,[esi+1] ; green |
mov ah,[esi+2] ; red |
709,10 → 712,10 |
add edi,BYTE 2 |
dec ecx |
jnz .L1 |
.L2: |
retn |
.L2 |
jmp _x86return |
.L3: ; head |
.L3 ; head |
mov ebx,edi |
and ebx,BYTE 11b |
jz .L4 |
730,7 → 733,7 |
add edi,BYTE 2 |
dec ecx |
.L4: ; save count |
.L4 ; save count |
push ecx |
; unroll twice |
744,9 → 747,9 |
neg ecx |
jmp SHORT .L6 |
.L5: |
.L5 |
mov [edi+ecx*4-4],eax |
.L6: |
.L6 |
mov eax,[esi+ecx*8] |
shr ah,3 |
791,8 → 794,8 |
add esi,BYTE 4 |
add edi,BYTE 2 |
.L7: |
retn |
.L7 |
jmp _x86return |
806,7 → 809,7 |
ja .L3 |
.L1: ; short loop |
.L1 ; short loop |
mov ah,[esi+0] ; blue |
mov al,[esi+1] ; green |
mov bl,[esi+2] ; red |
821,10 → 824,10 |
add edi,BYTE 2 |
dec ecx |
jnz .L1 |
.L2: |
retn |
.L2 |
jmp _x86return |
.L3: ; head |
.L3 ; head |
mov ebx,edi |
and ebx,BYTE 11b |
jz .L4 |
842,7 → 845,7 |
add edi,BYTE 2 |
dec ecx |
.L4: ; save count |
.L4 ; save count |
push ecx |
; unroll twice |
856,9 → 859,9 |
neg ecx |
jmp SHORT .L6 |
.L5: |
.L5 |
mov [edi+ecx*4-4],eax |
.L6: |
.L6 |
mov edx,[esi+ecx*8+4] |
mov bh,[esi+ecx*8+4] |
906,8 → 909,8 |
add esi,BYTE 4 |
add edi,BYTE 2 |
.L7: |
retn |
.L7 |
jmp _x86return |
919,7 → 922,7 |
_ConvertX86p32_8RGB332: |
.L_ALIGNED: |
.L_ALIGNED |
push ecx |
shr ecx,2 ; We will draw 4 pixels at once |
1037,8 → 1040,4 |
jnz .L3 |
.L4: |
retn |
%ifidn __OUTPUT_FORMAT__,elf32 |
section .note.GNU-stack noalloc noexec nowrite progbits |
%endif |
jmp _x86return |