/contrib/sdk/sources/SDL-1.2.2_newlib/src/Tupfile.lua |
---|
27,7 → 27,7 |
for i,v in ipairs(FOLDERS) do |
compile_gcc(v .. "*.c", v .. "%B.o") |
tup.append_table(OBJS, |
tup.foreach_rule(v .. "*.asm", "nasm -f coff -o %o %f", v .. "%B.o") |
tup.foreach_rule(v .. "*.asm", "nasm -i hermes -f coff -o %o %f", v .. "%B.o") |
) |
end |
tup.rule(OBJS, "kos32-ar rcs %o %f", {"../../../lib/libSDLn.a", "../../../lib/<libSDLn>"}) |
/contrib/sdk/sources/SDL-1.2.2_newlib/src/hermes/HeadMMX.h |
---|
6,7 → 6,6 |
Please refer to the file COPYING.LIB contained in the distribution for |
licensing conditions |
*/ |
#ifndef __HERMES_HEAD_MMX__ |
#define __HERMES_HEAD_MMX__ |
45,27 → 44,25 |
/* Fix the underscore business with ELF compilers */ |
#if defined(__ELF__) && defined(__GNUC__) |
#if (defined(__ELF__) && defined(__GNUC__)) || defined(__SUNPRO_C) |
#ifdef __cplusplus |
extern "C" { |
#endif |
void ConvertMMX(HermesConverterInterface *) __attribute__ ((alias ("_ConvertMMX"))); |
#if 0 |
void ClearMMX_32(HermesClearInterface *) __attribute__ ((alias ("_ClearMMX_32"))); |
void ClearMMX_24(HermesClearInterface *) __attribute__ ((alias ("_ClearMMX_24"))); |
void ClearMMX_16(HermesClearInterface *) __attribute__ ((alias ("_ClearMMX_16"))); |
void ClearMMX_8(HermesClearInterface *) __attribute__ ((alias ("_ClearMMX_8"))); |
extern void _ConvertMMX(HermesConverterInterface *); |
extern void _ConvertMMXpII32_24RGB888(); |
extern void _ConvertMMXpII32_16RGB565(); |
extern void _ConvertMMXpII32_16BGR565(); |
extern void _ConvertMMXpII32_16RGB555(); |
extern void _ConvertMMXpII32_16BGR555(); |
void ConvertMMXp32_16RGB555() __attribute__ ((alias ("_ConvertMMXp32_16RGB555"))); |
#endif |
#define ConvertMMX _ConvertMMX |
#define ConvertMMXpII32_24RGB888 _ConvertMMXpII32_24RGB888 |
#define ConvertMMXpII32_16RGB565 _ConvertMMXpII32_16RGB565 |
#define ConvertMMXpII32_16BGR565 _ConvertMMXpII32_16BGR565 |
#define ConvertMMXpII32_16RGB555 _ConvertMMXpII32_16RGB555 |
#define ConvertMMXpII32_16BGR555 _ConvertMMXpII32_16BGR555 |
void ConvertMMXpII32_24RGB888() __attribute__ ((alias ("_ConvertMMXpII32_24RGB888"))); |
void ConvertMMXpII32_16RGB565() __attribute__ ((alias ("_ConvertMMXpII32_16RGB565"))); |
void ConvertMMXpII32_16BGR565() __attribute__ ((alias ("_ConvertMMXpII32_16BGR565"))); |
void ConvertMMXpII32_16RGB555() __attribute__ ((alias ("_ConvertMMXpII32_16RGB555"))); |
void ConvertMMXpII32_16BGR555() __attribute__ ((alias ("_ConvertMMXpII32_16BGR555"))); |
#ifdef __cplusplus |
} |
#endif |
/contrib/sdk/sources/SDL-1.2.2_newlib/src/hermes/HeadX86.h |
---|
60,10 → 60,10 |
void ConvertX86pI8_24(); |
void ConvertX86pI8_16(); |
extern int32 ConvertX86p16_32RGB888_LUT_X86[512]; |
extern int32 ConvertX86p16_32BGR888_LUT_X86[512]; |
extern int32 ConvertX86p16_32RGBA888_LUT_X86[512]; |
extern int32 ConvertX86p16_32BGRA888_LUT_X86[512]; |
extern int ConvertX86p16_32RGB888_LUT_X86[512]; |
extern int ConvertX86p16_32BGR888_LUT_X86[512]; |
extern int ConvertX86p16_32RGBA888_LUT_X86[512]; |
extern int ConvertX86p16_32BGRA888_LUT_X86[512]; |
#ifdef __cplusplus |
} |
74,62 → 74,53 |
/* Now fix up the ELF underscore problem */ |
#if defined(__ELF__) && defined(__GNUC__) |
#if (defined(__ELF__) && defined(__GNUC__)) || defined(__SUNPRO_C) |
#ifdef __cplusplus |
extern "C" { |
#endif |
int Hermes_X86_CPU() __attribute__ ((alias ("_Hermes_X86_CPU"))); |
extern int _Hermes_X86_CPU(); |
void ConvertX86(HermesConverterInterface *) __attribute__ ((alias ("_ConvertX86"))); |
extern void _ConvertX86(HermesConverterInterface *); |
#if 0 |
void ClearX86_32(HermesClearInterface *) __attribute__ ((alias ("_ClearX86_32"))); |
void ClearX86_24(HermesClearInterface *) __attribute__ ((alias ("_ClearX86_24"))); |
void ClearX86_16(HermesClearInterface *) __attribute__ ((alias ("_ClearX86_16"))); |
void ClearX86_8(HermesClearInterface *) __attribute__ ((alias ("_ClearX86_8"))); |
#endif |
extern void _ConvertX86p32_32BGR888(); |
extern void _ConvertX86p32_32RGBA888(); |
extern void _ConvertX86p32_32BGRA888(); |
extern void _ConvertX86p32_24RGB888(); |
extern void _ConvertX86p32_24BGR888(); |
extern void _ConvertX86p32_16RGB565(); |
extern void _ConvertX86p32_16BGR565(); |
extern void _ConvertX86p32_16RGB555(); |
extern void _ConvertX86p32_16BGR555(); |
extern void _ConvertX86p32_8RGB332(); |
void ConvertX86p32_32BGR888() __attribute__ ((alias ("_ConvertX86p32_32BGR888"))); |
void ConvertX86p32_32RGBA888() __attribute__ ((alias ("_ConvertX86p32_32RGBA888"))); |
void ConvertX86p32_32BGRA888() __attribute__ ((alias ("_ConvertX86p32_32BGRA888"))); |
void ConvertX86p32_24RGB888() __attribute__ ((alias ("_ConvertX86p32_24RGB888"))); |
void ConvertX86p32_24BGR888() __attribute__ ((alias ("_ConvertX86p32_24BGR888"))); |
void ConvertX86p32_16RGB565() __attribute__ ((alias ("_ConvertX86p32_16RGB565"))); |
void ConvertX86p32_16BGR565() __attribute__ ((alias ("_ConvertX86p32_16BGR565"))); |
void ConvertX86p32_16RGB555() __attribute__ ((alias ("_ConvertX86p32_16RGB555"))); |
void ConvertX86p32_16BGR555() __attribute__ ((alias ("_ConvertX86p32_16BGR555"))); |
void ConvertX86p32_8RGB332() __attribute__ ((alias ("_ConvertX86p32_8RGB332"))); |
extern void _ConvertX86p16_16BGR565(); |
extern void _ConvertX86p16_16RGB555(); |
extern void _ConvertX86p16_16BGR555(); |
extern void _ConvertX86p16_8RGB332(); |
#if 0 |
void ConvertX86p16_32RGB888() __attribute__ ((alias ("_ConvertX86p16_32RGB888"))); |
void ConvertX86p16_32BGR888() __attribute__ ((alias ("_ConvertX86p16_32BGR888"))); |
void ConvertX86p16_32RGBA888() __attribute__ ((alias ("_ConvertX86p16_32RGBA888"))); |
void ConvertX86p16_32BGRA888() __attribute__ ((alias ("_ConvertX86p16_32BGRA888"))); |
void ConvertX86p16_24RGB888() __attribute__ ((alias ("_ConvertX86p16_24RGB888"))); |
void ConvertX86p16_24BGR888() __attribute__ ((alias ("_ConvertX86p16_24BGR888"))); |
#endif |
void ConvertX86p16_16BGR565() __attribute__ ((alias ("_ConvertX86p16_16BGR565"))); |
void ConvertX86p16_16RGB555() __attribute__ ((alias ("_ConvertX86p16_16RGB555"))); |
void ConvertX86p16_16BGR555() __attribute__ ((alias ("_ConvertX86p16_16BGR555"))); |
void ConvertX86p16_8RGB332() __attribute__ ((alias ("_ConvertX86p16_8RGB332"))); |
#if 0 |
void CopyX86p_4byte() __attribute__ ((alias ("_CopyX86p_4byte"))); |
void CopyX86p_3byte() __attribute__ ((alias ("_CopyX86p_3byte"))); |
void CopyX86p_2byte() __attribute__ ((alias ("_CopyX86p_2byte"))); |
void CopyX86p_1byte() __attribute__ ((alias ("_CopyX86p_1byte"))); |
#define Hermes_X86_CPU _Hermes_X86_CPU |
void ConvertX86pI8_32() __attribute__ ((alias ("_ConvertX86pI8_32"))); |
void ConvertX86pI8_24() __attribute__ ((alias ("_ConvertX86pI8_24"))); |
void ConvertX86pI8_16() __attribute__ ((alias ("_ConvertX86pI8_16"))); |
#define ConvertX86 _ConvertX86 |
extern int32 ConvertX86p16_32RGB888_LUT_X86[512] __attribute__ ((alias ("_ConvertX86p16_32RGB888_LUT_X86"))); |
extern int32 ConvertX86p16_32BGR888_LUT_X86[512] __attribute__ ((alias ("_ConvertX86p16_32BGR888_LUT_X86"))); |
extern int32 ConvertX86p16_32RGBA888_LUT_X86[512] __attribute__ ((alias ("_ConvertX86p16_32RGBA888_LUT_X86"))); |
extern int32 ConvertX86p16_32BGRA888_LUT_X86[512] __attribute__ ((alias ("_ConvertX86p16_32BGRA888_LUT_X86"))); |
#endif |
#define ConvertX86p32_32BGR888 _ConvertX86p32_32BGR888 |
#define ConvertX86p32_32RGBA888 _ConvertX86p32_32RGBA888 |
#define ConvertX86p32_32BGRA888 _ConvertX86p32_32BGRA888 |
#define ConvertX86p32_24RGB888 _ConvertX86p32_24RGB888 |
#define ConvertX86p32_24BGR888 _ConvertX86p32_24BGR888 |
#define ConvertX86p32_16RGB565 _ConvertX86p32_16RGB565 |
#define ConvertX86p32_16BGR565 _ConvertX86p32_16BGR565 |
#define ConvertX86p32_16RGB555 _ConvertX86p32_16RGB555 |
#define ConvertX86p32_16BGR555 _ConvertX86p32_16BGR555 |
#define ConvertX86p32_8RGB332 _ConvertX86p32_8RGB332 |
#define ConvertX86p16_16BGR565 _ConvertX86p16_16BGR565 |
#define ConvertX86p16_16RGB555 _ConvertX86p16_16RGB555 |
#define ConvertX86p16_16BGR555 _ConvertX86p16_16BGR555 |
#define ConvertX86p16_8RGB332 _ConvertX86p16_8RGB332 |
#ifdef __cplusplus |
} |
#endif |
/contrib/sdk/sources/SDL-1.2.2_newlib/src/hermes/common.inc |
---|
0,0 → 1,15 |
; Some common macros for hermes nasm code |
%macro SDL_FUNC 1 |
%ifdef HIDDEN_VISIBILITY |
GLOBAL %1:function hidden |
%else |
GLOBAL %1 |
%endif |
%endmacro |
%ifdef __OS2__ |
; declare segments with proper attributes for OS/2 386 builds: |
SEGMENT .data CLASS=DATA ALIGN=16 USE32 FLAT |
SEGMENT .text CLASS=CODE ALIGN=16 USE32 FLAT |
%endif |
/contrib/sdk/sources/SDL-1.2.2_newlib/src/hermes/mmx_main.asm |
---|
9,9 → 9,9 |
BITS 32 |
GLOBAL _ConvertMMX |
GLOBAL _mmxreturn |
%include "common.inc" |
SDL_FUNC _ConvertMMX |
SECTION .text |
50,9 → 50,8 |
y_loop: |
mov ecx,[ebp+4] |
jmp [ebp+32] |
call [ebp+32] |
_mmxreturn: |
add esi,[ebp+12] |
add edi,[ebp+28] |
70,5 → 69,6 |
ret |
%ifidn __OUTPUT_FORMAT__,elf32 |
section .note.GNU-stack noalloc noexec nowrite progbits |
%endif |
/contrib/sdk/sources/SDL-1.2.2_newlib/src/hermes/mmxp2_32.asm |
---|
20,40 → 20,51 |
BITS 32 |
%include "common.inc" |
GLOBAL _ConvertMMXpII32_24RGB888 |
GLOBAL _ConvertMMXpII32_16RGB565 |
GLOBAL _ConvertMMXpII32_16BGR565 |
GLOBAL _ConvertMMXpII32_16RGB555 |
GLOBAL _ConvertMMXpII32_16BGR555 |
SDL_FUNC _ConvertMMXpII32_24RGB888 |
SDL_FUNC _ConvertMMXpII32_16RGB565 |
SDL_FUNC _ConvertMMXpII32_16BGR565 |
SDL_FUNC _ConvertMMXpII32_16RGB555 |
SDL_FUNC _ConvertMMXpII32_16BGR555 |
EXTERN _mmxreturn |
;; Macros for conversion routines |
SECTION .data |
%macro _push_immq_mask 1 |
push dword %1 |
push dword %1 |
%endmacro |
ALIGN 8 |
%macro load_immq 2 |
_push_immq_mask %2 |
movq %1, [esp] |
%endmacro |
;; Constants for conversion routines |
%macro pand_immq 2 |
_push_immq_mask %2 |
pand %1, [esp] |
%endmacro |
mmx32_rgb888_mask dd 00ffffffh,00ffffffh |
%define CLEANUP_IMMQ_LOADS(num) \ |
add esp, byte 8 * num |
mmx32_rgb565_b dd 000000f8h, 000000f8h |
mmx32_rgb565_g dd 0000fc00h, 0000fc00h |
mmx32_rgb565_r dd 00f80000h, 00f80000h |
%define mmx32_rgb888_mask 00ffffffh |
%define mmx32_rgb565_b 000000f8h |
%define mmx32_rgb565_g 0000fc00h |
%define mmx32_rgb565_r 00f80000h |
mmx32_rgb555_rb dd 00f800f8h,00f800f8h |
mmx32_rgb555_g dd 0000f800h,0000f800h |
mmx32_rgb555_mul dd 20000008h,20000008h |
mmx32_bgr555_mul dd 00082000h,00082000h |
%define mmx32_rgb555_rb 00f800f8h |
%define mmx32_rgb555_g 0000f800h |
%define mmx32_rgb555_mul 20000008h |
%define mmx32_bgr555_mul 00082000h |
SECTION .text |
_ConvertMMXpII32_24RGB888: |
; set up mm6 as the mask, mm7 as zero |
movq mm6, qword [mmx32_rgb888_mask] |
load_immq mm6, mmx32_rgb888_mask |
CLEANUP_IMMQ_LOADS(1) |
pxor mm7, mm7 |
mov edx, ecx ; save ecx |
108,7 → 119,7 |
dec ecx |
jnz .L3 |
.L4: |
jmp _mmxreturn |
retn |
115,9 → 126,10 |
_ConvertMMXpII32_16RGB565: |
; set up masks |
movq mm5, [mmx32_rgb565_b] |
movq mm6, [mmx32_rgb565_g] |
movq mm7, [mmx32_rgb565_r] |
load_immq mm5, mmx32_rgb565_b |
load_immq mm6, mmx32_rgb565_g |
load_immq mm7, mmx32_rgb565_r |
CLEANUP_IMMQ_LOADS(3) |
mov edx, ecx |
shr ecx, 2 |
176,14 → 188,15 |
jnz .L3 |
.L4: |
jmp _mmxreturn |
retn |
_ConvertMMXpII32_16BGR565: |
movq mm5, [mmx32_rgb565_r] |
movq mm6, [mmx32_rgb565_g] |
movq mm7, [mmx32_rgb565_b] |
load_immq mm5, mmx32_rgb565_r |
load_immq mm6, mmx32_rgb565_g |
load_immq mm7, mmx32_rgb565_b |
CLEANUP_IMMQ_LOADS(3) |
mov edx, ecx |
shr ecx, 2 |
245,7 → 258,7 |
jnz .L3 |
.L4: |
jmp _mmxreturn |
retn |
_ConvertMMXpII32_16BGR555: |
253,7 → 266,7 |
; except it uses a different multiplier for the pmaddwd |
; instruction. cool huh. |
movq mm7, qword [mmx32_bgr555_mul] |
load_immq mm7, mmx32_bgr555_mul |
jmp _convert_bgr555_cheat |
; This is the same as the Intel version.. they obviously went to |
263,15 → 276,16 |
; (I think) a more accurate name.. |
_ConvertMMXpII32_16RGB555: |
movq mm7,qword [mmx32_rgb555_mul] |
load_immq mm7, mmx32_rgb555_mul |
_convert_bgr555_cheat: |
movq mm6,qword [mmx32_rgb555_g] |
load_immq mm6, mmx32_rgb555_g |
CLEANUP_IMMQ_LOADS(2) |
mov edx,ecx ; Save ecx |
and ecx,BYTE 0fffffff8h ; clear lower three bits |
and ecx,DWORD 0fffffff8h ; clear lower three bits |
jnz .L_OK |
jmp .L2 |
jmp near .L2 |
.L_OK: |
280,12 → 294,14 |
movq mm0,[esi] |
movq mm3,mm2 |
pand mm3,qword [mmx32_rgb555_rb] |
pand_immq mm3, mmx32_rgb555_rb |
movq mm1,mm0 |
pand mm1,qword [mmx32_rgb555_rb] |
pand_immq mm1, mmx32_rgb555_rb |
pmaddwd mm3,mm7 |
CLEANUP_IMMQ_LOADS(2) |
pmaddwd mm1,mm7 |
pand mm2,mm6 |
302,13 → 318,13 |
movq mm0,mm4 |
psrld mm1,6 |
pand mm0,qword [mmx32_rgb555_rb] |
pand_immq mm0, mmx32_rgb555_rb |
packssdw mm1,mm3 |
movq mm3,mm5 |
pmaddwd mm0,mm7 |
pand mm3,qword [mmx32_rgb555_rb] |
pand_immq mm3, mmx32_rgb555_rb |
pand mm4,mm6 |
movq [edi],mm1 |
329,12 → 345,14 |
movq mm3,mm2 |
movq mm1,mm0 |
pand mm3,qword [mmx32_rgb555_rb] |
pand_immq mm3, mmx32_rgb555_rb |
packssdw mm5,mm4 |
pand mm1,qword [mmx32_rgb555_rb] |
pand_immq mm1, mmx32_rgb555_rb |
pand mm2,mm6 |
CLEANUP_IMMQ_LOADS(4) |
movq [edi+8],mm5 |
pmaddwd mm3,mm7 |
380,7 → 398,8 |
jnz .L3 |
.L4: |
jmp _mmxreturn |
retn |
%ifidn __OUTPUT_FORMAT__,elf32 |
section .note.GNU-stack noalloc noexec nowrite progbits |
%endif |
/contrib/sdk/sources/SDL-1.2.2_newlib/src/hermes/x86_main.asm |
---|
11,17 → 11,14 |
BITS 32 |
GLOBAL _ConvertX86 |
GLOBAL _x86return |
%include "common.inc" |
GLOBAL _Hermes_X86_CPU |
SDL_FUNC _ConvertX86 |
SDL_FUNC _Hermes_X86_CPU |
SECTION .data |
cpu_flags dd 0 |
SECTION .text |
;; _ConvertX86: |
59,9 → 56,8 |
y_loop: |
mov ecx,[ebp+4] |
jmp [ebp+32] |
call [ebp+32] |
_x86return: |
add esi,[ebp+12] |
add edi,[ebp+28] |
77,9 → 73,6 |
ret |
;; Hermes_X86_CPU returns the CPUID flags in eax |
_Hermes_X86_CPU: |
pushfd |
pop eax |
122,5 → 115,9 |
mov eax,[cpu_flags] |
.L1: |
xor eax,eax |
ret |
%ifidn __OUTPUT_FORMAT__,elf32 |
section .note.GNU-stack noalloc noexec nowrite progbits |
%endif |
/contrib/sdk/sources/SDL-1.2.2_newlib/src/hermes/x86p_16.asm |
---|
10,28 → 10,19 |
; Used with permission. |
; |
BITS 32 |
GLOBAL _ConvertX86p16_32RGB888 |
GLOBAL _ConvertX86p16_32BGR888 |
GLOBAL _ConvertX86p16_32RGBA888 |
GLOBAL _ConvertX86p16_32BGRA888 |
GLOBAL _ConvertX86p16_24RGB888 |
GLOBAL _ConvertX86p16_24BGR888 |
GLOBAL _ConvertX86p16_16BGR565 |
GLOBAL _ConvertX86p16_16RGB555 |
GLOBAL _ConvertX86p16_16BGR555 |
GLOBAL _ConvertX86p16_8RGB332 |
%include "common.inc" |
SDL_FUNC _ConvertX86p16_16BGR565 |
SDL_FUNC _ConvertX86p16_16RGB555 |
SDL_FUNC _ConvertX86p16_16BGR555 |
SDL_FUNC _ConvertX86p16_8RGB332 |
EXTERN _ConvertX86 |
EXTERN _x86return |
SECTION .text |
_ConvertX86p16_16BGR565: |
; check short |
39,7 → 30,7 |
ja .L3 |
.L1 ; short loop |
.L1: ; short loop |
mov al,[esi] |
mov ah,[esi+1] |
mov ebx,eax |
56,10 → 47,10 |
add edi,BYTE 2 |
dec ecx |
jnz .L1 |
.L2 |
jmp _x86return |
.L2: |
retn |
.L3 ; head |
.L3: ; head |
mov eax,edi |
and eax,BYTE 11b |
jz .L4 |
79,7 → 70,7 |
add edi,BYTE 2 |
dec ecx |
.L4 ; save count |
.L4: ; save count |
push ecx |
; unroll twice |
93,8 → 84,8 |
neg ecx |
jmp SHORT .L6 |
.L5 mov [edi+ecx*4-4],eax |
.L6 mov eax,[esi+ecx*4] |
.L5: mov [edi+ecx*4-4],eax |
.L6: mov eax,[esi+ecx*4] |
mov ebx,[esi+ecx*4] |
and eax,07E007E0h |
134,8 → 125,8 |
add esi,BYTE 2 |
add edi,BYTE 2 |
.L7 |
jmp _x86return |
.L7: |
retn |
149,7 → 140,7 |
ja .L3 |
.L1 ; short loop |
.L1: ; short loop |
mov al,[esi] |
mov ah,[esi+1] |
mov ebx,eax |
163,10 → 154,10 |
add edi,BYTE 2 |
dec ecx |
jnz .L1 |
.L2 |
jmp _x86return |
.L2: |
retn |
.L3 ; head |
.L3: ; head |
mov eax,edi |
and eax,BYTE 11b |
jz .L4 |
183,7 → 174,7 |
add edi,BYTE 2 |
dec ecx |
.L4 ; save ebp |
.L4: ; save ebp |
push ebp |
; save count |
200,7 → 191,7 |
xor ebp,ebp |
sub ebp,ecx |
.L5 mov eax,[esi+ebp*8] ; agi? |
.L5: mov eax,[esi+ebp*8] ; agi? |
mov ecx,[esi+ebp*8+4] |
mov ebx,eax |
226,7 → 217,7 |
; tail |
pop ecx |
.L6 and ecx,BYTE 11b |
.L6: and ecx,BYTE 11b |
jz .L7 |
mov al,[esi] |
mov ah,[esi+1] |
242,8 → 233,8 |
dec ecx |
jmp SHORT .L6 |
.L7 pop ebp |
jmp _x86return |
.L7: pop ebp |
retn |
257,7 → 248,7 |
ja .L3 |
.L1 ; short loop |
.L1: ; short loop |
mov al,[esi] |
mov ah,[esi+1] |
mov ebx,eax |
276,10 → 267,10 |
add edi,BYTE 2 |
dec ecx |
jnz .L1 |
.L2 |
jmp _x86return |
.L2: |
retn |
.L3 ; head |
.L3: ; head |
mov eax,edi |
and eax,BYTE 11b |
jz .L4 |
301,7 → 292,7 |
add edi,BYTE 2 |
dec ecx |
.L4 ; save count |
.L4: ; save count |
push ecx |
; unroll twice |
315,8 → 306,8 |
neg ecx |
jmp SHORT .L6 |
.L5 mov [edi+ecx*4-4],eax |
.L6 mov eax,[esi+ecx*4] |
.L5: mov [edi+ecx*4-4],eax |
.L6: mov eax,[esi+ecx*4] |
shr eax,1 |
mov ebx,[esi+ecx*4] |
360,8 → 351,8 |
add esi,BYTE 2 |
add edi,BYTE 2 |
.L7 |
jmp _x86return |
.L7: |
retn |
375,7 → 366,7 |
ja .L3 |
.L1 ; short loop |
.L1: ; short loop |
mov al,[esi+0] |
mov ah,[esi+1] |
mov ebx,eax |
393,10 → 384,10 |
inc edi |
dec ecx |
jnz .L1 |
.L2 |
jmp _x86return |
.L2: |
retn |
.L3 mov eax,edi |
.L3: mov eax,edi |
and eax,BYTE 11b |
jz .L4 |
mov al,[esi+0] |
417,7 → 408,7 |
dec ecx |
jmp SHORT .L3 |
.L4 ; save ebp |
.L4: ; save ebp |
push ebp |
; save count |
431,7 → 422,7 |
mov bl,[esi+1] |
mov dh,[esi+2] |
.L5 shl edx,16 |
.L5: shl edx,16 |
mov bh,[esi+3] |
shl ebx,16 |
472,7 → 463,7 |
and ecx,BYTE 11b |
jz .L7 |
.L6 ; tail |
.L6: ; tail |
mov al,[esi+0] |
mov ah,[esi+1] |
mov ebx,eax |
491,6 → 482,9 |
dec ecx |
jnz .L6 |
.L7 pop ebp |
jmp _x86return |
.L7: pop ebp |
retn |
%ifidn __OUTPUT_FORMAT__,elf32 |
section .note.GNU-stack noalloc noexec nowrite progbits |
%endif |
/contrib/sdk/sources/SDL-1.2.2_newlib/src/hermes/x86p_32.asm |
---|
9,25 → 9,23 |
; Most routines are (c) Glenn Fiedler (ptc@gaffer.org), used with permission |
; |
BITS 32 |
GLOBAL _ConvertX86p32_32BGR888 |
GLOBAL _ConvertX86p32_32RGBA888 |
GLOBAL _ConvertX86p32_32BGRA888 |
GLOBAL _ConvertX86p32_24RGB888 |
GLOBAL _ConvertX86p32_24BGR888 |
GLOBAL _ConvertX86p32_16RGB565 |
GLOBAL _ConvertX86p32_16BGR565 |
GLOBAL _ConvertX86p32_16RGB555 |
GLOBAL _ConvertX86p32_16BGR555 |
GLOBAL _ConvertX86p32_8RGB332 |
%include "common.inc" |
EXTERN _x86return |
SDL_FUNC _ConvertX86p32_32BGR888 |
SDL_FUNC _ConvertX86p32_32RGBA888 |
SDL_FUNC _ConvertX86p32_32BGRA888 |
SDL_FUNC _ConvertX86p32_24RGB888 |
SDL_FUNC _ConvertX86p32_24BGR888 |
SDL_FUNC _ConvertX86p32_16RGB565 |
SDL_FUNC _ConvertX86p32_16BGR565 |
SDL_FUNC _ConvertX86p32_16RGB555 |
SDL_FUNC _ConvertX86p32_16BGR555 |
SDL_FUNC _ConvertX86p32_8RGB332 |
SECTION .text |
;; _Convert_* |
;; Paramters: |
;; ESI = source |
43,7 → 41,7 |
cmp ecx,BYTE 32 |
ja .L3 |
.L1 ; short loop |
.L1: ; short loop |
mov edx,[esi] |
bswap edx |
ror edx,8 |
52,10 → 50,10 |
add edi,BYTE 4 |
dec ecx |
jnz .L1 |
.L2 |
jmp _x86return |
.L2: |
retn |
.L3 ; save ebp |
.L3: ; save ebp |
push ebp |
; unroll four times |
65,7 → 63,7 |
; save count |
push ecx |
.L4 mov eax,[esi] |
.L4: mov eax,[esi] |
mov ebx,[esi+4] |
bswap eax |
102,7 → 100,7 |
and ecx,BYTE 11b |
jz .L6 |
.L5 ; tail loop |
.L5: ; tail loop |
mov edx,[esi] |
bswap edx |
ror edx,8 |
112,8 → 110,8 |
dec ecx |
jnz .L5 |
.L6 pop ebp |
jmp _x86return |
.L6: pop ebp |
retn |
124,7 → 122,7 |
cmp ecx,BYTE 32 |
ja .L3 |
.L1 ; short loop |
.L1: ; short loop |
mov edx,[esi] |
rol edx,8 |
mov [edi],edx |
132,10 → 130,10 |
add edi,BYTE 4 |
dec ecx |
jnz .L1 |
.L2 |
jmp _x86return |
.L2: |
retn |
.L3 ; save ebp |
.L3: ; save ebp |
push ebp |
; unroll four times |
145,7 → 143,7 |
; save count |
push ecx |
.L4 mov eax,[esi] |
.L4: mov eax,[esi] |
mov ebx,[esi+4] |
rol eax,8 |
174,7 → 172,7 |
and ecx,BYTE 11b |
jz .L6 |
.L5 ; tail loop |
.L5: ; tail loop |
mov edx,[esi] |
rol edx,8 |
mov [edi],edx |
183,8 → 181,8 |
dec ecx |
jnz .L5 |
.L6 pop ebp |
jmp _x86return |
.L6: pop ebp |
retn |
195,7 → 193,7 |
cmp ecx,BYTE 32 |
ja .L3 |
.L1 ; short loop |
.L1: ; short loop |
mov edx,[esi] |
bswap edx |
mov [edi],edx |
203,10 → 201,10 |
add edi,BYTE 4 |
dec ecx |
jnz .L1 |
.L2 |
jmp _x86return |
.L2: |
retn |
.L3 ; save ebp |
.L3: ; save ebp |
push ebp |
; unroll four times |
216,7 → 214,7 |
; save count |
push ecx |
.L4 mov eax,[esi] |
.L4: mov eax,[esi] |
mov ebx,[esi+4] |
mov ecx,[esi+8] |
247,7 → 245,7 |
and ecx,BYTE 11b |
jz .L6 |
.L5 ; tail loop |
.L5: ; tail loop |
mov edx,[esi] |
bswap edx |
mov [edi],edx |
256,8 → 254,8 |
dec ecx |
jnz .L5 |
.L6 pop ebp |
jmp _x86return |
.L6: pop ebp |
retn |
270,7 → 268,7 |
cmp ecx,BYTE 32 |
ja .L3 |
.L1 ; short loop |
.L1: ; short loop |
mov al,[esi] |
mov bl,[esi+1] |
mov dl,[esi+2] |
281,10 → 279,10 |
add edi,BYTE 3 |
dec ecx |
jnz .L1 |
.L2 |
jmp _x86return |
.L2: |
retn |
.L3 ; head |
.L3: ; head |
mov edx,edi |
and edx,BYTE 11b |
jz .L4 |
299,7 → 297,7 |
dec ecx |
jmp SHORT .L3 |
.L4 ; unroll 4 times |
.L4: ; unroll 4 times |
push ebp |
mov ebp,ecx |
shr ebp,2 |
307,7 → 305,7 |
; save count |
push ecx |
.L5 mov eax,[esi] ; first dword eax = [A][R][G][B] |
.L5: mov eax,[esi] ; first dword eax = [A][R][G][B] |
mov ebx,[esi+4] ; second dword ebx = [a][r][g][b] |
shl eax,8 ; eax = [R][G][B][.] |
341,7 → 339,7 |
and ecx,BYTE 11b |
jz .L7 |
.L6 ; tail loop |
.L6: ; tail loop |
mov al,[esi] |
mov bl,[esi+1] |
mov dl,[esi+2] |
353,8 → 351,8 |
dec ecx |
jnz .L6 |
.L7 pop ebp |
jmp _x86return |
.L7: pop ebp |
retn |
367,8 → 365,7 |
cmp ecx,BYTE 32 |
ja .L3 |
.L1 ; short loop |
.L1: ; short loop |
mov dl,[esi] |
mov bl,[esi+1] |
mov al,[esi+2] |
379,10 → 376,10 |
add edi,BYTE 3 |
dec ecx |
jnz .L1 |
.L2 |
jmp _x86return |
.L2: |
retn |
.L3 ; head |
.L3: ; head |
mov edx,edi |
and edx,BYTE 11b |
jz .L4 |
397,7 → 394,7 |
dec ecx |
jmp SHORT .L3 |
.L4 ; unroll 4 times |
.L4: ; unroll 4 times |
push ebp |
mov ebp,ecx |
shr ebp,2 |
405,7 → 402,7 |
; save count |
push ecx |
.L5 |
.L5: |
mov eax,[esi] ; first dword eax = [A][R][G][B] |
mov ebx,[esi+4] ; second dword ebx = [a][r][g][b] |
441,7 → 438,7 |
and ecx,BYTE 11b |
jz .L7 |
.L6 ; tail loop |
.L6: ; tail loop |
mov dl,[esi] |
mov bl,[esi+1] |
mov al,[esi+2] |
453,9 → 450,9 |
dec ecx |
jnz .L6 |
.L7 |
.L7: |
pop ebp |
jmp _x86return |
retn |
467,7 → 464,7 |
cmp ecx,BYTE 16 |
ja .L3 |
.L1 ; short loop |
.L1: ; short loop |
mov bl,[esi+0] ; blue |
mov al,[esi+1] ; green |
mov ah,[esi+2] ; red |
484,10 → 481,10 |
jnz .L1 |
.L2: ; End of short loop |
jmp _x86return |
retn |
.L3 ; head |
.L3: ; head |
mov ebx,edi |
and ebx,BYTE 11b |
jz .L4 |
570,7 → 567,7 |
add edi,BYTE 2 |
.L7: |
jmp _x86return |
retn |
583,7 → 580,7 |
cmp ecx,BYTE 16 |
ja .L3 |
.L1 ; short loop |
.L1: ; short loop |
mov ah,[esi+0] ; blue |
mov al,[esi+1] ; green |
mov bl,[esi+2] ; red |
598,10 → 595,10 |
add edi,BYTE 2 |
dec ecx |
jnz .L1 |
.L2 |
jmp _x86return |
.L2: |
retn |
.L3 ; head |
.L3: ; head |
mov ebx,edi |
and ebx,BYTE 11b |
jz .L4 |
619,7 → 616,7 |
add edi,BYTE 2 |
dec ecx |
.L4 ; save count |
.L4: ; save count |
push ecx |
; unroll twice |
633,9 → 630,9 |
neg ecx |
jmp SHORT .L6 |
.L5 |
.L5: |
mov [edi+ecx*4-4],eax |
.L6 |
.L6: |
mov edx,[esi+ecx*8+4] |
mov bh,[esi+ecx*8+4] |
683,8 → 680,8 |
add esi,BYTE 4 |
add edi,BYTE 2 |
.L7 |
jmp _x86return |
.L7: |
retn |
697,7 → 694,7 |
cmp ecx,BYTE 16 |
ja .L3 |
.L1 ; short loop |
.L1: ; short loop |
mov bl,[esi+0] ; blue |
mov al,[esi+1] ; green |
mov ah,[esi+2] ; red |
712,10 → 709,10 |
add edi,BYTE 2 |
dec ecx |
jnz .L1 |
.L2 |
jmp _x86return |
.L2: |
retn |
.L3 ; head |
.L3: ; head |
mov ebx,edi |
and ebx,BYTE 11b |
jz .L4 |
733,7 → 730,7 |
add edi,BYTE 2 |
dec ecx |
.L4 ; save count |
.L4: ; save count |
push ecx |
; unroll twice |
747,9 → 744,9 |
neg ecx |
jmp SHORT .L6 |
.L5 |
.L5: |
mov [edi+ecx*4-4],eax |
.L6 |
.L6: |
mov eax,[esi+ecx*8] |
shr ah,3 |
794,8 → 791,8 |
add esi,BYTE 4 |
add edi,BYTE 2 |
.L7 |
jmp _x86return |
.L7: |
retn |
809,7 → 806,7 |
ja .L3 |
.L1 ; short loop |
.L1: ; short loop |
mov ah,[esi+0] ; blue |
mov al,[esi+1] ; green |
mov bl,[esi+2] ; red |
824,10 → 821,10 |
add edi,BYTE 2 |
dec ecx |
jnz .L1 |
.L2 |
jmp _x86return |
.L2: |
retn |
.L3 ; head |
.L3: ; head |
mov ebx,edi |
and ebx,BYTE 11b |
jz .L4 |
845,7 → 842,7 |
add edi,BYTE 2 |
dec ecx |
.L4 ; save count |
.L4: ; save count |
push ecx |
; unroll twice |
859,9 → 856,9 |
neg ecx |
jmp SHORT .L6 |
.L5 |
.L5: |
mov [edi+ecx*4-4],eax |
.L6 |
.L6: |
mov edx,[esi+ecx*8+4] |
mov bh,[esi+ecx*8+4] |
909,8 → 906,8 |
add esi,BYTE 4 |
add edi,BYTE 2 |
.L7 |
jmp _x86return |
.L7: |
retn |
922,7 → 919,7 |
_ConvertX86p32_8RGB332: |
.L_ALIGNED |
.L_ALIGNED: |
push ecx |
shr ecx,2 ; We will draw 4 pixels at once |
1040,4 → 1037,8 |
jnz .L3 |
.L4: |
jmp _x86return |
retn |
%ifidn __OUTPUT_FORMAT__,elf32 |
section .note.GNU-stack noalloc noexec nowrite progbits |
%endif |
/contrib/sdk/sources/SDL-1.2.2_newlib/test/Makefile |
---|
14,8 → 14,8 |
fire: $(OBJECTS) Makefile |
$(CC) $(CFLAGS) $(INCLUDES) -o sdltest.o sdltest.c |
$(CC) $(CFLAGS) $(INCLUDES) -o testbitmap.o testbitmap.c |
$(LD) $(LDFLAGS) $(LIBPATH) --subsystem native -o sdltest sdltest.o -lgcc -lSDL -lc.dll -lc -lsound |
$(LD) $(LDFLAGS) $(LIBPATH) --subsystem native -o testbitmap testbitmap.o -lgcc -lSDL -lc.dll -lc -lsound |
$(LD) $(LDFLAGS) $(LIBPATH) --subsystem native -o sdltest sdltest.o -lgcc -lSDLn -lc.dll -lsound |
$(LD) $(LDFLAGS) $(LIBPATH) --subsystem native -o testbitmap testbitmap.o -lgcc -lSDLn -lc.dll -lsound |
kos32-strip -s sdltest -o sdltest |
kos32-strip -s testbitmap -o testbitmap |
objcopy testbitmap -O binary |
/contrib/sdk/sources/SDL-1.2.2_newlib/test/sdltest.c |
---|
1,28 → 1,79 |
#include "SDL.h" |
#include <stdlib.h> |
SDL_Surface* screen; |
static int done = 0; |
#define WIDTH 640 |
#define HEIGHT 480 |
#define BPP 4 |
#define DEPTH 32 |
int main() |
void setpixel(SDL_Surface *screen, int x, int y, Uint8 r, Uint8 g, Uint8 b) |
{ |
Uint32 *pixmem32; |
Uint32 colour; |
colour = SDL_MapRGB( screen->format, r, g, b ); |
pixmem32 = (Uint32*) screen->pixels + y + x; |
*pixmem32 = colour; |
} |
void DrawScreen(SDL_Surface* screen, int h) |
{ |
int x, y, ytimesw; |
if(SDL_MUSTLOCK(screen)) |
{ |
if(SDL_LockSurface(screen) < 0) return; |
} |
for(y = 0; y < screen->h; y++ ) |
{ |
ytimesw = y*screen->pitch/BPP; |
for( x = 0; x < screen->w; x++ ) |
{ |
setpixel(screen, x, ytimesw, (x*x)/256+3*y+h, (y*y)/256+x+h, h); |
} |
} |
if(SDL_MUSTLOCK(screen)) SDL_UnlockSurface(screen); |
SDL_Flip(screen); |
} |
int main(int argc, char* argv[]) |
{ |
SDL_Surface *screen; |
SDL_Event event; |
if(SDL_Init(SDL_INIT_VIDEO) < 0) exit(0); |
atexit(SDL_Quit); |
screen = SDL_SetVideoMode(320, 200, 8, SDL_SWSURFACE); |
while(!done) |
int keypress = 0; |
int h=0; |
if (SDL_Init(SDL_INIT_VIDEO) < 0 ) return 1; |
if (!(screen = SDL_SetVideoMode(WIDTH, HEIGHT, DEPTH, SDL_FULLSCREEN|SDL_HWSURFACE))) |
{ |
SDL_Quit(); |
return 1; |
} |
while(!keypress) |
{ |
DrawScreen(screen,h++); |
while(SDL_PollEvent(&event)) |
{ |
switch(event.type) |
{ |
case SDL_KEYDOWN: |
case SDL_QUIT: |
done=1; |
keypress = 1; |
break; |
default: |
case SDL_KEYDOWN: |
keypress = 1; |
break; |
} |
} |
} |
SDL_Quit(); |
return 0; |
} |