Subversion Repositories Kolibri OS

Compare Revisions

Regard whitespace Rev 9172 → Rev 9170

/contrib/sdk/sources/SDL-1.2.2_newlib/src/Tupfile.lua
27,7 → 27,7
for i,v in ipairs(FOLDERS) do
compile_gcc(v .. "*.c", v .. "%B.o")
tup.append_table(OBJS,
tup.foreach_rule(v .. "*.asm", "nasm -i hermes -f coff -o %o %f", v .. "%B.o")
tup.foreach_rule(v .. "*.asm", "nasm -f coff -o %o %f", v .. "%B.o")
)
end
tup.rule(OBJS, "kos32-ar rcs %o %f", {"../../../lib/libSDLn.a", "../../../lib/<libSDLn>"})
/contrib/sdk/sources/SDL-1.2.2_newlib/src/hermes/common.inc
File deleted
/contrib/sdk/sources/SDL-1.2.2_newlib/src/hermes/HeadMMX.h
6,6 → 6,7
Please refer to the file COPYING.LIB contained in the distribution for
licensing conditions
*/
 
#ifndef __HERMES_HEAD_MMX__
#define __HERMES_HEAD_MMX__
 
44,25 → 45,27
 
/* Fix the underscore business with ELF compilers */
 
#if (defined(__ELF__) && defined(__GNUC__)) || defined(__SUNPRO_C)
#if defined(__ELF__) && defined(__GNUC__)
#ifdef __cplusplus
extern "C" {
#endif
 
extern void _ConvertMMX(HermesConverterInterface *);
extern void _ConvertMMXpII32_24RGB888();
extern void _ConvertMMXpII32_16RGB565();
extern void _ConvertMMXpII32_16BGR565();
extern void _ConvertMMXpII32_16RGB555();
extern void _ConvertMMXpII32_16BGR555();
void ConvertMMX(HermesConverterInterface *) __attribute__ ((alias ("_ConvertMMX")));
#if 0
void ClearMMX_32(HermesClearInterface *) __attribute__ ((alias ("_ClearMMX_32")));
void ClearMMX_24(HermesClearInterface *) __attribute__ ((alias ("_ClearMMX_24")));
void ClearMMX_16(HermesClearInterface *) __attribute__ ((alias ("_ClearMMX_16")));
void ClearMMX_8(HermesClearInterface *) __attribute__ ((alias ("_ClearMMX_8")));
 
#define ConvertMMX _ConvertMMX
#define ConvertMMXpII32_24RGB888 _ConvertMMXpII32_24RGB888
#define ConvertMMXpII32_16RGB565 _ConvertMMXpII32_16RGB565
#define ConvertMMXpII32_16BGR565 _ConvertMMXpII32_16BGR565
#define ConvertMMXpII32_16RGB555 _ConvertMMXpII32_16RGB555
#define ConvertMMXpII32_16BGR555 _ConvertMMXpII32_16BGR555
void ConvertMMXp32_16RGB555() __attribute__ ((alias ("_ConvertMMXp32_16RGB555")));
#endif
 
void ConvertMMXpII32_24RGB888() __attribute__ ((alias ("_ConvertMMXpII32_24RGB888")));
void ConvertMMXpII32_16RGB565() __attribute__ ((alias ("_ConvertMMXpII32_16RGB565")));
void ConvertMMXpII32_16BGR565() __attribute__ ((alias ("_ConvertMMXpII32_16BGR565")));
void ConvertMMXpII32_16RGB555() __attribute__ ((alias ("_ConvertMMXpII32_16RGB555")));
void ConvertMMXpII32_16BGR555() __attribute__ ((alias ("_ConvertMMXpII32_16BGR555")));
 
#ifdef __cplusplus
}
#endif
/contrib/sdk/sources/SDL-1.2.2_newlib/src/hermes/HeadX86.h
60,10 → 60,10
void ConvertX86pI8_24();
void ConvertX86pI8_16();
 
extern int ConvertX86p16_32RGB888_LUT_X86[512];
extern int ConvertX86p16_32BGR888_LUT_X86[512];
extern int ConvertX86p16_32RGBA888_LUT_X86[512];
extern int ConvertX86p16_32BGRA888_LUT_X86[512];
extern int32 ConvertX86p16_32RGB888_LUT_X86[512];
extern int32 ConvertX86p16_32BGR888_LUT_X86[512];
extern int32 ConvertX86p16_32RGBA888_LUT_X86[512];
extern int32 ConvertX86p16_32BGRA888_LUT_X86[512];
#ifdef __cplusplus
}
74,53 → 74,62
 
/* Now fix up the ELF underscore problem */
 
#if (defined(__ELF__) && defined(__GNUC__)) || defined(__SUNPRO_C)
#if defined(__ELF__) && defined(__GNUC__)
#ifdef __cplusplus
extern "C" {
#endif
 
extern int _Hermes_X86_CPU();
int Hermes_X86_CPU() __attribute__ ((alias ("_Hermes_X86_CPU")));
 
extern void _ConvertX86(HermesConverterInterface *);
void ConvertX86(HermesConverterInterface *) __attribute__ ((alias ("_ConvertX86")));
 
extern void _ConvertX86p32_32BGR888();
extern void _ConvertX86p32_32RGBA888();
extern void _ConvertX86p32_32BGRA888();
extern void _ConvertX86p32_24RGB888();
extern void _ConvertX86p32_24BGR888();
extern void _ConvertX86p32_16RGB565();
extern void _ConvertX86p32_16BGR565();
extern void _ConvertX86p32_16RGB555();
extern void _ConvertX86p32_16BGR555();
extern void _ConvertX86p32_8RGB332();
#if 0
void ClearX86_32(HermesClearInterface *) __attribute__ ((alias ("_ClearX86_32")));
void ClearX86_24(HermesClearInterface *) __attribute__ ((alias ("_ClearX86_24")));
void ClearX86_16(HermesClearInterface *) __attribute__ ((alias ("_ClearX86_16")));
void ClearX86_8(HermesClearInterface *) __attribute__ ((alias ("_ClearX86_8")));
#endif
 
extern void _ConvertX86p16_16BGR565();
extern void _ConvertX86p16_16RGB555();
extern void _ConvertX86p16_16BGR555();
extern void _ConvertX86p16_8RGB332();
void ConvertX86p32_32BGR888() __attribute__ ((alias ("_ConvertX86p32_32BGR888")));
void ConvertX86p32_32RGBA888() __attribute__ ((alias ("_ConvertX86p32_32RGBA888")));
void ConvertX86p32_32BGRA888() __attribute__ ((alias ("_ConvertX86p32_32BGRA888")));
void ConvertX86p32_24RGB888() __attribute__ ((alias ("_ConvertX86p32_24RGB888")));
void ConvertX86p32_24BGR888() __attribute__ ((alias ("_ConvertX86p32_24BGR888")));
void ConvertX86p32_16RGB565() __attribute__ ((alias ("_ConvertX86p32_16RGB565")));
void ConvertX86p32_16BGR565() __attribute__ ((alias ("_ConvertX86p32_16BGR565")));
void ConvertX86p32_16RGB555() __attribute__ ((alias ("_ConvertX86p32_16RGB555")));
void ConvertX86p32_16BGR555() __attribute__ ((alias ("_ConvertX86p32_16BGR555")));
void ConvertX86p32_8RGB332() __attribute__ ((alias ("_ConvertX86p32_8RGB332")));
 
#if 0
void ConvertX86p16_32RGB888() __attribute__ ((alias ("_ConvertX86p16_32RGB888")));
void ConvertX86p16_32BGR888() __attribute__ ((alias ("_ConvertX86p16_32BGR888")));
void ConvertX86p16_32RGBA888() __attribute__ ((alias ("_ConvertX86p16_32RGBA888")));
void ConvertX86p16_32BGRA888() __attribute__ ((alias ("_ConvertX86p16_32BGRA888")));
void ConvertX86p16_24RGB888() __attribute__ ((alias ("_ConvertX86p16_24RGB888")));
void ConvertX86p16_24BGR888() __attribute__ ((alias ("_ConvertX86p16_24BGR888")));
#endif
void ConvertX86p16_16BGR565() __attribute__ ((alias ("_ConvertX86p16_16BGR565")));
void ConvertX86p16_16RGB555() __attribute__ ((alias ("_ConvertX86p16_16RGB555")));
void ConvertX86p16_16BGR555() __attribute__ ((alias ("_ConvertX86p16_16BGR555")));
void ConvertX86p16_8RGB332() __attribute__ ((alias ("_ConvertX86p16_8RGB332")));
 
#define Hermes_X86_CPU _Hermes_X86_CPU
#if 0
void CopyX86p_4byte() __attribute__ ((alias ("_CopyX86p_4byte")));
void CopyX86p_3byte() __attribute__ ((alias ("_CopyX86p_3byte")));
void CopyX86p_2byte() __attribute__ ((alias ("_CopyX86p_2byte")));
void CopyX86p_1byte() __attribute__ ((alias ("_CopyX86p_1byte")));
 
#define ConvertX86 _ConvertX86
void ConvertX86pI8_32() __attribute__ ((alias ("_ConvertX86pI8_32")));
void ConvertX86pI8_24() __attribute__ ((alias ("_ConvertX86pI8_24")));
void ConvertX86pI8_16() __attribute__ ((alias ("_ConvertX86pI8_16")));
 
#define ConvertX86p32_32BGR888 _ConvertX86p32_32BGR888
#define ConvertX86p32_32RGBA888 _ConvertX86p32_32RGBA888
#define ConvertX86p32_32BGRA888 _ConvertX86p32_32BGRA888
#define ConvertX86p32_24RGB888 _ConvertX86p32_24RGB888
#define ConvertX86p32_24BGR888 _ConvertX86p32_24BGR888
#define ConvertX86p32_16RGB565 _ConvertX86p32_16RGB565
#define ConvertX86p32_16BGR565 _ConvertX86p32_16BGR565
#define ConvertX86p32_16RGB555 _ConvertX86p32_16RGB555
#define ConvertX86p32_16BGR555 _ConvertX86p32_16BGR555
#define ConvertX86p32_8RGB332 _ConvertX86p32_8RGB332
extern int32 ConvertX86p16_32RGB888_LUT_X86[512] __attribute__ ((alias ("_ConvertX86p16_32RGB888_LUT_X86")));
extern int32 ConvertX86p16_32BGR888_LUT_X86[512] __attribute__ ((alias ("_ConvertX86p16_32BGR888_LUT_X86")));
extern int32 ConvertX86p16_32RGBA888_LUT_X86[512] __attribute__ ((alias ("_ConvertX86p16_32RGBA888_LUT_X86")));
extern int32 ConvertX86p16_32BGRA888_LUT_X86[512] __attribute__ ((alias ("_ConvertX86p16_32BGRA888_LUT_X86")));
#endif
 
#define ConvertX86p16_16BGR565 _ConvertX86p16_16BGR565
#define ConvertX86p16_16RGB555 _ConvertX86p16_16RGB555
#define ConvertX86p16_16BGR555 _ConvertX86p16_16BGR555
#define ConvertX86p16_8RGB332 _ConvertX86p16_8RGB332
 
 
#ifdef __cplusplus
}
#endif
/contrib/sdk/sources/SDL-1.2.2_newlib/src/hermes/mmx_main.asm
9,9 → 9,9
 
BITS 32
 
%include "common.inc"
GLOBAL _ConvertMMX
GLOBAL _mmxreturn
 
SDL_FUNC _ConvertMMX
 
SECTION .text
50,8 → 50,9
y_loop:
mov ecx,[ebp+4]
 
call [ebp+32]
jmp [ebp+32]
 
_mmxreturn:
add esi,[ebp+12]
add edi,[ebp+28]
69,6 → 70,5
ret
 
%ifidn __OUTPUT_FORMAT__,elf32
section .note.GNU-stack noalloc noexec nowrite progbits
%endif
 
 
/contrib/sdk/sources/SDL-1.2.2_newlib/src/hermes/mmxp2_32.asm
20,51 → 20,40
 
BITS 32
 
%include "common.inc"
SDL_FUNC _ConvertMMXpII32_24RGB888
SDL_FUNC _ConvertMMXpII32_16RGB565
SDL_FUNC _ConvertMMXpII32_16BGR565
SDL_FUNC _ConvertMMXpII32_16RGB555
SDL_FUNC _ConvertMMXpII32_16BGR555
GLOBAL _ConvertMMXpII32_24RGB888
GLOBAL _ConvertMMXpII32_16RGB565
GLOBAL _ConvertMMXpII32_16BGR565
GLOBAL _ConvertMMXpII32_16RGB555
GLOBAL _ConvertMMXpII32_16BGR555
 
;; Macros for conversion routines
EXTERN _mmxreturn
 
%macro _push_immq_mask 1
push dword %1
push dword %1
%endmacro
SECTION .data
 
%macro load_immq 2
_push_immq_mask %2
movq %1, [esp]
%endmacro
ALIGN 8
 
%macro pand_immq 2
_push_immq_mask %2
pand %1, [esp]
%endmacro
;; Constants for conversion routines
 
%define CLEANUP_IMMQ_LOADS(num) \
add esp, byte 8 * num
mmx32_rgb888_mask dd 00ffffffh,00ffffffh
 
%define mmx32_rgb888_mask 00ffffffh
%define mmx32_rgb565_b 000000f8h
%define mmx32_rgb565_g 0000fc00h
%define mmx32_rgb565_r 00f80000h
mmx32_rgb565_b dd 000000f8h, 000000f8h
mmx32_rgb565_g dd 0000fc00h, 0000fc00h
mmx32_rgb565_r dd 00f80000h, 00f80000h
 
%define mmx32_rgb555_rb 00f800f8h
%define mmx32_rgb555_g 0000f800h
%define mmx32_rgb555_mul 20000008h
%define mmx32_bgr555_mul 00082000h
mmx32_rgb555_rb dd 00f800f8h,00f800f8h
mmx32_rgb555_g dd 0000f800h,0000f800h
mmx32_rgb555_mul dd 20000008h,20000008h
mmx32_bgr555_mul dd 00082000h,00082000h
 
 
SECTION .text
 
_ConvertMMXpII32_24RGB888:
 
; set up mm6 as the mask, mm7 as zero
load_immq mm6, mmx32_rgb888_mask
CLEANUP_IMMQ_LOADS(1)
movq mm6, qword [mmx32_rgb888_mask]
pxor mm7, mm7
 
mov edx, ecx ; save ecx
119,7 → 108,7
dec ecx
jnz .L3
.L4:
retn
jmp _mmxreturn
 
 
 
126,10 → 115,9
_ConvertMMXpII32_16RGB565:
 
; set up masks
load_immq mm5, mmx32_rgb565_b
load_immq mm6, mmx32_rgb565_g
load_immq mm7, mmx32_rgb565_r
CLEANUP_IMMQ_LOADS(3)
movq mm5, [mmx32_rgb565_b]
movq mm6, [mmx32_rgb565_g]
movq mm7, [mmx32_rgb565_r]
 
mov edx, ecx
shr ecx, 2
188,15 → 176,14
jnz .L3
 
.L4:
retn
jmp _mmxreturn
 
_ConvertMMXpII32_16BGR565:
 
load_immq mm5, mmx32_rgb565_r
load_immq mm6, mmx32_rgb565_g
load_immq mm7, mmx32_rgb565_b
CLEANUP_IMMQ_LOADS(3)
movq mm5, [mmx32_rgb565_r]
movq mm6, [mmx32_rgb565_g]
movq mm7, [mmx32_rgb565_b]
 
mov edx, ecx
shr ecx, 2
258,7 → 245,7
jnz .L3
 
.L4:
retn
jmp _mmxreturn
 
_ConvertMMXpII32_16BGR555:
 
266,7 → 253,7
; except it uses a different multiplier for the pmaddwd
; instruction. cool huh.
 
load_immq mm7, mmx32_bgr555_mul
movq mm7, qword [mmx32_bgr555_mul]
jmp _convert_bgr555_cheat
 
; This is the same as the Intel version.. they obviously went to
276,16 → 263,15
; (I think) a more accurate name..
_ConvertMMXpII32_16RGB555:
 
load_immq mm7, mmx32_rgb555_mul
movq mm7,qword [mmx32_rgb555_mul]
_convert_bgr555_cheat:
load_immq mm6, mmx32_rgb555_g
CLEANUP_IMMQ_LOADS(2)
movq mm6,qword [mmx32_rgb555_g]
mov edx,ecx ; Save ecx
 
and ecx,DWORD 0fffffff8h ; clear lower three bits
and ecx,BYTE 0fffffff8h ; clear lower three bits
jnz .L_OK
jmp near .L2
jmp .L2
 
.L_OK:
294,14 → 280,12
movq mm0,[esi]
movq mm3,mm2
 
pand_immq mm3, mmx32_rgb555_rb
pand mm3,qword [mmx32_rgb555_rb]
movq mm1,mm0
 
pand_immq mm1, mmx32_rgb555_rb
pand mm1,qword [mmx32_rgb555_rb]
pmaddwd mm3,mm7
 
CLEANUP_IMMQ_LOADS(2)
 
pmaddwd mm1,mm7
pand mm2,mm6
 
318,13 → 302,13
movq mm0,mm4
psrld mm1,6
 
pand_immq mm0, mmx32_rgb555_rb
pand mm0,qword [mmx32_rgb555_rb]
packssdw mm1,mm3
 
movq mm3,mm5
pmaddwd mm0,mm7
 
pand_immq mm3, mmx32_rgb555_rb
pand mm3,qword [mmx32_rgb555_rb]
pand mm4,mm6
 
movq [edi],mm1
345,14 → 329,12
movq mm3,mm2
movq mm1,mm0
 
pand_immq mm3, mmx32_rgb555_rb
pand mm3,qword [mmx32_rgb555_rb]
packssdw mm5,mm4
 
pand_immq mm1, mmx32_rgb555_rb
pand mm1,qword [mmx32_rgb555_rb]
pand mm2,mm6
 
CLEANUP_IMMQ_LOADS(4)
 
movq [edi+8],mm5
pmaddwd mm3,mm7
 
398,8 → 380,7
jnz .L3
 
.L4:
retn
jmp _mmxreturn
 
%ifidn __OUTPUT_FORMAT__,elf32
section .note.GNU-stack noalloc noexec nowrite progbits
%endif
 
 
/contrib/sdk/sources/SDL-1.2.2_newlib/src/hermes/x86_main.asm
11,14 → 11,17
 
BITS 32
 
%include "common.inc"
GLOBAL _ConvertX86
GLOBAL _x86return
 
SDL_FUNC _ConvertX86
SDL_FUNC _Hermes_X86_CPU
GLOBAL _Hermes_X86_CPU
 
 
SECTION .data
cpu_flags dd 0
 
SECTION .text
;; _ConvertX86:
56,8 → 59,9
y_loop:
mov ecx,[ebp+4]
 
call [ebp+32]
jmp [ebp+32]
 
_x86return:
add esi,[ebp+12]
add edi,[ebp+28]
73,6 → 77,9
ret
 
 
 
;; Hermes_X86_CPU returns the CPUID flags in eax
_Hermes_X86_CPU:
pushfd
pop eax
115,9 → 122,5
mov eax,[cpu_flags]
 
.L1:
xor eax,eax
ret
 
 
%ifidn __OUTPUT_FORMAT__,elf32
section .note.GNU-stack noalloc noexec nowrite progbits
%endif
/contrib/sdk/sources/SDL-1.2.2_newlib/src/hermes/x86p_16.asm
10,19 → 10,28
; Used with permission.
;
 
BITS 32
 
%include "common.inc"
GLOBAL _ConvertX86p16_32RGB888
GLOBAL _ConvertX86p16_32BGR888
GLOBAL _ConvertX86p16_32RGBA888
GLOBAL _ConvertX86p16_32BGRA888
GLOBAL _ConvertX86p16_24RGB888
GLOBAL _ConvertX86p16_24BGR888
GLOBAL _ConvertX86p16_16BGR565
GLOBAL _ConvertX86p16_16RGB555
GLOBAL _ConvertX86p16_16BGR555
GLOBAL _ConvertX86p16_8RGB332
 
SDL_FUNC _ConvertX86p16_16BGR565
SDL_FUNC _ConvertX86p16_16RGB555
SDL_FUNC _ConvertX86p16_16BGR555
SDL_FUNC _ConvertX86p16_8RGB332
 
EXTERN _ConvertX86
EXTERN _x86return
 
 
SECTION .text
 
 
 
_ConvertX86p16_16BGR565:
 
; check short
30,7 → 39,7
ja .L3
 
 
.L1: ; short loop
.L1 ; short loop
mov al,[esi]
mov ah,[esi+1]
mov ebx,eax
47,10 → 56,10
add edi,BYTE 2
dec ecx
jnz .L1
.L2:
retn
.L2
jmp _x86return
 
.L3: ; head
.L3 ; head
mov eax,edi
and eax,BYTE 11b
jz .L4
70,7 → 79,7
add edi,BYTE 2
dec ecx
 
.L4: ; save count
.L4 ; save count
push ecx
 
; unroll twice
84,8 → 93,8
neg ecx
jmp SHORT .L6
.L5: mov [edi+ecx*4-4],eax
.L6: mov eax,[esi+ecx*4]
.L5 mov [edi+ecx*4-4],eax
.L6 mov eax,[esi+ecx*4]
 
mov ebx,[esi+ecx*4]
and eax,07E007E0h
125,8 → 134,8
add esi,BYTE 2
add edi,BYTE 2
 
.L7:
retn
.L7
jmp _x86return
 
 
 
140,7 → 149,7
ja .L3
 
 
.L1: ; short loop
.L1 ; short loop
mov al,[esi]
mov ah,[esi+1]
mov ebx,eax
154,10 → 163,10
add edi,BYTE 2
dec ecx
jnz .L1
.L2:
retn
.L2
jmp _x86return
 
.L3: ; head
.L3 ; head
mov eax,edi
and eax,BYTE 11b
jz .L4
174,7 → 183,7
add edi,BYTE 2
dec ecx
 
.L4: ; save ebp
.L4 ; save ebp
push ebp
 
; save count
191,7 → 200,7
xor ebp,ebp
sub ebp,ecx
 
.L5: mov eax,[esi+ebp*8] ; agi?
.L5 mov eax,[esi+ebp*8] ; agi?
mov ecx,[esi+ebp*8+4]
mov ebx,eax
217,7 → 226,7
 
; tail
pop ecx
.L6: and ecx,BYTE 11b
.L6 and ecx,BYTE 11b
jz .L7
mov al,[esi]
mov ah,[esi+1]
233,8 → 242,8
dec ecx
jmp SHORT .L6
 
.L7: pop ebp
retn
.L7 pop ebp
jmp _x86return
 
 
 
248,7 → 257,7
ja .L3
 
.L1: ; short loop
.L1 ; short loop
mov al,[esi]
mov ah,[esi+1]
mov ebx,eax
267,10 → 276,10
add edi,BYTE 2
dec ecx
jnz .L1
.L2:
retn
.L2
jmp _x86return
 
.L3: ; head
.L3 ; head
mov eax,edi
and eax,BYTE 11b
jz .L4
292,7 → 301,7
add edi,BYTE 2
dec ecx
 
.L4: ; save count
.L4 ; save count
push ecx
 
; unroll twice
306,8 → 315,8
neg ecx
jmp SHORT .L6
.L5: mov [edi+ecx*4-4],eax
.L6: mov eax,[esi+ecx*4]
.L5 mov [edi+ecx*4-4],eax
.L6 mov eax,[esi+ecx*4]
 
shr eax,1
mov ebx,[esi+ecx*4]
351,8 → 360,8
add esi,BYTE 2
add edi,BYTE 2
 
.L7:
retn
.L7
jmp _x86return
 
 
 
366,7 → 375,7
ja .L3
 
 
.L1: ; short loop
.L1 ; short loop
mov al,[esi+0]
mov ah,[esi+1]
mov ebx,eax
384,10 → 393,10
inc edi
dec ecx
jnz .L1
.L2:
retn
.L2
jmp _x86return
 
.L3: mov eax,edi
.L3 mov eax,edi
and eax,BYTE 11b
jz .L4
mov al,[esi+0]
408,7 → 417,7
dec ecx
jmp SHORT .L3
 
.L4: ; save ebp
.L4 ; save ebp
push ebp
 
; save count
422,7 → 431,7
mov bl,[esi+1]
mov dh,[esi+2]
.L5: shl edx,16
.L5 shl edx,16
mov bh,[esi+3]
shl ebx,16
463,7 → 472,7
and ecx,BYTE 11b
jz .L7
 
.L6: ; tail
.L6 ; tail
mov al,[esi+0]
mov ah,[esi+1]
mov ebx,eax
482,9 → 491,6
dec ecx
jnz .L6
 
.L7: pop ebp
retn
.L7 pop ebp
jmp _x86return
 
%ifidn __OUTPUT_FORMAT__,elf32
section .note.GNU-stack noalloc noexec nowrite progbits
%endif
/contrib/sdk/sources/SDL-1.2.2_newlib/src/hermes/x86p_32.asm
9,23 → 9,25
; Most routines are (c) Glenn Fiedler (ptc@gaffer.org), used with permission
;
 
BITS 32
 
%include "common.inc"
GLOBAL _ConvertX86p32_32BGR888
GLOBAL _ConvertX86p32_32RGBA888
GLOBAL _ConvertX86p32_32BGRA888
GLOBAL _ConvertX86p32_24RGB888
GLOBAL _ConvertX86p32_24BGR888
GLOBAL _ConvertX86p32_16RGB565
GLOBAL _ConvertX86p32_16BGR565
GLOBAL _ConvertX86p32_16RGB555
GLOBAL _ConvertX86p32_16BGR555
GLOBAL _ConvertX86p32_8RGB332
 
SDL_FUNC _ConvertX86p32_32BGR888
SDL_FUNC _ConvertX86p32_32RGBA888
SDL_FUNC _ConvertX86p32_32BGRA888
SDL_FUNC _ConvertX86p32_24RGB888
SDL_FUNC _ConvertX86p32_24BGR888
SDL_FUNC _ConvertX86p32_16RGB565
SDL_FUNC _ConvertX86p32_16BGR565
SDL_FUNC _ConvertX86p32_16RGB555
SDL_FUNC _ConvertX86p32_16BGR555
SDL_FUNC _ConvertX86p32_8RGB332
EXTERN _x86return
 
SECTION .text
 
 
;; _Convert_*
;; Paramters:
;; ESI = source
41,7 → 43,7
cmp ecx,BYTE 32
ja .L3
 
.L1: ; short loop
.L1 ; short loop
mov edx,[esi]
bswap edx
ror edx,8
50,10 → 52,10
add edi,BYTE 4
dec ecx
jnz .L1
.L2:
retn
.L2
jmp _x86return
 
.L3: ; save ebp
.L3 ; save ebp
push ebp
 
; unroll four times
63,7 → 65,7
; save count
push ecx
 
.L4: mov eax,[esi]
.L4 mov eax,[esi]
mov ebx,[esi+4]
 
bswap eax
100,7 → 102,7
and ecx,BYTE 11b
jz .L6
 
.L5: ; tail loop
.L5 ; tail loop
mov edx,[esi]
bswap edx
ror edx,8
110,8 → 112,8
dec ecx
jnz .L5
 
.L6: pop ebp
retn
.L6 pop ebp
jmp _x86return
 
122,7 → 124,7
cmp ecx,BYTE 32
ja .L3
 
.L1: ; short loop
.L1 ; short loop
mov edx,[esi]
rol edx,8
mov [edi],edx
130,10 → 132,10
add edi,BYTE 4
dec ecx
jnz .L1
.L2:
retn
.L2
jmp _x86return
 
.L3: ; save ebp
.L3 ; save ebp
push ebp
 
; unroll four times
143,7 → 145,7
; save count
push ecx
 
.L4: mov eax,[esi]
.L4 mov eax,[esi]
mov ebx,[esi+4]
 
rol eax,8
172,7 → 174,7
and ecx,BYTE 11b
jz .L6
 
.L5: ; tail loop
.L5 ; tail loop
mov edx,[esi]
rol edx,8
mov [edi],edx
181,8 → 183,8
dec ecx
jnz .L5
 
.L6: pop ebp
retn
.L6 pop ebp
jmp _x86return
 
 
193,7 → 195,7
cmp ecx,BYTE 32
ja .L3
 
.L1: ; short loop
.L1 ; short loop
mov edx,[esi]
bswap edx
mov [edi],edx
201,10 → 203,10
add edi,BYTE 4
dec ecx
jnz .L1
.L2:
retn
.L2
jmp _x86return
 
.L3: ; save ebp
.L3 ; save ebp
push ebp
 
; unroll four times
214,7 → 216,7
; save count
push ecx
 
.L4: mov eax,[esi]
.L4 mov eax,[esi]
mov ebx,[esi+4]
 
mov ecx,[esi+8]
245,7 → 247,7
and ecx,BYTE 11b
jz .L6
 
.L5: ; tail loop
.L5 ; tail loop
mov edx,[esi]
bswap edx
mov [edi],edx
254,8 → 256,8
dec ecx
jnz .L5
 
.L6: pop ebp
retn
.L6 pop ebp
jmp _x86return
 
 
268,7 → 270,7
cmp ecx,BYTE 32
ja .L3
 
.L1: ; short loop
.L1 ; short loop
mov al,[esi]
mov bl,[esi+1]
mov dl,[esi+2]
279,10 → 281,10
add edi,BYTE 3
dec ecx
jnz .L1
.L2:
retn
.L2
jmp _x86return
 
.L3: ; head
.L3 ; head
mov edx,edi
and edx,BYTE 11b
jz .L4
297,7 → 299,7
dec ecx
jmp SHORT .L3
 
.L4: ; unroll 4 times
.L4 ; unroll 4 times
push ebp
mov ebp,ecx
shr ebp,2
305,7 → 307,7
; save count
push ecx
 
.L5: mov eax,[esi] ; first dword eax = [A][R][G][B]
.L5 mov eax,[esi] ; first dword eax = [A][R][G][B]
mov ebx,[esi+4] ; second dword ebx = [a][r][g][b]
 
shl eax,8 ; eax = [R][G][B][.]
339,7 → 341,7
and ecx,BYTE 11b
jz .L7
 
.L6: ; tail loop
.L6 ; tail loop
mov al,[esi]
mov bl,[esi+1]
mov dl,[esi+2]
351,8 → 353,8
dec ecx
jnz .L6
 
.L7: pop ebp
retn
.L7 pop ebp
jmp _x86return
 
 
 
365,7 → 367,8
cmp ecx,BYTE 32
ja .L3
 
.L1: ; short loop
.L1 ; short loop
mov dl,[esi]
mov bl,[esi+1]
mov al,[esi+2]
376,10 → 379,10
add edi,BYTE 3
dec ecx
jnz .L1
.L2:
retn
.L2
jmp _x86return
 
.L3: ; head
.L3 ; head
mov edx,edi
and edx,BYTE 11b
jz .L4
394,7 → 397,7
dec ecx
jmp SHORT .L3
 
.L4: ; unroll 4 times
.L4 ; unroll 4 times
push ebp
mov ebp,ecx
shr ebp,2
402,7 → 405,7
; save count
push ecx
 
.L5:
.L5
mov eax,[esi] ; first dword eax = [A][R][G][B]
mov ebx,[esi+4] ; second dword ebx = [a][r][g][b]
 
438,7 → 441,7
and ecx,BYTE 11b
jz .L7
 
.L6: ; tail loop
.L6 ; tail loop
mov dl,[esi]
mov bl,[esi+1]
mov al,[esi+2]
450,9 → 453,9
dec ecx
jnz .L6
 
.L7:
.L7
pop ebp
retn
jmp _x86return
 
464,7 → 467,7
cmp ecx,BYTE 16
ja .L3
 
.L1: ; short loop
.L1 ; short loop
mov bl,[esi+0] ; blue
mov al,[esi+1] ; green
mov ah,[esi+2] ; red
481,10 → 484,10
jnz .L1
 
.L2: ; End of short loop
retn
jmp _x86return
 
.L3: ; head
.L3 ; head
mov ebx,edi
and ebx,BYTE 11b
jz .L4
567,7 → 570,7
add edi,BYTE 2
 
.L7:
retn
jmp _x86return
 
 
 
580,7 → 583,7
cmp ecx,BYTE 16
ja .L3
 
.L1: ; short loop
.L1 ; short loop
mov ah,[esi+0] ; blue
mov al,[esi+1] ; green
mov bl,[esi+2] ; red
595,10 → 598,10
add edi,BYTE 2
dec ecx
jnz .L1
.L2:
retn
.L2
jmp _x86return
 
.L3: ; head
.L3 ; head
mov ebx,edi
and ebx,BYTE 11b
jz .L4
616,7 → 619,7
add edi,BYTE 2
dec ecx
 
.L4: ; save count
.L4 ; save count
push ecx
 
; unroll twice
630,9 → 633,9
neg ecx
jmp SHORT .L6
 
.L5:
.L5
mov [edi+ecx*4-4],eax
.L6:
.L6
mov edx,[esi+ecx*8+4]
 
mov bh,[esi+ecx*8+4]
680,8 → 683,8
add esi,BYTE 4
add edi,BYTE 2
 
.L7:
retn
.L7
jmp _x86return
 
 
694,7 → 697,7
cmp ecx,BYTE 16
ja .L3
 
.L1: ; short loop
.L1 ; short loop
mov bl,[esi+0] ; blue
mov al,[esi+1] ; green
mov ah,[esi+2] ; red
709,10 → 712,10
add edi,BYTE 2
dec ecx
jnz .L1
.L2:
retn
.L2
jmp _x86return
 
.L3: ; head
.L3 ; head
mov ebx,edi
and ebx,BYTE 11b
jz .L4
730,7 → 733,7
add edi,BYTE 2
dec ecx
 
.L4: ; save count
.L4 ; save count
push ecx
 
; unroll twice
744,9 → 747,9
neg ecx
jmp SHORT .L6
 
.L5:
.L5
mov [edi+ecx*4-4],eax
.L6:
.L6
mov eax,[esi+ecx*8]
 
shr ah,3
791,8 → 794,8
add esi,BYTE 4
add edi,BYTE 2
 
.L7:
retn
.L7
jmp _x86return
 
 
 
806,7 → 809,7
ja .L3
 
 
.L1: ; short loop
.L1 ; short loop
mov ah,[esi+0] ; blue
mov al,[esi+1] ; green
mov bl,[esi+2] ; red
821,10 → 824,10
add edi,BYTE 2
dec ecx
jnz .L1
.L2:
retn
.L2
jmp _x86return
 
.L3: ; head
.L3 ; head
mov ebx,edi
and ebx,BYTE 11b
jz .L4
842,7 → 845,7
add edi,BYTE 2
dec ecx
 
.L4: ; save count
.L4 ; save count
push ecx
 
; unroll twice
856,9 → 859,9
neg ecx
jmp SHORT .L6
 
.L5:
.L5
mov [edi+ecx*4-4],eax
.L6:
.L6
mov edx,[esi+ecx*8+4]
 
mov bh,[esi+ecx*8+4]
906,8 → 909,8
add esi,BYTE 4
add edi,BYTE 2
 
.L7:
retn
.L7
jmp _x86return
 
 
 
919,7 → 922,7
_ConvertX86p32_8RGB332:
 
.L_ALIGNED:
.L_ALIGNED
push ecx
 
shr ecx,2 ; We will draw 4 pixels at once
1037,8 → 1040,4
jnz .L3
.L4:
retn
 
%ifidn __OUTPUT_FORMAT__,elf32
section .note.GNU-stack noalloc noexec nowrite progbits
%endif
jmp _x86return