Subversion Repositories Kolibri OS

Compare Revisions

Regard whitespace Rev 9171 → Rev 9172

/contrib/sdk/sources/SDL-1.2.2_newlib/src/hermes/mmxp2_32.asm
20,40 → 20,51
 
BITS 32
 
%include "common.inc"
GLOBAL _ConvertMMXpII32_24RGB888
GLOBAL _ConvertMMXpII32_16RGB565
GLOBAL _ConvertMMXpII32_16BGR565
GLOBAL _ConvertMMXpII32_16RGB555
GLOBAL _ConvertMMXpII32_16BGR555
SDL_FUNC _ConvertMMXpII32_24RGB888
SDL_FUNC _ConvertMMXpII32_16RGB565
SDL_FUNC _ConvertMMXpII32_16BGR565
SDL_FUNC _ConvertMMXpII32_16RGB555
SDL_FUNC _ConvertMMXpII32_16BGR555
 
EXTERN _mmxreturn
;; Macros for conversion routines
SECTION .data
%macro _push_immq_mask 1
push dword %1
push dword %1
%endmacro
ALIGN 8
%macro load_immq 2
_push_immq_mask %2
movq %1, [esp]
%endmacro
 
;; Constants for conversion routines
%macro pand_immq 2
_push_immq_mask %2
pand %1, [esp]
%endmacro
 
mmx32_rgb888_mask dd 00ffffffh,00ffffffh
%define CLEANUP_IMMQ_LOADS(num) \
add esp, byte 8 * num
 
mmx32_rgb565_b dd 000000f8h, 000000f8h
mmx32_rgb565_g dd 0000fc00h, 0000fc00h
mmx32_rgb565_r dd 00f80000h, 00f80000h
%define mmx32_rgb888_mask 00ffffffh
%define mmx32_rgb565_b 000000f8h
%define mmx32_rgb565_g 0000fc00h
%define mmx32_rgb565_r 00f80000h
 
mmx32_rgb555_rb dd 00f800f8h,00f800f8h
mmx32_rgb555_g dd 0000f800h,0000f800h
mmx32_rgb555_mul dd 20000008h,20000008h
mmx32_bgr555_mul dd 00082000h,00082000h
%define mmx32_rgb555_rb 00f800f8h
%define mmx32_rgb555_g 0000f800h
%define mmx32_rgb555_mul 20000008h
%define mmx32_bgr555_mul 00082000h
 
 
SECTION .text
 
_ConvertMMXpII32_24RGB888:
 
; set up mm6 as the mask, mm7 as zero
movq mm6, qword [mmx32_rgb888_mask]
load_immq mm6, mmx32_rgb888_mask
CLEANUP_IMMQ_LOADS(1)
pxor mm7, mm7
 
mov edx, ecx ; save ecx
108,7 → 119,7
dec ecx
jnz .L3
.L4:
jmp _mmxreturn
retn
 
 
 
115,9 → 126,10
_ConvertMMXpII32_16RGB565:
 
; set up masks
movq mm5, [mmx32_rgb565_b]
movq mm6, [mmx32_rgb565_g]
movq mm7, [mmx32_rgb565_r]
load_immq mm5, mmx32_rgb565_b
load_immq mm6, mmx32_rgb565_g
load_immq mm7, mmx32_rgb565_r
CLEANUP_IMMQ_LOADS(3)
 
mov edx, ecx
shr ecx, 2
176,14 → 188,15
jnz .L3
 
.L4:
jmp _mmxreturn
retn
 
_ConvertMMXpII32_16BGR565:
 
movq mm5, [mmx32_rgb565_r]
movq mm6, [mmx32_rgb565_g]
movq mm7, [mmx32_rgb565_b]
load_immq mm5, mmx32_rgb565_r
load_immq mm6, mmx32_rgb565_g
load_immq mm7, mmx32_rgb565_b
CLEANUP_IMMQ_LOADS(3)
 
mov edx, ecx
shr ecx, 2
245,7 → 258,7
jnz .L3
 
.L4:
jmp _mmxreturn
retn
 
_ConvertMMXpII32_16BGR555:
 
253,7 → 266,7
; except it uses a different multiplier for the pmaddwd
; instruction. cool huh.
 
movq mm7, qword [mmx32_bgr555_mul]
load_immq mm7, mmx32_bgr555_mul
jmp _convert_bgr555_cheat
 
; This is the same as the Intel version.. they obviously went to
263,15 → 276,16
; (I think) a more accurate name..
_ConvertMMXpII32_16RGB555:
 
movq mm7,qword [mmx32_rgb555_mul]
load_immq mm7, mmx32_rgb555_mul
_convert_bgr555_cheat:
movq mm6,qword [mmx32_rgb555_g]
load_immq mm6, mmx32_rgb555_g
CLEANUP_IMMQ_LOADS(2)
mov edx,ecx ; Save ecx
 
and ecx,BYTE 0fffffff8h ; clear lower three bits
and ecx,DWORD 0fffffff8h ; clear lower three bits
jnz .L_OK
jmp .L2
jmp near .L2
 
.L_OK:
280,12 → 294,14
movq mm0,[esi]
movq mm3,mm2
 
pand mm3,qword [mmx32_rgb555_rb]
pand_immq mm3, mmx32_rgb555_rb
movq mm1,mm0
 
pand mm1,qword [mmx32_rgb555_rb]
pand_immq mm1, mmx32_rgb555_rb
pmaddwd mm3,mm7
 
CLEANUP_IMMQ_LOADS(2)
 
pmaddwd mm1,mm7
pand mm2,mm6
 
302,13 → 318,13
movq mm0,mm4
psrld mm1,6
 
pand mm0,qword [mmx32_rgb555_rb]
pand_immq mm0, mmx32_rgb555_rb
packssdw mm1,mm3
 
movq mm3,mm5
pmaddwd mm0,mm7
 
pand mm3,qword [mmx32_rgb555_rb]
pand_immq mm3, mmx32_rgb555_rb
pand mm4,mm6
 
movq [edi],mm1
329,12 → 345,14
movq mm3,mm2
movq mm1,mm0
 
pand mm3,qword [mmx32_rgb555_rb]
pand_immq mm3, mmx32_rgb555_rb
packssdw mm5,mm4
 
pand mm1,qword [mmx32_rgb555_rb]
pand_immq mm1, mmx32_rgb555_rb
pand mm2,mm6
 
CLEANUP_IMMQ_LOADS(4)
 
movq [edi+8],mm5
pmaddwd mm3,mm7
 
380,7 → 398,8
jnz .L3
 
.L4:
jmp _mmxreturn
retn
 
 
 
%ifidn __OUTPUT_FORMAT__,elf32
section .note.GNU-stack noalloc noexec nowrite progbits
%endif