/kernel/trunk/drivers/infinity.asm |
---|
19,18 → 19,22 |
include 'main.inc' |
include 'imports.inc' |
USE_MMX equ 0 |
USE_MMX_128 equ 0 |
USE_SSE equ 0 |
FORCE_MMX equ 0 ;set to 1 to force use mmx or |
FORCE_MMX_128 equ 0 ;integer sse2 extensions |
;and reduce driver size |
;USE_SSE equ 0 |
DEBUG equ 1 |
EVENT_NOTIFY equ 0x00000200 |
OS_BASE equ 0; 0x80400000 |
new_app_base equ 0x60400000; 0x01000000 |
OS_BASE equ 0 |
new_app_base equ 0x60400000 |
PROC_BASE equ OS_BASE+0x0080000 |
CAPS_SSE2 equ 26 |
public START |
public service_proc |
public version |
79,6 → 83,42 |
mov [str.fd], eax |
mov [str.bk], eax |
if FORCE_MMX |
if FORCE_MMX_128 |
display 'Use only FORCE_MMX or FORCE_MMX_128 not both together',13,10 |
stop |
end if |
mov [mix_2_core], mmx_mix_2 |
mov [mix_3_core], mmx_mix_3 |
mov [mix_4_core], mmx_mix_4 |
end if |
if FORCE_MMX_128 |
if FORCE_MMX |
display 'Use only FORCE_MMX or FORCE_MMX_128 not both together',13,10 |
stop |
end if |
mov [mix_2_core], mmx128_mix_2 |
mov [mix_3_core], mmx128_mix_3 |
mov [mix_4_core], mmx128_mix_4 |
end if |
if ~(FORCE_MMX or FORCE_MMX_128) ;autodetect |
mov eax, 1 |
cpuid |
bt edx, CAPS_SSE2 |
jc .mmx128 |
;old 64-bit mmx |
mov [mix_2_core], mmx_mix_2 |
mov [mix_3_core], mmx_mix_3 |
mov [mix_4_core], mmx_mix_4 |
jmp @F |
.mmx128: ;new 128-bit sse2 extensions |
mov [mix_2_core], mmx128_mix_2 |
mov [mix_3_core], mmx128_mix_3 |
mov [mix_4_core], mmx128_mix_4 |
@@: |
end if |
stdcall set_handler, [hSound], new_mix |
stdcall RegService, szInfinity, service_proc |
ret |
563,14 → 603,8 |
endp |
include 'mixer.asm' |
;if USE_MMX |
; include 'mix_mmx.inc' |
;end if |
if USE_MMX_128 |
include 'mix_mmx.inc' |
include 'mix_sse2.inc' |
end if |
;if USE_SSE |
; include 'mix_sse.inc' |
664,7 → 698,7 |
str.fd rd 1 |
str.bk rd 1 |
mix_2_1.core rd 1 |
mix_3_1.core rd 1 |
mix_4_1.core rd 1 |
mix_2_core rd 1 |
mix_3_core rd 1 |
mix_4_core rd 1 |
/kernel/trunk/drivers/mix_mmx.inc |
---|
0,0 → 1,241 |
; params |
; edi= output |
; eax= input stream 1 |
; ebx= input stream 2 |
if used mmx_mix_2 |
align 4 |
mmx_mix_2: |
movq mm0, [eax] |
movq mm1, [eax+8] |
movq mm2, [eax+16] |
movq mm3, [eax+24] |
movq mm4, [eax+32] |
movq mm5, [eax+40] |
movq mm6, [eax+48] |
movq mm7, [eax+56] |
paddsw mm0, [ebx] |
movq [edi], mm0 |
paddsw mm1,[ebx+8] |
movq [edi+8], mm1 |
paddsw mm2, [ebx+16] |
movq [edi+16], mm2 |
paddsw mm3, [ebx+24] |
movq [edi+24], mm3 |
paddsw mm4, [ebx+32] |
movq [edi+32], mm4 |
paddsw mm5, [ebx+40] |
movq [edi+40], mm5 |
paddsw mm6, [ebx+48] |
movq [edi+48], mm6 |
paddsw mm7, [ebx+56] |
movq [edi+56], mm7 |
movq mm0, [eax+64] |
movq mm1, [eax+72] |
movq mm2, [eax+80] |
movq mm3, [eax+88] |
movq mm4, [eax+96] |
movq mm5, [eax+104] |
movq mm6, [eax+112] |
movq mm7, [eax+120] |
paddsw mm0, [ebx+64] |
movq [edi+64], mm0 |
paddsw mm1, [ebx+72] |
movq [edi+72], mm1 |
paddsw mm2, [ebx+80] |
movq [edi+80], mm2 |
paddsw mm3, [ebx+88] |
movq [edi+88], mm3 |
paddsw mm4, [ebx+96] |
movq [edi+96], mm4 |
paddsw mm5, [ecx+104] |
movq [edx+104], mm5 |
paddsw mm6, [ebx+112] |
movq [edi+112], mm6 |
paddsw mm7, [ebx+120] |
movq [edi+120], mm7 |
ret |
align 4 |
mmx_mix_3: |
movq mm0, [eax] |
movq mm1, [eax+8] |
movq mm2, [eax+16] |
movq mm3, [eax+24] |
movq mm4, [eax+32] |
movq mm5, [eax+40] |
movq mm6, [eax+48] |
movq mm7, [eax+56] |
paddsw mm0, [ebx] |
paddsw mm1, [ebx+8] |
paddsw mm2, [ebx+16] |
paddsw mm3, [ebx+24] |
paddsw mm4, [ebx+32] |
paddsw mm5, [ebx+40] |
paddsw mm6, [ebx+48] |
paddsw mm7, [ebx+56] |
paddsw mm0, [ecx] |
movq [edi], mm0 |
paddsw mm1,[ecx+8] |
movq [edi+8], mm1 |
paddsw mm2, [ecx+16] |
movq [edi+16], mm2 |
paddsw mm3, [ecx+24] |
movq [edi+24], mm3 |
paddsw mm4, [ecx+32] |
movq [edi+32], mm4 |
paddsw mm5, [ecx+40] |
movq [edi+40], mm5 |
paddsw mm6, [ecx+48] |
movq [edi+48], mm6 |
paddsw mm7, [ecx+56] |
movq [edi+56], mm7 |
movq mm0, [eax+64] |
movq mm1, [eax+72] |
movq mm2, [eax+80] |
movq mm3, [eax+88] |
movq mm4, [eax+96] |
movq mm5, [eax+104] |
movq mm6, [eax+112] |
movq mm7, [eax+120] |
paddsw mm0, [ebx+64] |
paddsw mm1, [ebx+72] |
paddsw mm2, [ebx+80] |
paddsw mm3, [ebx+88] |
paddsw mm4, [ebx+96] |
paddsw mm5, [ebx+104] |
paddsw mm6, [ebx+112] |
paddsw mm7, [ebx+120] |
paddsw mm0, [ecx+64] |
movq [edi+64], mm0 |
paddsw mm1, [ecx+72] |
movq [edi+72], mm1 |
paddsw mm2, [ecx+80] |
movq [edi+80], mm2 |
paddsw mm3, [ecx+88] |
movq [edi+88], mm3 |
paddsw mm4, [ecx+96] |
movq [edi+96], mm4 |
paddsw mm5, [ecx+104] |
movq [edi+104], mm5 |
paddsw mm6, [ecx+112] |
movq [edi+112], mm6 |
paddsw mm7, [ecx+120] |
movq [edi+120], mm7 |
ret |
align 4 |
mmx_mix_4: |
movq mm0, [eax] |
movq mm2, [eax+8] |
movq mm4, [eax+16] |
movq mm6, [eax+24] |
movq mm1, [ebx] |
movq mm3, [ebx+8] |
movq mm5, [ebx+16] |
movq mm7, [ebx+24] |
paddsw mm0, [ecx] |
paddsw mm2, [ecx+8] |
paddsw mm4, [ecx+16] |
paddsw mm6, [ecx+24] |
paddsw mm1, [edx] |
paddsw mm3, [edx+8] |
paddsw mm5, [edx+16] |
paddsw mm7, [edx+24] |
paddsw mm0, mm1 |
movq [edi], mm0 |
paddsw mm2, mm3 |
movq [edi+8], mm2 |
paddsw mm4, mm5 |
movq [edi+16], mm4 |
paddsw mm5, mm6 |
movq [edi+24], mm6 |
movq mm0, [eax+32] |
movq mm2, [eax+40] |
movq mm4, [eax+48] |
movq mm6, [eax+56] |
movq mm1, [ebx+32] |
movq mm3, [ebx+40] |
movq mm5, [ebx+48] |
movq mm7, [ebx+56] |
paddsw mm0, [ecx+32] |
paddsw mm2, [ecx+40] |
paddsw mm4, [ecx+48] |
paddsw mm6, [ecx+56] |
paddsw mm1, [edx+32] |
paddsw mm3, [edx+40] |
paddsw mm5, [edx+48] |
paddsw mm7, [edx+56] |
paddsw mm0, mm1 |
movq [edi+32], mm0 |
paddsw mm2, mm2 |
movq [edi+40], mm2 |
paddsw mm4, mm5 |
movq [edi+48], mm4 |
paddsw mm6, mm7 |
movq [edi+56], mm6 |
movq mm0, [eax+64] |
movq mm2, [eax+72] |
movq mm4, [eax+80] |
movq mm6, [eax+88] |
movq mm1, [ebx+64] |
movq mm3, [ebx+72] |
movq mm5, [ebx+80] |
movq mm7, [ebx+88] |
paddsw mm0, [ecx+64] |
paddsw mm2, [ecx+72] |
paddsw mm4, [ecx+80] |
paddsw mm6, [ecx+88] |
paddsw mm1, [edx+64] |
paddsw mm3, [edx+72] |
paddsw mm5, [edx+80] |
paddsw mm7, [edx+88] |
paddsw mm0, mm1 |
movq [edi+64], mm0 |
paddsw mm2, mm3 |
movq [edi+72], mm2 |
paddsw mm4, mm5 |
movq [edi+80], mm4 |
paddsw mm6, mm5 |
movq [edi+88], mm7 |
movq mm0, [eax+96] |
movq mm2, [eax+104] |
movq mm4, [eax+112] |
movq mm6, [eax+120] |
movq mm1, [ebx+96] |
movq mm3, [ebx+104] |
movq mm5, [ebx+112] |
movq mm7, [ebx+120] |
paddsw mm0, [ecx+96] |
paddsw mm2, [ecx+104] |
paddsw mm4, [ecx+112] |
paddsw mm6, [ecx+120] |
paddsw mm1, [edx+96] |
paddsw mm3, [edx+104] |
paddsw mm5, [edx+112] |
paddsw mm7, [edx+120] |
paddsw mm0, mm1 |
movq [eax+96], mm0 |
paddsw mm2, mm3 |
movq [edi+104], mm2 |
paddsw mm4, mm5 |
movq [edi+112], mm4 |
paddsw mm6, mm7 |
movq [edi+120], mm6 |
ret |
end if |
/kernel/trunk/drivers/mix_sse2.inc |
---|
0,0 → 1,139 |
if used mmx128_mix_2 |
align 4 |
mmx128_mix_2: |
prefetcht1 [eax+128] |
prefetcht1 [ebx+128] |
movaps xmm0, [eax] |
movaps xmm1, [eax+16] |
movaps xmm2, [eax+32] |
movaps xmm3, [eax+48] |
movaps xmm4, [eax+64] |
movaps xmm5, [eax+80] |
movaps xmm6, [eax+96] |
movaps xmm7, [eax+112] |
paddsw xmm0, [ebx] |
movaps [edi], xmm0 |
paddsw xmm1,[ebx+16] |
movaps [edi+16], xmm1 |
paddsw xmm2, [ebx+32] |
movaps [edi+32], xmm2 |
paddsw xmm3, [ebx+48] |
movaps [edi+48], xmm3 |
paddsw xmm4, [ebx+64] |
movaps [edi+64], xmm4 |
paddsw xmm5, [ebx+80] |
movaps [edi+80], xmm5 |
paddsw xmm6, [ebx+96] |
movaps [edi+96], xmm6 |
paddsw xmm7, [ebx+112] |
movaps [edi+112], xmm7 |
ret |
align 4 |
mmx128_mix_3: |
prefetcht1 [eax+128] |
prefetcht1 [ebx+128] |
prefetcht1 [ecx+128] |
movaps xmm0, [eax] |
movaps xmm1, [eax+16] |
movaps xmm2, [eax+32] |
movaps xmm3, [eax+48] |
movaps xmm4, [eax+64] |
movaps xmm5, [eax+80] |
movaps xmm6, [eax+96] |
movaps xmm7, [eax+112] |
paddsw xmm0, [ebx] |
paddsw xmm1, [ebx+16] |
paddsw xmm2, [ebx+32] |
paddsw xmm3, [ebx+48] |
paddsw xmm4, [ebx+64] |
paddsw xmm5, [ebx+80] |
paddsw xmm6, [ebx+96] |
paddsw xmm7, [ebx+112] |
paddsw xmm0, [ecx] |
movaps [edi], xmm0 |
paddsw xmm1, [ecx+16] |
movaps [edi+16], xmm1 |
paddsw xmm2, [ecx+32] |
movaps [edi+32], xmm2 |
paddsw xmm3, [ecx+48] |
movaps [edi+48], xmm3 |
paddsw xmm4, [ecx+64] |
movaps [edi+64], xmm4 |
paddsw xmm5, [ecx+80] |
movaps [edi+80], xmm5 |
paddsw xmm6, [ecx+96] |
movaps [edi+96], xmm6 |
paddsw xmm7, [ecx+112] |
movaps [edi+112], xmm7 |
ret |
align 4 |
mmx128_mix_4: |
prefetcht1 [eax+128] |
prefetcht1 [ebx+128] |
prefetcht1 [ecx+128] |
prefetcht1 [edx+128] |
movaps xmm0, [eax] |
movaps xmm2, [eax+16] |
movaps xmm4, [eax+32] |
movaps xmm6, [eax+48] |
movaps xmm1, [ebx] |
movaps xmm3, [ebx+16] |
movaps xmm5, [ebx+32] |
movaps xmm7, [ebx+48] |
paddsw xmm0, [ecx] |
paddsw xmm2, [ecx+16] |
paddsw xmm4, [ecx+32] |
paddsw xmm6, [ecx+48] |
paddsw xmm1, [edx] |
paddsw xmm3, [edx+16] |
paddsw xmm5, [edx+32] |
paddsw xmm7, [edx+48] |
paddsw xmm0, xmm1 |
movaps [edi], xmm0 |
paddsw xmm2, xmm3 |
movaps [edi+16], xmm2 |
paddsw xmm4, xmm5 |
movaps [edi+32], xmm4 |
paddsw xmm6, xmm7 |
movaps [edi+48], xmm6 |
movaps xmm0, [eax+64] |
movaps xmm2, [eax+80] |
movaps xmm4, [eax+96] |
movaps xmm6, [eax+112] |
movaps xmm1, [ebx+64] |
movaps xmm3, [ebx+80] |
movaps xmm5, [ebx+96] |
movaps xmm7, [ebx+112] |
paddsw xmm0, [ecx+64] |
paddsw xmm2, [ecx+80] |
paddsw xmm4, [ecx+96] |
paddsw xmm6, [ecx+112] |
paddsw xmm1, [edx+64] |
paddsw xmm3, [edx+80] |
paddsw xmm5, [edx+96] |
paddsw xmm7, [edx+112] |
paddsw xmm0, xmm1 |
movaps [edi+64], xmm0 |
paddsw xmm2, xmm3 |
movaps [edi+80], xmm2 |
paddsw xmm4, xmm5 |
movaps [edi+96], xmm4 |
paddsw xmm6, xmm7 |
movaps [edi+112], xmm6 |
ret |
end if |
/kernel/trunk/drivers/mixer.asm |
---|
90,7 → 90,7 |
.m3: |
add [output],512 |
sub [main_count], 1 |
dec [main_count] |
jnz .l00 |
call update_stream |
622,6 → 622,7 |
ret |
endp |
align 4 |
proc m16_s_mmx |
movq mm0, [esi] |
777,56 → 778,59 |
ret |
endp |
align 4 |
proc mix_2_1 stdcall, output:dword, str0:dword, str1:dword |
mov edi, [output] |
mov eax, [str0] |
mov ebx, [str1] |
mov esi, 128 |
call [mix_2_core] ;edi, eax, ebx |
stdcall mix_2_1_mmx, edi, [str0],[str1] |
; stdcall mix_2_1_sse, edi, [str0],[str1] |
add edi, 128 |
add [str0], 128 |
add [str1], 128 |
stdcall mix_2_1_mmx, edi, [str0],[str1] |
; stdcall mix_2_1_sse, edi, [str0],[str1] |
add edi, 128 |
add [str0], 128 |
add [str1], 128 |
stdcall mix_2_1_mmx, edi, [str0],[str1] |
; stdcall mix_2_1_sse, edi, [str0],[str1] |
add edi, 128 |
add [str0], 128 |
add [str1], 128 |
stdcall mix_2_1_mmx, edi, [str0],[str1] |
; stdcall mix_2_1_sse, edi, [str0],[str1] |
add edi, esi |
add eax, esi |
add ebx, esi |
call [mix_2_core] ;edi, eax, ebx |
add edi, esi |
add eax, esi |
add ebx, esi |
call [mix_2_core] ;edi, eax, ebx |
add edi, esi |
add eax, esi |
add ebx, esi |
call [mix_2_core] ;edi, eax, ebx |
ret |
endp |
align 4 |
proc mix_3_1 stdcall, output:dword, str0:dword, str1:dword, str2:dword |
mov edi, [output] |
mov eax, [str0] |
mov ebx, [str1] |
mov ecx, [str2] |
mov esi, 128 |
call [mix_3_core] |
stdcall mix_3_1_mmx, edi, [str0],[str1],[str2] |
add edi, 128 |
add [str0], 128 |
add [str1], 128 |
add [str2], 128 |
stdcall mix_3_1_mmx, edi, [str0],[str1],[str2] |
add edi, 128 |
add [str0], 128 |
add [str1], 128 |
add [str2], 128 |
stdcall mix_3_1_mmx, edi, [str0],[str1],[str2] |
add edi, 128 |
add [str0], 128 |
add [str1], 128 |
add [str2], 128 |
stdcall mix_3_1_mmx, edi, [str0],[str1],[str2] |
add edi, esi |
add eax, esi |
add ebx, esi |
add ecx, esi |
call [mix_3_core] |
add edi, esi |
add eax, esi |
add ebx, esi |
add ecx, esi |
call [mix_3_core] |
add edi, esi |
add eax, esi |
add ebx, esi |
add ecx, esi |
call [mix_3_core] |
ret |
endp |
839,29 → 843,35 |
call alloc_mix_buff |
and eax, eax |
jz .err |
mov [output], eax |
mov edi, eax |
mov eax, [str0] |
mov ebx, [str1] |
mov ecx, [str2] |
mov edx, [str3] |
mov esi, 128 |
call [mix_4_core] ;edi, eax, ebx, ecx, edx |
stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3] |
add edi, 128 |
add [str0], 128 |
add [str1], 128 |
add [str2], 128 |
add [str3], 128 |
stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3] |
add edi, 128 |
add [str0], 128 |
add [str1], 128 |
add [str2], 128 |
add [str3], 128 |
stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3] |
add edi, 128 |
add [str0], 128 |
add [str1], 128 |
add [str2], 128 |
add [str3], 128 |
stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3] |
add edi, esi |
add eax, esi |
add ebx, esi |
add ecx, esi |
add edx, esi |
call [mix_4_core] ;edi, eax, ebx, ecx, edx |
add edi, esi |
add eax, esi |
add ebx, esi |
add ecx, esi |
add edx, esi |
call [mix_4_core] ;edi, eax, ebx, ecx, edx |
add edi, esi |
add eax, esi |
add ebx, esi |
add ecx, esi |
add edx, esi |
call [mix_4_core] ;edi, eax, ebx, ecx, edx |
mov eax, [output] |
ret |
.err: |
876,322 → 886,37 |
mov edi, [output] |
stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3] |
add edi, 128 |
add [str0], 128 |
add [str1], 128 |
add [str2], 128 |
add [str3], 128 |
stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3] |
add edi, 128 |
add [str0], 128 |
add [str1], 128 |
add [str2], 128 |
add [str3], 128 |
stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3] |
add edi, 128 |
add [str0], 128 |
add [str1], 128 |
add [str2], 128 |
add [str3], 128 |
stdcall mix_4_1_mmx, edi, [str0],[str1],[str2],[str3] |
ret |
endp |
align 4 |
proc mix_2_1_mmx stdcall, output:dword, str0:dword, str1:dword |
mov edx, [output] |
mov eax, [str0] |
mov ecx, [str1] |
movq mm0, [eax] |
paddsw mm0, [ecx] |
movq [edx], mm0 |
movq mm1, [eax+8] |
paddsw mm1,[ecx+8] |
movq [edx+8], mm1 |
movq mm2, [eax+16] |
paddsw mm2, [ecx+16] |
movq [edx+16], mm2 |
movq mm3, [eax+24] |
paddsw mm3, [ecx+24] |
movq [edx+24], mm3 |
movq mm0, [eax+32] |
paddsw mm0, [ecx+32] |
movq [edx+32], mm0 |
movq mm1, [eax+40] |
paddsw mm1, [ecx+40] |
movq [edx+40], mm1 |
movq mm2, [eax+48] |
paddsw mm2, [ecx+48] |
movq [edx+48], mm2 |
movq mm3, [eax+56] |
paddsw mm3, [ecx+56] |
movq [edx+56], mm3 |
movq mm0, [eax+64] |
paddsw mm0, [ecx+64] |
movq [edx+64], mm0 |
movq mm1, [eax+72] |
paddsw mm1, [ecx+72] |
movq [edx+72], mm1 |
movq mm2, [eax+80] |
paddsw mm2, [ecx+80] |
movq [edx+80], mm2 |
movq mm3, [eax+88] |
paddsw mm3, [ecx+88] |
movq [edx+88], mm3 |
movq mm0, [eax+96] |
paddsw mm0, [ecx+96] |
movq [edx+96], mm0 |
movq mm1, [eax+104] |
paddsw mm1, [ecx+104] |
movq [edx+104], mm1 |
movq mm2, [eax+112] |
paddsw mm2, [ecx+112] |
movq [edx+112], mm2 |
movq mm3, [eax+120] |
paddsw mm3, [ecx+120] |
movq [edx+120], mm3 |
ret |
endp |
align 4 |
proc mix_3_1_mmx stdcall, output:dword, str0:dword, str1:dword, str2:dword |
mov edx, [output] |
mov eax, [str0] |
mov ebx, [str1] |
mov ecx, [str2] |
mov edx, [str3] |
mov esi, 128 |
call [mix_4_core] ;edi, eax, ebx, ecx, edx |
movq mm0, [eax] |
paddsw mm0, [ebx] |
paddsw mm0, [ecx] |
movq [edx], mm0 |
add edi, esi |
add eax, esi |
add ebx, esi |
add ecx, esi |
add edx, esi |
call [mix_4_core] ;edi, eax, ebx, ecx, edx |
movq mm1, [eax+8] |
paddsw mm1,[ebx+8] |
paddsw mm1,[ecx+8] |
movq [edx+8], mm1 |
add edi, esi |
add eax, esi |
add ebx, esi |
add ecx, esi |
add edx, esi |
call [mix_4_core] ;edi, eax, ebx, ecx, edx |
movq mm2, [eax+16] |
paddsw mm2, [ebx+16] |
paddsw mm2, [ecx+16] |
movq [edx+16], mm2 |
movq mm3, [eax+24] |
paddsw mm3, [ebx+24] |
paddsw mm3, [ecx+24] |
movq [edx+24], mm3 |
movq mm0, [eax+32] |
paddsw mm0, [ebx+32] |
paddsw mm0, [ecx+32] |
movq [edx+32], mm0 |
movq mm1, [eax+40] |
paddsw mm1, [ebx+40] |
paddsw mm1, [ecx+40] |
movq [edx+40], mm1 |
movq mm2, [eax+48] |
paddsw mm2, [ebx+48] |
paddsw mm2, [ecx+48] |
movq [edx+48], mm2 |
movq mm3, [eax+56] |
paddsw mm3, [ebx+56] |
paddsw mm3, [ecx+56] |
movq [edx+56], mm3 |
movq mm0, [eax+64] |
paddsw mm0, [ebx+64] |
paddsw mm0, [ecx+64] |
movq [edx+64], mm0 |
movq mm1, [eax+72] |
paddsw mm1, [ebx+72] |
paddsw mm1, [ecx+72] |
movq [edx+72], mm1 |
movq mm2, [eax+80] |
paddsw mm2, [ebx+80] |
paddsw mm2, [ecx+80] |
movq [edx+80], mm2 |
movq mm3, [eax+88] |
paddsw mm3, [ebx+88] |
paddsw mm3, [ecx+88] |
movq [edx+88], mm3 |
movq mm0, [eax+96] |
paddsw mm0, [ebx+96] |
paddsw mm0, [ecx+96] |
movq [edx+96], mm0 |
movq mm1, [eax+104] |
paddsw mm1, [ebx+104] |
paddsw mm1, [ecx+104] |
movq [edx+104], mm1 |
movq mm2, [eax+112] |
paddsw mm2, [ebx+112] |
paddsw mm2, [ecx+112] |
movq [edx+112], mm2 |
movq mm3, [eax+120] |
paddsw mm3, [ebx+120] |
paddsw mm3, [ecx+120] |
movq [edx+120], mm3 |
add edi, esi |
add eax, esi |
add ebx, esi |
add ecx, esi |
add edx, esi |
call [mix_4_core] ;edi, eax, ebx, ecx, edx |
ret |
endp |
align 4 |
proc mix_4_1_mmx stdcall, output:dword, str0:dword, str1:dword,\ |
str2:dword, str3:dword |
mov edx, [output] |
mov esi, [str0] |
mov eax, [str1] |
mov ebx, [str2] |
mov ecx, [str3] |
movq mm0, [esi] |
movq mm1, [eax] |
paddsw mm0, [ebx] |
paddsw mm1, [ecx] |
paddsw mm0, mm1 |
movq [edx], mm0 |
movq mm2, [esi+8] |
movq mm3, [eax+8] |
paddsw mm2, [ebx+8] |
paddsw mm3, [ecx+8] |
paddsw mm2, mm3 |
movq [edx+8], mm2 |
movq mm0, [esi+16] |
movq mm1, [eax+16] |
paddsw mm0, [ebx+16] |
paddsw mm1, [ecx+16] |
paddsw mm0, mm1 |
movq [edx+16], mm0 |
movq mm2, [esi+24] |
movq mm3, [eax+24] |
paddsw mm2, [ebx+24] |
paddsw mm3, [ecx+24] |
paddsw mm2, mm3 |
movq [edx+24], mm2 |
movq mm0, [esi+32] |
movq mm1, [eax+32] |
paddsw mm0, [ebx+32] |
paddsw mm1, [ecx+32] |
paddsw mm0, mm1 |
movq [edx+32], mm0 |
movq mm2, [esi+40] |
movq mm3, [eax+40] |
paddsw mm2, [ebx+40] |
paddsw mm3, [ecx+40] |
paddsw mm2, mm3 |
movq [edx+40], mm2 |
movq mm0, [esi+48] |
movq mm1, [eax+48] |
paddsw mm0, [ebx+48] |
paddsw mm1, [ecx+48] |
paddsw mm0, mm1 |
movq [edx+48], mm0 |
movq mm2, [esi+56] |
movq mm3, [eax+56] |
paddsw mm2, [ebx+56] |
paddsw mm3, [ecx+56] |
paddsw mm2, mm3 |
movq [edx+56], mm2 |
movq mm0, [esi+64] |
movq mm1, [eax+64] |
paddsw mm0, [ebx+64] |
paddsw mm1, [ecx+64] |
paddsw mm0, mm1 |
movq [edx+64], mm0 |
movq mm2, [esi+72] |
movq mm3, [eax+72] |
paddsw mm2, [ebx+72] |
paddsw mm3, [ecx+72] |
paddsw mm2, mm3 |
movq [edx+72], mm2 |
movq mm2, [esi+80] |
movq mm3, [eax+80] |
paddsw mm2, [ebx+80] |
paddsw mm3, [ecx+80] |
paddsw mm2, mm3 |
movq [edx+80], mm2 |
movq mm2, [esi+88] |
movq mm3, [eax+88] |
paddsw mm2, [ebx+88] |
paddsw mm3, [ecx+88] |
paddsw mm2, mm3 |
movq [edx+88], mm2 |
movq mm2, [esi+96] |
movq mm3, [eax+96] |
paddsw mm2, [ebx+96] |
paddsw mm3, [ecx+96] |
paddsw mm2, mm3 |
movq [edx+96], mm2 |
movq mm2, [esi+104] |
movq mm3, [eax+104] |
paddsw mm2, [ebx+104] |
paddsw mm3, [ecx+104] |
paddsw mm2, mm3 |
movq [edx+104], mm2 |
movq mm2, [esi+112] |
movq mm3, [eax+112] |
paddsw mm2, [ebx+112] |
paddsw mm3, [ecx+112] |
paddsw mm2, mm3 |
movq [edx+112], mm2 |
movq mm2, [esi+120] |
movq mm3, [eax+120] |
paddsw mm2, [ebx+120] |
paddsw mm3, [ecx+120] |
paddsw mm2, mm3 |
movq [edx+120], mm2 |
ret |
endp |
align 4 |
proc copy_mem stdcall, output:dword, input:dword |
mov edi, [output] |