0,0 → 1,320 |
;// fast life generator: ~2.2 pixel*generation/tact |
|
macro live_shl x,do_shl |
{ |
if do_shl eq yes |
psllq x,1 |
end if |
} |
|
macro live_shr x,do_shr |
{ |
if do_shr eq yes |
psrlq x,1 |
end if |
} |
|
macro live_zero x,y |
{ |
pxor x,x |
movq y,x |
} |
|
macro live_load x,y,z,t,shl_edi,shr_esi |
{ |
movq y,[edi+ecx] |
movq x,[ebx+ecx] |
live_shl y,shl_edi |
movq t,y |
pxor y,x |
movq z,[esi+ecx] |
pand x,t |
live_shr z,shr_esi |
movq t,y |
pxor y,z |
pand t,z |
por x,t |
} |
|
macro live_operation a,A,b,B,c,C,d,D,shift |
{ |
movq D,A |
pxor A,B |
pand D,B |
movq d,a |
pxor a,D |
pand d,D |
movq D,a |
pxor a,b |
pand D,b |
por d,D |
movq D,a |
pxor a,c |
pand D,c |
pxor d,D |
pxor a,d |
movq D,A |
por D,C |
pxor A,C |
pxor d,D |
por A,[ebx+shift+16] |
pand a,d |
pand a,A |
movq [ebp+shift],a |
} |
|
macro live_cycle shl_edi,shr_esi |
{ |
local cycle |
local cycle_entry |
local last_oper |
lea ecx,[edx-8] |
live_zero mm2,mm3 |
live_load mm4,mm5,mm6,mm7,shl_edi,shr_esi |
sub ecx,eax |
jmp cycle_entry |
cycle: |
live_load mm4,mm5,mm6,mm7,shl_edi,shr_esi |
live_operation mm0,mm1,mm2,mm3,mm4,mm5,mm6,mm7,ecx |
sub ecx,eax |
cycle_entry: |
live_load mm6,mm7,mm0,mm1,shl_edi,shr_esi |
live_operation mm2,mm3,mm4,mm5,mm6,mm7,mm0,mm1,ecx |
sub ecx,eax |
live_load mm0,mm1,mm2,mm3,shl_edi,shr_esi |
live_operation mm4,mm5,mm6,mm7,mm0,mm1,mm2,mm3,ecx |
sub ecx,eax |
live_load mm2,mm3,mm4,mm5,shl_edi,shr_esi |
live_operation mm6,mm7,mm0,mm1,mm2,mm3,mm4,mm5,ecx |
sub ecx,eax |
jnl cycle |
cmp cl,-8 |
jnz last_oper |
lea ecx,[edx-16] |
live_load mm4,mm5,mm6,mm7,shl_edi,shr_esi |
live_operation mm0,mm1,mm2,mm3,mm4,mm5,mm6,mm7,-8 |
sub ecx,eax |
jmp cycle_entry |
last_oper: |
live_zero mm4,mm5 |
live_operation mm0,mm1,mm2,mm3,mm4,mm5,mm6,mm7,ecx |
} |
|
OneGeneration_Flag12: |
push edi |
lea esi,[eax+1] |
bt dword [esp+48],1 |
jnc OneGeneration_flag2_end |
bt dword [esp+48],3 |
jc OneGeneration_flag2_end |
mov edi,[esp+36] |
shl edi,4 |
cmp edi,edx |
jb OneGeneration_flag2_uphalf |
sub edi,edx |
cmp edi,edx |
jnb OneGeneration_flag2_end |
add edi,8 |
OneGeneration_flag2_uphalf: |
mov ecx,esi |
add edi,ebx |
pxor mm0,mm0 |
OneGeneration_flag2_cycle: |
movq [edi],mm0 |
add edi,edx |
loop OneGeneration_flag2_cycle |
OneGeneration_flag2_end: |
bt dword [esp+48],0 |
jnc OneGeneration_flag1_end |
bt dword [esp+48],2 |
jc OneGeneration_flag1_end |
push edx |
mov eax,[esp+36] |
xor edx,edx |
div esi |
mov esi,edx |
pop edx |
cmp eax,64 |
jnb OneGeneration_flag1_end |
imul esi,edx |
add esi,ebx |
btr eax,5 |
jnc OneGeneration_flag1_noadd4 |
add esi,4 |
OneGeneration_flag1_noadd4: |
lea ecx,[edx-8] |
mov edi,8 |
OneGeneration_flag1_cycle: |
btr dword [esi+ecx],eax |
sub ecx,edi |
btr dword [esi+ecx],eax |
sub ecx,edi |
btr dword [esi+ecx],eax |
sub ecx,edi |
btr dword [esi+ecx],eax |
sub ecx,edi |
jnl OneGeneration_flag1_cycle |
OneGeneration_flag1_end: |
pop edi |
ret |
|
@OneGeneration$qqsiipvpxvi: |
push ebp |
push ebx |
push esi |
push edi |
mov eax,[esp+20] |
mov edx,[esp+24] |
mov ebp,[esp+28] |
mov ebx,[esp+32] |
dec eax |
jl OneGeneration_end |
add edx,7 |
add ebp,31 |
add ebx,15 |
shr eax,6 |
shl edx,3 |
and ebp,not 15 |
and ebx,not 15 |
and edx,not 63 |
jng OneGeneration_end |
test eax,eax |
jz OneGeneration_single |
mov edi,edx |
imul edi,eax |
jo OneGeneration_end |
push eax |
add edi,ebx |
call OneGeneration_Flag12 |
lea esi,[ebx+edx] |
push dword [esp] |
mov eax,16 |
live_cycle yes,no |
jmp OneGeneration_cycle_fin |
OneGeneration_cycle: |
mov edi,ebx |
mov ebx,esi |
add ebp,edx |
add esi,edx |
live_cycle no,no |
OneGeneration_cycle_fin: |
dec dword [esp] |
jg OneGeneration_cycle |
mov edi,ebx |
pop ecx |
mov ebx,esi |
mov esi,edx |
add ebp,edx |
imul esi,[esp] |
neg esi |
add esi,ebx |
live_cycle no,yes |
jmp OneGeneration_flag48 |
OneGeneration_single: |
push eax |
mov edi,ebx |
call OneGeneration_Flag12 |
mov esi,ebx |
mov eax,16 |
live_cycle yes,yes |
OneGeneration_flag48: |
pop ebp |
inc ebp |
bt dword [esp+36],3 |
jnc OneGeneration_flag8_end |
mov edi,[esp+24] |
mov ebx,[esp+28] |
dec edi |
add ebx,15 |
shl edi,4 |
lea esi,[edi-16] |
and ebx,not 15 |
cmp edi,edx |
jb OneGeneration_flag8_uphalf |
sub edi,edx |
add edi,8 |
cmp esi,edx |
jb OneGeneration_flag8_uphalf |
sub esi,edx |
add esi,8 |
OneGeneration_flag8_uphalf: |
mov ecx,ebp |
OneGeneration_flag8_cycle: |
movq mm0,[ebx+esi] |
movq [ebx],mm0 |
movq mm0,[ebx+16] |
movq [ebx+edi],mm0 |
add ebx,edx |
loop OneGeneration_flag8_cycle |
OneGeneration_flag8_end: |
bt dword [esp+36],2 |
jnc OneGeneration_flag4_end |
mov eax,[esp+20] |
push edx |
dec eax |
xor edx,edx |
mov ebx,[esp+32] |
div ebp |
add ebx,15 |
mov esi,eax |
mov edi,edx |
and ebx,not 15 |
dec edx |
jl OneGeneration_flag4_dec0 |
mov ebp,edx |
jmp OneGeneration_flag4_after_dec |
OneGeneration_flag4_dec0: |
dec ebp |
dec eax |
OneGeneration_flag4_after_dec: |
pop edx |
imul edi,edx |
imul ebp,edx |
add edi,ebx |
add ebp,ebx |
btr esi,5 |
jnc OneGeneration_flag4_noadd4f |
add edi,4 |
OneGeneration_flag4_noadd4f: |
btr eax,5 |
jnc OneGeneration_flag4_noadd4s |
add ebp,4 |
OneGeneration_flag4_noadd4s: |
mov ecx,edx |
jmp OneGeneration_flag4_cycle0_entry |
OneGeneration_flag4_cycle0: |
btr dword [ebx+ecx],0 |
OneGeneration_flag4_cycle0_entry: |
sub ecx,8 |
jl OneGeneration_flag4_cycle0_end |
bt dword [ebp+ecx],eax |
jnc OneGeneration_flag4_cycle0 |
bts dword [ebx+ecx],0 |
jmp OneGeneration_flag4_cycle0_entry |
OneGeneration_flag4_cycle0_end: |
xor eax,eax |
cmp dword [esp+20],64 |
jng OneGeneration_flag4_single |
add ebx,edx |
jmp OneGeneration_flag4_cycle1_entry |
OneGeneration_flag4_single: |
inc eax |
jmp OneGeneration_flag4_cycle1_entry |
OneGeneration_flag4_cycle1: |
btr dword [edi+edx],esi |
OneGeneration_flag4_cycle1_entry: |
sub edx,8 |
jl OneGeneration_flag4_end |
bt dword [ebx+edx],eax |
jnc OneGeneration_flag4_cycle1 |
bts dword [edi+edx],esi |
jmp OneGeneration_flag4_cycle1_entry |
OneGeneration_flag4_end: |
emms |
OneGeneration_end: |
pop edi |
pop esi |
pop ebx |
pop ebp |
ret 20 |
|