;// fast life generator: ~2.2 pixel*generation/tact macro live_shl x,do_shl { if do_shl eq yes psllq x,1 end if } macro live_shr x,do_shr { if do_shr eq yes psrlq x,1 end if } macro live_zero x,y { pxor x,x movq y,x } macro live_load x,y,z,t,shl_edi,shr_esi { movq y,[edi+ecx] movq x,[ebx+ecx] live_shl y,shl_edi movq t,y pxor y,x movq z,[esi+ecx] pand x,t live_shr z,shr_esi movq t,y pxor y,z pand t,z por x,t } macro live_operation a,A,b,B,c,C,d,D,shift { movq D,A pxor A,B pand D,B movq d,a pxor a,D pand d,D movq D,a pxor a,b pand D,b por d,D movq D,a pxor a,c pand D,c pxor d,D pxor a,d movq D,A por D,C pxor A,C pxor d,D por A,[ebx+shift+16] pand a,d pand a,A movq [ebp+shift],a } macro live_cycle shl_edi,shr_esi { local cycle local cycle_entry local last_oper lea ecx,[edx-8] live_zero mm2,mm3 live_load mm4,mm5,mm6,mm7,shl_edi,shr_esi sub ecx,eax jmp cycle_entry cycle: live_load mm4,mm5,mm6,mm7,shl_edi,shr_esi live_operation mm0,mm1,mm2,mm3,mm4,mm5,mm6,mm7,ecx sub ecx,eax cycle_entry: live_load mm6,mm7,mm0,mm1,shl_edi,shr_esi live_operation mm2,mm3,mm4,mm5,mm6,mm7,mm0,mm1,ecx sub ecx,eax live_load mm0,mm1,mm2,mm3,shl_edi,shr_esi live_operation mm4,mm5,mm6,mm7,mm0,mm1,mm2,mm3,ecx sub ecx,eax live_load mm2,mm3,mm4,mm5,shl_edi,shr_esi live_operation mm6,mm7,mm0,mm1,mm2,mm3,mm4,mm5,ecx sub ecx,eax jnl cycle cmp cl,-8 jnz last_oper lea ecx,[edx-16] live_load mm4,mm5,mm6,mm7,shl_edi,shr_esi live_operation mm0,mm1,mm2,mm3,mm4,mm5,mm6,mm7,-8 sub ecx,eax jmp cycle_entry last_oper: live_zero mm4,mm5 live_operation mm0,mm1,mm2,mm3,mm4,mm5,mm6,mm7,ecx } OneGeneration_Flag12: push edi lea esi,[eax+1] bt dword [esp+48],1 jnc OneGeneration_flag2_end bt dword [esp+48],3 jc OneGeneration_flag2_end mov edi,[esp+36] shl edi,4 cmp edi,edx jb OneGeneration_flag2_uphalf sub edi,edx cmp edi,edx jnb OneGeneration_flag2_end add edi,8 OneGeneration_flag2_uphalf: mov ecx,esi add edi,ebx pxor mm0,mm0 OneGeneration_flag2_cycle: movq [edi],mm0 add edi,edx loop OneGeneration_flag2_cycle OneGeneration_flag2_end: bt dword [esp+48],0 jnc OneGeneration_flag1_end bt dword [esp+48],2 jc OneGeneration_flag1_end push edx mov eax,[esp+36] xor edx,edx div esi mov esi,edx pop edx cmp eax,64 jnb OneGeneration_flag1_end imul esi,edx add esi,ebx btr eax,5 jnc OneGeneration_flag1_noadd4 add esi,4 OneGeneration_flag1_noadd4: lea ecx,[edx-8] mov edi,8 OneGeneration_flag1_cycle: btr dword [esi+ecx],eax sub ecx,edi btr dword [esi+ecx],eax sub ecx,edi btr dword [esi+ecx],eax sub ecx,edi btr dword [esi+ecx],eax sub ecx,edi jnl OneGeneration_flag1_cycle OneGeneration_flag1_end: pop edi ret @OneGeneration$qqsiipvpxvi: push ebp push ebx push esi push edi mov eax,[esp+20] mov edx,[esp+24] mov ebp,[esp+28] mov ebx,[esp+32] dec eax jl OneGeneration_end add edx,7 add ebp,31 add ebx,15 shr eax,6 shl edx,3 and ebp,not 15 and ebx,not 15 and edx,not 63 jng OneGeneration_end test eax,eax jz OneGeneration_single mov edi,edx imul edi,eax jo OneGeneration_end push eax add edi,ebx call OneGeneration_Flag12 lea esi,[ebx+edx] push dword [esp] mov eax,16 live_cycle yes,no jmp OneGeneration_cycle_fin OneGeneration_cycle: mov edi,ebx mov ebx,esi add ebp,edx add esi,edx live_cycle no,no OneGeneration_cycle_fin: dec dword [esp] jg OneGeneration_cycle mov edi,ebx pop ecx mov ebx,esi mov esi,edx add ebp,edx imul esi,[esp] neg esi add esi,ebx live_cycle no,yes jmp OneGeneration_flag48 OneGeneration_single: push eax mov edi,ebx call OneGeneration_Flag12 mov esi,ebx mov eax,16 live_cycle yes,yes OneGeneration_flag48: pop ebp inc ebp bt dword [esp+36],3 jnc OneGeneration_flag8_end mov edi,[esp+24] mov ebx,[esp+28] dec edi add ebx,15 shl edi,4 lea esi,[edi-16] and ebx,not 15 cmp edi,edx jb OneGeneration_flag8_uphalf sub edi,edx add edi,8 cmp esi,edx jb OneGeneration_flag8_uphalf sub esi,edx add esi,8 OneGeneration_flag8_uphalf: mov ecx,ebp OneGeneration_flag8_cycle: movq mm0,[ebx+esi] movq [ebx],mm0 movq mm0,[ebx+16] movq [ebx+edi],mm0 add ebx,edx loop OneGeneration_flag8_cycle OneGeneration_flag8_end: bt dword [esp+36],2 jnc OneGeneration_flag4_end mov eax,[esp+20] push edx dec eax xor edx,edx mov ebx,[esp+32] div ebp add ebx,15 mov esi,eax mov edi,edx and ebx,not 15 dec edx jl OneGeneration_flag4_dec0 mov ebp,edx jmp OneGeneration_flag4_after_dec OneGeneration_flag4_dec0: dec ebp dec eax OneGeneration_flag4_after_dec: pop edx imul edi,edx imul ebp,edx add edi,ebx add ebp,ebx btr esi,5 jnc OneGeneration_flag4_noadd4f add edi,4 OneGeneration_flag4_noadd4f: btr eax,5 jnc OneGeneration_flag4_noadd4s add ebp,4 OneGeneration_flag4_noadd4s: mov ecx,edx jmp OneGeneration_flag4_cycle0_entry OneGeneration_flag4_cycle0: btr dword [ebx+ecx],0 OneGeneration_flag4_cycle0_entry: sub ecx,8 jl OneGeneration_flag4_cycle0_end bt dword [ebp+ecx],eax jnc OneGeneration_flag4_cycle0 bts dword [ebx+ecx],0 jmp OneGeneration_flag4_cycle0_entry OneGeneration_flag4_cycle0_end: xor eax,eax cmp dword [esp+20],64 jng OneGeneration_flag4_single add ebx,edx jmp OneGeneration_flag4_cycle1_entry OneGeneration_flag4_single: inc eax jmp OneGeneration_flag4_cycle1_entry OneGeneration_flag4_cycle1: btr dword [edi+edx],esi OneGeneration_flag4_cycle1_entry: sub edx,8 jl OneGeneration_flag4_end bt dword [ebx+edx],eax jnc OneGeneration_flag4_cycle1 bts dword [edi+edx],esi jmp OneGeneration_flag4_cycle1_entry OneGeneration_flag4_end: emms OneGeneration_end: pop edi pop esi pop ebx pop ebp ret 20