Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
1769 | yogev_ezra | 1 | ;// fast life generator: ~2.8 pixel*generation/tact |
2 | |||
3 | macro live_shl x,do_shl |
||
4 | { |
||
5 | if do_shl eq yes |
||
6 | psllq x,1 |
||
7 | end if |
||
8 | } |
||
9 | |||
10 | macro live_shr x,do_shr |
||
11 | { |
||
12 | if do_shr eq yes |
||
13 | psrlq x,1 |
||
14 | end if |
||
15 | } |
||
16 | |||
17 | macro live_mov x,reg,how |
||
18 | { |
||
19 | if how eq low |
||
20 | xorps x,x |
||
21 | movlps x,[reg+24] |
||
22 | else if how eq high |
||
23 | xorps x,x |
||
24 | movhps x,[reg+edx] |
||
25 | else |
||
26 | movaps x,[reg+ecx] |
||
27 | end if |
||
28 | } |
||
29 | |||
30 | macro live_load x,y,z,t,shl_edi,shr_esi,how |
||
31 | { |
||
32 | live_mov y,edi,how |
||
33 | live_mov x,ebx,how |
||
34 | live_shl y,shl_edi |
||
35 | movaps t,y |
||
36 | xorps y,x |
||
37 | live_mov z,esi,how |
||
38 | andps x,t |
||
39 | live_shr z,shr_esi |
||
40 | movaps t,y |
||
41 | xorps y,z |
||
42 | andps t,z |
||
43 | orps x,t |
||
44 | } |
||
45 | |||
46 | macro live_operation a,A,b,B,c,C,d,D |
||
47 | { |
||
48 | movaps D,A |
||
49 | xorps A,B |
||
50 | andps D,B |
||
51 | movaps d,a |
||
52 | xorps a,D |
||
53 | andps d,D |
||
54 | movaps D,a |
||
55 | xorps a,b |
||
56 | andps D,b |
||
57 | orps d,D |
||
58 | movaps D,a |
||
59 | xorps a,c |
||
60 | andps D,c |
||
61 | xorps d,D |
||
62 | xorps a,d |
||
63 | movaps D,A |
||
64 | orps D,C |
||
65 | xorps A,C |
||
66 | xorps d,D |
||
67 | orps A,[ebx+ecx+16] |
||
68 | andps a,d |
||
69 | andps a,A |
||
70 | movaps [ebp+ecx],a |
||
71 | } |
||
72 | |||
73 | macro live_cycle shl_edi,shr_esi |
||
74 | { |
||
75 | local cycle |
||
76 | local cycle_entry |
||
77 | mov ecx,edx |
||
78 | live_load xmm2,xmm3,xmm4,xmm5,shl_edi,shr_esi,low |
||
79 | live_load xmm4,xmm5,xmm6,xmm7,shl_edi,shr_esi |
||
80 | sub ecx,eax |
||
81 | jmp cycle_entry |
||
82 | cycle: |
||
83 | live_load xmm4,xmm5,xmm6,xmm7,shl_edi,shr_esi |
||
84 | live_operation xmm0,xmm1,xmm2,xmm3,xmm4,xmm5,xmm6,xmm7 |
||
85 | sub ecx,eax |
||
86 | cycle_entry: |
||
87 | live_load xmm6,xmm7,xmm0,xmm1,shl_edi,shr_esi |
||
88 | live_operation xmm2,xmm3,xmm4,xmm5,xmm6,xmm7,xmm0,xmm1 |
||
89 | sub ecx,eax |
||
90 | live_load xmm0,xmm1,xmm2,xmm3,shl_edi,shr_esi |
||
91 | live_operation xmm4,xmm5,xmm6,xmm7,xmm0,xmm1,xmm2,xmm3 |
||
92 | sub ecx,eax |
||
93 | live_load xmm2,xmm3,xmm4,xmm5,shl_edi,shr_esi |
||
94 | live_operation xmm6,xmm7,xmm0,xmm1,xmm2,xmm3,xmm4,xmm5 |
||
95 | sub ecx,eax |
||
96 | jg cycle |
||
97 | live_load xmm4,xmm5,xmm6,xmm7,shl_edi,shr_esi,high |
||
98 | live_operation xmm0,xmm1,xmm2,xmm3,xmm4,xmm5,xmm6,xmm7 |
||
99 | } |
||
100 | |||
101 | OneGeneration_Flag12: |
||
102 | push edi |
||
103 | lea esi,[eax+1] |
||
104 | bt dword [esp+48],1 |
||
105 | jnc OneGeneration_flag2_end |
||
106 | bt dword [esp+48],3 |
||
107 | jc OneGeneration_flag2_end |
||
108 | mov edi,[esp+36] |
||
109 | shl edi,4 |
||
110 | cmp edi,edx |
||
111 | jb OneGeneration_flag2_uphalf |
||
112 | sub edi,edx |
||
113 | cmp edi,edx |
||
114 | jnb OneGeneration_flag2_end |
||
115 | add edi,8 |
||
116 | OneGeneration_flag2_uphalf: |
||
117 | mov ecx,esi |
||
118 | lea edi,[edi+ebx+16] |
||
119 | pxor mm0,mm0 |
||
120 | OneGeneration_flag2_cycle: |
||
121 | movq [edi],mm0 |
||
122 | add edi,edx |
||
123 | loop OneGeneration_flag2_cycle |
||
124 | OneGeneration_flag2_end: |
||
125 | bt dword [esp+48],0 |
||
126 | jnc OneGeneration_flag1_end |
||
127 | bt dword [esp+48],2 |
||
128 | jc OneGeneration_flag1_end |
||
129 | push edx |
||
130 | mov eax,[esp+36] |
||
131 | xor edx,edx |
||
132 | div esi |
||
133 | mov esi,edx |
||
134 | pop edx |
||
135 | cmp eax,64 |
||
136 | jnb OneGeneration_flag1_end |
||
137 | imul esi,edx |
||
138 | lea esi,[esi+ebx+16] |
||
139 | btr eax,5 |
||
140 | jnc OneGeneration_flag1_noadd4 |
||
141 | add esi,4 |
||
142 | OneGeneration_flag1_noadd4: |
||
143 | lea ecx,[edx-8] |
||
144 | mov edi,8 |
||
145 | OneGeneration_flag1_cycle: |
||
146 | btr dword [esi+ecx],eax |
||
147 | sub ecx,edi |
||
148 | btr dword [esi+ecx],eax |
||
149 | sub ecx,edi |
||
150 | btr dword [esi+ecx],eax |
||
151 | sub ecx,edi |
||
152 | btr dword [esi+ecx],eax |
||
153 | sub ecx,edi |
||
154 | jnl OneGeneration_flag1_cycle |
||
155 | OneGeneration_flag1_end: |
||
156 | pop edi |
||
157 | ret |
||
158 | |||
159 | @OneGeneration$qqsiipvpxvi: |
||
160 | push ebp |
||
161 | push ebx |
||
162 | push esi |
||
163 | push edi |
||
164 | mov eax,[esp+20] |
||
165 | mov edx,[esp+24] |
||
166 | mov ebp,[esp+28] |
||
167 | mov ebx,[esp+32] |
||
168 | dec eax |
||
169 | jl OneGeneration_end |
||
170 | add edx,7 |
||
171 | add ebp,15 |
||
172 | dec ebx |
||
173 | shr eax,6 |
||
174 | shl edx,3 |
||
175 | and ebp,not 15 |
||
176 | and ebx,not 15 |
||
177 | and edx,not 63 |
||
178 | jng OneGeneration_end |
||
179 | test eax,eax |
||
180 | jz OneGeneration_single |
||
181 | mov edi,edx |
||
182 | imul edi,eax |
||
183 | jo OneGeneration_end |
||
184 | push eax |
||
185 | add edi,ebx |
||
186 | call OneGeneration_Flag12 |
||
187 | lea esi,[ebx+edx] |
||
188 | push dword [esp] |
||
189 | mov eax,16 |
||
190 | live_cycle yes,no |
||
191 | jmp OneGeneration_cycle_fin |
||
192 | OneGeneration_cycle: |
||
193 | mov edi,ebx |
||
194 | mov ebx,esi |
||
195 | add ebp,edx |
||
196 | add esi,edx |
||
197 | live_cycle no,no |
||
198 | OneGeneration_cycle_fin: |
||
199 | dec dword [esp] |
||
200 | jg OneGeneration_cycle |
||
201 | mov edi,ebx |
||
202 | pop ecx |
||
203 | mov ebx,esi |
||
204 | mov esi,edx |
||
205 | add ebp,edx |
||
206 | imul esi,[esp] |
||
207 | neg esi |
||
208 | add esi,ebx |
||
209 | live_cycle no,yes |
||
210 | jmp OneGeneration_flag48 |
||
211 | OneGeneration_single: |
||
212 | push eax |
||
213 | mov edi,ebx |
||
214 | call OneGeneration_Flag12 |
||
215 | mov esi,ebx |
||
216 | mov eax,16 |
||
217 | live_cycle yes,yes |
||
218 | OneGeneration_flag48: |
||
219 | pop ebp |
||
220 | inc ebp |
||
221 | bt dword [esp+36],3 |
||
222 | jnc OneGeneration_flag8_end |
||
223 | mov edi,[esp+24] |
||
224 | mov ebx,[esp+28] |
||
225 | dec edi |
||
226 | add ebx,15 |
||
227 | shl edi,4 |
||
228 | lea esi,[edi-16] |
||
229 | and ebx,not 15 |
||
230 | cmp edi,edx |
||
231 | jb OneGeneration_flag8_uphalf |
||
232 | sub edi,edx |
||
233 | add edi,8 |
||
234 | cmp esi,edx |
||
235 | jb OneGeneration_flag8_uphalf |
||
236 | sub esi,edx |
||
237 | add esi,8 |
||
238 | OneGeneration_flag8_uphalf: |
||
239 | mov ecx,ebp |
||
240 | OneGeneration_flag8_cycle: |
||
241 | movq mm0,[ebx+esi] |
||
242 | movq [ebx],mm0 |
||
243 | movq mm0,[ebx+16] |
||
244 | movq [ebx+edi],mm0 |
||
245 | add ebx,edx |
||
246 | loop OneGeneration_flag8_cycle |
||
247 | OneGeneration_flag8_end: |
||
248 | bt dword [esp+36],2 |
||
249 | jnc OneGeneration_flag4_end |
||
250 | mov eax,[esp+20] |
||
251 | push edx |
||
252 | dec eax |
||
253 | xor edx,edx |
||
254 | mov ebx,[esp+32] |
||
255 | div ebp |
||
256 | add ebx,15 |
||
257 | mov esi,eax |
||
258 | mov edi,edx |
||
259 | and ebx,not 15 |
||
260 | dec edx |
||
261 | jl OneGeneration_flag4_dec0 |
||
262 | mov ebp,edx |
||
263 | jmp OneGeneration_flag4_after_dec |
||
264 | OneGeneration_flag4_dec0: |
||
265 | dec ebp |
||
266 | dec eax |
||
267 | OneGeneration_flag4_after_dec: |
||
268 | pop edx |
||
269 | imul edi,edx |
||
270 | imul ebp,edx |
||
271 | add edi,ebx |
||
272 | add ebp,ebx |
||
273 | btr esi,5 |
||
274 | jnc OneGeneration_flag4_noadd4f |
||
275 | add edi,4 |
||
276 | OneGeneration_flag4_noadd4f: |
||
277 | btr eax,5 |
||
278 | jnc OneGeneration_flag4_noadd4s |
||
279 | add ebp,4 |
||
280 | OneGeneration_flag4_noadd4s: |
||
281 | mov ecx,edx |
||
282 | jmp OneGeneration_flag4_cycle0_entry |
||
283 | OneGeneration_flag4_cycle0: |
||
284 | btr dword [ebx+ecx],0 |
||
285 | OneGeneration_flag4_cycle0_entry: |
||
286 | sub ecx,8 |
||
287 | jl OneGeneration_flag4_cycle0_end |
||
288 | bt dword [ebp+ecx],eax |
||
289 | jnc OneGeneration_flag4_cycle0 |
||
290 | bts dword [ebx+ecx],0 |
||
291 | jmp OneGeneration_flag4_cycle0_entry |
||
292 | OneGeneration_flag4_cycle0_end: |
||
293 | xor eax,eax |
||
294 | cmp dword [esp+20],64 |
||
295 | jng OneGeneration_flag4_single |
||
296 | add ebx,edx |
||
297 | jmp OneGeneration_flag4_cycle1_entry |
||
298 | OneGeneration_flag4_single: |
||
299 | inc eax |
||
300 | jmp OneGeneration_flag4_cycle1_entry |
||
301 | OneGeneration_flag4_cycle1: |
||
302 | btr dword [edi+edx],esi |
||
303 | OneGeneration_flag4_cycle1_entry: |
||
304 | sub edx,8 |
||
305 | jl OneGeneration_flag4_end |
||
306 | bt dword [ebx+edx],eax |
||
307 | jnc OneGeneration_flag4_cycle1 |
||
308 | bts dword [edi+edx],esi |
||
309 | jmp OneGeneration_flag4_cycle1_entry |
||
310 | OneGeneration_flag4_end: |
||
311 | emms |
||
312 | OneGeneration_end: |
||
313 | pop edi |
||
314 | pop esi |
||
315 | pop ebx |
||
316 | pop ebp |
||
317 | ret 20 |
||
318 |