Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
1769 yogev_ezra 1
;// fast life generator: ~2.2 pixel*generation/tact
2
 
3
macro live_shl x,do_shl
4
{
5
	if do_shl eq yes
6
		psllq x,1
7
	end if
8
}
9
 
10
macro live_shr x,do_shr
11
{
12
	if do_shr eq yes
13
		psrlq x,1
14
	end if
15
}
16
 
17
macro live_zero x,y
18
{
19
	pxor     x,x
20
	movq     y,x
21
}
22
 
23
macro live_load x,y,z,t,shl_edi,shr_esi
24
{
25
	movq     y,[edi+ecx]
26
	movq     x,[ebx+ecx]
27
	live_shl y,shl_edi
28
	movq     t,y
29
	pxor     y,x
30
	movq     z,[esi+ecx]
31
	pand     x,t
32
	live_shr z,shr_esi
33
	movq     t,y
34
	pxor     y,z
35
	pand     t,z
36
	por      x,t
37
}
38
 
39
macro live_operation a,A,b,B,c,C,d,D,shift
40
{
41
	movq     D,A
42
	pxor     A,B
43
	pand     D,B
44
	movq     d,a
45
	pxor     a,D
46
	pand     d,D
47
	movq     D,a
48
	pxor     a,b
49
	pand     D,b
50
	por      d,D
51
	movq     D,a
52
	pxor     a,c
53
	pand     D,c
54
	pxor     d,D
55
	pxor     a,d
56
	movq     D,A
57
	por      D,C
58
	pxor     A,C
59
	pxor     d,D
60
	por      A,[ebx+shift+16]
61
	pand     a,d
62
	pand     a,A
63
	movq     [ebp+shift],a
64
}
65
 
66
macro live_cycle shl_edi,shr_esi
67
{
68
	local cycle
69
	local cycle_entry
70
	local last_oper
71
	lea  ecx,[edx-8]
72
	live_zero      mm2,mm3
73
	live_load      mm4,mm5,mm6,mm7,shl_edi,shr_esi
74
	sub  ecx,eax
75
	jmp  cycle_entry
76
cycle:
77
	live_load      mm4,mm5,mm6,mm7,shl_edi,shr_esi
78
	live_operation mm0,mm1,mm2,mm3,mm4,mm5,mm6,mm7,ecx
79
	sub  ecx,eax
80
cycle_entry:
81
	live_load      mm6,mm7,mm0,mm1,shl_edi,shr_esi
82
	live_operation mm2,mm3,mm4,mm5,mm6,mm7,mm0,mm1,ecx
83
	sub  ecx,eax
84
	live_load      mm0,mm1,mm2,mm3,shl_edi,shr_esi
85
	live_operation mm4,mm5,mm6,mm7,mm0,mm1,mm2,mm3,ecx
86
	sub  ecx,eax
87
	live_load      mm2,mm3,mm4,mm5,shl_edi,shr_esi
88
	live_operation mm6,mm7,mm0,mm1,mm2,mm3,mm4,mm5,ecx
89
	sub  ecx,eax
90
	jnl  cycle
91
	cmp  cl,-8
92
	jnz  last_oper
93
	lea  ecx,[edx-16]
94
	live_load      mm4,mm5,mm6,mm7,shl_edi,shr_esi
95
	live_operation mm0,mm1,mm2,mm3,mm4,mm5,mm6,mm7,-8
96
	sub  ecx,eax
97
	jmp  cycle_entry
98
last_oper:
99
	live_zero      mm4,mm5
100
	live_operation mm0,mm1,mm2,mm3,mm4,mm5,mm6,mm7,ecx
101
}
102
 
103
OneGeneration_Flag12:
104
	push edi
105
	lea  esi,[eax+1]
106
	bt   dword [esp+48],1
107
	jnc  OneGeneration_flag2_end
108
	bt   dword [esp+48],3
109
	jc   OneGeneration_flag2_end
110
	mov  edi,[esp+36]
111
	shl  edi,4
112
	cmp  edi,edx
113
	jb   OneGeneration_flag2_uphalf
114
	sub  edi,edx
115
	cmp  edi,edx
116
	jnb  OneGeneration_flag2_end
117
	add  edi,8
118
OneGeneration_flag2_uphalf:
119
	mov  ecx,esi
120
	add  edi,ebx
121
	pxor mm0,mm0
122
OneGeneration_flag2_cycle:
123
	movq [edi],mm0
124
	add  edi,edx
125
	loop OneGeneration_flag2_cycle
126
OneGeneration_flag2_end:
127
	bt   dword [esp+48],0
128
	jnc  OneGeneration_flag1_end
129
	bt   dword [esp+48],2
130
	jc   OneGeneration_flag1_end
131
	push edx
132
	mov  eax,[esp+36]
133
	xor  edx,edx
134
	div  esi
135
	mov  esi,edx
136
	pop  edx
137
	cmp  eax,64
138
	jnb  OneGeneration_flag1_end
139
	imul esi,edx
140
	add  esi,ebx
141
	btr  eax,5
142
	jnc  OneGeneration_flag1_noadd4
143
	add  esi,4
144
OneGeneration_flag1_noadd4:
145
	lea  ecx,[edx-8]
146
	mov  edi,8
147
OneGeneration_flag1_cycle:
148
	btr  dword [esi+ecx],eax
149
	sub  ecx,edi
150
	btr  dword [esi+ecx],eax
151
	sub  ecx,edi
152
	btr  dword [esi+ecx],eax
153
	sub  ecx,edi
154
	btr  dword [esi+ecx],eax
155
	sub  ecx,edi
156
	jnl  OneGeneration_flag1_cycle
157
OneGeneration_flag1_end:
158
	pop  edi
159
	ret
160
 
161
@OneGeneration$qqsiipvpxvi:
162
	push ebp
163
	push ebx
164
	push esi
165
	push edi
166
	mov  eax,[esp+20]
167
	mov  edx,[esp+24]
168
	mov  ebp,[esp+28]
169
	mov  ebx,[esp+32]
170
	dec  eax
171
	jl   OneGeneration_end
172
	add  edx,7
173
	add  ebp,31
174
	add  ebx,15
175
	shr  eax,6
176
	shl  edx,3
177
	and  ebp,not 15
178
	and  ebx,not 15
179
	and  edx,not 63
180
	jng  OneGeneration_end
181
	test eax,eax
182
	jz   OneGeneration_single
183
	mov  edi,edx
184
	imul edi,eax
185
	jo   OneGeneration_end
186
	push eax
187
	add  edi,ebx
188
	call OneGeneration_Flag12
189
	lea  esi,[ebx+edx]
190
	push dword [esp]
191
	mov  eax,16
192
	live_cycle yes,no
193
	jmp  OneGeneration_cycle_fin
194
OneGeneration_cycle:
195
	mov  edi,ebx
196
	mov  ebx,esi
197
	add  ebp,edx
198
	add  esi,edx
199
	live_cycle no,no
200
OneGeneration_cycle_fin:
201
	dec  dword [esp]
202
	jg   OneGeneration_cycle
203
	mov  edi,ebx
204
	pop  ecx
205
	mov  ebx,esi
206
	mov  esi,edx
207
	add  ebp,edx
208
	imul esi,[esp]
209
	neg  esi
210
	add  esi,ebx
211
	live_cycle no,yes
212
	jmp  OneGeneration_flag48
213
OneGeneration_single:
214
	push eax
215
	mov  edi,ebx
216
	call OneGeneration_Flag12
217
	mov  esi,ebx
218
	mov  eax,16
219
	live_cycle yes,yes
220
OneGeneration_flag48:
221
	pop  ebp
222
	inc  ebp
223
	bt   dword [esp+36],3
224
	jnc  OneGeneration_flag8_end
225
	mov  edi,[esp+24]
226
	mov  ebx,[esp+28]
227
	dec  edi
228
	add  ebx,15
229
	shl  edi,4
230
	lea  esi,[edi-16]
231
	and  ebx,not 15
232
	cmp  edi,edx
233
	jb   OneGeneration_flag8_uphalf
234
	sub  edi,edx
235
	add  edi,8
236
	cmp  esi,edx
237
	jb   OneGeneration_flag8_uphalf
238
	sub  esi,edx
239
	add  esi,8
240
OneGeneration_flag8_uphalf:
241
	mov  ecx,ebp
242
OneGeneration_flag8_cycle:
243
	movq mm0,[ebx+esi]
244
	movq [ebx],mm0
245
	movq mm0,[ebx+16]
246
	movq [ebx+edi],mm0
247
	add  ebx,edx
248
	loop OneGeneration_flag8_cycle
249
OneGeneration_flag8_end:
250
	bt   dword [esp+36],2
251
	jnc  OneGeneration_flag4_end
252
	mov  eax,[esp+20]
253
	push edx
254
	dec  eax
255
	xor  edx,edx
256
	mov  ebx,[esp+32]
257
	div  ebp
258
	add  ebx,15
259
	mov  esi,eax
260
	mov  edi,edx
261
	and  ebx,not 15
262
	dec  edx
263
	jl   OneGeneration_flag4_dec0
264
	mov  ebp,edx
265
	jmp  OneGeneration_flag4_after_dec
266
OneGeneration_flag4_dec0:
267
	dec  ebp
268
	dec  eax
269
OneGeneration_flag4_after_dec:
270
	pop  edx
271
	imul edi,edx
272
	imul ebp,edx
273
	add  edi,ebx
274
	add  ebp,ebx
275
	btr  esi,5
276
	jnc  OneGeneration_flag4_noadd4f
277
	add  edi,4
278
OneGeneration_flag4_noadd4f:
279
	btr  eax,5
280
	jnc  OneGeneration_flag4_noadd4s
281
	add  ebp,4
282
OneGeneration_flag4_noadd4s:
283
	mov  ecx,edx
284
	jmp  OneGeneration_flag4_cycle0_entry
285
OneGeneration_flag4_cycle0:
286
	btr  dword [ebx+ecx],0
287
OneGeneration_flag4_cycle0_entry:
288
	sub  ecx,8
289
	jl   OneGeneration_flag4_cycle0_end
290
	bt   dword [ebp+ecx],eax
291
	jnc  OneGeneration_flag4_cycle0
292
	bts  dword [ebx+ecx],0
293
	jmp  OneGeneration_flag4_cycle0_entry
294
OneGeneration_flag4_cycle0_end:
295
	xor  eax,eax
296
	cmp  dword [esp+20],64
297
	jng  OneGeneration_flag4_single
298
	add  ebx,edx
299
	jmp  OneGeneration_flag4_cycle1_entry
300
OneGeneration_flag4_single:
301
	inc  eax
302
	jmp  OneGeneration_flag4_cycle1_entry
303
OneGeneration_flag4_cycle1:
304
	btr  dword [edi+edx],esi
305
OneGeneration_flag4_cycle1_entry:
306
	sub  edx,8
307
	jl   OneGeneration_flag4_end
308
	bt   dword [ebx+edx],eax
309
	jnc  OneGeneration_flag4_cycle1
310
	bts  dword [edi+edx],esi
311
	jmp  OneGeneration_flag4_cycle1_entry
312
OneGeneration_flag4_end:
313
	emms
314
OneGeneration_end:
315
	pop  edi
316
	pop  esi
317
	pop  ebx
318
	pop  ebp
319
	ret  20
320