Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
1769 yogev_ezra 1
;// fast life generator: ~2.8 pixel*generation/tact
2
 
3
macro live_shl x,do_shl
4
{
5
	if do_shl eq yes
6
		psllq x,1
7
	end if
8
}
9
 
10
macro live_shr x,do_shr
11
{
12
	if do_shr eq yes
13
		psrlq x,1
14
	end if
15
}
16
 
17
macro live_mov  x,reg,how
18
{
19
	if how eq low
20
		xorps     x,x
21
		movlps    x,[reg+24]
22
	else if how eq high
23
		xorps     x,x
24
		movhps    x,[reg+edx]
25
	else
26
		movaps    x,[reg+ecx]
27
	end if
28
}
29
 
30
macro live_load x,y,z,t,shl_edi,shr_esi,how
31
{
32
	live_mov  y,edi,how
33
	live_mov  x,ebx,how
34
	live_shl  y,shl_edi
35
	movaps    t,y
36
	xorps     y,x
37
	live_mov  z,esi,how
38
	andps     x,t
39
	live_shr  z,shr_esi
40
	movaps    t,y
41
	xorps     y,z
42
	andps     t,z
43
	orps      x,t
44
}
45
 
46
macro live_operation a,A,b,B,c,C,d,D
47
{
48
	movaps    D,A
49
	xorps     A,B
50
	andps     D,B
51
	movaps    d,a
52
	xorps     a,D
53
	andps     d,D
54
	movaps    D,a
55
	xorps     a,b
56
	andps     D,b
57
	orps      d,D
58
	movaps    D,a
59
	xorps     a,c
60
	andps     D,c
61
	xorps     d,D
62
	xorps     a,d
63
	movaps    D,A
64
	orps      D,C
65
	xorps     A,C
66
	xorps     d,D
67
	orps      A,[ebx+ecx+16]
68
	andps     a,d
69
	andps     a,A
70
	movaps    [ebp+ecx],a
71
}
72
 
73
macro live_cycle shl_edi,shr_esi
74
{
75
	local cycle
76
	local cycle_entry
77
	mov  ecx,edx
78
	live_load      xmm2,xmm3,xmm4,xmm5,shl_edi,shr_esi,low
79
	live_load      xmm4,xmm5,xmm6,xmm7,shl_edi,shr_esi
80
	sub  ecx,eax
81
	jmp  cycle_entry
82
cycle:
83
	live_load      xmm4,xmm5,xmm6,xmm7,shl_edi,shr_esi
84
	live_operation xmm0,xmm1,xmm2,xmm3,xmm4,xmm5,xmm6,xmm7
85
	sub  ecx,eax
86
cycle_entry:
87
	live_load      xmm6,xmm7,xmm0,xmm1,shl_edi,shr_esi
88
	live_operation xmm2,xmm3,xmm4,xmm5,xmm6,xmm7,xmm0,xmm1
89
	sub  ecx,eax
90
	live_load      xmm0,xmm1,xmm2,xmm3,shl_edi,shr_esi
91
	live_operation xmm4,xmm5,xmm6,xmm7,xmm0,xmm1,xmm2,xmm3
92
	sub  ecx,eax
93
	live_load      xmm2,xmm3,xmm4,xmm5,shl_edi,shr_esi
94
	live_operation xmm6,xmm7,xmm0,xmm1,xmm2,xmm3,xmm4,xmm5
95
	sub  ecx,eax
96
	jg   cycle
97
	live_load      xmm4,xmm5,xmm6,xmm7,shl_edi,shr_esi,high
98
	live_operation xmm0,xmm1,xmm2,xmm3,xmm4,xmm5,xmm6,xmm7
99
}
100
 
101
OneGeneration_Flag12:
102
	push edi
103
	lea  esi,[eax+1]
104
	bt   dword [esp+48],1
105
	jnc  OneGeneration_flag2_end
106
	bt   dword [esp+48],3
107
	jc   OneGeneration_flag2_end
108
	mov  edi,[esp+36]
109
	shl  edi,4
110
	cmp  edi,edx
111
	jb   OneGeneration_flag2_uphalf
112
	sub  edi,edx
113
	cmp  edi,edx
114
	jnb  OneGeneration_flag2_end
115
	add  edi,8
116
OneGeneration_flag2_uphalf:
117
	mov  ecx,esi
118
	lea  edi,[edi+ebx+16]
119
	pxor mm0,mm0
120
OneGeneration_flag2_cycle:
121
	movq [edi],mm0
122
	add  edi,edx
123
	loop OneGeneration_flag2_cycle
124
OneGeneration_flag2_end:
125
	bt   dword [esp+48],0
126
	jnc  OneGeneration_flag1_end
127
	bt   dword [esp+48],2
128
	jc   OneGeneration_flag1_end
129
	push edx
130
	mov  eax,[esp+36]
131
	xor  edx,edx
132
	div  esi
133
	mov  esi,edx
134
	pop  edx
135
	cmp  eax,64
136
	jnb  OneGeneration_flag1_end
137
	imul esi,edx
138
	lea  esi,[esi+ebx+16]
139
	btr  eax,5
140
	jnc  OneGeneration_flag1_noadd4
141
	add  esi,4
142
OneGeneration_flag1_noadd4:
143
	lea  ecx,[edx-8]
144
	mov  edi,8
145
OneGeneration_flag1_cycle:
146
	btr  dword [esi+ecx],eax
147
	sub  ecx,edi
148
	btr  dword [esi+ecx],eax
149
	sub  ecx,edi
150
	btr  dword [esi+ecx],eax
151
	sub  ecx,edi
152
	btr  dword [esi+ecx],eax
153
	sub  ecx,edi
154
	jnl  OneGeneration_flag1_cycle
155
OneGeneration_flag1_end:
156
	pop  edi
157
	ret
158
 
159
@OneGeneration$qqsiipvpxvi:
160
	push ebp
161
	push ebx
162
	push esi
163
	push edi
164
	mov  eax,[esp+20]
165
	mov  edx,[esp+24]
166
	mov  ebp,[esp+28]
167
	mov  ebx,[esp+32]
168
	dec  eax
169
	jl   OneGeneration_end
170
	add  edx,7
171
	add  ebp,15
172
	dec  ebx
173
	shr  eax,6
174
	shl  edx,3
175
	and  ebp,not 15
176
	and  ebx,not 15
177
	and  edx,not 63
178
	jng  OneGeneration_end
179
	test eax,eax
180
	jz   OneGeneration_single
181
	mov  edi,edx
182
	imul edi,eax
183
	jo   OneGeneration_end
184
	push eax
185
	add  edi,ebx
186
	call OneGeneration_Flag12
187
	lea  esi,[ebx+edx]
188
	push dword [esp]
189
	mov  eax,16
190
	live_cycle yes,no
191
	jmp  OneGeneration_cycle_fin
192
OneGeneration_cycle:
193
	mov  edi,ebx
194
	mov  ebx,esi
195
	add  ebp,edx
196
	add  esi,edx
197
	live_cycle no,no
198
OneGeneration_cycle_fin:
199
	dec  dword [esp]
200
	jg   OneGeneration_cycle
201
	mov  edi,ebx
202
	pop  ecx
203
	mov  ebx,esi
204
	mov  esi,edx
205
	add  ebp,edx
206
	imul esi,[esp]
207
	neg  esi
208
	add  esi,ebx
209
	live_cycle no,yes
210
	jmp  OneGeneration_flag48
211
OneGeneration_single:
212
	push eax
213
	mov  edi,ebx
214
	call OneGeneration_Flag12
215
	mov  esi,ebx
216
	mov  eax,16
217
	live_cycle yes,yes
218
OneGeneration_flag48:
219
	pop  ebp
220
	inc  ebp
221
	bt   dword [esp+36],3
222
	jnc  OneGeneration_flag8_end
223
	mov  edi,[esp+24]
224
	mov  ebx,[esp+28]
225
	dec  edi
226
	add  ebx,15
227
	shl  edi,4
228
	lea  esi,[edi-16]
229
	and  ebx,not 15
230
	cmp  edi,edx
231
	jb   OneGeneration_flag8_uphalf
232
	sub  edi,edx
233
	add  edi,8
234
	cmp  esi,edx
235
	jb   OneGeneration_flag8_uphalf
236
	sub  esi,edx
237
	add  esi,8
238
OneGeneration_flag8_uphalf:
239
	mov  ecx,ebp
240
OneGeneration_flag8_cycle:
241
	movq mm0,[ebx+esi]
242
	movq [ebx],mm0
243
	movq mm0,[ebx+16]
244
	movq [ebx+edi],mm0
245
	add  ebx,edx
246
	loop OneGeneration_flag8_cycle
247
OneGeneration_flag8_end:
248
	bt   dword [esp+36],2
249
	jnc  OneGeneration_flag4_end
250
	mov  eax,[esp+20]
251
	push edx
252
	dec  eax
253
	xor  edx,edx
254
	mov  ebx,[esp+32]
255
	div  ebp
256
	add  ebx,15
257
	mov  esi,eax
258
	mov  edi,edx
259
	and  ebx,not 15
260
	dec  edx
261
	jl   OneGeneration_flag4_dec0
262
	mov  ebp,edx
263
	jmp  OneGeneration_flag4_after_dec
264
OneGeneration_flag4_dec0:
265
	dec  ebp
266
	dec  eax
267
OneGeneration_flag4_after_dec:
268
	pop  edx
269
	imul edi,edx
270
	imul ebp,edx
271
	add  edi,ebx
272
	add  ebp,ebx
273
	btr  esi,5
274
	jnc  OneGeneration_flag4_noadd4f
275
	add  edi,4
276
OneGeneration_flag4_noadd4f:
277
	btr  eax,5
278
	jnc  OneGeneration_flag4_noadd4s
279
	add  ebp,4
280
OneGeneration_flag4_noadd4s:
281
	mov  ecx,edx
282
	jmp  OneGeneration_flag4_cycle0_entry
283
OneGeneration_flag4_cycle0:
284
	btr  dword [ebx+ecx],0
285
OneGeneration_flag4_cycle0_entry:
286
	sub  ecx,8
287
	jl   OneGeneration_flag4_cycle0_end
288
	bt   dword [ebp+ecx],eax
289
	jnc  OneGeneration_flag4_cycle0
290
	bts  dword [ebx+ecx],0
291
	jmp  OneGeneration_flag4_cycle0_entry
292
OneGeneration_flag4_cycle0_end:
293
	xor  eax,eax
294
	cmp  dword [esp+20],64
295
	jng  OneGeneration_flag4_single
296
	add  ebx,edx
297
	jmp  OneGeneration_flag4_cycle1_entry
298
OneGeneration_flag4_single:
299
	inc  eax
300
	jmp  OneGeneration_flag4_cycle1_entry
301
OneGeneration_flag4_cycle1:
302
	btr  dword [edi+edx],esi
303
OneGeneration_flag4_cycle1_entry:
304
	sub  edx,8
305
	jl   OneGeneration_flag4_end
306
	bt   dword [ebx+edx],eax
307
	jnc  OneGeneration_flag4_cycle1
308
	bts  dword [edi+edx],esi
309
	jmp  OneGeneration_flag4_cycle1_entry
310
OneGeneration_flag4_end:
311
	emms
312
OneGeneration_end:
313
	pop  edi
314
	pop  esi
315
	pop  ebx
316
	pop  ebp
317
	ret  20
318