Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
3960 | Serge | 1 | /* |
2 | dct64_3dnow.s: Replacement of dct64() with AMD's 3DNow! SIMD operations support |
||
3 | |||
4 | copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1 |
||
5 | see COPYING and AUTHORS files in distribution or http://mpg123.org |
||
6 | initially written by Syuuhei Kashiyama |
||
7 | |||
8 | Original "license" statement: |
||
9 | The author of this program disclaim whole expressed or implied |
||
10 | warranties with regard to this program, and in no event shall the |
||
11 | author of this program liable to whatever resulted from the use of |
||
12 | this program. Use it at your own risk. |
||
13 | */ |
||
14 | |||
15 | #include "mangle.h" |
||
16 | |||
17 | .globl ASM_NAME(dct64_3dnow) |
||
18 | /* .type ASM_NAME(dct64_3dnow),@function */ |
||
19 | ASM_NAME(dct64_3dnow): |
||
20 | subl $256,%esp |
||
21 | pushl %ebp |
||
22 | pushl %edi |
||
23 | pushl %esi |
||
24 | pushl %ebx |
||
25 | leal 16(%esp),%ebx |
||
26 | movl 284(%esp),%edi |
||
27 | movl 276(%esp),%ebp |
||
28 | movl 280(%esp),%edx |
||
29 | leal 128(%ebx),%esi |
||
30 | |||
31 | /* femms */ |
||
32 | |||
33 | /* 1 */ |
||
34 | movl ASM_NAME(pnts),%eax |
||
35 | movq 0(%edi),%mm0 |
||
36 | movq %mm0,%mm1 |
||
37 | movd 124(%edi),%mm2 |
||
38 | punpckldq 120(%edi),%mm2 |
||
39 | movq 0(%eax),%mm3 |
||
40 | pfadd %mm2,%mm0 |
||
41 | movq %mm0,0(%ebx) |
||
42 | pfsub %mm2,%mm1 |
||
43 | pfmul %mm3,%mm1 |
||
44 | movd %mm1,124(%ebx) |
||
45 | psrlq $32,%mm1 |
||
46 | movd %mm1,120(%ebx) |
||
47 | movq 8(%edi),%mm4 |
||
48 | movq %mm4,%mm5 |
||
49 | movd 116(%edi),%mm6 |
||
50 | punpckldq 112(%edi),%mm6 |
||
51 | movq 8(%eax),%mm7 |
||
52 | pfadd %mm6,%mm4 |
||
53 | movq %mm4,8(%ebx) |
||
54 | pfsub %mm6,%mm5 |
||
55 | pfmul %mm7,%mm5 |
||
56 | movd %mm5,116(%ebx) |
||
57 | psrlq $32,%mm5 |
||
58 | movd %mm5,112(%ebx) |
||
59 | movq 16(%edi),%mm0 |
||
60 | movq %mm0,%mm1 |
||
61 | movd 108(%edi),%mm2 |
||
62 | punpckldq 104(%edi),%mm2 |
||
63 | movq 16(%eax),%mm3 |
||
64 | pfadd %mm2,%mm0 |
||
65 | movq %mm0,16(%ebx) |
||
66 | pfsub %mm2,%mm1 |
||
67 | pfmul %mm3,%mm1 |
||
68 | movd %mm1,108(%ebx) |
||
69 | psrlq $32,%mm1 |
||
70 | movd %mm1,104(%ebx) |
||
71 | movq 24(%edi),%mm4 |
||
72 | movq %mm4,%mm5 |
||
73 | movd 100(%edi),%mm6 |
||
74 | punpckldq 96(%edi),%mm6 |
||
75 | movq 24(%eax),%mm7 |
||
76 | pfadd %mm6,%mm4 |
||
77 | movq %mm4,24(%ebx) |
||
78 | pfsub %mm6,%mm5 |
||
79 | pfmul %mm7,%mm5 |
||
80 | movd %mm5,100(%ebx) |
||
81 | psrlq $32,%mm5 |
||
82 | movd %mm5,96(%ebx) |
||
83 | movq 32(%edi),%mm0 |
||
84 | movq %mm0,%mm1 |
||
85 | movd 92(%edi),%mm2 |
||
86 | punpckldq 88(%edi),%mm2 |
||
87 | movq 32(%eax),%mm3 |
||
88 | pfadd %mm2,%mm0 |
||
89 | movq %mm0,32(%ebx) |
||
90 | pfsub %mm2,%mm1 |
||
91 | pfmul %mm3,%mm1 |
||
92 | movd %mm1,92(%ebx) |
||
93 | psrlq $32,%mm1 |
||
94 | movd %mm1,88(%ebx) |
||
95 | movq 40(%edi),%mm4 |
||
96 | movq %mm4,%mm5 |
||
97 | movd 84(%edi),%mm6 |
||
98 | punpckldq 80(%edi),%mm6 |
||
99 | movq 40(%eax),%mm7 |
||
100 | pfadd %mm6,%mm4 |
||
101 | movq %mm4,40(%ebx) |
||
102 | pfsub %mm6,%mm5 |
||
103 | pfmul %mm7,%mm5 |
||
104 | movd %mm5,84(%ebx) |
||
105 | psrlq $32,%mm5 |
||
106 | movd %mm5,80(%ebx) |
||
107 | movq 48(%edi),%mm0 |
||
108 | movq %mm0,%mm1 |
||
109 | movd 76(%edi),%mm2 |
||
110 | punpckldq 72(%edi),%mm2 |
||
111 | movq 48(%eax),%mm3 |
||
112 | pfadd %mm2,%mm0 |
||
113 | movq %mm0,48(%ebx) |
||
114 | pfsub %mm2,%mm1 |
||
115 | pfmul %mm3,%mm1 |
||
116 | movd %mm1,76(%ebx) |
||
117 | psrlq $32,%mm1 |
||
118 | movd %mm1,72(%ebx) |
||
119 | movq 56(%edi),%mm4 |
||
120 | movq %mm4,%mm5 |
||
121 | movd 68(%edi),%mm6 |
||
122 | punpckldq 64(%edi),%mm6 |
||
123 | movq 56(%eax),%mm7 |
||
124 | pfadd %mm6,%mm4 |
||
125 | movq %mm4,56(%ebx) |
||
126 | pfsub %mm6,%mm5 |
||
127 | pfmul %mm7,%mm5 |
||
128 | movd %mm5,68(%ebx) |
||
129 | psrlq $32,%mm5 |
||
130 | movd %mm5,64(%ebx) |
||
131 | |||
132 | /* 2 */ |
||
133 | movl ASM_NAME(pnts)+4,%eax |
||
134 | /* 0,14 */ |
||
135 | movq 0(%ebx),%mm0 |
||
136 | movq %mm0,%mm1 |
||
137 | movd 60(%ebx),%mm2 |
||
138 | punpckldq 56(%ebx),%mm2 |
||
139 | movq 0(%eax),%mm3 |
||
140 | pfadd %mm2,%mm0 |
||
141 | movq %mm0,0(%esi) |
||
142 | pfsub %mm2,%mm1 |
||
143 | pfmul %mm3,%mm1 |
||
144 | movd %mm1,60(%esi) |
||
145 | psrlq $32,%mm1 |
||
146 | movd %mm1,56(%esi) |
||
147 | /* 16,30 */ |
||
148 | movq 64(%ebx),%mm0 |
||
149 | movq %mm0,%mm1 |
||
150 | movd 124(%ebx),%mm2 |
||
151 | punpckldq 120(%ebx),%mm2 |
||
152 | pfadd %mm2,%mm0 |
||
153 | movq %mm0,64(%esi) |
||
154 | pfsubr %mm2,%mm1 |
||
155 | pfmul %mm3,%mm1 |
||
156 | movd %mm1,124(%esi) |
||
157 | psrlq $32,%mm1 |
||
158 | movd %mm1,120(%esi) |
||
159 | /* 2,12 */ |
||
160 | movq 8(%ebx),%mm4 |
||
161 | movq %mm4,%mm5 |
||
162 | movd 52(%ebx),%mm6 |
||
163 | punpckldq 48(%ebx),%mm6 |
||
164 | movq 8(%eax),%mm7 |
||
165 | pfadd %mm6,%mm4 |
||
166 | movq %mm4,8(%esi) |
||
167 | pfsub %mm6,%mm5 |
||
168 | pfmul %mm7,%mm5 |
||
169 | movd %mm5,52(%esi) |
||
170 | psrlq $32,%mm5 |
||
171 | movd %mm5,48(%esi) |
||
172 | /* 18,28 */ |
||
173 | movq 72(%ebx),%mm4 |
||
174 | movq %mm4,%mm5 |
||
175 | movd 116(%ebx),%mm6 |
||
176 | punpckldq 112(%ebx),%mm6 |
||
177 | pfadd %mm6,%mm4 |
||
178 | movq %mm4,72(%esi) |
||
179 | pfsubr %mm6,%mm5 |
||
180 | pfmul %mm7,%mm5 |
||
181 | movd %mm5,116(%esi) |
||
182 | psrlq $32,%mm5 |
||
183 | movd %mm5,112(%esi) |
||
184 | /* 4,10 */ |
||
185 | movq 16(%ebx),%mm0 |
||
186 | movq %mm0,%mm1 |
||
187 | movd 44(%ebx),%mm2 |
||
188 | punpckldq 40(%ebx),%mm2 |
||
189 | movq 16(%eax),%mm3 |
||
190 | pfadd %mm2,%mm0 |
||
191 | movq %mm0,16(%esi) |
||
192 | pfsub %mm2,%mm1 |
||
193 | pfmul %mm3,%mm1 |
||
194 | movd %mm1,44(%esi) |
||
195 | psrlq $32,%mm1 |
||
196 | movd %mm1,40(%esi) |
||
197 | /* 20,26 */ |
||
198 | movq 80(%ebx),%mm0 |
||
199 | movq %mm0,%mm1 |
||
200 | movd 108(%ebx),%mm2 |
||
201 | punpckldq 104(%ebx),%mm2 |
||
202 | pfadd %mm2,%mm0 |
||
203 | movq %mm0,80(%esi) |
||
204 | pfsubr %mm2,%mm1 |
||
205 | pfmul %mm3,%mm1 |
||
206 | movd %mm1,108(%esi) |
||
207 | psrlq $32,%mm1 |
||
208 | movd %mm1,104(%esi) |
||
209 | /* 6,8 */ |
||
210 | movq 24(%ebx),%mm4 |
||
211 | movq %mm4,%mm5 |
||
212 | movd 36(%ebx),%mm6 |
||
213 | punpckldq 32(%ebx),%mm6 |
||
214 | movq 24(%eax),%mm7 |
||
215 | pfadd %mm6,%mm4 |
||
216 | movq %mm4,24(%esi) |
||
217 | pfsub %mm6,%mm5 |
||
218 | pfmul %mm7,%mm5 |
||
219 | movd %mm5,36(%esi) |
||
220 | psrlq $32,%mm5 |
||
221 | movd %mm5,32(%esi) |
||
222 | /* 22,24 */ |
||
223 | movq 88(%ebx),%mm4 |
||
224 | movq %mm4,%mm5 |
||
225 | movd 100(%ebx),%mm6 |
||
226 | punpckldq 96(%ebx),%mm6 |
||
227 | pfadd %mm6,%mm4 |
||
228 | movq %mm4,88(%esi) |
||
229 | pfsubr %mm6,%mm5 |
||
230 | pfmul %mm7,%mm5 |
||
231 | movd %mm5,100(%esi) |
||
232 | psrlq $32,%mm5 |
||
233 | movd %mm5,96(%esi) |
||
234 | |||
235 | /* 3 */ |
||
236 | movl ASM_NAME(pnts)+8,%eax |
||
237 | movq 0(%eax),%mm0 |
||
238 | movq 8(%eax),%mm1 |
||
239 | /* 0,6 */ |
||
240 | movq 0(%esi),%mm2 |
||
241 | movq %mm2,%mm3 |
||
242 | movd 28(%esi),%mm4 |
||
243 | punpckldq 24(%esi),%mm4 |
||
244 | pfadd %mm4,%mm2 |
||
245 | pfsub %mm4,%mm3 |
||
246 | pfmul %mm0,%mm3 |
||
247 | movq %mm2,0(%ebx) |
||
248 | movd %mm3,28(%ebx) |
||
249 | psrlq $32,%mm3 |
||
250 | movd %mm3,24(%ebx) |
||
251 | /* 2,4 */ |
||
252 | movq 8(%esi),%mm5 |
||
253 | movq %mm5,%mm6 |
||
254 | movd 20(%esi),%mm7 |
||
255 | punpckldq 16(%esi),%mm7 |
||
256 | pfadd %mm7,%mm5 |
||
257 | pfsub %mm7,%mm6 |
||
258 | pfmul %mm1,%mm6 |
||
259 | movq %mm5,8(%ebx) |
||
260 | movd %mm6,20(%ebx) |
||
261 | psrlq $32,%mm6 |
||
262 | movd %mm6,16(%ebx) |
||
263 | /* 8,14 */ |
||
264 | movq 32(%esi),%mm2 |
||
265 | movq %mm2,%mm3 |
||
266 | movd 60(%esi),%mm4 |
||
267 | punpckldq 56(%esi),%mm4 |
||
268 | pfadd %mm4,%mm2 |
||
269 | pfsubr %mm4,%mm3 |
||
270 | pfmul %mm0,%mm3 |
||
271 | movq %mm2,32(%ebx) |
||
272 | movd %mm3,60(%ebx) |
||
273 | psrlq $32,%mm3 |
||
274 | movd %mm3,56(%ebx) |
||
275 | /* 10,12 */ |
||
276 | movq 40(%esi),%mm5 |
||
277 | movq %mm5,%mm6 |
||
278 | movd 52(%esi),%mm7 |
||
279 | punpckldq 48(%esi),%mm7 |
||
280 | pfadd %mm7,%mm5 |
||
281 | pfsubr %mm7,%mm6 |
||
282 | pfmul %mm1,%mm6 |
||
283 | movq %mm5,40(%ebx) |
||
284 | movd %mm6,52(%ebx) |
||
285 | psrlq $32,%mm6 |
||
286 | movd %mm6,48(%ebx) |
||
287 | /* 16,22 */ |
||
288 | movq 64(%esi),%mm2 |
||
289 | movq %mm2,%mm3 |
||
290 | movd 92(%esi),%mm4 |
||
291 | punpckldq 88(%esi),%mm4 |
||
292 | pfadd %mm4,%mm2 |
||
293 | pfsub %mm4,%mm3 |
||
294 | pfmul %mm0,%mm3 |
||
295 | movq %mm2,64(%ebx) |
||
296 | movd %mm3,92(%ebx) |
||
297 | psrlq $32,%mm3 |
||
298 | movd %mm3,88(%ebx) |
||
299 | /* 18,20 */ |
||
300 | movq 72(%esi),%mm5 |
||
301 | movq %mm5,%mm6 |
||
302 | movd 84(%esi),%mm7 |
||
303 | punpckldq 80(%esi),%mm7 |
||
304 | pfadd %mm7,%mm5 |
||
305 | pfsub %mm7,%mm6 |
||
306 | pfmul %mm1,%mm6 |
||
307 | movq %mm5,72(%ebx) |
||
308 | movd %mm6,84(%ebx) |
||
309 | psrlq $32,%mm6 |
||
310 | movd %mm6,80(%ebx) |
||
311 | /* 24,30 */ |
||
312 | movq 96(%esi),%mm2 |
||
313 | movq %mm2,%mm3 |
||
314 | movd 124(%esi),%mm4 |
||
315 | punpckldq 120(%esi),%mm4 |
||
316 | pfadd %mm4,%mm2 |
||
317 | pfsubr %mm4,%mm3 |
||
318 | pfmul %mm0,%mm3 |
||
319 | movq %mm2,96(%ebx) |
||
320 | movd %mm3,124(%ebx) |
||
321 | psrlq $32,%mm3 |
||
322 | movd %mm3,120(%ebx) |
||
323 | /* 26,28 */ |
||
324 | movq 104(%esi),%mm5 |
||
325 | movq %mm5,%mm6 |
||
326 | movd 116(%esi),%mm7 |
||
327 | punpckldq 112(%esi),%mm7 |
||
328 | pfadd %mm7,%mm5 |
||
329 | pfsubr %mm7,%mm6 |
||
330 | pfmul %mm1,%mm6 |
||
331 | movq %mm5,104(%ebx) |
||
332 | movd %mm6,116(%ebx) |
||
333 | psrlq $32,%mm6 |
||
334 | movd %mm6,112(%ebx) |
||
335 | |||
336 | /* 4 */ |
||
337 | movl ASM_NAME(pnts)+12,%eax |
||
338 | movq 0(%eax),%mm0 |
||
339 | /* 0 */ |
||
340 | movq 0(%ebx),%mm1 |
||
341 | movq %mm1,%mm2 |
||
342 | movd 12(%ebx),%mm3 |
||
343 | punpckldq 8(%ebx),%mm3 |
||
344 | pfadd %mm3,%mm1 |
||
345 | pfsub %mm3,%mm2 |
||
346 | pfmul %mm0,%mm2 |
||
347 | movq %mm1,0(%esi) |
||
348 | movd %mm2,12(%esi) |
||
349 | psrlq $32,%mm2 |
||
350 | movd %mm2,8(%esi) |
||
351 | /* 4 */ |
||
352 | movq 16(%ebx),%mm4 |
||
353 | movq %mm4,%mm5 |
||
354 | movd 28(%ebx),%mm6 |
||
355 | punpckldq 24(%ebx),%mm6 |
||
356 | pfadd %mm6,%mm4 |
||
357 | pfsubr %mm6,%mm5 |
||
358 | pfmul %mm0,%mm5 |
||
359 | movq %mm4,16(%esi) |
||
360 | movd %mm5,28(%esi) |
||
361 | psrlq $32,%mm5 |
||
362 | movd %mm5,24(%esi) |
||
363 | /* 8 */ |
||
364 | movq 32(%ebx),%mm1 |
||
365 | movq %mm1,%mm2 |
||
366 | movd 44(%ebx),%mm3 |
||
367 | punpckldq 40(%ebx),%mm3 |
||
368 | pfadd %mm3,%mm1 |
||
369 | pfsub %mm3,%mm2 |
||
370 | pfmul %mm0,%mm2 |
||
371 | movq %mm1,32(%esi) |
||
372 | movd %mm2,44(%esi) |
||
373 | psrlq $32,%mm2 |
||
374 | movd %mm2,40(%esi) |
||
375 | /* 12 */ |
||
376 | movq 48(%ebx),%mm4 |
||
377 | movq %mm4,%mm5 |
||
378 | movd 60(%ebx),%mm6 |
||
379 | punpckldq 56(%ebx),%mm6 |
||
380 | pfadd %mm6,%mm4 |
||
381 | pfsubr %mm6,%mm5 |
||
382 | pfmul %mm0,%mm5 |
||
383 | movq %mm4,48(%esi) |
||
384 | movd %mm5,60(%esi) |
||
385 | psrlq $32,%mm5 |
||
386 | movd %mm5,56(%esi) |
||
387 | /* 16 */ |
||
388 | movq 64(%ebx),%mm1 |
||
389 | movq %mm1,%mm2 |
||
390 | movd 76(%ebx),%mm3 |
||
391 | punpckldq 72(%ebx),%mm3 |
||
392 | pfadd %mm3,%mm1 |
||
393 | pfsub %mm3,%mm2 |
||
394 | pfmul %mm0,%mm2 |
||
395 | movq %mm1,64(%esi) |
||
396 | movd %mm2,76(%esi) |
||
397 | psrlq $32,%mm2 |
||
398 | movd %mm2,72(%esi) |
||
399 | /* 20 */ |
||
400 | movq 80(%ebx),%mm4 |
||
401 | movq %mm4,%mm5 |
||
402 | movd 92(%ebx),%mm6 |
||
403 | punpckldq 88(%ebx),%mm6 |
||
404 | pfadd %mm6,%mm4 |
||
405 | pfsubr %mm6,%mm5 |
||
406 | pfmul %mm0,%mm5 |
||
407 | movq %mm4,80(%esi) |
||
408 | movd %mm5,92(%esi) |
||
409 | psrlq $32,%mm5 |
||
410 | movd %mm5,88(%esi) |
||
411 | /* 24 */ |
||
412 | movq 96(%ebx),%mm1 |
||
413 | movq %mm1,%mm2 |
||
414 | movd 108(%ebx),%mm3 |
||
415 | punpckldq 104(%ebx),%mm3 |
||
416 | pfadd %mm3,%mm1 |
||
417 | pfsub %mm3,%mm2 |
||
418 | pfmul %mm0,%mm2 |
||
419 | movq %mm1,96(%esi) |
||
420 | movd %mm2,108(%esi) |
||
421 | psrlq $32,%mm2 |
||
422 | movd %mm2,104(%esi) |
||
423 | /* 28 */ |
||
424 | movq 112(%ebx),%mm4 |
||
425 | movq %mm4,%mm5 |
||
426 | movd 124(%ebx),%mm6 |
||
427 | punpckldq 120(%ebx),%mm6 |
||
428 | pfadd %mm6,%mm4 |
||
429 | pfsubr %mm6,%mm5 |
||
430 | pfmul %mm0,%mm5 |
||
431 | movq %mm4,112(%esi) |
||
432 | movd %mm5,124(%esi) |
||
433 | psrlq $32,%mm5 |
||
434 | movd %mm5,120(%esi) |
||
435 | |||
436 | /* 5 */ |
||
437 | movl $-1,%eax |
||
438 | movd %eax,%mm1 |
||
439 | movl $1,%eax |
||
440 | /* L | H */ |
||
441 | movd %eax,%mm0 |
||
442 | punpckldq %mm1,%mm0 |
||
443 | /* 1.0 | -1.0 */ |
||
444 | pi2fd %mm0,%mm0 |
||
445 | movd %eax,%mm1 |
||
446 | pi2fd %mm1,%mm1 |
||
447 | movl ASM_NAME(pnts)+16,%eax |
||
448 | movd 0(%eax),%mm2 |
||
449 | /* 1.0 | cos0 */ |
||
450 | punpckldq %mm2,%mm1 |
||
451 | /* 0 */ |
||
452 | movq 0(%esi),%mm2 |
||
453 | movq %mm2,%mm3 |
||
454 | pfmul %mm0,%mm3 |
||
455 | pfacc %mm3,%mm2 |
||
456 | pfmul %mm1,%mm2 |
||
457 | movq %mm2,0(%ebx) |
||
458 | movq 8(%esi),%mm4 |
||
459 | movq %mm4,%mm5 |
||
460 | pfmul %mm0,%mm5 |
||
461 | pfacc %mm5,%mm4 |
||
462 | pfmul %mm0,%mm4 |
||
463 | pfmul %mm1,%mm4 |
||
464 | movq %mm4,%mm5 |
||
465 | psrlq $32,%mm5 |
||
466 | pfacc %mm5,%mm4 |
||
467 | movq %mm4,8(%ebx) |
||
468 | /* 4 */ |
||
469 | movq 16(%esi),%mm2 |
||
470 | movq %mm2,%mm3 |
||
471 | pfmul %mm0,%mm3 |
||
472 | pfacc %mm3,%mm2 |
||
473 | pfmul %mm1,%mm2 |
||
474 | movq 24(%esi),%mm4 |
||
475 | movq %mm4,%mm5 |
||
476 | pfmul %mm0,%mm5 |
||
477 | pfacc %mm5,%mm4 |
||
478 | pfmul %mm0,%mm4 |
||
479 | pfmul %mm1,%mm4 |
||
480 | movq %mm4,%mm5 |
||
481 | psrlq $32,%mm5 |
||
482 | pfacc %mm5,%mm4 |
||
483 | movq %mm2,%mm3 |
||
484 | psrlq $32,%mm3 |
||
485 | pfadd %mm4,%mm2 |
||
486 | pfadd %mm3,%mm4 |
||
487 | movq %mm2,16(%ebx) |
||
488 | movq %mm4,24(%ebx) |
||
489 | /* 8 */ |
||
490 | movq 32(%esi),%mm2 |
||
491 | movq %mm2,%mm3 |
||
492 | pfmul %mm0,%mm3 |
||
493 | pfacc %mm3,%mm2 |
||
494 | pfmul %mm1,%mm2 |
||
495 | movq %mm2,32(%ebx) |
||
496 | movq 40(%esi),%mm4 |
||
497 | movq %mm4,%mm5 |
||
498 | pfmul %mm0,%mm5 |
||
499 | pfacc %mm5,%mm4 |
||
500 | pfmul %mm0,%mm4 |
||
501 | pfmul %mm1,%mm4 |
||
502 | movq %mm4,%mm5 |
||
503 | psrlq $32,%mm5 |
||
504 | pfacc %mm5,%mm4 |
||
505 | movq %mm4,40(%ebx) |
||
506 | /* 12 */ |
||
507 | movq 48(%esi),%mm2 |
||
508 | movq %mm2,%mm3 |
||
509 | pfmul %mm0,%mm3 |
||
510 | pfacc %mm3,%mm2 |
||
511 | pfmul %mm1,%mm2 |
||
512 | movq 56(%esi),%mm4 |
||
513 | movq %mm4,%mm5 |
||
514 | pfmul %mm0,%mm5 |
||
515 | pfacc %mm5,%mm4 |
||
516 | pfmul %mm0,%mm4 |
||
517 | pfmul %mm1,%mm4 |
||
518 | movq %mm4,%mm5 |
||
519 | psrlq $32,%mm5 |
||
520 | pfacc %mm5,%mm4 |
||
521 | movq %mm2,%mm3 |
||
522 | psrlq $32,%mm3 |
||
523 | pfadd %mm4,%mm2 |
||
524 | pfadd %mm3,%mm4 |
||
525 | movq %mm2,48(%ebx) |
||
526 | movq %mm4,56(%ebx) |
||
527 | /* 16 */ |
||
528 | movq 64(%esi),%mm2 |
||
529 | movq %mm2,%mm3 |
||
530 | pfmul %mm0,%mm3 |
||
531 | pfacc %mm3,%mm2 |
||
532 | pfmul %mm1,%mm2 |
||
533 | movq %mm2,64(%ebx) |
||
534 | movq 72(%esi),%mm4 |
||
535 | movq %mm4,%mm5 |
||
536 | pfmul %mm0,%mm5 |
||
537 | pfacc %mm5,%mm4 |
||
538 | pfmul %mm0,%mm4 |
||
539 | pfmul %mm1,%mm4 |
||
540 | movq %mm4,%mm5 |
||
541 | psrlq $32,%mm5 |
||
542 | pfacc %mm5,%mm4 |
||
543 | movq %mm4,72(%ebx) |
||
544 | /* 20 */ |
||
545 | movq 80(%esi),%mm2 |
||
546 | movq %mm2,%mm3 |
||
547 | pfmul %mm0,%mm3 |
||
548 | pfacc %mm3,%mm2 |
||
549 | pfmul %mm1,%mm2 |
||
550 | movq 88(%esi),%mm4 |
||
551 | movq %mm4,%mm5 |
||
552 | pfmul %mm0,%mm5 |
||
553 | pfacc %mm5,%mm4 |
||
554 | pfmul %mm0,%mm4 |
||
555 | pfmul %mm1,%mm4 |
||
556 | movq %mm4,%mm5 |
||
557 | psrlq $32,%mm5 |
||
558 | pfacc %mm5,%mm4 |
||
559 | movq %mm2,%mm3 |
||
560 | psrlq $32,%mm3 |
||
561 | pfadd %mm4,%mm2 |
||
562 | pfadd %mm3,%mm4 |
||
563 | movq %mm2,80(%ebx) |
||
564 | movq %mm4,88(%ebx) |
||
565 | /* 24 */ |
||
566 | movq 96(%esi),%mm2 |
||
567 | movq %mm2,%mm3 |
||
568 | pfmul %mm0,%mm3 |
||
569 | pfacc %mm3,%mm2 |
||
570 | pfmul %mm1,%mm2 |
||
571 | movq %mm2,96(%ebx) |
||
572 | movq 104(%esi),%mm4 |
||
573 | movq %mm4,%mm5 |
||
574 | pfmul %mm0,%mm5 |
||
575 | pfacc %mm5,%mm4 |
||
576 | pfmul %mm0,%mm4 |
||
577 | pfmul %mm1,%mm4 |
||
578 | movq %mm4,%mm5 |
||
579 | psrlq $32,%mm5 |
||
580 | pfacc %mm5,%mm4 |
||
581 | movq %mm4,104(%ebx) |
||
582 | /* 28 */ |
||
583 | movq 112(%esi),%mm2 |
||
584 | movq %mm2,%mm3 |
||
585 | pfmul %mm0,%mm3 |
||
586 | pfacc %mm3,%mm2 |
||
587 | pfmul %mm1,%mm2 |
||
588 | movq 120(%esi),%mm4 |
||
589 | movq %mm4,%mm5 |
||
590 | pfmul %mm0,%mm5 |
||
591 | pfacc %mm5,%mm4 |
||
592 | pfmul %mm0,%mm4 |
||
593 | pfmul %mm1,%mm4 |
||
594 | movq %mm4,%mm5 |
||
595 | psrlq $32,%mm5 |
||
596 | pfacc %mm5,%mm4 |
||
597 | movq %mm2,%mm3 |
||
598 | psrlq $32,%mm3 |
||
599 | pfadd %mm4,%mm2 |
||
600 | pfadd %mm3,%mm4 |
||
601 | movq %mm2,112(%ebx) |
||
602 | movq %mm4,120(%ebx) |
||
603 | |||
604 | /* Phase6 */ |
||
605 | movl 0(%ebx),%eax |
||
606 | movl %eax,1024(%ebp) |
||
607 | movl 4(%ebx),%eax |
||
608 | movl %eax,0(%ebp) |
||
609 | movl %eax,0(%edx) |
||
610 | movl 8(%ebx),%eax |
||
611 | movl %eax,512(%ebp) |
||
612 | movl 12(%ebx),%eax |
||
613 | movl %eax,512(%edx) |
||
614 | |||
615 | movl 16(%ebx),%eax |
||
616 | movl %eax,768(%ebp) |
||
617 | movl 20(%ebx),%eax |
||
618 | movl %eax,256(%edx) |
||
619 | |||
620 | movl 24(%ebx),%eax |
||
621 | movl %eax,256(%ebp) |
||
622 | movl 28(%ebx),%eax |
||
623 | movl %eax,768(%edx) |
||
624 | |||
625 | movq 32(%ebx),%mm0 |
||
626 | movq 48(%ebx),%mm1 |
||
627 | pfadd %mm1,%mm0 |
||
628 | movd %mm0,896(%ebp) |
||
629 | psrlq $32,%mm0 |
||
630 | movd %mm0,128(%edx) |
||
631 | movq 40(%ebx),%mm2 |
||
632 | pfadd %mm2,%mm1 |
||
633 | movd %mm1,640(%ebp) |
||
634 | psrlq $32,%mm1 |
||
635 | movd %mm1,384(%edx) |
||
636 | |||
637 | movq 56(%ebx),%mm3 |
||
638 | pfadd %mm3,%mm2 |
||
639 | movd %mm2,384(%ebp) |
||
640 | psrlq $32,%mm2 |
||
641 | movd %mm2,640(%edx) |
||
642 | |||
643 | movd 36(%ebx),%mm4 |
||
644 | pfadd %mm4,%mm3 |
||
645 | movd %mm3,128(%ebp) |
||
646 | psrlq $32,%mm3 |
||
647 | movd %mm3,896(%edx) |
||
648 | movq 96(%ebx),%mm0 |
||
649 | movq 64(%ebx),%mm1 |
||
650 | |||
651 | movq 112(%ebx),%mm2 |
||
652 | pfadd %mm2,%mm0 |
||
653 | movq %mm0,%mm3 |
||
654 | pfadd %mm1,%mm3 |
||
655 | movd %mm3,960(%ebp) |
||
656 | psrlq $32,%mm3 |
||
657 | movd %mm3,64(%edx) |
||
658 | movq 80(%ebx),%mm1 |
||
659 | pfadd %mm1,%mm0 |
||
660 | movd %mm0,832(%ebp) |
||
661 | psrlq $32,%mm0 |
||
662 | movd %mm0,192(%edx) |
||
663 | movq 104(%ebx),%mm3 |
||
664 | pfadd %mm3,%mm2 |
||
665 | movq %mm2,%mm4 |
||
666 | pfadd %mm1,%mm4 |
||
667 | movd %mm4,704(%ebp) |
||
668 | psrlq $32,%mm4 |
||
669 | movd %mm4,320(%edx) |
||
670 | movq 72(%ebx),%mm1 |
||
671 | pfadd %mm1,%mm2 |
||
672 | movd %mm2,576(%ebp) |
||
673 | psrlq $32,%mm2 |
||
674 | movd %mm2,448(%edx) |
||
675 | |||
676 | movq 120(%ebx),%mm4 |
||
677 | pfadd %mm4,%mm3 |
||
678 | movq %mm3,%mm5 |
||
679 | pfadd %mm1,%mm5 |
||
680 | movd %mm5,448(%ebp) |
||
681 | psrlq $32,%mm5 |
||
682 | movd %mm5,576(%edx) |
||
683 | movq 88(%ebx),%mm1 |
||
684 | pfadd %mm1,%mm3 |
||
685 | movd %mm3,320(%ebp) |
||
686 | psrlq $32,%mm3 |
||
687 | movd %mm3,704(%edx) |
||
688 | |||
689 | movd 100(%ebx),%mm5 |
||
690 | pfadd %mm5,%mm4 |
||
691 | movq %mm4,%mm6 |
||
692 | pfadd %mm1,%mm6 |
||
693 | movd %mm6,192(%ebp) |
||
694 | psrlq $32,%mm6 |
||
695 | movd %mm6,832(%edx) |
||
696 | movd 68(%ebx),%mm1 |
||
697 | pfadd %mm1,%mm4 |
||
698 | movd %mm4,64(%ebp) |
||
699 | psrlq $32,%mm4 |
||
700 | movd %mm4,960(%edx) |
||
701 | |||
702 | /* femms */ |
||
703 | |||
704 | popl %ebx |
||
705 | popl %esi |
||
706 | popl %edi |
||
707 | popl %ebp |
||
708 | addl $256,%esp |
||
709 | |||
710 | ret |
||
711 | |||
712 | NONEXEC_STACK |