Rev 2210 | Rev 3474 | Go to most recent revision | Show entire file | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 2210 | Rev 2215 | ||
---|---|---|---|
Line 183... | Line 183... | ||
183 | add ebx, 32 |
183 | add ebx, 32 |
184 | cmp ebx, esi |
184 | cmp ebx, esi |
185 | jnz .loop |
185 | jnz .loop |
186 | ret |
186 | ret |
Line -... | Line 187... | ||
- | 187 | ||
- | 188 | ;================================================================= |
|
- | 189 | ; SSE3 version: Step1 |
|
- | 190 | ; |
|
- | 191 | ;========================== |
|
- | 192 | ||
- | 193 | align 4 |
|
- | 194 | step1_sse: |
|
- | 195 | mov ebx, [esp+8] |
|
- | 196 | mov esi, [esp+4] |
|
- | 197 | shl esi, 3 |
|
- | 198 | add esi, ebx |
|
- | 199 | ||
- | 200 | .loop: |
|
- | 201 | movddup xmm0, [ebx] ; xmm0: f0 ; f0 |
|
- | 202 | movddup xmm1, [ebx+8] ; xmm1: f1 ; f1 |
|
- | 203 | addsubpd xmm0, xmm1 ; xmm0: t1 ; t2 ( + - ) |
|
- | 204 | movddup xmm1, [ebx+16] ; xmm1: f2 ; f2 |
|
- | 205 | movddup xmm2, [ebx+24] ; xmm2: f3 ; f3 |
|
- | 206 | addsubpd xmm1, xmm2 ; xmm1: t3 ; t4 ( + - ) |
|
- | 207 | ||
- | 208 | movddup xmm2, xmm0 ; xmm2: t2 ; t2 |
|
- | 209 | movddup xmm3, xmm1 ; xmm3: t4 ; t4 |
|
- | 210 | addsubpd xmm2, xmm3 ; xmm2: 2+4; 2-4 |
|
- | 211 | shufpd xmm2, xmm2, 1 ; xmm2: 2-4; 2+4 |
|
- | 212 | movapd [ebx+16], xmm2 |
|
- | 213 | ||
- | 214 | shufpd xmm0, xmm0, 1 ; xmm0: t2 ; t1 |
|
- | 215 | shufpd xmm1, xmm1, 1 ; xmm1: t4 ; t3 |
|
- | 216 | movddup xmm2, xmm0 ; xmm2: t1 ; t1 |
|
- | 217 | movddup xmm3, xmm1 ; xmm3: t3 ; t3 |
|
- | 218 | addsubpd xmm2, xmm3 ; xmm2: 1+3; 1-3 |
|
- | 219 | shufpd xmm2, xmm2, 1 ; xmm2: 1-3; 1+3 |
|
- | 220 | movapd [ebx], xmm2 |
|
- | 221 | ||
- | 222 | add ebx, 32 |
|
- | 223 | cmp ebx, esi |
|
- | 224 | jnz .loop |
|
- | 225 | ret |
|
187 | 226 | ||
188 | ; local stack definitions |
227 | ; local stack definitions |
189 | ;=========================================================================== |
228 | ;=========================================================================== |
190 | _t0 equ dword [esp] |
229 | _t0 equ dword [esp] |
191 | _t1 equ dword[esp+4] |
230 | _t1 equ dword[esp+4] |