Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
3960 | Serge | 1 | /* |
2 | synth_sse_float: SSE optimized synth (float output version) |
||
3 | |||
4 | copyright 1995-2009 by the mpg123 project - free software under the terms of the LGPL 2.1 |
||
5 | see COPYING and AUTHORS files in distribution or http://mpg123.org |
||
6 | initially written by Taihei Monma |
||
7 | */ |
||
8 | |||
9 | #include "mangle.h" |
||
10 | |||
11 | /* real *window; */ |
||
12 | #define WINDOW %ebx |
||
13 | /* real *b0; */ |
||
14 | #define B0 %edx |
||
15 | /* real *samples; */ |
||
16 | #define SAMPLES %esi |
||
17 | |||
18 | /* |
||
19 | int synth_1to1_real_sse_asm(real *window, real *b0, real *samples, int bo1); |
||
20 | return value: number of clipped samples (0) |
||
21 | */ |
||
22 | |||
23 | #ifndef __APPLE__ |
||
24 | .section .rodata |
||
25 | #else |
||
26 | .data |
||
27 | #endif |
||
28 | ALIGN32 |
||
29 | ASM_NAME(scale_sse): |
||
30 | .long 939524096 |
||
31 | .long 939524096 |
||
32 | .long 939524096 |
||
33 | .long 939524096 |
||
34 | .text |
||
35 | ALIGN16 |
||
36 | .globl ASM_NAME(synth_1to1_real_sse_asm) |
||
37 | ASM_NAME(synth_1to1_real_sse_asm): |
||
38 | pushl %ebp |
||
39 | movl %esp, %ebp |
||
40 | pushl %ebx |
||
41 | pushl %esi |
||
42 | |||
43 | movl 8(%ebp), WINDOW |
||
44 | movl 12(%ebp), B0 |
||
45 | movl 16(%ebp), SAMPLES |
||
46 | movl 20(%ebp), %eax |
||
47 | shll $2, %eax |
||
48 | |||
49 | leal 64(WINDOW), WINDOW |
||
50 | subl %eax, WINDOW |
||
51 | |||
52 | movl $4, %ecx |
||
53 | |||
54 | ALIGN16 |
||
55 | Loop_start_1: |
||
56 | movups (WINDOW), %xmm0 |
||
57 | movups 16(WINDOW), %xmm1 |
||
58 | movups 32(WINDOW), %xmm2 |
||
59 | movups 48(WINDOW), %xmm3 |
||
60 | movups 128(WINDOW), %xmm4 |
||
61 | movups 144(WINDOW), %xmm5 |
||
62 | movups 160(WINDOW), %xmm6 |
||
63 | movups 176(WINDOW), %xmm7 |
||
64 | mulps 0(B0), %xmm0 |
||
65 | mulps 16(B0), %xmm1 |
||
66 | mulps 32(B0), %xmm2 |
||
67 | mulps 48(B0), %xmm3 |
||
68 | mulps 64(B0), %xmm4 |
||
69 | mulps 80(B0), %xmm5 |
||
70 | mulps 96(B0), %xmm6 |
||
71 | mulps 112(B0), %xmm7 |
||
72 | addps %xmm1, %xmm0 |
||
73 | addps %xmm3, %xmm2 |
||
74 | addps %xmm5, %xmm4 |
||
75 | addps %xmm7, %xmm6 |
||
76 | addps %xmm2, %xmm0 |
||
77 | addps %xmm6, %xmm4 |
||
78 | movaps %xmm4, %xmm5 |
||
79 | movaps %xmm0, %xmm4 |
||
80 | |||
81 | leal 256(WINDOW), WINDOW |
||
82 | leal 128(B0), B0 |
||
83 | |||
84 | movups (WINDOW), %xmm0 |
||
85 | movups 16(WINDOW), %xmm1 |
||
86 | movups 32(WINDOW), %xmm2 |
||
87 | movups 48(WINDOW), %xmm3 |
||
88 | movups 128(WINDOW), %xmm6 |
||
89 | movups 144(WINDOW), %xmm7 |
||
90 | mulps (B0), %xmm0 |
||
91 | mulps 16(B0), %xmm1 |
||
92 | mulps 32(B0), %xmm2 |
||
93 | mulps 48(B0), %xmm3 |
||
94 | mulps 64(B0), %xmm6 |
||
95 | mulps 80(B0), %xmm7 |
||
96 | addps %xmm1, %xmm0 |
||
97 | addps %xmm3, %xmm2 |
||
98 | addps %xmm7, %xmm6 |
||
99 | movups 160(WINDOW), %xmm1 |
||
100 | movups 176(WINDOW), %xmm3 |
||
101 | mulps 96(B0), %xmm1 |
||
102 | mulps 112(B0), %xmm3 |
||
103 | addps %xmm2, %xmm0 |
||
104 | addps %xmm3, %xmm1 |
||
105 | addps %xmm1, %xmm6 |
||
106 | movaps %xmm6, %xmm7 |
||
107 | movaps %xmm0, %xmm6 |
||
108 | |||
109 | leal 256(WINDOW), WINDOW |
||
110 | leal 128(B0), B0 |
||
111 | |||
112 | movaps %xmm4, %xmm0 |
||
113 | movaps %xmm6, %xmm1 |
||
114 | unpcklps %xmm5, %xmm4 |
||
115 | unpcklps %xmm7, %xmm6 |
||
116 | unpckhps %xmm5, %xmm0 |
||
117 | unpckhps %xmm7, %xmm1 |
||
118 | movaps %xmm4, %xmm2 |
||
119 | movaps %xmm0, %xmm3 |
||
120 | movlhps %xmm6, %xmm4 |
||
121 | movhlps %xmm2, %xmm6 |
||
122 | movlhps %xmm1, %xmm0 |
||
123 | movhlps %xmm3, %xmm1 |
||
124 | subps %xmm6, %xmm4 |
||
125 | subps %xmm1, %xmm0 |
||
126 | addps %xmm4, %xmm0 |
||
127 | |||
128 | movups (SAMPLES), %xmm1 |
||
129 | movups 16(SAMPLES), %xmm2 |
||
130 | mulps ASM_NAME(scale_sse), %xmm0 |
||
131 | shufps $0xdd, %xmm2, %xmm1 |
||
132 | movaps %xmm0, %xmm2 |
||
133 | unpcklps %xmm1, %xmm0 |
||
134 | unpckhps %xmm1, %xmm2 |
||
135 | movups %xmm0, (SAMPLES) |
||
136 | movups %xmm2, 16(SAMPLES) |
||
137 | |||
138 | leal 32(SAMPLES), SAMPLES |
||
139 | decl %ecx |
||
140 | jnz Loop_start_1 |
||
141 | |||
142 | movl $4, %ecx |
||
143 | |||
144 | ALIGN16 |
||
145 | Loop_start_2: |
||
146 | movups (WINDOW), %xmm0 |
||
147 | movups 16(WINDOW), %xmm1 |
||
148 | movups 32(WINDOW), %xmm2 |
||
149 | movups 48(WINDOW), %xmm3 |
||
150 | movups 128(WINDOW), %xmm4 |
||
151 | movups 144(WINDOW), %xmm5 |
||
152 | movups 160(WINDOW), %xmm6 |
||
153 | movups 176(WINDOW), %xmm7 |
||
154 | mulps 0(B0), %xmm0 |
||
155 | mulps 16(B0), %xmm1 |
||
156 | mulps 32(B0), %xmm2 |
||
157 | mulps 48(B0), %xmm3 |
||
158 | mulps -64(B0), %xmm4 |
||
159 | mulps -48(B0), %xmm5 |
||
160 | mulps -32(B0), %xmm6 |
||
161 | mulps -16(B0), %xmm7 |
||
162 | addps %xmm1, %xmm0 |
||
163 | addps %xmm3, %xmm2 |
||
164 | addps %xmm5, %xmm4 |
||
165 | addps %xmm7, %xmm6 |
||
166 | addps %xmm2, %xmm0 |
||
167 | addps %xmm6, %xmm4 |
||
168 | movaps %xmm4, %xmm5 |
||
169 | movaps %xmm0, %xmm4 |
||
170 | |||
171 | leal 256(WINDOW), WINDOW |
||
172 | leal -128(B0), B0 |
||
173 | |||
174 | movups (WINDOW), %xmm0 |
||
175 | movups 16(WINDOW), %xmm1 |
||
176 | movups 32(WINDOW), %xmm2 |
||
177 | movups 48(WINDOW), %xmm3 |
||
178 | movups 128(WINDOW), %xmm6 |
||
179 | movups 144(WINDOW), %xmm7 |
||
180 | mulps (B0), %xmm0 |
||
181 | mulps 16(B0), %xmm1 |
||
182 | mulps 32(B0), %xmm2 |
||
183 | mulps 48(B0), %xmm3 |
||
184 | mulps -64(B0), %xmm6 |
||
185 | mulps -48(B0), %xmm7 |
||
186 | addps %xmm1, %xmm0 |
||
187 | addps %xmm3, %xmm2 |
||
188 | addps %xmm7, %xmm6 |
||
189 | movups 160(WINDOW), %xmm1 |
||
190 | movups 176(WINDOW), %xmm3 |
||
191 | mulps -32(B0), %xmm1 |
||
192 | mulps -16(B0), %xmm3 |
||
193 | addps %xmm2, %xmm0 |
||
194 | addps %xmm3, %xmm1 |
||
195 | addps %xmm1, %xmm6 |
||
196 | movaps %xmm6, %xmm7 |
||
197 | movaps %xmm0, %xmm6 |
||
198 | |||
199 | leal 256(WINDOW), WINDOW |
||
200 | leal -128(B0), B0 |
||
201 | |||
202 | movaps %xmm4, %xmm0 |
||
203 | movaps %xmm6, %xmm1 |
||
204 | unpcklps %xmm5, %xmm4 |
||
205 | unpcklps %xmm7, %xmm6 |
||
206 | unpckhps %xmm5, %xmm0 |
||
207 | unpckhps %xmm7, %xmm1 |
||
208 | movaps %xmm4, %xmm2 |
||
209 | movaps %xmm0, %xmm3 |
||
210 | movlhps %xmm6, %xmm4 |
||
211 | movhlps %xmm2, %xmm6 |
||
212 | movlhps %xmm1, %xmm0 |
||
213 | movhlps %xmm3, %xmm1 |
||
214 | addps %xmm6, %xmm4 |
||
215 | addps %xmm1, %xmm0 |
||
216 | addps %xmm4, %xmm0 |
||
217 | |||
218 | movups (SAMPLES), %xmm1 |
||
219 | movups 16(SAMPLES), %xmm2 |
||
220 | mulps ASM_NAME(scale_sse), %xmm0 |
||
221 | shufps $0xdd, %xmm2, %xmm1 |
||
222 | movaps %xmm0, %xmm2 |
||
223 | unpcklps %xmm1, %xmm0 |
||
224 | unpckhps %xmm1, %xmm2 |
||
225 | movups %xmm0, (SAMPLES) |
||
226 | movups %xmm2, 16(SAMPLES) |
||
227 | |||
228 | leal 32(SAMPLES), SAMPLES |
||
229 | decl %ecx |
||
230 | jnz Loop_start_2 |
||
231 | |||
232 | xorl %eax, %eax |
||
233 | |||
234 | popl %esi |
||
235 | popl %ebx |
||
236 | movl %ebp, %esp |
||
237 | popl %ebp |
||
238 | |||
239 | ret |
||
240 | |||
241 | NONEXEC_STACK |