Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
3960 Serge 1
/*
2
	synth_sse_float: SSE optimized synth (float output version)
3
 
4
	copyright 1995-2009 by the mpg123 project - free software under the terms of the LGPL 2.1
5
	see COPYING and AUTHORS files in distribution or http://mpg123.org
6
	initially written by Taihei Monma
7
*/
8
 
9
#include "mangle.h"
10
 
11
/* real *window; */
12
#define WINDOW %ebx
13
/* real *b0; */
14
#define B0 %edx
15
/* real *samples; */
16
#define SAMPLES %esi
17
 
18
/*
19
	int synth_1to1_real_sse_asm(real *window, real *b0, real *samples, int bo1);
20
	return value: number of clipped samples (0)
21
*/
22
 
23
#ifndef __APPLE__
24
	.section	.rodata
25
#else
26
	.data
27
#endif
28
	ALIGN32
29
ASM_NAME(scale_sse):
30
	.long   939524096
31
	.long   939524096
32
	.long   939524096
33
	.long   939524096
34
	.text
35
	ALIGN16
36
.globl ASM_NAME(synth_1to1_real_sse_asm)
37
ASM_NAME(synth_1to1_real_sse_asm):
38
	pushl		%ebp
39
	movl		%esp, %ebp
40
	pushl		%ebx
41
	pushl		%esi
42
 
43
	movl		8(%ebp), WINDOW
44
	movl		12(%ebp), B0
45
	movl		16(%ebp), SAMPLES
46
	movl		20(%ebp), %eax
47
	shll		$2, %eax
48
 
49
	leal		64(WINDOW), WINDOW
50
	subl		%eax, WINDOW
51
 
52
	movl		$4, %ecx
53
 
54
	ALIGN16
55
Loop_start_1:
56
	movups		(WINDOW), %xmm0
57
	movups		16(WINDOW), %xmm1
58
	movups		32(WINDOW), %xmm2
59
	movups		48(WINDOW), %xmm3
60
	movups		128(WINDOW), %xmm4
61
	movups		144(WINDOW), %xmm5
62
	movups		160(WINDOW), %xmm6
63
	movups		176(WINDOW), %xmm7
64
	mulps		0(B0), %xmm0
65
	mulps		16(B0), %xmm1
66
	mulps		32(B0), %xmm2
67
	mulps		48(B0), %xmm3
68
	mulps		64(B0), %xmm4
69
	mulps		80(B0), %xmm5
70
	mulps		96(B0), %xmm6
71
	mulps		112(B0), %xmm7
72
	addps		%xmm1, %xmm0
73
	addps		%xmm3, %xmm2
74
	addps		%xmm5, %xmm4
75
	addps		%xmm7, %xmm6
76
	addps		%xmm2, %xmm0
77
	addps		%xmm6, %xmm4
78
	movaps		%xmm4, %xmm5
79
	movaps		%xmm0, %xmm4
80
 
81
	leal		256(WINDOW), WINDOW
82
	leal		128(B0), B0
83
 
84
	movups		(WINDOW), %xmm0
85
	movups		16(WINDOW), %xmm1
86
	movups		32(WINDOW), %xmm2
87
	movups		48(WINDOW), %xmm3
88
	movups		128(WINDOW), %xmm6
89
	movups		144(WINDOW), %xmm7
90
	mulps		(B0), %xmm0
91
	mulps		16(B0), %xmm1
92
	mulps		32(B0), %xmm2
93
	mulps		48(B0), %xmm3
94
	mulps		64(B0), %xmm6
95
	mulps		80(B0), %xmm7
96
	addps		%xmm1, %xmm0
97
	addps		%xmm3, %xmm2
98
	addps		%xmm7, %xmm6
99
	movups		160(WINDOW), %xmm1
100
	movups		176(WINDOW), %xmm3
101
	mulps		96(B0), %xmm1
102
	mulps		112(B0), %xmm3
103
	addps		%xmm2, %xmm0
104
	addps		%xmm3, %xmm1
105
	addps		%xmm1, %xmm6
106
	movaps		%xmm6, %xmm7
107
	movaps		%xmm0, %xmm6
108
 
109
	leal		256(WINDOW), WINDOW
110
	leal		128(B0), B0
111
 
112
	movaps		%xmm4, %xmm0
113
	movaps		%xmm6, %xmm1
114
	unpcklps	%xmm5, %xmm4
115
	unpcklps	%xmm7, %xmm6
116
	unpckhps	%xmm5, %xmm0
117
	unpckhps	%xmm7, %xmm1
118
	movaps		%xmm4, %xmm2
119
	movaps		%xmm0, %xmm3
120
	movlhps		%xmm6, %xmm4
121
	movhlps		%xmm2, %xmm6
122
	movlhps		%xmm1, %xmm0
123
	movhlps		%xmm3, %xmm1
124
	subps		%xmm6, %xmm4
125
	subps		%xmm1, %xmm0
126
	addps		%xmm4, %xmm0
127
 
128
	movups		(SAMPLES), %xmm1
129
	movups		16(SAMPLES), %xmm2
130
	mulps		ASM_NAME(scale_sse), %xmm0
131
	shufps		$0xdd, %xmm2, %xmm1
132
	movaps		%xmm0, %xmm2
133
	unpcklps	%xmm1, %xmm0
134
	unpckhps	%xmm1, %xmm2
135
	movups		%xmm0, (SAMPLES)
136
	movups		%xmm2, 16(SAMPLES)
137
 
138
	leal		32(SAMPLES), SAMPLES
139
	decl		%ecx
140
	jnz			Loop_start_1
141
 
142
	movl		$4, %ecx
143
 
144
	ALIGN16
145
Loop_start_2:
146
	movups		(WINDOW), %xmm0
147
	movups		16(WINDOW), %xmm1
148
	movups		32(WINDOW), %xmm2
149
	movups		48(WINDOW), %xmm3
150
	movups		128(WINDOW), %xmm4
151
	movups		144(WINDOW), %xmm5
152
	movups		160(WINDOW), %xmm6
153
	movups		176(WINDOW), %xmm7
154
	mulps		0(B0), %xmm0
155
	mulps		16(B0), %xmm1
156
	mulps		32(B0), %xmm2
157
	mulps		48(B0), %xmm3
158
	mulps		-64(B0), %xmm4
159
	mulps		-48(B0), %xmm5
160
	mulps		-32(B0), %xmm6
161
	mulps		-16(B0), %xmm7
162
	addps		%xmm1, %xmm0
163
	addps		%xmm3, %xmm2
164
	addps		%xmm5, %xmm4
165
	addps		%xmm7, %xmm6
166
	addps		%xmm2, %xmm0
167
	addps		%xmm6, %xmm4
168
	movaps		%xmm4, %xmm5
169
	movaps		%xmm0, %xmm4
170
 
171
	leal		256(WINDOW), WINDOW
172
	leal		-128(B0), B0
173
 
174
	movups		(WINDOW), %xmm0
175
	movups		16(WINDOW), %xmm1
176
	movups		32(WINDOW), %xmm2
177
	movups		48(WINDOW), %xmm3
178
	movups		128(WINDOW), %xmm6
179
	movups		144(WINDOW), %xmm7
180
	mulps		(B0), %xmm0
181
	mulps		16(B0), %xmm1
182
	mulps		32(B0), %xmm2
183
	mulps		48(B0), %xmm3
184
	mulps		-64(B0), %xmm6
185
	mulps		-48(B0), %xmm7
186
	addps		%xmm1, %xmm0
187
	addps		%xmm3, %xmm2
188
	addps		%xmm7, %xmm6
189
	movups		160(WINDOW), %xmm1
190
	movups		176(WINDOW), %xmm3
191
	mulps		-32(B0), %xmm1
192
	mulps		-16(B0), %xmm3
193
	addps		%xmm2, %xmm0
194
	addps		%xmm3, %xmm1
195
	addps		%xmm1, %xmm6
196
	movaps		%xmm6, %xmm7
197
	movaps		%xmm0, %xmm6
198
 
199
	leal		256(WINDOW), WINDOW
200
	leal		-128(B0), B0
201
 
202
	movaps		%xmm4, %xmm0
203
	movaps		%xmm6, %xmm1
204
	unpcklps	%xmm5, %xmm4
205
	unpcklps	%xmm7, %xmm6
206
	unpckhps	%xmm5, %xmm0
207
	unpckhps	%xmm7, %xmm1
208
	movaps		%xmm4, %xmm2
209
	movaps		%xmm0, %xmm3
210
	movlhps		%xmm6, %xmm4
211
	movhlps		%xmm2, %xmm6
212
	movlhps		%xmm1, %xmm0
213
	movhlps		%xmm3, %xmm1
214
	addps		%xmm6, %xmm4
215
	addps		%xmm1, %xmm0
216
	addps		%xmm4, %xmm0
217
 
218
	movups		(SAMPLES), %xmm1
219
	movups		16(SAMPLES), %xmm2
220
	mulps		ASM_NAME(scale_sse), %xmm0
221
	shufps		$0xdd, %xmm2, %xmm1
222
	movaps		%xmm0, %xmm2
223
	unpcklps	%xmm1, %xmm0
224
	unpckhps	%xmm1, %xmm2
225
	movups		%xmm0, (SAMPLES)
226
	movups		%xmm2, 16(SAMPLES)
227
 
228
	leal		32(SAMPLES), SAMPLES
229
	decl		%ecx
230
	jnz			Loop_start_2
231
 
232
	xorl		%eax, %eax
233
 
234
	popl		%esi
235
	popl		%ebx
236
	movl		%ebp, %esp
237
	popl		%ebp
238
 
239
	ret
240
 
241
NONEXEC_STACK