Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
3960 Serge 1
/*
2
 decode_i586: asm synth
3
 
4
 copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
5
 see COPYING and AUTHORS files in distribution or http://mpg123.org
6
 initially written by Stefan Bieschewski
7
 
8
 synth_1to1 works the same way as the c version of this
9
 file.  only two types of changes have been made:
10
 - reordered floating point instructions to
11
   prevent pipline stalls
12
 - made WRITE_SAMPLE use integer instead of
13
   (slower) floating point
14
 all kinds of x86 processors should benefit from these
15
 modifications.
16
 
17
 useful sources of information on optimizing x86 code include:
18
 
19
     Intel Architecture Optimization Manual
20
     http://www.intel.com/design/pentium/manuals/242816.htm
21
 
22
     Cyrix 6x86 Instruction Set Summary
23
     ftp://ftp.cyrix.com/6x86/6x-dbch6.pdf
24
 
25
     AMD-K5 Processor Software Development
26
     http://www.amd.com/products/cpg/techdocs/appnotes/20007e.pdf
27
 
28
 Stefan Bieschewski 
29
 
30
 $Id: decode_i586.s 1 2004-09-18 13:30:08Z thomas $
31
*/
32
 
33
#include "mangle.h"
34
 
35
.data
36
#ifndef __APPLE__
37
.section .rodata
38
#endif
39
	ALIGN8
40
.LC0:
41
	.long 0x0,0x40dfffc0
42
	ALIGN8
43
.LC1:
44
	.long 0x0,0xc0e00000
45
	ALIGN8
46
.text
47
/* int synth_1to1_i586_asm(real *bandPtr, int channel, unsigned char *out, unsigned char *buffs, int *bo, real *decwin); */
48
.globl ASM_NAME(synth_1to1_i586_asm)
49
ASM_NAME(synth_1to1_i586_asm):
50
	subl $12,%esp
51
	pushl %ebp
52
	pushl %edi
53
	pushl %esi
54
	pushl %ebx
55
/* stack: 0=ebx, 4=esi, 8=edi, 12=ebp, 16,20,24=local, 28=back, 32=bandPtr, 36=channel, 40=out, 44=buffs, 48=bo, 52=decwin */
56
	movl 32(%esp),%eax /* *bandPtr */
57
	movl 40(%esp),%esi /* *out */
58
	movl 48(%esp),%edi /* *bo */
59
	movl (%edi),%ebp   /* store bo value in ebp */
60
	xorl %edi,%edi
61
	cmpl %edi,36(%esp)
62
	jne .L48           /* if(!channel) */
63
	decl %ebp          /* bo-- */
64
	andl $15,%ebp      /* bo &= 0xf */
65
	movl 48(%esp),	%edi /* *bo */
66
	movl %ebp,(%edi)   /* write back bo */
67
	xorl %edi,%edi     /* restore %edi to 0; it's used later */
68
	movl 44(%esp),%ecx /* use buffs */
69
	jmp .L49
70
.L48: /* if(channel) use buffs+2176 */
71
	addl $2,%esi
72
	movl 44(%esp),%ecx /* *buffs */
73
	addl $2176,%ecx
74
.L49:
75
	testl $1,%ebp
76
	je .L50
77
	movl %ecx,%ebx
78
	movl %ebp,16(%esp)
79
	pushl %eax
80
	movl 20(%esp),%edx
81
	leal (%ebx,%edx,4),%eax
82
	pushl %eax
83
	movl 24(%esp),%eax
84
	incl %eax
85
	andl $15,%eax
86
	leal 1088(,%eax,4),%eax
87
	addl %ebx,%eax
88
	jmp .L74
89
.L50:
90
	leal 1088(%ecx),%ebx
91
	leal 1(%ebp),%edx
92
	movl %edx,16(%esp)
93
	pushl %eax
94
	leal 1092(%ecx,%ebp,4),%eax
95
	pushl %eax
96
	leal (%ecx,%ebp,4),%eax
97
.L74:
98
	pushl %eax
99
	call ASM_NAME(dct64_i386)
100
	addl $12,%esp
101
/* stack now back on track */
102
	movl 16(%esp),%edx
103
	leal 0(,%edx,4),%edx
104
	movl 52(%esp),%eax /* decwin */
105
	addl $64,%eax
106
	movl %eax,%ecx
107
	subl %edx,%ecx
108
	movl $16,%ebp
109
.L55:
110
	flds (%ecx)
111
	fmuls (%ebx)
112
	flds 4(%ecx)
113
	fmuls 4(%ebx)
114
	fxch %st(1)
115
	flds 8(%ecx)
116
	fmuls 8(%ebx)
117
	fxch %st(2)
118
	fsubrp %st,%st(1)
119
	flds 12(%ecx)
120
	fmuls 12(%ebx)
121
	fxch %st(2)
122
	faddp %st,%st(1)
123
	flds 16(%ecx)
124
	fmuls 16(%ebx)
125
	fxch %st(2)
126
	fsubrp %st,%st(1)
127
	flds 20(%ecx)
128
	fmuls 20(%ebx)
129
	fxch %st(2)
130
	faddp %st,%st(1)
131
	flds 24(%ecx)
132
	fmuls 24(%ebx)
133
	fxch %st(2)
134
	fsubrp %st,%st(1)
135
	flds 28(%ecx)
136
	fmuls 28(%ebx)
137
	fxch %st(2)
138
	faddp %st,%st(1)
139
	flds 32(%ecx)
140
	fmuls 32(%ebx)
141
	fxch %st(2)
142
	fsubrp %st,%st(1)
143
	flds 36(%ecx)
144
	fmuls 36(%ebx)
145
	fxch %st(2)
146
	faddp %st,%st(1)
147
	flds 40(%ecx)
148
	fmuls 40(%ebx)
149
	fxch %st(2)
150
	fsubrp %st,%st(1)
151
	flds 44(%ecx)
152
	fmuls 44(%ebx)
153
	fxch %st(2)
154
	faddp %st,%st(1)
155
	flds 48(%ecx)
156
	fmuls 48(%ebx)
157
	fxch %st(2)
158
	fsubrp %st,%st(1)
159
	flds 52(%ecx)
160
	fmuls 52(%ebx)
161
	fxch %st(2)
162
	faddp %st,%st(1)
163
	flds 56(%ecx)
164
	fmuls 56(%ebx)
165
	fxch %st(2)
166
	fsubrp %st,%st(1)
167
	flds 60(%ecx)
168
	fmuls 60(%ebx)
169
	fxch %st(2)
170
	subl $4,%esp
171
	faddp %st,%st(1)
172
	fxch %st(1)
173
	fsubrp %st,%st(1)
174
	fistpl (%esp)
175
	popl %eax
176
	cmpl $32767,%eax
177
	jg 1f
178
	cmpl $-32768,%eax
179
	jl 2f
180
	movw %ax,(%esi)
181
	jmp 4f
182
1:	movw $32767,(%esi)
183
	jmp 3f
184
2:	movw $-32768,(%esi)
185
3:	incl %edi
186
4:
187
.L54:
188
	addl $64,%ebx
189
	subl $-128,%ecx
190
	addl $4,%esi
191
	decl %ebp
192
	jnz .L55
193
	flds (%ecx)
194
	fmuls (%ebx)
195
	flds 8(%ecx)
196
	fmuls 8(%ebx)
197
	flds 16(%ecx)
198
	fmuls 16(%ebx)
199
	fxch %st(2)
200
	faddp %st,%st(1)
201
	flds 24(%ecx)
202
	fmuls 24(%ebx)
203
	fxch %st(2)
204
	faddp %st,%st(1)
205
	flds 32(%ecx)
206
	fmuls 32(%ebx)
207
	fxch %st(2)
208
	faddp %st,%st(1)
209
	flds 40(%ecx)
210
	fmuls 40(%ebx)
211
	fxch %st(2)
212
	faddp %st,%st(1)
213
	flds 48(%ecx)
214
	fmuls 48(%ebx)
215
	fxch %st(2)
216
	faddp %st,%st(1)
217
	flds 56(%ecx)
218
	fmuls 56(%ebx)
219
	fxch %st(2)
220
	subl $4,%esp
221
	faddp %st,%st(1)
222
	fxch %st(1)
223
	faddp %st,%st(1)
224
	fistpl (%esp)
225
	popl %eax
226
	cmpl $32767,%eax
227
	jg 1f
228
	cmpl $-32768,%eax
229
	jl 2f
230
	movw %ax,(%esi)
231
	jmp 4f
232
1:	movw $32767,(%esi)
233
	jmp 3f
234
2:	movw $-32768,(%esi)
235
3:	incl %edi
236
4:
237
.L62:
238
	addl $-64,%ebx
239
	addl $4,%esi
240
	movl 16(%esp),%edx
241
	leal -128(%ecx,%edx,8),%ecx
242
	movl $15,%ebp
243
.L68:
244
	flds -4(%ecx)
245
	fchs
246
	fmuls (%ebx)
247
	flds -8(%ecx)
248
	fmuls 4(%ebx)
249
	fxch %st(1)
250
	flds -12(%ecx)
251
	fmuls 8(%ebx)
252
	fxch %st(2)
253
	fsubrp %st,%st(1)
254
	flds -16(%ecx)
255
	fmuls 12(%ebx)
256
	fxch %st(2)
257
	fsubrp %st,%st(1)
258
	flds -20(%ecx)
259
	fmuls 16(%ebx)
260
	fxch %st(2)
261
	fsubrp %st,%st(1)
262
	flds -24(%ecx)
263
	fmuls 20(%ebx)
264
	fxch %st(2)
265
	fsubrp %st,%st(1)
266
	flds -28(%ecx)
267
	fmuls 24(%ebx)
268
	fxch %st(2)
269
	fsubrp %st,%st(1)
270
	flds -32(%ecx)
271
	fmuls 28(%ebx)
272
	fxch %st(2)
273
	fsubrp %st,%st(1)
274
	flds -36(%ecx)
275
	fmuls 32(%ebx)
276
	fxch %st(2)
277
	fsubrp %st,%st(1)
278
	flds -40(%ecx)
279
	fmuls 36(%ebx)
280
	fxch %st(2)
281
	fsubrp %st,%st(1)
282
	flds -44(%ecx)
283
	fmuls 40(%ebx)
284
	fxch %st(2)
285
	fsubrp %st,%st(1)
286
	flds -48(%ecx)
287
	fmuls 44(%ebx)
288
	fxch %st(2)
289
	fsubrp %st,%st(1)
290
	flds -52(%ecx)
291
	fmuls 48(%ebx)
292
	fxch %st(2)
293
	fsubrp %st,%st(1)
294
	flds -56(%ecx)
295
	fmuls 52(%ebx)
296
	fxch %st(2)
297
	fsubrp %st,%st(1)
298
	flds -60(%ecx)
299
	fmuls 56(%ebx)
300
	fxch %st(2)
301
	fsubrp %st,%st(1)
302
	flds (%ecx)
303
	fmuls 60(%ebx)
304
	fxch %st(2)
305
	subl $4,%esp
306
	fsubrp %st,%st(1)
307
	fxch %st(1)
308
	fsubrp %st,%st(1)
309
	fistpl (%esp)
310
	popl %eax
311
	cmpl $32767,%eax
312
	jg 1f
313
	cmpl $-32768,%eax
314
	jl 2f
315
	movw %ax,(%esi)
316
	jmp 4f
317
1:	movw $32767,(%esi)
318
	jmp 3f
319
2:	movw $-32768,(%esi)
320
3:	incl %edi
321
4:
322
.L67:
323
	addl $-64,%ebx
324
	addl $-128,%ecx
325
	addl $4,%esi
326
	decl %ebp
327
	jnz .L68
328
	movl %edi,%eax
329
	popl %ebx
330
	popl %esi
331
	popl %edi
332
	popl %ebp
333
	addl $12,%esp
334
	ret
335
 
336
NONEXEC_STACK