Subversion Repositories Kolibri OS

Rev

Rev 1905 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
1905 serge 1
/*
2
	decode_sse3d: Synth for SSE and extended 3DNow (yeah, the name is a relic)
3
 
4
	copyright 2006-2007 by Zuxy Meng/the mpg123 project - free software under the terms of the LGPL 2.1
5
	see COPYING and AUTHORS files in distribution or http://mpg123.org
6
	initially written by the mysterious higway for MMX (apparently)
7
	then developed into SSE opt by Zuxy Meng, also building on Romain Dolbeau's AltiVec
8
	Both have agreed to distribution under LGPL 2.1 .
9
 
10
	Transformed back into standalone asm, with help of
11
	gcc -S -DHAVE_CONFIG_H -I.  -march=pentium -O3 -Wall -pedantic -fno-strict-aliasing -DREAL_IS_FLOAT -c -o decode_mmxsse.{S,c}
12
 
13
	The difference between SSE and 3DNowExt is the dct64 function and the synth function name.
14
	This template here uses the SYNTH_NAME and MPL_DCT64 macros for this - see decode_sse.S and decode_3dnowext.S...
15
	That's not memory efficient since there's doubled code, but it's easier than giving another function pointer.
16
	Maybe I'll change it in future, but now I need something that works.
17
 
18
	Original comment from MPlayer source follows:
19
*/
20
 
21
/*
22
 * this code comes under GPL
23
 * This code was taken from http://www.mpg123.org
24
 * See ChangeLog of mpg123-0.59s-pre.1 for detail
25
 * Applied to mplayer by Nick Kurshev 
26
 *
27
 * Local ChangeLog:
28
 * - Partial loops unrolling and removing MOVW insn from loops
29
*/
30
 
31
#include "mangle.h"
32
 
33
	.data
34
	ALIGN8
35
one_null:
36
	.long	-65536
37
	.long	-65536
38
	ALIGN8
39
null_one:
40
	.long	65535
41
	.long	65535
42
 
43
	.text
3960 Serge 44
	ALIGN16
1905 serge 45
	/* void SYNTH_NAME(real *bandPtr, int channel, short *samples, short *buffs, int *bo, float *decwins) */
46
.globl SYNTH_NAME
47
SYNTH_NAME:
48
	pushl	%ebp
49
/* stack:0=ebp 4=back 8=bandptr 12=channel 16=samples 20=buffs 24=bo 28=decwins */
50
	movl	%esp, %ebp
51
/* Now the old stack addresses are preserved via %epb. */
52
	subl  $4,%esp /* What has been called temp before. */
53
	pushl	%edi
54
	pushl	%esi
55
	pushl	%ebx
56
#define TEMP 12(%esp)
57
/* APP */
58
	movl 12(%ebp),%ecx
59
	movl 16(%ebp),%edi
60
	movl $15,%ebx
61
	movl 24(%ebp),%edx
62
	leal (%edi,%ecx,2),%edi
63
	decl %ecx
64
	movl 20(%ebp),%esi
65
	movl (%edx),%eax
66
	jecxz .L01
67
	decl %eax
68
	andl %ebx,%eax
69
	leal 1088(%esi),%esi
70
	movl %eax,(%edx)
71
	.L01:
72
	leal (%esi,%eax,2),%edx
73
	movl %eax,TEMP
74
	incl %eax
75
	andl %ebx,%eax
76
	leal 544(%esi,%eax,2),%ecx
77
	incl %ebx
78
	testl $1, %eax
79
	jnz .L02
80
	xchgl %edx,%ecx
81
	incl TEMP
82
	leal 544(%esi),%esi
83
	.L02:
84
	pushl 8(%ebp)
85
	pushl %edx
86
	pushl %ecx
87
	call MPL_DCT64
88
	addl $12, %esp
89
	leal 1(%ebx), %ecx
90
	subl TEMP,%ebx
91
	pushl %ecx
92
	/* leal ASM_NAME(decwins)(%ebx,%ebx,1), %edx */
93
	movl 28(%ebp),%ecx
94
	leal (%ecx,%ebx,2), %edx
95
	movl (%esp),%ecx /* restore, but leave value on stack */
96
	shrl $1, %ecx
97
	ALIGN16
98
	.L03:
99
	movq  (%edx),%mm0
100
	movq  64(%edx),%mm4
101
	pmaddwd (%esi),%mm0
102
	pmaddwd 32(%esi),%mm4
103
	movq  8(%edx),%mm1
104
	movq  72(%edx),%mm5
105
	pmaddwd 8(%esi),%mm1
106
	pmaddwd 40(%esi),%mm5
107
	movq  16(%edx),%mm2
108
	movq  80(%edx),%mm6
109
	pmaddwd 16(%esi),%mm2
110
	pmaddwd 48(%esi),%mm6
111
	movq  24(%edx),%mm3
112
	movq  88(%edx),%mm7
113
	pmaddwd 24(%esi),%mm3
114
	pmaddwd 56(%esi),%mm7
115
	paddd %mm1,%mm0
116
	paddd %mm5,%mm4
117
	paddd %mm2,%mm0
118
	paddd %mm6,%mm4
119
	paddd %mm3,%mm0
120
	paddd %mm7,%mm4
121
	movq  %mm0,%mm1
122
	movq  %mm4,%mm5
123
	psrlq $32,%mm1
124
	psrlq $32,%mm5
125
	paddd %mm1,%mm0
126
	paddd %mm5,%mm4
127
	psrad $13,%mm0
128
	psrad $13,%mm4
129
	packssdw %mm0,%mm0
130
	packssdw %mm4,%mm4
131
	movq	(%edi), %mm1
132
	punpckldq %mm4, %mm0
133
	pand   one_null, %mm1
134
	pand   null_one, %mm0
135
	por    %mm0, %mm1
136
	movq   %mm1,(%edi)
137
	leal 64(%esi),%esi
138
	leal 128(%edx),%edx
139
	leal 8(%edi),%edi
140
	decl %ecx
141
	jnz  .L03
142
	popl %ecx
143
	andl $1, %ecx
144
	jecxz .next_loop
145
	movq  (%edx),%mm0
146
	pmaddwd (%esi),%mm0
147
	movq  8(%edx),%mm1
148
	pmaddwd 8(%esi),%mm1
149
	movq  16(%edx),%mm2
150
	pmaddwd 16(%esi),%mm2
151
	movq  24(%edx),%mm3
152
	pmaddwd 24(%esi),%mm3
153
	paddd %mm1,%mm0
154
	paddd %mm2,%mm0
155
	paddd %mm3,%mm0
156
	movq  %mm0,%mm1
157
	psrlq $32,%mm1
158
	paddd %mm1,%mm0
159
	psrad $13,%mm0
160
	packssdw %mm0,%mm0
161
	movd %mm0,%eax
162
	movw %ax, (%edi)
163
	leal 32(%esi),%esi
164
	leal 64(%edx),%edx
165
	leal 4(%edi),%edi
166
	.next_loop:
167
	subl $64,%esi
168
	movl $7,%ecx
169
	ALIGN16
170
	.L04:
171
	movq  (%edx),%mm0
172
	movq  64(%edx),%mm4
173
	pmaddwd (%esi),%mm0
174
	pmaddwd -32(%esi),%mm4
175
	movq  8(%edx),%mm1
176
	movq  72(%edx),%mm5
177
	pmaddwd 8(%esi),%mm1
178
	pmaddwd -24(%esi),%mm5
179
	movq  16(%edx),%mm2
180
	movq  80(%edx),%mm6
181
	pmaddwd 16(%esi),%mm2
182
	pmaddwd -16(%esi),%mm6
183
	movq  24(%edx),%mm3
184
	movq  88(%edx),%mm7
185
	pmaddwd 24(%esi),%mm3
186
	pmaddwd -8(%esi),%mm7
187
	paddd %mm1,%mm0
188
	paddd %mm5,%mm4
189
	paddd %mm2,%mm0
190
	paddd %mm6,%mm4
191
	paddd %mm3,%mm0
192
	paddd %mm7,%mm4
193
	movq  %mm0,%mm1
194
	movq  %mm4,%mm5
195
	psrlq $32,%mm1
196
	psrlq $32,%mm5
197
	paddd %mm0,%mm1
198
	paddd %mm4,%mm5
199
	psrad $13,%mm1
200
	psrad $13,%mm5
201
	packssdw %mm1,%mm1
202
	packssdw %mm5,%mm5
203
	psubd %mm0,%mm0
204
	psubd %mm4,%mm4
205
	psubsw %mm1,%mm0
206
	psubsw %mm5,%mm4
207
	movq	(%edi), %mm1
208
	punpckldq %mm4, %mm0
209
	pand   one_null, %mm1
210
	pand   null_one, %mm0
211
	por    %mm0, %mm1
212
	movq   %mm1,(%edi)
213
	subl $64,%esi
214
	addl $128,%edx
215
	leal 8(%edi),%edi
216
	decl %ecx
217
	jnz  .L04
218
	movq  (%edx),%mm0
219
	pmaddwd (%esi),%mm0
220
	movq  8(%edx),%mm1
221
	pmaddwd 8(%esi),%mm1
222
	movq  16(%edx),%mm2
223
	pmaddwd 16(%esi),%mm2
224
	movq  24(%edx),%mm3
225
	pmaddwd 24(%esi),%mm3
226
	paddd %mm1,%mm0
227
	paddd %mm2,%mm0
228
	paddd %mm3,%mm0
229
	movq  %mm0,%mm1
230
	psrlq $32,%mm1
231
	paddd %mm0,%mm1
232
	psrad $13,%mm1
233
	packssdw %mm1,%mm1
234
	psubd %mm0,%mm0
235
	psubsw %mm1,%mm0
236
	movd %mm0,%eax
237
	movw %ax,(%edi)
238
	emms
239
 
240
/* NO_APP */
241
	popl	%ebx
242
	popl	%esi
243
	popl	%edi
244
	addl $4,%esp
245
	popl	%ebp
246
	ret