Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
576 | serge | 1 | /* |
2 | decode_i386.c: decode for i386 (really faster?) |
||
3 | |||
4 | copyright 1995-2006 by the mpg123 project - free software under the terms of the LGPL 2.1 |
||
5 | see COPYING and AUTHORS files in distribution or http://mpg123.de |
||
6 | initially written by Michael Hipp |
||
7 | |||
8 | slighlty optimized for machines without autoincrement/decrement. |
||
9 | The performance is highly compiler dependend. Maybe |
||
10 | the decode.c version for 'normal' processor may be faster |
||
11 | even for Intel processors. |
||
12 | */ |
||
13 | |||
14 | //#include |
||
15 | #include |
||
16 | //#include |
||
17 | |||
18 | //#include "config.h" |
||
19 | #include "mpg123.h" |
||
20 | |||
21 | #if 0 |
||
22 | /* old WRITE_SAMPLE */ |
||
23 | #define WRITE_SAMPLE(samples,sum,clip) \ |
||
24 | if( (sum) > 32767.0) { *(samples) = 0x7fff; (clip)++; } \ |
||
25 | else if( (sum) < -32768.0) { *(samples) = -0x8000; (clip)++; } \ |
||
26 | else { *(samples) = sum; } |
||
27 | #else |
||
28 | /* new WRITE_SAMPLE */ |
||
29 | /* keep in mind that we are on known little-endian i386 here and special tricks are allowed... */ |
||
30 | #define WRITE_SAMPLE(samples,sum,clip) { \ |
||
31 | double dtemp; int v; /* sizeof(int) == 4 */ \ |
||
32 | dtemp = ((((65536.0 * 65536.0 * 16)+(65536.0 * 0.5))* 65536.0)) + (sum); \ |
||
33 | v = ((*(int *)&dtemp) - 0x80000000); \ |
||
34 | if( v > 32767) { *(samples) = 0x7fff; (clip)++; } \ |
||
35 | else if( v < -32768) { *(samples) = -0x8000; (clip)++; } \ |
||
36 | else { *(samples) = v; } \ |
||
37 | } |
||
38 | #endif |
||
39 | |||
40 | #if 0 |
||
41 | int synth_1to1_8bit(real *bandPtr,int channel,unsigned char *samples,int *pnt) |
||
42 | { |
||
43 | short samples_tmp[64]; |
||
44 | short *tmp1 = samples_tmp + channel; |
||
45 | int i,ret; |
||
46 | int pnt1 = 0; |
||
47 | |||
48 | ret = synth_1to1(bandPtr,channel,(unsigned char *)samples_tmp,&pnt1); |
||
49 | samples += channel + *pnt; |
||
50 | |||
51 | for(i=0;i<32;i++) { |
||
52 | *samples = conv16to8[*tmp1>>AUSHIFT]; |
||
53 | samples += 2; |
||
54 | tmp1 += 2; |
||
55 | } |
||
56 | *pnt += 64; |
||
57 | |||
58 | return ret; |
||
59 | } |
||
60 | |||
61 | int synth_1to1_8bit_mono(real *bandPtr,unsigned char *samples,int *pnt) |
||
62 | { |
||
63 | short samples_tmp[64]; |
||
64 | short *tmp1 = samples_tmp; |
||
65 | int i,ret; |
||
66 | int pnt1 = 0; |
||
67 | |||
68 | ret = synth_1to1(bandPtr,0,(unsigned char *)samples_tmp,&pnt1); |
||
69 | samples += *pnt; |
||
70 | |||
71 | for(i=0;i<32;i++) { |
||
72 | *samples++ = conv16to8[*tmp1>>AUSHIFT]; |
||
73 | tmp1+=2; |
||
74 | } |
||
75 | *pnt += 32; |
||
76 | |||
77 | return ret; |
||
78 | } |
||
79 | |||
80 | int synth_1to1_8bit_mono2stereo(real *bandPtr,unsigned char *samples,int *pnt) |
||
81 | { |
||
82 | short samples_tmp[64]; |
||
83 | short *tmp1 = samples_tmp; |
||
84 | int i,ret; |
||
85 | int pnt1 = 0; |
||
86 | |||
87 | ret = synth_1to1(bandPtr,0,(unsigned char *)samples_tmp,&pnt1); |
||
88 | samples += *pnt; |
||
89 | |||
90 | for(i=0;i<32;i++) { |
||
91 | *samples++ = conv16to8[*tmp1>>AUSHIFT]; |
||
92 | *samples++ = conv16to8[*tmp1>>AUSHIFT]; |
||
93 | tmp1 += 2; |
||
94 | } |
||
95 | *pnt += 64; |
||
96 | |||
97 | return ret; |
||
98 | } |
||
99 | |||
100 | int synth_1to1_mono(real *bandPtr,unsigned char *samples,int *pnt) |
||
101 | { |
||
102 | short samples_tmp[64]; |
||
103 | short *tmp1 = samples_tmp; |
||
104 | int i,ret; |
||
105 | int pnt1 = 0; |
||
106 | |||
107 | ret = synth_1to1(bandPtr,0,(unsigned char *) samples_tmp,&pnt1); |
||
108 | samples += *pnt; |
||
109 | |||
110 | for(i=0;i<32;i++) { |
||
111 | *( (short *) samples) = *tmp1; |
||
112 | samples += 2; |
||
113 | tmp1 += 2; |
||
114 | } |
||
115 | *pnt += 64; |
||
116 | |||
117 | return ret; |
||
118 | } |
||
119 | #endif |
||
120 | |||
121 | int synth_1to1_mono2stereo(real *bandPtr,unsigned char *samples,int *pnt) |
||
122 | { |
||
123 | int i,ret; |
||
124 | |||
125 | ret = synth_1to1(bandPtr,0,samples,pnt); |
||
126 | samples = samples + *pnt - 128; |
||
127 | |||
128 | for(i=0;i<32;i++) { |
||
129 | ((short *)samples)[1] = ((short *)samples)[0]; |
||
130 | samples+=4; |
||
131 | } |
||
132 | |||
133 | return ret; |
||
134 | } |
||
135 | |||
136 | |||
137 | static real buffs[2][2][0x110]; |
||
138 | static const int step = 2; |
||
139 | static int bo = 1; |
||
140 | |||
141 | void init_dct() |
||
142 | { |
||
143 | bo = 1; |
||
144 | memset(buffs,0, sizeof(buffs)); |
||
145 | |||
146 | }; |
||
147 | |||
148 | |||
149 | int synth_1to1(real *bandPtr,int channel,unsigned char *out,int *pnt) |
||
150 | { |
||
151 | #ifndef PENTIUM_OPT |
||
152 | short *samples = (short *) (out + *pnt); |
||
153 | |||
154 | real *b0,(*buf)[0x110]; |
||
155 | int clip = 0; |
||
156 | int bo1; |
||
157 | #endif |
||
158 | |||
159 | // if(have_eq_settings) |
||
160 | // do_equalizer(bandPtr,channel); |
||
161 | |||
162 | #ifndef PENTIUM_OPT |
||
163 | if(!channel) { |
||
164 | bo--; |
||
165 | bo &= 0xf; |
||
166 | buf = buffs[0]; |
||
167 | } |
||
168 | else { |
||
169 | samples++; |
||
170 | buf = buffs[1]; |
||
171 | } |
||
172 | |||
173 | if(bo & 0x1) { |
||
174 | b0 = buf[0]; |
||
175 | bo1 = bo; |
||
176 | dct64(buf[1]+((bo+1)&0xf),buf[0]+bo,bandPtr); |
||
177 | } |
||
178 | else { |
||
179 | b0 = buf[1]; |
||
180 | bo1 = bo+1; |
||
181 | dct64(buf[0]+bo,buf[1]+bo+1,bandPtr); |
||
182 | } |
||
183 | |||
184 | { |
||
185 | register int j; |
||
186 | real *window = decwin + 16 - bo1; |
||
187 | |||
188 | for (j=16;j;j--,b0+=0x10,window+=0x20,samples+=step) |
||
189 | { |
||
190 | real sum; |
||
191 | sum = window[0x0] * b0[0x0]; |
||
192 | sum -= window[0x1] * b0[0x1]; |
||
193 | sum += window[0x2] * b0[0x2]; |
||
194 | sum -= window[0x3] * b0[0x3]; |
||
195 | sum += window[0x4] * b0[0x4]; |
||
196 | sum -= window[0x5] * b0[0x5]; |
||
197 | sum += window[0x6] * b0[0x6]; |
||
198 | sum -= window[0x7] * b0[0x7]; |
||
199 | sum += window[0x8] * b0[0x8]; |
||
200 | sum -= window[0x9] * b0[0x9]; |
||
201 | sum += window[0xA] * b0[0xA]; |
||
202 | sum -= window[0xB] * b0[0xB]; |
||
203 | sum += window[0xC] * b0[0xC]; |
||
204 | sum -= window[0xD] * b0[0xD]; |
||
205 | sum += window[0xE] * b0[0xE]; |
||
206 | sum -= window[0xF] * b0[0xF]; |
||
207 | |||
208 | WRITE_SAMPLE(samples,sum,clip); |
||
209 | } |
||
210 | |||
211 | { |
||
212 | real sum; |
||
213 | sum = window[0x0] * b0[0x0]; |
||
214 | sum += window[0x2] * b0[0x2]; |
||
215 | sum += window[0x4] * b0[0x4]; |
||
216 | sum += window[0x6] * b0[0x6]; |
||
217 | sum += window[0x8] * b0[0x8]; |
||
218 | sum += window[0xA] * b0[0xA]; |
||
219 | sum += window[0xC] * b0[0xC]; |
||
220 | sum += window[0xE] * b0[0xE]; |
||
221 | WRITE_SAMPLE(samples,sum,clip); |
||
222 | b0-=0x10,window-=0x20,samples+=step; |
||
223 | } |
||
224 | window += bo1<<1; |
||
225 | |||
226 | for (j=15;j;j--,b0-=0x10,window-=0x20,samples+=step) |
||
227 | { |
||
228 | real sum; |
||
229 | sum = -window[-0x1] * b0[0x0]; |
||
230 | sum -= window[-0x2] * b0[0x1]; |
||
231 | sum -= window[-0x3] * b0[0x2]; |
||
232 | sum -= window[-0x4] * b0[0x3]; |
||
233 | sum -= window[-0x5] * b0[0x4]; |
||
234 | sum -= window[-0x6] * b0[0x5]; |
||
235 | sum -= window[-0x7] * b0[0x6]; |
||
236 | sum -= window[-0x8] * b0[0x7]; |
||
237 | sum -= window[-0x9] * b0[0x8]; |
||
238 | sum -= window[-0xA] * b0[0x9]; |
||
239 | sum -= window[-0xB] * b0[0xA]; |
||
240 | sum -= window[-0xC] * b0[0xB]; |
||
241 | sum -= window[-0xD] * b0[0xC]; |
||
242 | sum -= window[-0xE] * b0[0xD]; |
||
243 | sum -= window[-0xF] * b0[0xE]; |
||
244 | sum -= window[-0x0] * b0[0xF]; |
||
245 | |||
246 | WRITE_SAMPLE(samples,sum,clip); |
||
247 | } |
||
248 | } |
||
249 | *pnt += 128; |
||
250 | |||
251 | return clip; |
||
252 | #elif defined(USE_MMX) |
||
253 | { |
||
254 | static short buffs[2][2][0x110]; |
||
255 | static int bo = 1; |
||
256 | short *samples = (short *) (out + *pnt); |
||
257 | synth_1to1_MMX(bandPtr, channel, samples, (short *) buffs, &bo); |
||
258 | *pnt += 128; |
||
259 | return 0; |
||
260 | } |
||
261 | #else |
||
262 | { |
||
263 | int ret; |
||
264 | ret = synth_1to1_pent(bandPtr,channel,out+*pnt); |
||
265 | *pnt += 128; |
||
266 | return ret; |
||
267 | } |
||
268 | #endif |
||
269 | }1; |