Rev 1905 | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
1905 | serge | 1 | /* |
2 | optimize: get a grip on the different optimizations |
||
3 | |||
4 | copyright 2006-9 by the mpg123 project - free software under the terms of the LGPL 2.1 |
||
5 | see COPYING and AUTHORS files in distribution or http://mpg123.org |
||
6 | initially written by Thomas Orgis, inspired by 3DNow stuff in mpg123.[hc] |
||
7 | |||
8 | Currently, this file contains the struct and function to choose an optimization variant and works only when OPT_MULTI is in effect. |
||
9 | */ |
||
10 | |||
11 | #include "mpg123lib_intern.h" /* includes optimize.h */ |
||
12 | #include "debug.h" |
||
13 | |||
14 | /* Must match the enum dectype! */ |
||
15 | |||
16 | /* |
||
17 | It SUCKS having to define these names that way, but compile-time intialization of string arrays is a bitch. |
||
18 | GCC doesn't see constant stuff when it's wiggling in front of it! |
||
19 | Anyhow: Have a script for that: |
||
20 | names="generic generic_dither i386 i486 i586 i586_dither MMX 3DNow 3DNowExt AltiVec SSE x86-64" |
||
21 | for i in $names; do echo "##define dn_${i/-/_} \"$i\""; done |
||
22 | echo -n "static const char* decname[] = |
||
23 | { |
||
24 | \"auto\" |
||
25 | " |
||
26 | for i in $names; do echo -n ", dn_${i/-/_}"; done |
||
27 | echo " |
||
28 | , \"nodec\" |
||
29 | };" |
||
30 | */ |
||
31 | #define dn_generic "generic" |
||
32 | #define dn_generic_dither "generic_dither" |
||
33 | #define dn_i386 "i386" |
||
34 | #define dn_i486 "i486" |
||
35 | #define dn_i586 "i586" |
||
36 | #define dn_i586_dither "i586_dither" |
||
37 | #define dn_MMX "MMX" |
||
38 | #define dn_3DNow "3DNow" |
||
39 | #define dn_3DNowExt "3DNowExt" |
||
40 | #define dn_AltiVec "AltiVec" |
||
41 | #define dn_SSE "SSE" |
||
42 | #define dn_x86_64 "x86-64" |
||
43 | #define dn_ARM "ARM" |
||
3960 | Serge | 44 | #define dn_NEON "NEON" |
1905 | serge | 45 | static const char* decname[] = |
46 | { |
||
47 | "auto" |
||
3960 | Serge | 48 | , dn_generic, dn_generic_dither, dn_i386, dn_i486, dn_i586, dn_i586_dither, dn_MMX, dn_3DNow, dn_3DNowExt, dn_AltiVec, dn_SSE, dn_x86_64, dn_ARM, dn_NEON |
1905 | serge | 49 | , "nodec" |
50 | }; |
||
51 | |||
52 | #if (defined OPT_X86) && (defined OPT_MULTI) |
||
53 | #include "getcpuflags.h" |
||
3960 | Serge | 54 | static struct cpuflags cpu_flags; |
1905 | serge | 55 | #else |
56 | /* Faking stuff for non-multi builds. The same code for synth function choice is used. |
||
57 | Just no runtime dependency of result... */ |
||
3960 | Serge | 58 | #define cpu_flags nothing |
1905 | serge | 59 | #define cpu_i586(s) 1 |
60 | #define cpu_fpu(s) 1 |
||
61 | #define cpu_mmx(s) 1 |
||
62 | #define cpu_3dnow(s) 1 |
||
63 | #define cpu_3dnowext(s) 1 |
||
64 | #define cpu_sse(s) 1 |
||
65 | #define cpu_sse2(s) 1 |
||
66 | #define cpu_sse3(s) 1 |
||
67 | #endif |
||
68 | |||
69 | /* Ugly macros to build conditional synth function array values. */ |
||
70 | |||
71 | #ifndef NO_8BIT |
||
72 | #define IF8(synth) synth, |
||
73 | #else |
||
74 | #define IF8(synth) |
||
75 | #endif |
||
76 | |||
77 | #ifndef NO_REAL |
||
78 | #define IFREAL(synth) synth, |
||
79 | #else |
||
80 | #define IFREAL(synth) |
||
81 | #endif |
||
82 | |||
83 | #ifndef NO_32BIT |
||
84 | #define IF32(synth) synth |
||
85 | #else |
||
86 | #define IF32(synth) |
||
87 | #endif |
||
88 | |||
89 | #ifndef NO_16BIT |
||
90 | # define OUT_SYNTHS(synth_16, synth_8, synth_real, synth_32) { synth_16, IF8(synth_8) IFREAL(synth_real) IF32(synth_32) } |
||
91 | #else |
||
92 | # define OUT_SYNTHS(synth_16, synth_8, synth_real, synth_32) { IF8(synth_8) IFREAL(synth_real) IF32(synth_32) } |
||
93 | #endif |
||
94 | |||
3960 | Serge | 95 | /* The call of left and right plain synth, wrapped. |
96 | This may be replaced by a direct stereo optimized synth. */ |
||
97 | static int synth_stereo_wrap(real *bandPtr_l, real *bandPtr_r, mpg123_handle *fr) |
||
1905 | serge | 98 | { |
3960 | Serge | 99 | int clip; |
100 | clip = (fr->synth)(bandPtr_l, 0, fr, 0); |
||
101 | clip += (fr->synth)(bandPtr_r, 1, fr, 1); |
||
102 | return clip; |
||
103 | } |
||
104 | |||
105 | static const struct synth_s synth_base = |
||
106 | { |
||
1905 | serge | 107 | { /* plain */ |
108 | OUT_SYNTHS(synth_1to1, synth_1to1_8bit, synth_1to1_real, synth_1to1_s32) |
||
109 | # ifndef NO_DOWNSAMPLE |
||
110 | ,OUT_SYNTHS(synth_2to1, synth_2to1_8bit, synth_2to1_real, synth_2to1_s32) |
||
111 | ,OUT_SYNTHS(synth_4to1, synth_4to1_8bit, synth_4to1_real, synth_4to1_s32) |
||
112 | # endif |
||
113 | # ifndef NO_NTOM |
||
114 | ,OUT_SYNTHS(synth_ntom, synth_ntom_8bit, synth_ntom_real, synth_ntom_s32) |
||
115 | # endif |
||
116 | }, |
||
117 | { /* stereo, by default only wrappers over plain synth */ |
||
118 | OUT_SYNTHS(synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap) |
||
119 | # ifndef NO_DOWNSAMPLE |
||
120 | ,OUT_SYNTHS(synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap) |
||
121 | ,OUT_SYNTHS(synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap) |
||
122 | # endif |
||
123 | # ifndef NO_NTOM |
||
124 | ,OUT_SYNTHS(synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap) |
||
125 | # endif |
||
126 | }, |
||
127 | { /* mono2stereo */ |
||
3960 | Serge | 128 | OUT_SYNTHS(synth_1to1_m2s, synth_1to1_8bit_m2s, synth_1to1_real_m2s, synth_1to1_s32_m2s) |
1905 | serge | 129 | # ifndef NO_DOWNSAMPLE |
3960 | Serge | 130 | ,OUT_SYNTHS(synth_2to1_m2s, synth_2to1_8bit_m2s, synth_2to1_real_m2s, synth_2to1_s32_m2s) |
131 | ,OUT_SYNTHS(synth_4to1_m2s, synth_4to1_8bit_m2s, synth_4to1_real_m2s, synth_4to1_s32_m2s) |
||
1905 | serge | 132 | # endif |
133 | # ifndef NO_NTOM |
||
3960 | Serge | 134 | ,OUT_SYNTHS(synth_ntom_m2s, synth_ntom_8bit_m2s, synth_ntom_real_m2s, synth_ntom_s32_m2s) |
1905 | serge | 135 | # endif |
136 | }, |
||
137 | { /* mono*/ |
||
138 | OUT_SYNTHS(synth_1to1_mono, synth_1to1_8bit_mono, synth_1to1_real_mono, synth_1to1_s32_mono) |
||
139 | # ifndef NO_DOWNSAMPLE |
||
140 | ,OUT_SYNTHS(synth_2to1_mono, synth_2to1_8bit_mono, synth_2to1_real_mono, synth_2to1_s32_mono) |
||
141 | ,OUT_SYNTHS(synth_4to1_mono, synth_4to1_8bit_mono, synth_4to1_real_mono, synth_4to1_s32_mono) |
||
142 | # endif |
||
143 | # ifndef NO_NTOM |
||
144 | ,OUT_SYNTHS(synth_ntom_mono, synth_ntom_8bit_mono, synth_ntom_real_mono, synth_ntom_s32_mono) |
||
145 | #endif |
||
146 | } |
||
147 | }; |
||
148 | |||
149 | #ifdef OPT_X86 |
||
150 | /* More plain synths for i386 */ |
||
151 | const func_synth plain_i386[r_limit][f_limit] = |
||
152 | { /* plain */ |
||
153 | OUT_SYNTHS(synth_1to1_i386, synth_1to1_8bit_i386, synth_1to1_real_i386, synth_1to1_s32_i386) |
||
154 | # ifndef NO_DOWNSAMPLE |
||
155 | ,OUT_SYNTHS(synth_2to1_i386, synth_2to1_8bit_i386, synth_2to1_real_i386, synth_2to1_s32_i386) |
||
156 | ,OUT_SYNTHS(synth_4to1_i386, synth_4to1_8bit_i386, synth_4to1_real_i386, synth_4to1_s32_i386) |
||
157 | # endif |
||
158 | # ifndef NO_NTOM |
||
159 | ,OUT_SYNTHS(synth_ntom, synth_ntom_8bit, synth_ntom_real, synth_ntom_s32) |
||
160 | # endif |
||
161 | }; |
||
162 | #endif |
||
163 | |||
164 | |||
165 | enum optdec defdec(void){ return defopt; } |
||
166 | |||
167 | enum optcla decclass(const enum optdec type) |
||
168 | { |
||
3960 | Serge | 169 | return (type == mmx || type == sse || type == dreidnowext || type == x86_64 || type == neon) ? mmxsse : normal; |
1905 | serge | 170 | } |
171 | |||
172 | |||
173 | static int find_synth(func_synth synth, const func_synth synths[r_limit][f_limit]) |
||
174 | { |
||
175 | enum synth_resample ri; |
||
176 | enum synth_format fi; |
||
177 | for(ri=0; ri |
||
178 | for(fi=0; fi |
||
179 | if(synth == synths[ri][fi]) |
||
180 | return TRUE; |
||
181 | |||
182 | return FALSE; |
||
183 | } |
||
184 | |||
185 | /* Determine what kind of decoder is actually active |
||
186 | This depends on runtime choices which may cause fallback to i386 or generic code. */ |
||
187 | static int find_dectype(mpg123_handle *fr) |
||
188 | { |
||
189 | enum optdec type = nodec; |
||
190 | /* Direct and indirect usage, 1to1 stereo decoding. |
||
191 | Concentrating on the plain stereo synth should be fine, mono stuff is derived. */ |
||
192 | func_synth basic_synth = fr->synth; |
||
193 | #ifndef NO_8BIT |
||
194 | #ifndef NO_16BIT |
||
195 | if(basic_synth == synth_1to1_8bit_wrap) |
||
196 | basic_synth = fr->synths.plain[r_1to1][f_16]; /* That is what's really below the surface. */ |
||
197 | #endif |
||
198 | #endif |
||
199 | |||
200 | if(FALSE) ; /* Just to initialize the else if ladder. */ |
||
201 | #ifndef NO_16BIT |
||
202 | #ifdef OPT_3DNOWEXT |
||
203 | else if(basic_synth == synth_1to1_3dnowext) type = dreidnowext; |
||
204 | #endif |
||
205 | #ifdef OPT_SSE |
||
206 | else if(basic_synth == synth_1to1_sse) type = sse; |
||
207 | #endif |
||
208 | #ifdef OPT_3DNOW |
||
209 | else if(basic_synth == synth_1to1_3dnow) type = dreidnow; |
||
210 | #endif |
||
211 | #ifdef OPT_MMX |
||
212 | else if(basic_synth == synth_1to1_mmx) type = mmx; |
||
213 | #endif |
||
214 | #ifdef OPT_I586_DITHER |
||
215 | else if(basic_synth == synth_1to1_i586_dither) type = ifuenf_dither; |
||
216 | #endif |
||
217 | #ifdef OPT_I586 |
||
218 | else if(basic_synth == synth_1to1_i586) type = ifuenf; |
||
219 | #endif |
||
220 | #ifdef OPT_ALTIVEC |
||
221 | else if(basic_synth == synth_1to1_altivec) type = altivec; |
||
222 | #endif |
||
223 | #ifdef OPT_X86_64 |
||
224 | else if(basic_synth == synth_1to1_x86_64) type = x86_64; |
||
225 | #endif |
||
226 | #ifdef OPT_ARM |
||
227 | else if(basic_synth == synth_1to1_arm) type = arm; |
||
228 | #endif |
||
3960 | Serge | 229 | #ifdef OPT_NEON |
230 | else if(basic_synth == synth_1to1_neon) type = neon; |
||
231 | #endif |
||
1905 | serge | 232 | #ifdef OPT_GENERIC_DITHER |
233 | else if(basic_synth == synth_1to1_dither) type = generic_dither; |
||
234 | #endif |
||
235 | #ifdef OPT_DITHER /* either i586 or generic! */ |
||
236 | #ifndef NO_DOWNSAMPLE |
||
237 | else if |
||
238 | ( |
||
239 | basic_synth == synth_2to1_dither |
||
240 | || basic_synth == synth_4to1_dither |
||
241 | ) type = generic_dither; |
||
242 | #endif |
||
243 | #endif |
||
244 | #endif /* 16bit */ |
||
245 | |||
246 | #ifndef NO_REAL |
||
247 | #ifdef OPT_SSE |
||
248 | else if(basic_synth == synth_1to1_real_sse) type = sse; |
||
249 | #endif |
||
250 | #ifdef OPT_X86_64 |
||
251 | else if(basic_synth == synth_1to1_real_x86_64) type = x86_64; |
||
252 | #endif |
||
253 | #ifdef OPT_ALTIVEC |
||
254 | else if(basic_synth == synth_1to1_real_altivec) type = altivec; |
||
255 | #endif |
||
3960 | Serge | 256 | #ifdef OPT_NEON |
257 | else if(basic_synth == synth_1to1_real_neon) type = neon; |
||
258 | #endif |
||
1905 | serge | 259 | |
260 | #endif /* real */ |
||
261 | |||
262 | #ifndef NO_32BIT |
||
263 | #ifdef OPT_SSE |
||
264 | else if(basic_synth == synth_1to1_s32_sse) type = sse; |
||
265 | #endif |
||
266 | #ifdef OPT_X86_64 |
||
267 | else if(basic_synth == synth_1to1_s32_x86_64) type = x86_64; |
||
268 | #endif |
||
269 | #ifdef OPT_ALTIVEC |
||
270 | else if(basic_synth == synth_1to1_s32_altivec) type = altivec; |
||
271 | #endif |
||
3960 | Serge | 272 | #ifdef OPT_NEON |
273 | else if(basic_synth == synth_1to1_s32_neon) type = neon; |
||
274 | #endif |
||
1905 | serge | 275 | #endif /* 32bit */ |
276 | |||
277 | #ifdef OPT_X86 |
||
278 | else if(find_synth(basic_synth, plain_i386)) |
||
279 | type = idrei; |
||
280 | #endif |
||
281 | |||
282 | else if(find_synth(basic_synth, synth_base.plain)) |
||
283 | type = generic; |
||
284 | |||
285 | |||
286 | |||
287 | #ifdef OPT_I486 |
||
288 | /* i486 is special ... the specific code is in use for 16bit 1to1 stereo |
||
289 | otherwise we have i386 active... but still, the distinction doesn't matter*/ |
||
290 | type = ivier; |
||
291 | #endif |
||
292 | |||
293 | if(type != nodec) |
||
294 | { |
||
295 | fr->cpu_opts.type = type; |
||
296 | fr->cpu_opts.class = decclass(type); |
||
297 | |||
298 | debug3("determined active decoder type %i (%s) of class %i", type, decname[type], fr->cpu_opts.class); |
||
299 | return MPG123_OK; |
||
300 | } |
||
301 | else |
||
302 | { |
||
303 | if(NOQUIET) error("Unable to determine active decoder type -- this is SERIOUS b0rkage!"); |
||
304 | |||
305 | fr->err = MPG123_BAD_DECODER_SETUP; |
||
306 | return MPG123_ERR; |
||
307 | } |
||
308 | } |
||
309 | |||
310 | /* set synth functions for current frame, optimizations handled by opt_* macros */ |
||
311 | int set_synth_functions(mpg123_handle *fr) |
||
312 | { |
||
313 | enum synth_resample resample = r_none; |
||
314 | enum synth_format basic_format = f_none; /* Default is always 16bit, or whatever. */ |
||
315 | |||
316 | /* Select the basic output format, different from 16bit: 8bit, real. */ |
||
317 | if(FALSE){} |
||
318 | #ifndef NO_16BIT |
||
319 | else if(fr->af.encoding & MPG123_ENC_16) |
||
320 | basic_format = f_16; |
||
321 | #endif |
||
322 | #ifndef NO_8BIT |
||
323 | else if(fr->af.encoding & MPG123_ENC_8) |
||
324 | basic_format = f_8; |
||
325 | #endif |
||
326 | #ifndef NO_REAL |
||
327 | else if(fr->af.encoding & MPG123_ENC_FLOAT) |
||
328 | basic_format = f_real; |
||
329 | #endif |
||
330 | #ifndef NO_32BIT |
||
3960 | Serge | 331 | /* 24 bit integer means decoding to 32 bit first. */ |
332 | else if(fr->af.encoding & MPG123_ENC_32 || fr->af.encoding & MPG123_ENC_24) |
||
1905 | serge | 333 | basic_format = f_32; |
334 | #endif |
||
335 | |||
336 | /* Make sure the chosen format is compiled into this lib. */ |
||
337 | if(basic_format == f_none) |
||
338 | { |
||
339 | if(NOQUIET) error("set_synth_functions: This output format is disabled in this build!"); |
||
340 | |||
341 | return -1; |
||
342 | } |
||
343 | |||
344 | /* Be explicit about downsampling variant. */ |
||
345 | switch(fr->down_sample) |
||
346 | { |
||
347 | case 0: resample = r_1to1; break; |
||
348 | #ifndef NO_DOWNSAMPLE |
||
349 | case 1: resample = r_2to1; break; |
||
350 | case 2: resample = r_4to1; break; |
||
351 | #endif |
||
352 | #ifndef NO_NTOM |
||
353 | case 3: resample = r_ntom; break; |
||
354 | #endif |
||
355 | } |
||
356 | |||
357 | if(resample == r_none) |
||
358 | { |
||
359 | if(NOQUIET) error("set_synth_functions: This resampling mode is not supported in this build!"); |
||
360 | |||
361 | return -1; |
||
362 | } |
||
363 | |||
364 | debug2("selecting synth: resample=%i format=%i", resample, basic_format); |
||
365 | /* Finally selecting the synth functions for stereo / mono. */ |
||
366 | fr->synth = fr->synths.plain[resample][basic_format]; |
||
367 | fr->synth_stereo = fr->synths.stereo[resample][basic_format]; |
||
368 | fr->synth_mono = fr->af.channels==2 |
||
369 | ? fr->synths.mono2stereo[resample][basic_format] /* Mono MPEG file decoded to stereo. */ |
||
370 | : fr->synths.mono[resample][basic_format]; /* Mono MPEG file decoded to mono. */ |
||
371 | |||
372 | if(find_dectype(fr) != MPG123_OK) /* Actually determine the currently active decoder breed. */ |
||
373 | { |
||
374 | fr->err = MPG123_BAD_DECODER_SETUP; |
||
375 | return MPG123_ERR; |
||
376 | } |
||
377 | |||
378 | if(frame_buffers(fr) != 0) |
||
379 | { |
||
380 | fr->err = MPG123_NO_BUFFERS; |
||
381 | if(NOQUIET) error("Failed to set up decoder buffers!"); |
||
382 | |||
383 | return MPG123_ERR; |
||
384 | } |
||
385 | |||
386 | #ifndef NO_8BIT |
||
387 | if(basic_format == f_8) |
||
388 | { |
||
389 | if(make_conv16to8_table(fr) != 0) |
||
390 | { |
||
391 | if(NOQUIET) error("Failed to set up conv16to8 table!"); |
||
392 | /* it's a bit more work to get proper error propagation up */ |
||
393 | return -1; |
||
394 | } |
||
395 | } |
||
396 | #endif |
||
397 | |||
398 | #ifdef OPT_MMXORSSE |
||
399 | /* Special treatment for MMX, SSE and 3DNowExt stuff. |
||
400 | The real-decoding SSE for x86-64 uses normal tables! */ |
||
401 | if(fr->cpu_opts.class == mmxsse |
||
402 | # ifndef NO_REAL |
||
403 | && basic_format != f_real |
||
404 | # endif |
||
405 | # ifndef NO_32BIT |
||
406 | && basic_format != f_32 |
||
407 | # endif |
||
408 | # ifdef ACCURATE_ROUNDING |
||
409 | && fr->cpu_opts.type != sse |
||
410 | && fr->cpu_opts.type != x86_64 |
||
3960 | Serge | 411 | && fr->cpu_opts.type != neon |
1905 | serge | 412 | # endif |
413 | ) |
||
414 | { |
||
415 | #ifndef NO_LAYER3 |
||
416 | init_layer3_stuff(fr, init_layer3_gainpow2_mmx); |
||
417 | #endif |
||
418 | #ifndef NO_LAYER12 |
||
419 | init_layer12_stuff(fr, init_layer12_table_mmx); |
||
420 | #endif |
||
421 | fr->make_decode_tables = make_decode_tables_mmx; |
||
422 | } |
||
423 | else |
||
424 | #endif |
||
425 | { |
||
426 | #ifndef NO_LAYER3 |
||
427 | init_layer3_stuff(fr, init_layer3_gainpow2); |
||
428 | #endif |
||
429 | #ifndef NO_LAYER12 |
||
430 | init_layer12_stuff(fr, init_layer12_table); |
||
431 | #endif |
||
432 | fr->make_decode_tables = make_decode_tables; |
||
433 | } |
||
434 | |||
435 | /* We allocated the table buffers just now, so (re)create the tables. */ |
||
436 | fr->make_decode_tables(fr); |
||
437 | |||
438 | return 0; |
||
439 | } |
||
440 | |||
441 | int frame_cpu_opt(mpg123_handle *fr, const char* cpu) |
||
442 | { |
||
443 | const char* chosen = ""; /* the chosen decoder opt as string */ |
||
444 | enum optdec want_dec = nodec; |
||
445 | int done = 0; |
||
446 | int auto_choose = 0; |
||
447 | #ifdef OPT_DITHER |
||
448 | int dithered = FALSE; /* If some dithered decoder is chosen. */ |
||
449 | #endif |
||
450 | |||
451 | want_dec = dectype(cpu); |
||
452 | auto_choose = want_dec == autodec; |
||
453 | /* Fill whole array of synth functions with generic code first. */ |
||
454 | fr->synths = synth_base; |
||
455 | |||
456 | #ifndef OPT_MULTI |
||
457 | { |
||
458 | if(!auto_choose && want_dec != defopt) |
||
459 | { |
||
460 | if(NOQUIET) error2("you wanted decoder type %i, I only have %i", want_dec, defopt); |
||
461 | } |
||
462 | auto_choose = TRUE; /* There will be only one choice anyway. */ |
||
463 | } |
||
464 | #endif |
||
465 | |||
466 | fr->cpu_opts.type = nodec; |
||
467 | /* covers any i386+ cpu; they actually differ only in the synth_1to1 function, mostly... */ |
||
468 | #ifdef OPT_X86 |
||
469 | |||
3960 | Serge | 470 | #ifdef OPT_MULTI |
1905 | serge | 471 | #ifndef NO_LAYER3 |
472 | #if (defined OPT_3DNOW || defined OPT_3DNOWEXT) |
||
3960 | Serge | 473 | fr->cpu_opts.the_dct36 = dct36; |
1905 | serge | 474 | #endif |
475 | #endif |
||
3960 | Serge | 476 | #endif |
1905 | serge | 477 | |
478 | if(cpu_i586(cpu_flags)) |
||
479 | { |
||
480 | # ifdef OPT_MULTI |
||
481 | debug2("standard flags: 0x%08x\textended flags: 0x%08x", cpu_flags.std, cpu_flags.ext); |
||
482 | # endif |
||
483 | #ifdef OPT_SSE |
||
484 | if( !done && (auto_choose || want_dec == sse) |
||
485 | && cpu_sse(cpu_flags) && cpu_mmx(cpu_flags) ) |
||
486 | { |
||
487 | chosen = "SSE"; |
||
488 | fr->cpu_opts.type = sse; |
||
489 | # ifndef NO_16BIT |
||
490 | fr->synths.plain[r_1to1][f_16] = synth_1to1_sse; |
||
491 | # ifdef ACCURATE_ROUNDING |
||
492 | fr->synths.stereo[r_1to1][f_16] = synth_1to1_stereo_sse; |
||
493 | # endif |
||
494 | # endif |
||
495 | # ifndef NO_REAL |
||
496 | fr->synths.plain[r_1to1][f_real] = synth_1to1_real_sse; |
||
497 | fr->synths.stereo[r_1to1][f_real] = synth_1to1_real_stereo_sse; |
||
498 | # endif |
||
499 | # ifndef NO_32BIT |
||
500 | fr->synths.plain[r_1to1][f_32] = synth_1to1_s32_sse; |
||
501 | fr->synths.stereo[r_1to1][f_32] = synth_1to1_s32_stereo_sse; |
||
502 | # endif |
||
503 | done = 1; |
||
504 | } |
||
505 | #endif |
||
506 | # ifdef OPT_3DNOWEXT |
||
507 | if( !done && (auto_choose || want_dec == dreidnowext ) |
||
508 | && cpu_3dnow(cpu_flags) |
||
509 | && cpu_3dnowext(cpu_flags) |
||
510 | && cpu_mmx(cpu_flags) ) |
||
511 | { |
||
512 | chosen = "3DNowExt"; |
||
513 | fr->cpu_opts.type = dreidnowext; |
||
3960 | Serge | 514 | #ifdef OPT_MULTI |
1905 | serge | 515 | # ifndef NO_LAYER3 |
3960 | Serge | 516 | /* The DCT36 is _bad_, at least compared to gcc 4.4-built C code. */ |
517 | /* fr->cpu_opts.the_dct36 = dct36_3dnowext; */ |
||
1905 | serge | 518 | # endif |
3960 | Serge | 519 | #endif |
1905 | serge | 520 | # ifndef NO_16BIT |
521 | fr->synths.plain[r_1to1][f_16] = synth_1to1_3dnowext; |
||
522 | # endif |
||
523 | done = 1; |
||
524 | } |
||
525 | #endif |
||
526 | #ifdef OPT_3DNOW |
||
527 | if( !done && (auto_choose || want_dec == dreidnow) |
||
528 | && cpu_3dnow(cpu_flags) && cpu_mmx(cpu_flags) ) |
||
529 | { |
||
530 | chosen = "3DNow"; |
||
531 | fr->cpu_opts.type = dreidnow; |
||
3960 | Serge | 532 | #ifdef OPT_MULTI |
1905 | serge | 533 | # ifndef NO_LAYER3 |
3960 | Serge | 534 | /* The DCT36 is _bad_, at least compared to gcc 4.4-built C code. */ |
535 | /* fr->cpu_opts.the_dct36 = dct36_3dnow; */ |
||
1905 | serge | 536 | # endif |
3960 | Serge | 537 | #endif |
1905 | serge | 538 | # ifndef NO_16BIT |
539 | fr->synths.plain[r_1to1][f_16] = synth_1to1_3dnow; |
||
540 | # endif |
||
541 | done = 1; |
||
542 | } |
||
543 | #endif |
||
544 | #ifdef OPT_MMX |
||
545 | if( !done && (auto_choose || want_dec == mmx) |
||
546 | && cpu_mmx(cpu_flags) ) |
||
547 | { |
||
548 | chosen = "MMX"; |
||
549 | fr->cpu_opts.type = mmx; |
||
550 | # ifndef NO_16BIT |
||
551 | fr->synths.plain[r_1to1][f_16] = synth_1to1_mmx; |
||
552 | # endif |
||
553 | done = 1; |
||
554 | } |
||
555 | #endif |
||
556 | #ifdef OPT_I586 |
||
557 | if(!done && (auto_choose || want_dec == ifuenf)) |
||
558 | { |
||
559 | chosen = "i586/pentium"; |
||
560 | fr->cpu_opts.type = ifuenf; |
||
561 | # ifndef NO_16BIT |
||
562 | fr->synths.plain[r_1to1][f_16] = synth_1to1_i586; |
||
563 | # endif |
||
564 | done = 1; |
||
565 | } |
||
566 | #endif |
||
567 | #ifdef OPT_I586_DITHER |
||
568 | if(!done && (auto_choose || want_dec == ifuenf_dither)) |
||
569 | { |
||
570 | chosen = "dithered i586/pentium"; |
||
571 | fr->cpu_opts.type = ifuenf_dither; |
||
572 | dithered = TRUE; |
||
573 | # ifndef NO_16BIT |
||
574 | fr->synths.plain[r_1to1][f_16] = synth_1to1_i586_dither; |
||
575 | # ifndef NO_DOWNSAMPLE |
||
576 | fr->synths.plain[r_2to1][f_16] = synth_2to1_dither; |
||
577 | fr->synths.plain[r_4to1][f_16] = synth_4to1_dither; |
||
578 | # endif |
||
579 | # endif |
||
580 | done = 1; |
||
581 | } |
||
582 | #endif |
||
583 | } |
||
584 | #ifdef OPT_I486 |
||
585 | /* That won't cooperate in multi opt mode - forcing i486 in layer3.c |
||
586 | But still... here it is... maybe for real use in future. */ |
||
587 | if(!done && (auto_choose || want_dec == ivier)) |
||
588 | { |
||
589 | chosen = "i486"; |
||
590 | fr->cpu_opts.type = ivier; |
||
591 | done = 1; |
||
592 | } |
||
593 | #endif |
||
594 | #ifdef OPT_I386 |
||
595 | if(!done && (auto_choose || want_dec == idrei)) |
||
596 | { |
||
597 | chosen = "i386"; |
||
598 | fr->cpu_opts.type = idrei; |
||
599 | done = 1; |
||
600 | } |
||
601 | #endif |
||
602 | |||
603 | if(done) |
||
604 | { |
||
605 | /* |
||
606 | We have chosen some x86 decoder... fillup some i386 stuff. |
||
607 | There is an open question about using dithered synth_1to1 for 8bit wrappers. |
||
608 | For quality it won't make sense, but wrapped i586_dither wrapped may still be faster... |
||
609 | */ |
||
610 | enum synth_resample ri; |
||
611 | enum synth_format fi; |
||
612 | # ifndef NO_8BIT |
||
613 | # ifndef NO_16BIT /* possibility to use a 16->8 wrapper... */ |
||
614 | if(fr->synths.plain[r_1to1][f_16] != synth_base.plain[r_1to1][f_16]) |
||
615 | { |
||
616 | fr->synths.plain[r_1to1][f_8] = synth_1to1_8bit_wrap; |
||
617 | fr->synths.mono[r_1to1][f_8] = synth_1to1_8bit_wrap_mono; |
||
3960 | Serge | 618 | fr->synths.mono2stereo[r_1to1][f_8] = synth_1to1_8bit_wrap_m2s; |
1905 | serge | 619 | } |
620 | # endif |
||
621 | # endif |
||
622 | for(ri=0; ri |
||
623 | for(fi=0; fi |
||
624 | { |
||
625 | if(fr->synths.plain[ri][fi] == synth_base.plain[ri][fi]) |
||
626 | fr->synths.plain[ri][fi] = plain_i386[ri][fi]; |
||
627 | } |
||
628 | } |
||
629 | |||
630 | #endif /* OPT_X86 */ |
||
631 | |||
632 | #ifdef OPT_X86_64 |
||
633 | if(!done && (auto_choose || want_dec == x86_64)) |
||
634 | { |
||
635 | chosen = "x86-64 (SSE)"; |
||
636 | fr->cpu_opts.type = x86_64; |
||
637 | # ifndef NO_16BIT |
||
638 | fr->synths.plain[r_1to1][f_16] = synth_1to1_x86_64; |
||
639 | fr->synths.stereo[r_1to1][f_16] = synth_1to1_stereo_x86_64; |
||
640 | # endif |
||
641 | # ifndef NO_REAL |
||
642 | fr->synths.plain[r_1to1][f_real] = synth_1to1_real_x86_64; |
||
643 | fr->synths.stereo[r_1to1][f_real] = synth_1to1_real_stereo_x86_64; |
||
644 | # endif |
||
645 | # ifndef NO_32BIT |
||
646 | fr->synths.plain[r_1to1][f_32] = synth_1to1_s32_x86_64; |
||
647 | fr->synths.stereo[r_1to1][f_32] = synth_1to1_s32_stereo_x86_64; |
||
648 | # endif |
||
649 | done = 1; |
||
650 | } |
||
651 | #endif |
||
652 | |||
653 | #ifdef OPT_GENERIC_DITHER |
||
654 | if(!done && (auto_choose || want_dec == generic_dither)) |
||
655 | { |
||
656 | chosen = "dithered generic"; |
||
657 | fr->cpu_opts.type = generic_dither; |
||
658 | dithered = TRUE; |
||
659 | # ifndef NO_16BIT |
||
660 | fr->synths.plain[r_1to1][f_16] = synth_1to1_dither; |
||
661 | # ifndef NO_DOWNSAMPLE |
||
662 | fr->synths.plain[r_2to1][f_16] = synth_2to1_dither; |
||
663 | fr->synths.plain[r_4to1][f_16] = synth_4to1_dither; |
||
664 | # endif |
||
665 | # endif |
||
666 | done = 1; |
||
667 | } |
||
668 | #endif |
||
669 | |||
670 | # ifdef OPT_ALTIVEC |
||
671 | if(!done && (auto_choose || want_dec == altivec)) |
||
672 | { |
||
673 | chosen = "AltiVec"; |
||
674 | fr->cpu_opts.type = altivec; |
||
675 | # ifndef NO_16BIT |
||
676 | fr->synths.plain[r_1to1][f_16] = synth_1to1_altivec; |
||
677 | fr->synths.stereo[r_1to1][f_16] = synth_1to1_stereo_altivec; |
||
678 | # endif |
||
679 | # ifndef NO_REAL |
||
680 | fr->synths.plain[r_1to1][f_real] = synth_1to1_real_altivec; |
||
681 | fr->synths.stereo[r_1to1][f_real] = synth_1to1_real_stereo_altivec; |
||
682 | # endif |
||
683 | # ifndef NO_32BIT |
||
684 | fr->synths.plain[r_1to1][f_32] = synth_1to1_s32_altivec; |
||
685 | fr->synths.stereo[r_1to1][f_32] = synth_1to1_s32_stereo_altivec; |
||
686 | # endif |
||
687 | done = 1; |
||
688 | } |
||
689 | # endif |
||
690 | |||
3960 | Serge | 691 | # ifdef OPT_NEON |
692 | if(!done && (auto_choose || want_dec == neon)) |
||
693 | { |
||
694 | chosen = "NEON"; |
||
695 | fr->cpu_opts.type = neon; |
||
696 | # ifndef NO_16BIT |
||
697 | fr->synths.plain[r_1to1][f_16] = synth_1to1_neon; |
||
698 | fr->synths.stereo[r_1to1][f_16] = synth_1to1_stereo_neon; |
||
699 | # endif |
||
700 | # ifndef NO_REAL |
||
701 | fr->synths.plain[r_1to1][f_real] = synth_1to1_real_neon; |
||
702 | fr->synths.stereo[r_1to1][f_real] = synth_1to1_real_stereo_neon; |
||
703 | # endif |
||
704 | # ifndef NO_32BIT |
||
705 | fr->synths.plain[r_1to1][f_32] = synth_1to1_s32_neon; |
||
706 | fr->synths.stereo[r_1to1][f_32] = synth_1to1_s32_stereo_neon; |
||
707 | # endif |
||
708 | done = 1; |
||
709 | } |
||
710 | # endif |
||
711 | |||
1905 | serge | 712 | # ifdef OPT_ARM |
713 | if(!done && (auto_choose || want_dec == arm)) |
||
714 | { |
||
715 | chosen = "ARM"; |
||
716 | fr->cpu_opts.type = arm; |
||
717 | # ifndef NO_16BIT |
||
718 | fr->synths.plain[r_1to1][f_16] = synth_1to1_arm; |
||
719 | # endif |
||
720 | done = 1; |
||
721 | } |
||
722 | # endif |
||
723 | |||
724 | # ifdef OPT_GENERIC |
||
725 | if(!done && (auto_choose || want_dec == generic)) |
||
726 | { |
||
727 | chosen = "generic"; |
||
728 | fr->cpu_opts.type = generic; |
||
729 | done = 1; |
||
730 | } |
||
731 | # endif |
||
732 | |||
733 | fr->cpu_opts.class = decclass(fr->cpu_opts.type); |
||
734 | |||
735 | # ifndef NO_8BIT |
||
736 | # ifndef NO_16BIT /* possibility to use a 16->8 wrapper... */ |
||
737 | /* Last chance to use some optimized routine via generic wrappers (for 8bit). */ |
||
738 | if( fr->cpu_opts.type != ifuenf_dither |
||
739 | && fr->cpu_opts.type != generic_dither |
||
740 | && fr->synths.plain[r_1to1][f_16] != synth_base.plain[r_1to1][f_16] ) |
||
741 | { |
||
742 | fr->synths.plain[r_1to1][f_8] = synth_1to1_8bit_wrap; |
||
743 | fr->synths.mono[r_1to1][f_8] = synth_1to1_8bit_wrap_mono; |
||
3960 | Serge | 744 | fr->synths.mono2stereo[r_1to1][f_8] = synth_1to1_8bit_wrap_m2s; |
1905 | serge | 745 | } |
746 | # endif |
||
747 | # endif |
||
748 | |||
749 | #ifdef OPT_DITHER |
||
750 | if(done && dithered) |
||
751 | { |
||
752 | /* run-time dither noise table generation */ |
||
753 | if(!frame_dither_init(fr)) |
||
754 | { |
||
755 | if(NOQUIET) error("Dither noise setup failed!"); |
||
756 | return 0; |
||
757 | } |
||
758 | } |
||
759 | #endif |
||
760 | |||
761 | if(done) |
||
762 | { |
||
763 | if(VERBOSE) fprintf(stderr, "Decoder: %s\n", chosen); |
||
764 | return 1; |
||
765 | } |
||
766 | else |
||
767 | { |
||
768 | if(NOQUIET) error("Could not set optimization!"); |
||
769 | return 0; |
||
770 | } |
||
771 | } |
||
772 | |||
773 | enum optdec dectype(const char* decoder) |
||
774 | { |
||
775 | enum optdec dt; |
||
776 | if( (decoder == NULL) |
||
777 | || (decoder[0] == 0) ) |
||
778 | return autodec; |
||
779 | |||
780 | for(dt=autodec; dt |
||
781 | if(!strcasecmp(decoder, decname[dt])) return dt; |
||
782 | |||
783 | return nodec; /* If we found nothing... */ |
||
784 | } |
||
785 | |||
786 | #ifdef OPT_MULTI |
||
787 | |||
788 | /* same number of entries as full list, but empty at beginning */ |
||
789 | static const char *mpg123_supported_decoder_list[] = |
||
790 | { |
||
791 | #ifdef OPT_SSE |
||
792 | NULL, |
||
793 | #endif |
||
794 | #ifdef OPT_3DNOWEXT |
||
795 | NULL, |
||
796 | #endif |
||
797 | #ifdef OPT_3DNOW |
||
798 | NULL, |
||
799 | #endif |
||
800 | #ifdef OPT_MMX |
||
801 | NULL, |
||
802 | #endif |
||
803 | #ifdef OPT_I586 |
||
804 | NULL, |
||
805 | #endif |
||
806 | #ifdef OPT_I586_DITHER |
||
807 | NULL, |
||
808 | #endif |
||
809 | #ifdef OPT_I486 |
||
810 | NULL, |
||
811 | #endif |
||
812 | #ifdef OPT_I386 |
||
813 | NULL, |
||
814 | #endif |
||
815 | #ifdef OPT_ALTIVEC |
||
816 | NULL, |
||
817 | #endif |
||
818 | #ifdef OPT_X86_64 |
||
819 | NULL, |
||
820 | #endif |
||
821 | #ifdef OPT_ARM |
||
822 | NULL, |
||
823 | #endif |
||
3960 | Serge | 824 | #ifdef OPT_NEON |
825 | NULL, |
||
826 | #endif |
||
1905 | serge | 827 | #ifdef OPT_GENERIC_FLOAT |
828 | NULL, |
||
829 | #endif |
||
830 | # ifdef OPT_GENERIC |
||
831 | NULL, |
||
832 | # endif |
||
833 | # ifdef OPT_GENERIC_DITHER |
||
834 | NULL, |
||
835 | # endif |
||
836 | NULL |
||
837 | }; |
||
838 | #endif |
||
839 | |||
840 | static const char *mpg123_decoder_list[] = |
||
841 | { |
||
842 | #ifdef OPT_SSE |
||
843 | dn_SSE, |
||
844 | #endif |
||
845 | #ifdef OPT_3DNOWEXT |
||
846 | dn_3DNowExt, |
||
847 | #endif |
||
848 | #ifdef OPT_3DNOW |
||
849 | dn_3DNow, |
||
850 | #endif |
||
851 | #ifdef OPT_MMX |
||
852 | dn_MMX, |
||
853 | #endif |
||
854 | #ifdef OPT_I586 |
||
855 | dn_i586, |
||
856 | #endif |
||
857 | #ifdef OPT_I586_DITHER |
||
858 | dn_i586_dither, |
||
859 | #endif |
||
860 | #ifdef OPT_I486 |
||
861 | dn_i486, |
||
862 | #endif |
||
863 | #ifdef OPT_I386 |
||
864 | dn_i386, |
||
865 | #endif |
||
866 | #ifdef OPT_ALTIVEC |
||
867 | dn_AltiVec, |
||
868 | #endif |
||
869 | #ifdef OPT_X86_64 |
||
870 | dn_x86_64, |
||
871 | #endif |
||
872 | #ifdef OPT_ARM |
||
873 | dn_ARM, |
||
874 | #endif |
||
3960 | Serge | 875 | #ifdef OPT_NEON |
876 | dn_NEON, |
||
877 | #endif |
||
1905 | serge | 878 | #ifdef OPT_GENERIC |
879 | dn_generic, |
||
880 | #endif |
||
881 | #ifdef OPT_GENERIC_DITHER |
||
882 | dn_generic_dither, |
||
883 | #endif |
||
884 | NULL |
||
885 | }; |
||
886 | |||
887 | void check_decoders(void ) |
||
888 | { |
||
889 | #ifndef OPT_MULTI |
||
890 | /* In non-multi mode, only the full list (one entry) is used. */ |
||
891 | return; |
||
892 | #else |
||
893 | const char **d = mpg123_supported_decoder_list; |
||
894 | #ifdef OPT_X86 |
||
895 | getcpuflags(&cpu_flags); |
||
896 | if(cpu_i586(cpu_flags)) |
||
897 | { |
||
898 | /* not yet: if(cpu_sse2(cpu_flags)) printf(" SSE2"); |
||
899 | if(cpu_sse3(cpu_flags)) printf(" SSE3"); */ |
||
900 | #ifdef OPT_SSE |
||
901 | if(cpu_sse(cpu_flags)) *(d++) = decname[sse]; |
||
902 | #endif |
||
903 | #ifdef OPT_3DNOWEXT |
||
904 | if(cpu_3dnowext(cpu_flags)) *(d++) = decname[dreidnowext]; |
||
905 | #endif |
||
906 | #ifdef OPT_3DNOW |
||
907 | if(cpu_3dnow(cpu_flags)) *(d++) = decname[dreidnow]; |
||
908 | #endif |
||
909 | #ifdef OPT_MMX |
||
910 | if(cpu_mmx(cpu_flags)) *(d++) = decname[mmx]; |
||
911 | #endif |
||
912 | #ifdef OPT_I586 |
||
913 | *(d++) = decname[ifuenf]; |
||
914 | #endif |
||
915 | #ifdef OPT_I586_DITHER |
||
916 | *(d++) = decname[ifuenf_dither]; |
||
917 | #endif |
||
918 | } |
||
919 | #endif |
||
920 | /* just assume that the i486 built is run on a i486 cpu... */ |
||
921 | #ifdef OPT_I486 |
||
922 | *(d++) = decname[ivier]; |
||
923 | #endif |
||
924 | #ifdef OPT_ALTIVEC |
||
925 | *(d++) = decname[altivec]; |
||
926 | #endif |
||
927 | /* every supported x86 can do i386, any cpu can do generic */ |
||
928 | #ifdef OPT_I386 |
||
929 | *(d++) = decname[idrei]; |
||
930 | #endif |
||
931 | #ifdef OPT_X86_64 |
||
932 | *(d++) = decname[x86_64]; |
||
933 | #endif |
||
934 | #ifdef OPT_ARM |
||
935 | *(d++) = decname[arm]; |
||
936 | #endif |
||
3960 | Serge | 937 | #ifdef OPT_NEON |
938 | *(d++) = decname[neon]; |
||
939 | #endif |
||
1905 | serge | 940 | #ifdef OPT_GENERIC |
941 | *(d++) = decname[generic]; |
||
942 | #endif |
||
943 | #ifdef OPT_GENERIC_DITHER |
||
944 | *(d++) = decname[generic_dither]; |
||
945 | #endif |
||
946 | #endif /* ndef OPT_MULTI */ |
||
947 | } |
||
948 | |||
949 | const char* attribute_align_arg mpg123_current_decoder(mpg123_handle *mh) |
||
950 | { |
||
951 | if(mh == NULL) return NULL; |
||
952 | |||
953 | return decname[mh->cpu_opts.type]; |
||
954 | } |
||
955 | |||
3960 | Serge | 956 | const char attribute_align_arg **mpg123_decoders(void){ return mpg123_decoder_list; } |
957 | const char attribute_align_arg **mpg123_supported_decoders(void) |
||
1905 | serge | 958 | { |
959 | #ifdef OPT_MULTI |
||
960 | return mpg123_supported_decoder_list; |
||
961 | #else |
||
962 | return mpg123_decoder_list; |
||
963 | #endif |
||
964 | } |