Subversion Repositories Kolibri OS

Rev

Rev 1905 | Blame | Compare with Previous | Last modification | View Log | Download | RSS feed

  1. /*
  2.         optimize: get a grip on the different optimizations
  3.  
  4.         copyright 2006-9 by the mpg123 project - free software under the terms of the LGPL 2.1
  5.         see COPYING and AUTHORS files in distribution or http://mpg123.org
  6.         initially written by Thomas Orgis, inspired by 3DNow stuff in mpg123.[hc]
  7.  
  8.         Currently, this file contains the struct and function to choose an optimization variant and works only when OPT_MULTI is in effect.
  9. */
  10.  
  11. #include "mpg123lib_intern.h" /* includes optimize.h */
  12. #include "debug.h"
  13.  
  14. /* Must match the enum dectype! */
  15.  
  16. /*
  17.         It SUCKS having to define these names that way, but compile-time intialization of string arrays is a bitch.
  18.         GCC doesn't see constant stuff when it's wiggling in front of it!
  19.         Anyhow: Have a script for that:
  20. names="generic generic_dither i386 i486 i586 i586_dither MMX 3DNow 3DNowExt AltiVec SSE x86-64"
  21. for i in $names; do echo "##define dn_${i/-/_} \"$i\""; done
  22. echo -n "static const char* decname[] =
  23. {
  24.         \"auto\"
  25.         "
  26. for i in $names; do echo -n ", dn_${i/-/_}"; done
  27. echo "
  28.         , \"nodec\"
  29. };"
  30. */
  31. #define dn_generic "generic"
  32. #define dn_generic_dither "generic_dither"
  33. #define dn_i386 "i386"
  34. #define dn_i486 "i486"
  35. #define dn_i586 "i586"
  36. #define dn_i586_dither "i586_dither"
  37. #define dn_MMX "MMX"
  38. #define dn_3DNow "3DNow"
  39. #define dn_3DNowExt "3DNowExt"
  40. #define dn_AltiVec "AltiVec"
  41. #define dn_SSE "SSE"
  42. #define dn_x86_64 "x86-64"
  43. #define dn_ARM "ARM"
  44. #define dn_NEON "NEON"
  45. static const char* decname[] =
  46. {
  47.         "auto"
  48.         , dn_generic, dn_generic_dither, dn_i386, dn_i486, dn_i586, dn_i586_dither, dn_MMX, dn_3DNow, dn_3DNowExt, dn_AltiVec, dn_SSE, dn_x86_64, dn_ARM, dn_NEON
  49.         , "nodec"
  50. };
  51.  
  52. #if (defined OPT_X86) && (defined OPT_MULTI)
  53. #include "getcpuflags.h"
  54. static struct cpuflags cpu_flags;
  55. #else
  56. /* Faking stuff for non-multi builds. The same code for synth function choice is used.
  57.    Just no runtime dependency of result... */
  58. #define cpu_flags nothing
  59. #define cpu_i586(s)     1
  60. #define cpu_fpu(s)      1
  61. #define cpu_mmx(s)      1
  62. #define cpu_3dnow(s)    1
  63. #define cpu_3dnowext(s) 1
  64. #define cpu_sse(s)      1
  65. #define cpu_sse2(s)     1
  66. #define cpu_sse3(s)     1
  67. #endif
  68.  
  69. /* Ugly macros to build conditional synth function array values. */
  70.  
  71. #ifndef NO_8BIT
  72. #define IF8(synth) synth,
  73. #else
  74. #define IF8(synth)
  75. #endif
  76.  
  77. #ifndef NO_REAL
  78. #define IFREAL(synth) synth,
  79. #else
  80. #define IFREAL(synth)
  81. #endif
  82.  
  83. #ifndef NO_32BIT
  84. #define IF32(synth) synth
  85. #else
  86. #define IF32(synth)
  87. #endif
  88.  
  89. #ifndef NO_16BIT
  90. #       define OUT_SYNTHS(synth_16, synth_8, synth_real, synth_32) { synth_16, IF8(synth_8) IFREAL(synth_real) IF32(synth_32) }
  91. #else
  92. #       define OUT_SYNTHS(synth_16, synth_8, synth_real, synth_32) { IF8(synth_8) IFREAL(synth_real) IF32(synth_32) }
  93. #endif
  94.  
  95. /* The call of left and right plain synth, wrapped.
  96.    This may be replaced by a direct stereo optimized synth. */
  97. static int synth_stereo_wrap(real *bandPtr_l, real *bandPtr_r, mpg123_handle *fr)
  98. {
  99.         int clip;
  100.         clip  = (fr->synth)(bandPtr_l, 0, fr, 0);
  101.         clip += (fr->synth)(bandPtr_r, 1, fr, 1);
  102.         return clip;
  103. }
  104.  
  105. static const struct synth_s synth_base =
  106. {
  107.         { /* plain */
  108.                  OUT_SYNTHS(synth_1to1, synth_1to1_8bit, synth_1to1_real, synth_1to1_s32)
  109. #               ifndef NO_DOWNSAMPLE
  110.                 ,OUT_SYNTHS(synth_2to1, synth_2to1_8bit, synth_2to1_real, synth_2to1_s32)
  111.                 ,OUT_SYNTHS(synth_4to1, synth_4to1_8bit, synth_4to1_real, synth_4to1_s32)
  112. #               endif
  113. #               ifndef NO_NTOM
  114.                 ,OUT_SYNTHS(synth_ntom, synth_ntom_8bit, synth_ntom_real, synth_ntom_s32)
  115. #               endif
  116.         },
  117.         { /* stereo, by default only wrappers over plain synth */
  118.                  OUT_SYNTHS(synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap)
  119. #               ifndef NO_DOWNSAMPLE
  120.                 ,OUT_SYNTHS(synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap)
  121.                 ,OUT_SYNTHS(synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap)
  122. #               endif
  123. #               ifndef NO_NTOM
  124.                 ,OUT_SYNTHS(synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap)
  125. #               endif
  126.         },
  127.         { /* mono2stereo */
  128.                  OUT_SYNTHS(synth_1to1_m2s, synth_1to1_8bit_m2s, synth_1to1_real_m2s, synth_1to1_s32_m2s)
  129. #               ifndef NO_DOWNSAMPLE
  130.                 ,OUT_SYNTHS(synth_2to1_m2s, synth_2to1_8bit_m2s, synth_2to1_real_m2s, synth_2to1_s32_m2s)
  131.                 ,OUT_SYNTHS(synth_4to1_m2s, synth_4to1_8bit_m2s, synth_4to1_real_m2s, synth_4to1_s32_m2s)
  132. #               endif
  133. #               ifndef NO_NTOM
  134.                 ,OUT_SYNTHS(synth_ntom_m2s, synth_ntom_8bit_m2s, synth_ntom_real_m2s, synth_ntom_s32_m2s)
  135. #               endif
  136.         },
  137.         { /* mono*/
  138.                  OUT_SYNTHS(synth_1to1_mono, synth_1to1_8bit_mono, synth_1to1_real_mono, synth_1to1_s32_mono)
  139. #               ifndef NO_DOWNSAMPLE
  140.                 ,OUT_SYNTHS(synth_2to1_mono, synth_2to1_8bit_mono, synth_2to1_real_mono, synth_2to1_s32_mono)
  141.                 ,OUT_SYNTHS(synth_4to1_mono, synth_4to1_8bit_mono, synth_4to1_real_mono, synth_4to1_s32_mono)
  142. #               endif
  143. #               ifndef NO_NTOM
  144.                 ,OUT_SYNTHS(synth_ntom_mono, synth_ntom_8bit_mono, synth_ntom_real_mono, synth_ntom_s32_mono)
  145. #endif
  146.         }
  147. };
  148.  
  149. #ifdef OPT_X86
  150. /* More plain synths for i386 */
  151. const func_synth plain_i386[r_limit][f_limit] =
  152. { /* plain */
  153.          OUT_SYNTHS(synth_1to1_i386, synth_1to1_8bit_i386, synth_1to1_real_i386, synth_1to1_s32_i386)
  154. #       ifndef NO_DOWNSAMPLE
  155.         ,OUT_SYNTHS(synth_2to1_i386, synth_2to1_8bit_i386, synth_2to1_real_i386, synth_2to1_s32_i386)
  156.         ,OUT_SYNTHS(synth_4to1_i386, synth_4to1_8bit_i386, synth_4to1_real_i386, synth_4to1_s32_i386)
  157. #       endif
  158. #       ifndef NO_NTOM
  159.         ,OUT_SYNTHS(synth_ntom, synth_ntom_8bit, synth_ntom_real, synth_ntom_s32)
  160. #       endif
  161. };
  162. #endif
  163.  
  164.  
  165. enum optdec defdec(void){ return defopt; }
  166.  
  167. enum optcla decclass(const enum optdec type)
  168. {
  169.         return (type == mmx || type == sse || type == dreidnowext || type == x86_64  || type == neon) ? mmxsse : normal;
  170. }
  171.  
  172.  
  173. static int find_synth(func_synth synth,  const func_synth synths[r_limit][f_limit])
  174. {
  175.         enum synth_resample ri;
  176.         enum synth_format   fi;
  177.         for(ri=0; ri<r_limit; ++ri)
  178.         for(fi=0; fi<f_limit; ++fi)
  179.         if(synth == synths[ri][fi])
  180.         return TRUE;
  181.  
  182.         return FALSE;
  183. }
  184.  
  185. /* Determine what kind of decoder is actually active
  186.    This depends on runtime choices which may cause fallback to i386 or generic code. */
  187. static int find_dectype(mpg123_handle *fr)
  188. {
  189.         enum optdec type = nodec;
  190.         /* Direct and indirect usage, 1to1 stereo decoding.
  191.            Concentrating on the plain stereo synth should be fine, mono stuff is derived. */
  192.         func_synth basic_synth = fr->synth;
  193. #ifndef NO_8BIT
  194. #ifndef NO_16BIT
  195.         if(basic_synth == synth_1to1_8bit_wrap)
  196.         basic_synth = fr->synths.plain[r_1to1][f_16]; /* That is what's really below the surface. */
  197. #endif
  198. #endif
  199.  
  200.         if(FALSE) ; /* Just to initialize the else if ladder. */
  201. #ifndef NO_16BIT
  202. #ifdef OPT_3DNOWEXT
  203.         else if(basic_synth == synth_1to1_3dnowext) type = dreidnowext;
  204. #endif
  205. #ifdef OPT_SSE
  206.         else if(basic_synth == synth_1to1_sse) type = sse;
  207. #endif
  208. #ifdef OPT_3DNOW
  209.         else if(basic_synth == synth_1to1_3dnow) type = dreidnow;
  210. #endif
  211. #ifdef OPT_MMX
  212.         else if(basic_synth == synth_1to1_mmx) type = mmx;
  213. #endif
  214. #ifdef OPT_I586_DITHER
  215.         else if(basic_synth == synth_1to1_i586_dither) type = ifuenf_dither;
  216. #endif
  217. #ifdef OPT_I586
  218.         else if(basic_synth == synth_1to1_i586) type = ifuenf;
  219. #endif
  220. #ifdef OPT_ALTIVEC
  221.         else if(basic_synth == synth_1to1_altivec) type = altivec;
  222. #endif
  223. #ifdef OPT_X86_64
  224.         else if(basic_synth == synth_1to1_x86_64) type = x86_64;
  225. #endif
  226. #ifdef OPT_ARM
  227.         else if(basic_synth == synth_1to1_arm) type = arm;
  228. #endif
  229. #ifdef OPT_NEON
  230.         else if(basic_synth == synth_1to1_neon) type = neon;
  231. #endif
  232. #ifdef OPT_GENERIC_DITHER
  233.         else if(basic_synth == synth_1to1_dither) type = generic_dither;
  234. #endif
  235. #ifdef OPT_DITHER /* either i586 or generic! */
  236. #ifndef NO_DOWNSAMPLE
  237.         else if
  238.         (
  239.                    basic_synth == synth_2to1_dither
  240.                 || basic_synth == synth_4to1_dither
  241.         ) type = generic_dither;
  242. #endif
  243. #endif
  244. #endif /* 16bit */
  245.  
  246. #ifndef NO_REAL
  247. #ifdef OPT_SSE
  248.         else if(basic_synth == synth_1to1_real_sse) type = sse;
  249. #endif
  250. #ifdef OPT_X86_64
  251.         else if(basic_synth == synth_1to1_real_x86_64) type = x86_64;
  252. #endif
  253. #ifdef OPT_ALTIVEC
  254.         else if(basic_synth == synth_1to1_real_altivec) type = altivec;
  255. #endif
  256. #ifdef OPT_NEON
  257.         else if(basic_synth == synth_1to1_real_neon) type = neon;
  258. #endif
  259.  
  260. #endif /* real */
  261.  
  262. #ifndef NO_32BIT
  263. #ifdef OPT_SSE
  264.         else if(basic_synth == synth_1to1_s32_sse) type = sse;
  265. #endif
  266. #ifdef OPT_X86_64
  267.         else if(basic_synth == synth_1to1_s32_x86_64) type = x86_64;
  268. #endif
  269. #ifdef OPT_ALTIVEC
  270.         else if(basic_synth == synth_1to1_s32_altivec) type = altivec;
  271. #endif
  272. #ifdef OPT_NEON
  273.         else if(basic_synth == synth_1to1_s32_neon) type = neon;
  274. #endif
  275. #endif /* 32bit */
  276.  
  277. #ifdef OPT_X86
  278.         else if(find_synth(basic_synth, plain_i386))
  279.         type = idrei;
  280. #endif
  281.  
  282.         else if(find_synth(basic_synth, synth_base.plain))
  283.         type = generic;
  284.  
  285.  
  286.  
  287. #ifdef OPT_I486
  288.         /* i486 is special ... the specific code is in use for 16bit 1to1 stereo
  289.            otherwise we have i386 active... but still, the distinction doesn't matter*/
  290.         type = ivier;
  291. #endif
  292.  
  293.         if(type != nodec)
  294.         {
  295.                 fr->cpu_opts.type = type;
  296.                 fr->cpu_opts.class = decclass(type);
  297.  
  298.                 debug3("determined active decoder type %i (%s) of class %i", type, decname[type], fr->cpu_opts.class);
  299.                 return MPG123_OK;
  300.         }
  301.         else
  302.         {
  303.                 if(NOQUIET) error("Unable to determine active decoder type -- this is SERIOUS b0rkage!");
  304.  
  305.                 fr->err = MPG123_BAD_DECODER_SETUP;
  306.                 return MPG123_ERR;
  307.         }
  308. }
  309.  
  310. /* set synth functions for current frame, optimizations handled by opt_* macros */
  311. int set_synth_functions(mpg123_handle *fr)
  312. {
  313.         enum synth_resample resample = r_none;
  314.         enum synth_format basic_format = f_none; /* Default is always 16bit, or whatever. */
  315.  
  316.         /* Select the basic output format, different from 16bit: 8bit, real. */
  317.         if(FALSE){}
  318. #ifndef NO_16BIT
  319.         else if(fr->af.encoding & MPG123_ENC_16)
  320.         basic_format = f_16;
  321. #endif
  322. #ifndef NO_8BIT
  323.         else if(fr->af.encoding & MPG123_ENC_8)
  324.         basic_format = f_8;
  325. #endif
  326. #ifndef NO_REAL
  327.         else if(fr->af.encoding & MPG123_ENC_FLOAT)
  328.         basic_format = f_real;
  329. #endif
  330. #ifndef NO_32BIT
  331.         /* 24 bit integer means decoding to 32 bit first. */
  332.         else if(fr->af.encoding & MPG123_ENC_32 || fr->af.encoding & MPG123_ENC_24)
  333.         basic_format = f_32;
  334. #endif
  335.  
  336.         /* Make sure the chosen format is compiled into this lib. */
  337.         if(basic_format == f_none)
  338.         {
  339.                 if(NOQUIET) error("set_synth_functions: This output format is disabled in this build!");
  340.  
  341.                 return -1;
  342.         }
  343.  
  344.         /* Be explicit about downsampling variant. */
  345.         switch(fr->down_sample)
  346.         {
  347.                 case 0: resample = r_1to1; break;
  348. #ifndef NO_DOWNSAMPLE
  349.                 case 1: resample = r_2to1; break;
  350.                 case 2: resample = r_4to1; break;
  351. #endif
  352. #ifndef NO_NTOM
  353.                 case 3: resample = r_ntom; break;
  354. #endif
  355.         }
  356.  
  357.         if(resample == r_none)
  358.         {
  359.                 if(NOQUIET) error("set_synth_functions: This resampling mode is not supported in this build!");
  360.  
  361.                 return -1;
  362.         }
  363.  
  364.         debug2("selecting synth: resample=%i format=%i", resample, basic_format);
  365.         /* Finally selecting the synth functions for stereo / mono. */
  366.         fr->synth = fr->synths.plain[resample][basic_format];
  367.         fr->synth_stereo = fr->synths.stereo[resample][basic_format];
  368.         fr->synth_mono = fr->af.channels==2
  369.                 ? fr->synths.mono2stereo[resample][basic_format] /* Mono MPEG file decoded to stereo. */
  370.                 : fr->synths.mono[resample][basic_format];       /* Mono MPEG file decoded to mono. */
  371.  
  372.         if(find_dectype(fr) != MPG123_OK) /* Actually determine the currently active decoder breed. */
  373.         {
  374.                 fr->err = MPG123_BAD_DECODER_SETUP;
  375.                 return MPG123_ERR;
  376.         }
  377.  
  378.         if(frame_buffers(fr) != 0)
  379.         {
  380.                 fr->err = MPG123_NO_BUFFERS;
  381.                 if(NOQUIET) error("Failed to set up decoder buffers!");
  382.  
  383.                 return MPG123_ERR;
  384.         }
  385.  
  386. #ifndef NO_8BIT
  387.         if(basic_format == f_8)
  388.         {
  389.                 if(make_conv16to8_table(fr) != 0)
  390.                 {
  391.                         if(NOQUIET) error("Failed to set up conv16to8 table!");
  392.                         /* it's a bit more work to get proper error propagation up */
  393.                         return -1;
  394.                 }
  395.         }
  396. #endif
  397.  
  398. #ifdef OPT_MMXORSSE
  399.         /* Special treatment for MMX, SSE and 3DNowExt stuff.
  400.            The real-decoding SSE for x86-64 uses normal tables! */
  401.         if(fr->cpu_opts.class == mmxsse
  402. #       ifndef NO_REAL
  403.            && basic_format != f_real
  404. #       endif
  405. #       ifndef NO_32BIT
  406.            && basic_format != f_32
  407. #       endif
  408. #       ifdef ACCURATE_ROUNDING
  409.            && fr->cpu_opts.type != sse
  410.            && fr->cpu_opts.type != x86_64
  411.            && fr->cpu_opts.type != neon
  412. #       endif
  413.           )
  414.         {
  415. #ifndef NO_LAYER3
  416.                 init_layer3_stuff(fr, init_layer3_gainpow2_mmx);
  417. #endif
  418. #ifndef NO_LAYER12
  419.                 init_layer12_stuff(fr, init_layer12_table_mmx);
  420. #endif
  421.                 fr->make_decode_tables = make_decode_tables_mmx;
  422.         }
  423.         else
  424. #endif
  425.         {
  426. #ifndef NO_LAYER3
  427.                 init_layer3_stuff(fr, init_layer3_gainpow2);
  428. #endif
  429. #ifndef NO_LAYER12
  430.                 init_layer12_stuff(fr, init_layer12_table);
  431. #endif
  432.                 fr->make_decode_tables = make_decode_tables;
  433.         }
  434.  
  435.         /* We allocated the table buffers just now, so (re)create the tables. */
  436.         fr->make_decode_tables(fr);
  437.  
  438.         return 0;
  439. }
  440.  
  441. int frame_cpu_opt(mpg123_handle *fr, const char* cpu)
  442. {
  443.         const char* chosen = ""; /* the chosen decoder opt as string */
  444.         enum optdec want_dec = nodec;
  445.         int done = 0;
  446.         int auto_choose = 0;
  447. #ifdef OPT_DITHER
  448.         int dithered = FALSE; /* If some dithered decoder is chosen. */
  449. #endif
  450.  
  451.         want_dec = dectype(cpu);
  452.         auto_choose = want_dec == autodec;
  453.         /* Fill whole array of synth functions with generic code first. */
  454.         fr->synths = synth_base;
  455.  
  456. #ifndef OPT_MULTI
  457.         {
  458.                 if(!auto_choose && want_dec != defopt)
  459.                 {
  460.                         if(NOQUIET) error2("you wanted decoder type %i, I only have %i", want_dec, defopt);
  461.                 }
  462.                 auto_choose = TRUE; /* There will be only one choice anyway. */
  463.         }
  464. #endif
  465.  
  466.         fr->cpu_opts.type = nodec;
  467.         /* covers any i386+ cpu; they actually differ only in the synth_1to1 function, mostly... */
  468. #ifdef OPT_X86
  469.  
  470. #ifdef OPT_MULTI
  471. #ifndef NO_LAYER3
  472. #if (defined OPT_3DNOW || defined OPT_3DNOWEXT)
  473.         fr->cpu_opts.the_dct36 = dct36;
  474. #endif
  475. #endif
  476. #endif
  477.  
  478.         if(cpu_i586(cpu_flags))
  479.         {
  480. #               ifdef OPT_MULTI
  481.                 debug2("standard flags: 0x%08x\textended flags: 0x%08x", cpu_flags.std, cpu_flags.ext);
  482. #               endif
  483.                 #ifdef OPT_SSE
  484.                 if(   !done && (auto_choose || want_dec == sse)
  485.                    && cpu_sse(cpu_flags) && cpu_mmx(cpu_flags) )
  486.                 {
  487.                         chosen = "SSE";
  488.                         fr->cpu_opts.type = sse;
  489. #                       ifndef NO_16BIT
  490.                         fr->synths.plain[r_1to1][f_16] = synth_1to1_sse;
  491. #                       ifdef ACCURATE_ROUNDING
  492.                         fr->synths.stereo[r_1to1][f_16] = synth_1to1_stereo_sse;
  493. #                       endif
  494. #                       endif
  495. #                       ifndef NO_REAL
  496.                         fr->synths.plain[r_1to1][f_real] = synth_1to1_real_sse;
  497.                         fr->synths.stereo[r_1to1][f_real] = synth_1to1_real_stereo_sse;
  498. #                       endif
  499. #                       ifndef NO_32BIT
  500.                         fr->synths.plain[r_1to1][f_32] = synth_1to1_s32_sse;
  501.                         fr->synths.stereo[r_1to1][f_32] = synth_1to1_s32_stereo_sse;
  502. #                       endif
  503.                         done = 1;
  504.                 }
  505.                 #endif
  506. #               ifdef OPT_3DNOWEXT
  507.                 if(   !done && (auto_choose || want_dec == dreidnowext )
  508.                    && cpu_3dnow(cpu_flags)
  509.                    && cpu_3dnowext(cpu_flags)
  510.                    && cpu_mmx(cpu_flags) )
  511.                 {
  512.                         chosen = "3DNowExt";
  513.                         fr->cpu_opts.type = dreidnowext;
  514. #ifdef OPT_MULTI
  515. #                       ifndef NO_LAYER3
  516. /* The DCT36 is _bad_, at least compared to gcc 4.4-built C code. */
  517. /*                      fr->cpu_opts.the_dct36 = dct36_3dnowext; */
  518. #                       endif
  519. #endif
  520. #                       ifndef NO_16BIT
  521.                         fr->synths.plain[r_1to1][f_16] = synth_1to1_3dnowext;
  522. #                       endif
  523.                         done = 1;
  524.                 }
  525.                 #endif
  526.                 #ifdef OPT_3DNOW
  527.                 if(    !done && (auto_choose || want_dec == dreidnow)
  528.                     && cpu_3dnow(cpu_flags) && cpu_mmx(cpu_flags) )
  529.                 {
  530.                         chosen = "3DNow";
  531.                         fr->cpu_opts.type = dreidnow;
  532. #ifdef OPT_MULTI
  533. #                       ifndef NO_LAYER3
  534. /* The DCT36 is _bad_, at least compared to gcc 4.4-built C code. */
  535. /*                      fr->cpu_opts.the_dct36 = dct36_3dnow; */
  536. #                       endif
  537. #endif
  538. #                       ifndef NO_16BIT
  539.                         fr->synths.plain[r_1to1][f_16] = synth_1to1_3dnow;
  540. #                       endif
  541.                         done = 1;
  542.                 }
  543.                 #endif
  544.                 #ifdef OPT_MMX
  545.                 if(   !done && (auto_choose || want_dec == mmx)
  546.                    && cpu_mmx(cpu_flags) )
  547.                 {
  548.                         chosen = "MMX";
  549.                         fr->cpu_opts.type = mmx;
  550. #                       ifndef NO_16BIT
  551.                         fr->synths.plain[r_1to1][f_16] = synth_1to1_mmx;
  552. #                       endif
  553.                         done = 1;
  554.                 }
  555.                 #endif
  556.                 #ifdef OPT_I586
  557.                 if(!done && (auto_choose || want_dec == ifuenf))
  558.                 {
  559.                         chosen = "i586/pentium";
  560.                         fr->cpu_opts.type = ifuenf;
  561. #                       ifndef NO_16BIT
  562.                         fr->synths.plain[r_1to1][f_16] = synth_1to1_i586;
  563. #                       endif
  564.                         done = 1;
  565.                 }
  566.                 #endif
  567.                 #ifdef OPT_I586_DITHER
  568.                 if(!done && (auto_choose || want_dec == ifuenf_dither))
  569.                 {
  570.                         chosen = "dithered i586/pentium";
  571.                         fr->cpu_opts.type = ifuenf_dither;
  572.                         dithered = TRUE;
  573. #                       ifndef NO_16BIT
  574.                         fr->synths.plain[r_1to1][f_16] = synth_1to1_i586_dither;
  575. #                       ifndef NO_DOWNSAMPLE
  576.                         fr->synths.plain[r_2to1][f_16] = synth_2to1_dither;
  577.                         fr->synths.plain[r_4to1][f_16] = synth_4to1_dither;
  578. #                       endif
  579. #                       endif
  580.                         done = 1;
  581.                 }
  582.                 #endif
  583.         }
  584.         #ifdef OPT_I486
  585.         /* That won't cooperate in multi opt mode - forcing i486 in layer3.c
  586.            But still... here it is... maybe for real use in future. */
  587.         if(!done && (auto_choose || want_dec == ivier))
  588.         {
  589.                 chosen = "i486";
  590.                 fr->cpu_opts.type = ivier;
  591.                 done = 1;
  592.         }
  593.         #endif
  594.         #ifdef OPT_I386
  595.         if(!done && (auto_choose || want_dec == idrei))
  596.         {
  597.                 chosen = "i386";
  598.                 fr->cpu_opts.type = idrei;
  599.                 done = 1;
  600.         }
  601.         #endif
  602.  
  603.         if(done)
  604.         {
  605.                 /*
  606.                         We have chosen some x86 decoder... fillup some i386 stuff.
  607.                         There is an open question about using dithered synth_1to1 for 8bit wrappers.
  608.                         For quality it won't make sense, but wrapped i586_dither wrapped may still be faster...
  609.                 */
  610.                 enum synth_resample ri;
  611.                 enum synth_format   fi;
  612. #               ifndef NO_8BIT
  613. #               ifndef NO_16BIT /* possibility to use a 16->8 wrapper... */
  614.                 if(fr->synths.plain[r_1to1][f_16] != synth_base.plain[r_1to1][f_16])
  615.                 {
  616.                         fr->synths.plain[r_1to1][f_8] = synth_1to1_8bit_wrap;
  617.                         fr->synths.mono[r_1to1][f_8] = synth_1to1_8bit_wrap_mono;
  618.                         fr->synths.mono2stereo[r_1to1][f_8] = synth_1to1_8bit_wrap_m2s;
  619.                 }
  620. #               endif
  621. #               endif
  622.                 for(ri=0; ri<r_limit; ++ri)
  623.                 for(fi=0; fi<f_limit; ++fi)
  624.                 {
  625.                         if(fr->synths.plain[ri][fi] == synth_base.plain[ri][fi])
  626.                         fr->synths.plain[ri][fi] = plain_i386[ri][fi];
  627.                 }
  628.         }
  629.  
  630. #endif /* OPT_X86 */
  631.  
  632. #ifdef OPT_X86_64
  633.         if(!done && (auto_choose || want_dec == x86_64))
  634.         {
  635.                 chosen = "x86-64 (SSE)";
  636.                 fr->cpu_opts.type = x86_64;
  637. #               ifndef NO_16BIT
  638.                 fr->synths.plain[r_1to1][f_16] = synth_1to1_x86_64;
  639.                 fr->synths.stereo[r_1to1][f_16] = synth_1to1_stereo_x86_64;
  640. #               endif
  641. #               ifndef NO_REAL
  642.                 fr->synths.plain[r_1to1][f_real] = synth_1to1_real_x86_64;
  643.                 fr->synths.stereo[r_1to1][f_real] = synth_1to1_real_stereo_x86_64;
  644. #               endif
  645. #               ifndef NO_32BIT
  646.                 fr->synths.plain[r_1to1][f_32] = synth_1to1_s32_x86_64;
  647.                 fr->synths.stereo[r_1to1][f_32] = synth_1to1_s32_stereo_x86_64;
  648. #               endif
  649.                 done = 1;
  650.         }
  651. #endif
  652.  
  653. #ifdef OPT_GENERIC_DITHER
  654.         if(!done && (auto_choose || want_dec == generic_dither))
  655.         {
  656.                 chosen = "dithered generic";
  657.                 fr->cpu_opts.type = generic_dither;
  658.                 dithered = TRUE;
  659. #               ifndef NO_16BIT
  660.                 fr->synths.plain[r_1to1][f_16] = synth_1to1_dither;
  661. #               ifndef NO_DOWNSAMPLE
  662.                 fr->synths.plain[r_2to1][f_16] = synth_2to1_dither;
  663.                 fr->synths.plain[r_4to1][f_16] = synth_4to1_dither;
  664. #               endif
  665. #               endif
  666.                 done = 1;
  667.         }
  668. #endif
  669.  
  670. #       ifdef OPT_ALTIVEC
  671.         if(!done && (auto_choose || want_dec == altivec))
  672.         {
  673.                 chosen = "AltiVec";
  674.                 fr->cpu_opts.type = altivec;
  675. #               ifndef NO_16BIT
  676.                 fr->synths.plain[r_1to1][f_16] = synth_1to1_altivec;
  677.                 fr->synths.stereo[r_1to1][f_16] = synth_1to1_stereo_altivec;
  678. #               endif
  679. #               ifndef NO_REAL
  680.                 fr->synths.plain[r_1to1][f_real] = synth_1to1_real_altivec;
  681.                 fr->synths.stereo[r_1to1][f_real] = synth_1to1_real_stereo_altivec;
  682. #               endif
  683. #               ifndef NO_32BIT
  684.                 fr->synths.plain[r_1to1][f_32] = synth_1to1_s32_altivec;
  685.                 fr->synths.stereo[r_1to1][f_32] = synth_1to1_s32_stereo_altivec;
  686. #               endif
  687.                 done = 1;
  688.         }
  689. #       endif
  690.  
  691. #       ifdef OPT_NEON
  692.         if(!done && (auto_choose || want_dec == neon))
  693.         {
  694.                 chosen = "NEON";
  695.                 fr->cpu_opts.type = neon;
  696. #               ifndef NO_16BIT
  697.                 fr->synths.plain[r_1to1][f_16] = synth_1to1_neon;
  698.                 fr->synths.stereo[r_1to1][f_16] = synth_1to1_stereo_neon;
  699. #               endif
  700. #               ifndef NO_REAL
  701.                 fr->synths.plain[r_1to1][f_real] = synth_1to1_real_neon;
  702.                 fr->synths.stereo[r_1to1][f_real] = synth_1to1_real_stereo_neon;
  703. #               endif
  704. #               ifndef NO_32BIT
  705.                 fr->synths.plain[r_1to1][f_32] = synth_1to1_s32_neon;
  706.                 fr->synths.stereo[r_1to1][f_32] = synth_1to1_s32_stereo_neon;
  707. #               endif
  708.                 done = 1;
  709.         }
  710. #       endif
  711.  
  712. #       ifdef OPT_ARM
  713.         if(!done && (auto_choose || want_dec == arm))
  714.         {
  715.                 chosen = "ARM";
  716.                 fr->cpu_opts.type = arm;
  717. #               ifndef NO_16BIT
  718.                 fr->synths.plain[r_1to1][f_16] = synth_1to1_arm;
  719. #               endif
  720.                 done = 1;
  721.         }
  722. #       endif
  723.  
  724. #       ifdef OPT_GENERIC
  725.         if(!done && (auto_choose || want_dec == generic))
  726.         {
  727.                 chosen = "generic";
  728.                 fr->cpu_opts.type = generic;
  729.                 done = 1;
  730.         }
  731. #       endif
  732.  
  733.         fr->cpu_opts.class = decclass(fr->cpu_opts.type);
  734.  
  735. #       ifndef NO_8BIT
  736. #       ifndef NO_16BIT /* possibility to use a 16->8 wrapper... */
  737.         /* Last chance to use some optimized routine via generic wrappers (for 8bit). */
  738.         if(     fr->cpu_opts.type != ifuenf_dither
  739.              && fr->cpu_opts.type != generic_dither
  740.              && fr->synths.plain[r_1to1][f_16] != synth_base.plain[r_1to1][f_16] )
  741.         {
  742.                 fr->synths.plain[r_1to1][f_8] = synth_1to1_8bit_wrap;
  743.                 fr->synths.mono[r_1to1][f_8] = synth_1to1_8bit_wrap_mono;
  744.                 fr->synths.mono2stereo[r_1to1][f_8] = synth_1to1_8bit_wrap_m2s;
  745.         }
  746. #       endif
  747. #       endif
  748.  
  749. #ifdef OPT_DITHER
  750.         if(done && dithered)
  751.         {
  752.                 /* run-time dither noise table generation */
  753.                 if(!frame_dither_init(fr))
  754.                 {
  755.                         if(NOQUIET) error("Dither noise setup failed!");
  756.                         return 0;
  757.                 }
  758.         }
  759. #endif
  760.  
  761.         if(done)
  762.         {
  763.                 if(VERBOSE) fprintf(stderr, "Decoder: %s\n", chosen);
  764.                 return 1;
  765.         }
  766.         else
  767.         {
  768.                 if(NOQUIET) error("Could not set optimization!");
  769.                 return 0;
  770.         }
  771. }
  772.  
  773. enum optdec dectype(const char* decoder)
  774. {
  775.         enum optdec dt;
  776.         if(   (decoder == NULL)
  777.            || (decoder[0] == 0) )
  778.         return autodec;
  779.  
  780.         for(dt=autodec; dt<nodec; ++dt)
  781.         if(!strcasecmp(decoder, decname[dt])) return dt;
  782.  
  783.         return nodec; /* If we found nothing... */
  784. }
  785.  
  786. #ifdef OPT_MULTI
  787.  
  788. /* same number of entries as full list, but empty at beginning */
  789. static const char *mpg123_supported_decoder_list[] =
  790. {
  791.         #ifdef OPT_SSE
  792.         NULL,
  793.         #endif
  794.         #ifdef OPT_3DNOWEXT
  795.         NULL,
  796.         #endif
  797.         #ifdef OPT_3DNOW
  798.         NULL,
  799.         #endif
  800.         #ifdef OPT_MMX
  801.         NULL,
  802.         #endif
  803.         #ifdef OPT_I586
  804.         NULL,
  805.         #endif
  806.         #ifdef OPT_I586_DITHER
  807.         NULL,
  808.         #endif
  809.         #ifdef OPT_I486
  810.         NULL,
  811.         #endif
  812.         #ifdef OPT_I386
  813.         NULL,
  814.         #endif
  815.         #ifdef OPT_ALTIVEC
  816.         NULL,
  817.         #endif
  818.         #ifdef OPT_X86_64
  819.         NULL,
  820.         #endif
  821.         #ifdef OPT_ARM
  822.         NULL,
  823.         #endif
  824.         #ifdef OPT_NEON
  825.         NULL,
  826.         #endif
  827.         #ifdef OPT_GENERIC_FLOAT
  828.         NULL,
  829.         #endif
  830. #       ifdef OPT_GENERIC
  831.         NULL,
  832. #       endif
  833. #       ifdef OPT_GENERIC_DITHER
  834.         NULL,
  835. #       endif
  836.         NULL
  837. };
  838. #endif
  839.  
  840. static const char *mpg123_decoder_list[] =
  841. {
  842.         #ifdef OPT_SSE
  843.         dn_SSE,
  844.         #endif
  845.         #ifdef OPT_3DNOWEXT
  846.         dn_3DNowExt,
  847.         #endif
  848.         #ifdef OPT_3DNOW
  849.         dn_3DNow,
  850.         #endif
  851.         #ifdef OPT_MMX
  852.         dn_MMX,
  853.         #endif
  854.         #ifdef OPT_I586
  855.         dn_i586,
  856.         #endif
  857.         #ifdef OPT_I586_DITHER
  858.         dn_i586_dither,
  859.         #endif
  860.         #ifdef OPT_I486
  861.         dn_i486,
  862.         #endif
  863.         #ifdef OPT_I386
  864.         dn_i386,
  865.         #endif
  866.         #ifdef OPT_ALTIVEC
  867.         dn_AltiVec,
  868.         #endif
  869.         #ifdef OPT_X86_64
  870.         dn_x86_64,
  871.         #endif
  872.         #ifdef OPT_ARM
  873.         dn_ARM,
  874.         #endif
  875.         #ifdef OPT_NEON
  876.         dn_NEON,
  877.         #endif
  878.         #ifdef OPT_GENERIC
  879.         dn_generic,
  880.         #endif
  881.         #ifdef OPT_GENERIC_DITHER
  882.         dn_generic_dither,
  883.         #endif
  884.         NULL
  885. };
  886.  
  887. void check_decoders(void )
  888. {
  889. #ifndef OPT_MULTI
  890.         /* In non-multi mode, only the full list (one entry) is used. */
  891.         return;
  892. #else
  893.         const char **d = mpg123_supported_decoder_list;
  894. #ifdef OPT_X86
  895.         getcpuflags(&cpu_flags);
  896.         if(cpu_i586(cpu_flags))
  897.         {
  898.                 /* not yet: if(cpu_sse2(cpu_flags)) printf(" SSE2");
  899.                 if(cpu_sse3(cpu_flags)) printf(" SSE3"); */
  900. #ifdef OPT_SSE
  901.                 if(cpu_sse(cpu_flags)) *(d++) = decname[sse];
  902. #endif
  903. #ifdef OPT_3DNOWEXT
  904.                 if(cpu_3dnowext(cpu_flags)) *(d++) = decname[dreidnowext];
  905. #endif
  906. #ifdef OPT_3DNOW
  907.                 if(cpu_3dnow(cpu_flags)) *(d++) = decname[dreidnow];
  908. #endif
  909. #ifdef OPT_MMX
  910.                 if(cpu_mmx(cpu_flags)) *(d++) = decname[mmx];
  911. #endif
  912. #ifdef OPT_I586
  913.                 *(d++) = decname[ifuenf];
  914. #endif
  915. #ifdef OPT_I586_DITHER
  916.                 *(d++) = decname[ifuenf_dither];
  917. #endif
  918.         }
  919. #endif
  920. /* just assume that the i486 built is run on a i486 cpu... */
  921. #ifdef OPT_I486
  922.         *(d++) = decname[ivier];
  923. #endif
  924. #ifdef OPT_ALTIVEC
  925.         *(d++) = decname[altivec];
  926. #endif
  927. /* every supported x86 can do i386, any cpu can do generic */
  928. #ifdef OPT_I386
  929.         *(d++) = decname[idrei];
  930. #endif
  931. #ifdef OPT_X86_64
  932.         *(d++) = decname[x86_64];
  933. #endif
  934. #ifdef OPT_ARM
  935.         *(d++) = decname[arm];
  936. #endif
  937. #ifdef OPT_NEON
  938.         *(d++) = decname[neon];
  939. #endif
  940. #ifdef OPT_GENERIC
  941.         *(d++) = decname[generic];
  942. #endif
  943. #ifdef OPT_GENERIC_DITHER
  944.         *(d++) = decname[generic_dither];
  945. #endif
  946. #endif /* ndef OPT_MULTI */
  947. }
  948.  
  949. const char* attribute_align_arg mpg123_current_decoder(mpg123_handle *mh)
  950. {
  951.         if(mh == NULL) return NULL;
  952.  
  953.         return decname[mh->cpu_opts.type];
  954. }
  955.  
  956. const char attribute_align_arg **mpg123_decoders(void){ return mpg123_decoder_list; }
  957. const char attribute_align_arg **mpg123_supported_decoders(void)
  958. {
  959. #ifdef OPT_MULTI
  960.         return mpg123_supported_decoder_list;
  961. #else
  962.         return mpg123_decoder_list;
  963. #endif
  964. }
  965.