Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | Download | RSS feed

  1. /*
  2.         optimize: get a grip on the different optimizations
  3.  
  4.         copyright 2006-9 by the mpg123 project - free software under the terms of the LGPL 2.1
  5.         see COPYING and AUTHORS files in distribution or http://mpg123.org
  6.         initially written by Thomas Orgis, inspired by 3DNow stuff in mpg123.[hc]
  7.  
  8.         Currently, this file contains the struct and function to choose an optimization variant and works only when OPT_MULTI is in effect.
  9. */
  10.  
  11. #include "mpg123lib_intern.h" /* includes optimize.h */
  12. #include "debug.h"
  13.  
  14. /* Must match the enum dectype! */
  15.  
  16. /*
  17.         It SUCKS having to define these names that way, but compile-time intialization of string arrays is a bitch.
  18.         GCC doesn't see constant stuff when it's wiggling in front of it!
  19.         Anyhow: Have a script for that:
  20. names="generic generic_dither i386 i486 i586 i586_dither MMX 3DNow 3DNowExt AltiVec SSE x86-64"
  21. for i in $names; do echo "##define dn_${i/-/_} \"$i\""; done
  22. echo -n "static const char* decname[] =
  23. {
  24.         \"auto\"
  25.         "
  26. for i in $names; do echo -n ", dn_${i/-/_}"; done
  27. echo "
  28.         , \"nodec\"
  29. };"
  30. */
  31. #define dn_generic "generic"
  32. #define dn_generic_dither "generic_dither"
  33. #define dn_i386 "i386"
  34. #define dn_i486 "i486"
  35. #define dn_i586 "i586"
  36. #define dn_i586_dither "i586_dither"
  37. #define dn_MMX "MMX"
  38. #define dn_3DNow "3DNow"
  39. #define dn_3DNowExt "3DNowExt"
  40. #define dn_AltiVec "AltiVec"
  41. #define dn_SSE "SSE"
  42. #define dn_x86_64 "x86-64"
  43. #define dn_ARM "ARM"
  44. static const char* decname[] =
  45. {
  46.         "auto"
  47.         , dn_generic, dn_generic_dither, dn_i386, dn_i486, dn_i586, dn_i586_dither, dn_MMX, dn_3DNow, dn_3DNowExt, dn_AltiVec, dn_SSE, dn_x86_64, dn_ARM
  48.         , "nodec"
  49. };
  50.  
  51. #if (defined OPT_X86) && (defined OPT_MULTI)
  52. #include "getcpuflags.h"
  53. struct cpuflags cpu_flags;
  54. #else
  55. /* Faking stuff for non-multi builds. The same code for synth function choice is used.
  56.    Just no runtime dependency of result... */
  57. char cpu_flags;
  58. #define cpu_i586(s)     1
  59. #define cpu_fpu(s)      1
  60. #define cpu_mmx(s)      1
  61. #define cpu_3dnow(s)    1
  62. #define cpu_3dnowext(s) 1
  63. #define cpu_sse(s)      1
  64. #define cpu_sse2(s)     1
  65. #define cpu_sse3(s)     1
  66. #endif
  67.  
  68. /* Ugly macros to build conditional synth function array values. */
  69.  
  70. #ifndef NO_8BIT
  71. #define IF8(synth) synth,
  72. #else
  73. #define IF8(synth)
  74. #endif
  75.  
  76. #ifndef NO_REAL
  77. #define IFREAL(synth) synth,
  78. #else
  79. #define IFREAL(synth)
  80. #endif
  81.  
  82. #ifndef NO_32BIT
  83. #define IF32(synth) synth
  84. #else
  85. #define IF32(synth)
  86. #endif
  87.  
  88. #ifndef NO_16BIT
  89. #       define OUT_SYNTHS(synth_16, synth_8, synth_real, synth_32) { synth_16, IF8(synth_8) IFREAL(synth_real) IF32(synth_32) }
  90. #else
  91. #       define OUT_SYNTHS(synth_16, synth_8, synth_real, synth_32) { IF8(synth_8) IFREAL(synth_real) IF32(synth_32) }
  92. #endif
  93.  
  94. const struct synth_s synth_base =
  95. {
  96.         { /* plain */
  97.                  OUT_SYNTHS(synth_1to1, synth_1to1_8bit, synth_1to1_real, synth_1to1_s32)
  98. #               ifndef NO_DOWNSAMPLE
  99.                 ,OUT_SYNTHS(synth_2to1, synth_2to1_8bit, synth_2to1_real, synth_2to1_s32)
  100.                 ,OUT_SYNTHS(synth_4to1, synth_4to1_8bit, synth_4to1_real, synth_4to1_s32)
  101. #               endif
  102. #               ifndef NO_NTOM
  103.                 ,OUT_SYNTHS(synth_ntom, synth_ntom_8bit, synth_ntom_real, synth_ntom_s32)
  104. #               endif
  105.         },
  106.         { /* stereo, by default only wrappers over plain synth */
  107.                  OUT_SYNTHS(synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap)
  108. #               ifndef NO_DOWNSAMPLE
  109.                 ,OUT_SYNTHS(synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap)
  110.                 ,OUT_SYNTHS(synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap)
  111. #               endif
  112. #               ifndef NO_NTOM
  113.                 ,OUT_SYNTHS(synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap)
  114. #               endif
  115.         },
  116.         { /* mono2stereo */
  117.                  OUT_SYNTHS(synth_1to1_mono2stereo, synth_1to1_8bit_mono2stereo, synth_1to1_real_mono2stereo, synth_1to1_s32_mono2stereo)
  118. #               ifndef NO_DOWNSAMPLE
  119.                 ,OUT_SYNTHS(synth_2to1_mono2stereo, synth_2to1_8bit_mono2stereo, synth_2to1_real_mono2stereo, synth_2to1_s32_mono2stereo)
  120.                 ,OUT_SYNTHS(synth_4to1_mono2stereo, synth_4to1_8bit_mono2stereo, synth_4to1_real_mono2stereo, synth_4to1_s32_mono2stereo)
  121. #               endif
  122. #               ifndef NO_NTOM
  123.                 ,OUT_SYNTHS(synth_ntom_mono2stereo, synth_ntom_8bit_mono2stereo, synth_ntom_real_mono2stereo, synth_ntom_s32_mono2stereo)
  124. #               endif
  125.         },
  126.         { /* mono*/
  127.                  OUT_SYNTHS(synth_1to1_mono, synth_1to1_8bit_mono, synth_1to1_real_mono, synth_1to1_s32_mono)
  128. #               ifndef NO_DOWNSAMPLE
  129.                 ,OUT_SYNTHS(synth_2to1_mono, synth_2to1_8bit_mono, synth_2to1_real_mono, synth_2to1_s32_mono)
  130.                 ,OUT_SYNTHS(synth_4to1_mono, synth_4to1_8bit_mono, synth_4to1_real_mono, synth_4to1_s32_mono)
  131. #               endif
  132. #               ifndef NO_NTOM
  133.                 ,OUT_SYNTHS(synth_ntom_mono, synth_ntom_8bit_mono, synth_ntom_real_mono, synth_ntom_s32_mono)
  134. #endif
  135.         }
  136. };
  137.  
  138. #ifdef OPT_X86
  139. /* More plain synths for i386 */
  140. const func_synth plain_i386[r_limit][f_limit] =
  141. { /* plain */
  142.          OUT_SYNTHS(synth_1to1_i386, synth_1to1_8bit_i386, synth_1to1_real_i386, synth_1to1_s32_i386)
  143. #       ifndef NO_DOWNSAMPLE
  144.         ,OUT_SYNTHS(synth_2to1_i386, synth_2to1_8bit_i386, synth_2to1_real_i386, synth_2to1_s32_i386)
  145.         ,OUT_SYNTHS(synth_4to1_i386, synth_4to1_8bit_i386, synth_4to1_real_i386, synth_4to1_s32_i386)
  146. #       endif
  147. #       ifndef NO_NTOM
  148.         ,OUT_SYNTHS(synth_ntom, synth_ntom_8bit, synth_ntom_real, synth_ntom_s32)
  149. #       endif
  150. };
  151. #endif
  152.  
  153.  
  154. enum optdec defdec(void){ return defopt; }
  155.  
  156. enum optcla decclass(const enum optdec type)
  157. {
  158.         return (type == mmx || type == sse || type == dreidnowext || type == x86_64 ) ? mmxsse : normal;
  159. }
  160.  
  161.  
  162. static int find_synth(func_synth synth,  const func_synth synths[r_limit][f_limit])
  163. {
  164.         enum synth_resample ri;
  165.         enum synth_format   fi;
  166.         for(ri=0; ri<r_limit; ++ri)
  167.         for(fi=0; fi<f_limit; ++fi)
  168.         if(synth == synths[ri][fi])
  169.         return TRUE;
  170.  
  171.         return FALSE;
  172. }
  173.  
  174. /* Determine what kind of decoder is actually active
  175.    This depends on runtime choices which may cause fallback to i386 or generic code. */
  176. static int find_dectype(mpg123_handle *fr)
  177. {
  178.         enum optdec type = nodec;
  179.         /* Direct and indirect usage, 1to1 stereo decoding.
  180.            Concentrating on the plain stereo synth should be fine, mono stuff is derived. */
  181.         func_synth basic_synth = fr->synth;
  182. #ifndef NO_8BIT
  183. #ifndef NO_16BIT
  184.         if(basic_synth == synth_1to1_8bit_wrap)
  185.         basic_synth = fr->synths.plain[r_1to1][f_16]; /* That is what's really below the surface. */
  186. #endif
  187. #endif
  188.  
  189.         if(FALSE) ; /* Just to initialize the else if ladder. */
  190. #ifndef NO_16BIT
  191. #ifdef OPT_3DNOWEXT
  192.         else if(basic_synth == synth_1to1_3dnowext) type = dreidnowext;
  193. #endif
  194. #ifdef OPT_SSE
  195.         else if(basic_synth == synth_1to1_sse) type = sse;
  196. #endif
  197. #ifdef OPT_3DNOW
  198.         else if(basic_synth == synth_1to1_3dnow) type = dreidnow;
  199. #endif
  200. #ifdef OPT_MMX
  201.         else if(basic_synth == synth_1to1_mmx) type = mmx;
  202. #endif
  203. #ifdef OPT_I586_DITHER
  204.         else if(basic_synth == synth_1to1_i586_dither) type = ifuenf_dither;
  205. #endif
  206. #ifdef OPT_I586
  207.         else if(basic_synth == synth_1to1_i586) type = ifuenf;
  208. #endif
  209. #ifdef OPT_ALTIVEC
  210.         else if(basic_synth == synth_1to1_altivec) type = altivec;
  211. #endif
  212. #ifdef OPT_X86_64
  213.         else if(basic_synth == synth_1to1_x86_64) type = x86_64;
  214. #endif
  215. #ifdef OPT_ARM
  216.         else if(basic_synth == synth_1to1_arm) type = arm;
  217. #endif
  218. #ifdef OPT_GENERIC_DITHER
  219.         else if(basic_synth == synth_1to1_dither) type = generic_dither;
  220. #endif
  221. #ifdef OPT_DITHER /* either i586 or generic! */
  222. #ifndef NO_DOWNSAMPLE
  223.         else if
  224.         (
  225.                    basic_synth == synth_2to1_dither
  226.                 || basic_synth == synth_4to1_dither
  227.         ) type = generic_dither;
  228. #endif
  229. #endif
  230. #endif /* 16bit */
  231.  
  232. #ifndef NO_REAL
  233. #ifdef OPT_SSE
  234.         else if(basic_synth == synth_1to1_real_sse) type = sse;
  235. #endif
  236. #ifdef OPT_X86_64
  237.         else if(basic_synth == synth_1to1_real_x86_64) type = x86_64;
  238. #endif
  239. #ifdef OPT_ALTIVEC
  240.         else if(basic_synth == synth_1to1_real_altivec) type = altivec;
  241. #endif
  242.  
  243. #endif /* real */
  244.  
  245. #ifndef NO_32BIT
  246. #ifdef OPT_SSE
  247.         else if(basic_synth == synth_1to1_s32_sse) type = sse;
  248. #endif
  249. #ifdef OPT_X86_64
  250.         else if(basic_synth == synth_1to1_s32_x86_64) type = x86_64;
  251. #endif
  252. #ifdef OPT_ALTIVEC
  253.         else if(basic_synth == synth_1to1_s32_altivec) type = altivec;
  254. #endif
  255. #endif /* 32bit */
  256.  
  257. #ifdef OPT_X86
  258.         else if(find_synth(basic_synth, plain_i386))
  259.         type = idrei;
  260. #endif
  261.  
  262.         else if(find_synth(basic_synth, synth_base.plain))
  263.         type = generic;
  264.  
  265.  
  266.  
  267. #ifdef OPT_I486
  268.         /* i486 is special ... the specific code is in use for 16bit 1to1 stereo
  269.            otherwise we have i386 active... but still, the distinction doesn't matter*/
  270.         type = ivier;
  271. #endif
  272.  
  273.         if(type != nodec)
  274.         {
  275.                 fr->cpu_opts.type = type;
  276.                 fr->cpu_opts.class = decclass(type);
  277.  
  278.                 debug3("determined active decoder type %i (%s) of class %i", type, decname[type], fr->cpu_opts.class);
  279.                 return MPG123_OK;
  280.         }
  281.         else
  282.         {
  283.                 if(NOQUIET) error("Unable to determine active decoder type -- this is SERIOUS b0rkage!");
  284.  
  285.                 fr->err = MPG123_BAD_DECODER_SETUP;
  286.                 return MPG123_ERR;
  287.         }
  288. }
  289.  
  290. /* set synth functions for current frame, optimizations handled by opt_* macros */
  291. int set_synth_functions(mpg123_handle *fr)
  292. {
  293.         enum synth_resample resample = r_none;
  294.         enum synth_format basic_format = f_none; /* Default is always 16bit, or whatever. */
  295.  
  296.         /* Select the basic output format, different from 16bit: 8bit, real. */
  297.         if(FALSE){}
  298. #ifndef NO_16BIT
  299.         else if(fr->af.encoding & MPG123_ENC_16)
  300.         basic_format = f_16;
  301. #endif
  302. #ifndef NO_8BIT
  303.         else if(fr->af.encoding & MPG123_ENC_8)
  304.         basic_format = f_8;
  305. #endif
  306. #ifndef NO_REAL
  307.         else if(fr->af.encoding & MPG123_ENC_FLOAT)
  308.         basic_format = f_real;
  309. #endif
  310. #ifndef NO_32BIT
  311.         else if(fr->af.encoding & MPG123_ENC_32)
  312.         basic_format = f_32;
  313. #endif
  314.  
  315.         /* Make sure the chosen format is compiled into this lib. */
  316.         if(basic_format == f_none)
  317.         {
  318.                 if(NOQUIET) error("set_synth_functions: This output format is disabled in this build!");
  319.  
  320.                 return -1;
  321.         }
  322.  
  323.         /* Be explicit about downsampling variant. */
  324.         switch(fr->down_sample)
  325.         {
  326.                 case 0: resample = r_1to1; break;
  327. #ifndef NO_DOWNSAMPLE
  328.                 case 1: resample = r_2to1; break;
  329.                 case 2: resample = r_4to1; break;
  330. #endif
  331. #ifndef NO_NTOM
  332.                 case 3: resample = r_ntom; break;
  333. #endif
  334.         }
  335.  
  336.         if(resample == r_none)
  337.         {
  338.                 if(NOQUIET) error("set_synth_functions: This resampling mode is not supported in this build!");
  339.  
  340.                 return -1;
  341.         }
  342.  
  343.         debug2("selecting synth: resample=%i format=%i", resample, basic_format);
  344.         /* Finally selecting the synth functions for stereo / mono. */
  345.         fr->synth = fr->synths.plain[resample][basic_format];
  346.         fr->synth_stereo = fr->synths.stereo[resample][basic_format];
  347.         fr->synth_mono = fr->af.channels==2
  348.                 ? fr->synths.mono2stereo[resample][basic_format] /* Mono MPEG file decoded to stereo. */
  349.                 : fr->synths.mono[resample][basic_format];       /* Mono MPEG file decoded to mono. */
  350.  
  351.         if(find_dectype(fr) != MPG123_OK) /* Actually determine the currently active decoder breed. */
  352.         {
  353.                 fr->err = MPG123_BAD_DECODER_SETUP;
  354.                 return MPG123_ERR;
  355.         }
  356.  
  357.         if(frame_buffers(fr) != 0)
  358.         {
  359.                 fr->err = MPG123_NO_BUFFERS;
  360.                 if(NOQUIET) error("Failed to set up decoder buffers!");
  361.  
  362.                 return MPG123_ERR;
  363.         }
  364.  
  365. #ifndef NO_8BIT
  366.         if(basic_format == f_8)
  367.         {
  368.                 if(make_conv16to8_table(fr) != 0)
  369.                 {
  370.                         if(NOQUIET) error("Failed to set up conv16to8 table!");
  371.                         /* it's a bit more work to get proper error propagation up */
  372.                         return -1;
  373.                 }
  374.         }
  375. #endif
  376.  
  377. #ifdef OPT_MMXORSSE
  378.         /* Special treatment for MMX, SSE and 3DNowExt stuff.
  379.            The real-decoding SSE for x86-64 uses normal tables! */
  380.         if(fr->cpu_opts.class == mmxsse
  381. #       ifndef NO_REAL
  382.            && basic_format != f_real
  383. #       endif
  384. #       ifndef NO_32BIT
  385.            && basic_format != f_32
  386. #       endif
  387. #       ifdef ACCURATE_ROUNDING
  388.            && fr->cpu_opts.type != sse
  389.            && fr->cpu_opts.type != x86_64
  390. #       endif
  391.           )
  392.         {
  393. #ifndef NO_LAYER3
  394.                 init_layer3_stuff(fr, init_layer3_gainpow2_mmx);
  395. #endif
  396. #ifndef NO_LAYER12
  397.                 init_layer12_stuff(fr, init_layer12_table_mmx);
  398. #endif
  399.                 fr->make_decode_tables = make_decode_tables_mmx;
  400.         }
  401.         else
  402. #endif
  403.         {
  404. #ifndef NO_LAYER3
  405.                 init_layer3_stuff(fr, init_layer3_gainpow2);
  406. #endif
  407. #ifndef NO_LAYER12
  408.                 init_layer12_stuff(fr, init_layer12_table);
  409. #endif
  410.                 fr->make_decode_tables = make_decode_tables;
  411.         }
  412.  
  413.         /* We allocated the table buffers just now, so (re)create the tables. */
  414.         fr->make_decode_tables(fr);
  415.  
  416.         return 0;
  417. }
  418.  
  419. int frame_cpu_opt(mpg123_handle *fr, const char* cpu)
  420. {
  421.         const char* chosen = ""; /* the chosen decoder opt as string */
  422.         enum optdec want_dec = nodec;
  423.         int done = 0;
  424.         int auto_choose = 0;
  425. #ifdef OPT_DITHER
  426.         int dithered = FALSE; /* If some dithered decoder is chosen. */
  427. #endif
  428.  
  429.         want_dec = dectype(cpu);
  430.         auto_choose = want_dec == autodec;
  431.         /* Fill whole array of synth functions with generic code first. */
  432.         fr->synths = synth_base;
  433.  
  434. #ifndef OPT_MULTI
  435.         {
  436.                 if(!auto_choose && want_dec != defopt)
  437.                 {
  438.                         if(NOQUIET) error2("you wanted decoder type %i, I only have %i", want_dec, defopt);
  439.                 }
  440.                 auto_choose = TRUE; /* There will be only one choice anyway. */
  441.         }
  442. #endif
  443.  
  444.         fr->cpu_opts.type = nodec;
  445.         /* covers any i386+ cpu; they actually differ only in the synth_1to1 function, mostly... */
  446. #ifdef OPT_X86
  447.  
  448. #ifndef NO_LAYER3
  449. #if (defined OPT_3DNOW || defined OPT_3DNOWEXT)
  450.         fr->cpu_opts.dct36 = dct36;
  451. #endif
  452. #endif
  453.  
  454.         if(cpu_i586(cpu_flags))
  455.         {
  456. #               ifdef OPT_MULTI
  457.                 debug2("standard flags: 0x%08x\textended flags: 0x%08x", cpu_flags.std, cpu_flags.ext);
  458. #               endif
  459.                 #ifdef OPT_SSE
  460.                 if(   !done && (auto_choose || want_dec == sse)
  461.                    && cpu_sse(cpu_flags) && cpu_mmx(cpu_flags) )
  462.                 {
  463.                         chosen = "SSE";
  464.                         fr->cpu_opts.type = sse;
  465. #                       ifndef NO_16BIT
  466.                         fr->synths.plain[r_1to1][f_16] = synth_1to1_sse;
  467. #                       ifdef ACCURATE_ROUNDING
  468.                         fr->synths.stereo[r_1to1][f_16] = synth_1to1_stereo_sse;
  469. #                       endif
  470. #                       endif
  471. #                       ifndef NO_REAL
  472.                         fr->synths.plain[r_1to1][f_real] = synth_1to1_real_sse;
  473.                         fr->synths.stereo[r_1to1][f_real] = synth_1to1_real_stereo_sse;
  474. #                       endif
  475. #                       ifndef NO_32BIT
  476.                         fr->synths.plain[r_1to1][f_32] = synth_1to1_s32_sse;
  477.                         fr->synths.stereo[r_1to1][f_32] = synth_1to1_s32_stereo_sse;
  478. #                       endif
  479.                         done = 1;
  480.                 }
  481.                 #endif
  482. #               ifdef OPT_3DNOWEXT
  483.                 if(   !done && (auto_choose || want_dec == dreidnowext )
  484.                    && cpu_3dnow(cpu_flags)
  485.                    && cpu_3dnowext(cpu_flags)
  486.                    && cpu_mmx(cpu_flags) )
  487.                 {
  488.                         chosen = "3DNowExt";
  489.                         fr->cpu_opts.type = dreidnowext;
  490. #                       ifndef NO_LAYER3
  491.                         fr->cpu_opts.dct36 = dct36_3dnowext;
  492. #                       endif
  493. #                       ifndef NO_16BIT
  494.                         fr->synths.plain[r_1to1][f_16] = synth_1to1_3dnowext;
  495. #                       endif
  496.                         done = 1;
  497.                 }
  498.                 #endif
  499.                 #ifdef OPT_3DNOW
  500.                 if(    !done && (auto_choose || want_dec == dreidnow)
  501.                     && cpu_3dnow(cpu_flags) && cpu_mmx(cpu_flags) )
  502.                 {
  503.                         chosen = "3DNow";
  504.                         fr->cpu_opts.type = dreidnow;
  505. #                       ifndef NO_LAYER3
  506.                         fr->cpu_opts.dct36 = dct36_3dnow;
  507. #                       endif
  508. #                       ifndef NO_16BIT
  509.                         fr->synths.plain[r_1to1][f_16] = synth_1to1_3dnow;
  510. #                       endif
  511.                         done = 1;
  512.                 }
  513.                 #endif
  514.                 #ifdef OPT_MMX
  515.                 if(   !done && (auto_choose || want_dec == mmx)
  516.                    && cpu_mmx(cpu_flags) )
  517.                 {
  518.                         chosen = "MMX";
  519.                         fr->cpu_opts.type = mmx;
  520. #                       ifndef NO_16BIT
  521.                         fr->synths.plain[r_1to1][f_16] = synth_1to1_mmx;
  522. #                       endif
  523.                         done = 1;
  524.                 }
  525.                 #endif
  526.                 #ifdef OPT_I586
  527.                 if(!done && (auto_choose || want_dec == ifuenf))
  528.                 {
  529.                         chosen = "i586/pentium";
  530.                         fr->cpu_opts.type = ifuenf;
  531. #                       ifndef NO_16BIT
  532.                         fr->synths.plain[r_1to1][f_16] = synth_1to1_i586;
  533. #                       endif
  534.                         done = 1;
  535.                 }
  536.                 #endif
  537.                 #ifdef OPT_I586_DITHER
  538.                 if(!done && (auto_choose || want_dec == ifuenf_dither))
  539.                 {
  540.                         chosen = "dithered i586/pentium";
  541.                         fr->cpu_opts.type = ifuenf_dither;
  542.                         dithered = TRUE;
  543. #                       ifndef NO_16BIT
  544.                         fr->synths.plain[r_1to1][f_16] = synth_1to1_i586_dither;
  545. #                       ifndef NO_DOWNSAMPLE
  546.                         fr->synths.plain[r_2to1][f_16] = synth_2to1_dither;
  547.                         fr->synths.plain[r_4to1][f_16] = synth_4to1_dither;
  548. #                       endif
  549. #                       endif
  550.                         done = 1;
  551.                 }
  552.                 #endif
  553.         }
  554.         #ifdef OPT_I486
  555.         /* That won't cooperate in multi opt mode - forcing i486 in layer3.c
  556.            But still... here it is... maybe for real use in future. */
  557.         if(!done && (auto_choose || want_dec == ivier))
  558.         {
  559.                 chosen = "i486";
  560.                 fr->cpu_opts.type = ivier;
  561.                 done = 1;
  562.         }
  563.         #endif
  564.         #ifdef OPT_I386
  565.         if(!done && (auto_choose || want_dec == idrei))
  566.         {
  567.                 chosen = "i386";
  568.                 fr->cpu_opts.type = idrei;
  569.                 done = 1;
  570.         }
  571.         #endif
  572.  
  573.         if(done)
  574.         {
  575.                 /*
  576.                         We have chosen some x86 decoder... fillup some i386 stuff.
  577.                         There is an open question about using dithered synth_1to1 for 8bit wrappers.
  578.                         For quality it won't make sense, but wrapped i586_dither wrapped may still be faster...
  579.                 */
  580.                 enum synth_resample ri;
  581.                 enum synth_format   fi;
  582. #               ifndef NO_8BIT
  583. #               ifndef NO_16BIT /* possibility to use a 16->8 wrapper... */
  584.                 if(fr->synths.plain[r_1to1][f_16] != synth_base.plain[r_1to1][f_16])
  585.                 {
  586.                         fr->synths.plain[r_1to1][f_8] = synth_1to1_8bit_wrap;
  587.                         fr->synths.mono[r_1to1][f_8] = synth_1to1_8bit_wrap_mono;
  588.                         fr->synths.mono2stereo[r_1to1][f_8] = synth_1to1_8bit_wrap_mono2stereo;
  589.                 }
  590. #               endif
  591. #               endif
  592.                 for(ri=0; ri<r_limit; ++ri)
  593.                 for(fi=0; fi<f_limit; ++fi)
  594.                 {
  595.                         if(fr->synths.plain[ri][fi] == synth_base.plain[ri][fi])
  596.                         fr->synths.plain[ri][fi] = plain_i386[ri][fi];
  597.                 }
  598.         }
  599.  
  600. #endif /* OPT_X86 */
  601.  
  602. #ifdef OPT_X86_64
  603.         if(!done && (auto_choose || want_dec == x86_64))
  604.         {
  605.                 chosen = "x86-64 (SSE)";
  606.                 fr->cpu_opts.type = x86_64;
  607. #               ifndef NO_16BIT
  608.                 fr->synths.plain[r_1to1][f_16] = synth_1to1_x86_64;
  609.                 fr->synths.stereo[r_1to1][f_16] = synth_1to1_stereo_x86_64;
  610. #               endif
  611. #               ifndef NO_REAL
  612.                 fr->synths.plain[r_1to1][f_real] = synth_1to1_real_x86_64;
  613.                 fr->synths.stereo[r_1to1][f_real] = synth_1to1_real_stereo_x86_64;
  614. #               endif
  615. #               ifndef NO_32BIT
  616.                 fr->synths.plain[r_1to1][f_32] = synth_1to1_s32_x86_64;
  617.                 fr->synths.stereo[r_1to1][f_32] = synth_1to1_s32_stereo_x86_64;
  618. #               endif
  619.                 done = 1;
  620.         }
  621. #endif
  622.  
  623. #ifdef OPT_GENERIC_DITHER
  624.         if(!done && (auto_choose || want_dec == generic_dither))
  625.         {
  626.                 chosen = "dithered generic";
  627.                 fr->cpu_opts.type = generic_dither;
  628.                 dithered = TRUE;
  629. #               ifndef NO_16BIT
  630.                 fr->synths.plain[r_1to1][f_16] = synth_1to1_dither;
  631. #               ifndef NO_DOWNSAMPLE
  632.                 fr->synths.plain[r_2to1][f_16] = synth_2to1_dither;
  633.                 fr->synths.plain[r_4to1][f_16] = synth_4to1_dither;
  634. #               endif
  635. #               endif
  636.                 done = 1;
  637.         }
  638. #endif
  639.  
  640. #       ifdef OPT_ALTIVEC
  641.         if(!done && (auto_choose || want_dec == altivec))
  642.         {
  643.                 chosen = "AltiVec";
  644.                 fr->cpu_opts.type = altivec;
  645. #               ifndef NO_16BIT
  646.                 fr->synths.plain[r_1to1][f_16] = synth_1to1_altivec;
  647.                 fr->synths.stereo[r_1to1][f_16] = synth_1to1_stereo_altivec;
  648. #               endif
  649. #               ifndef NO_REAL
  650.                 fr->synths.plain[r_1to1][f_real] = synth_1to1_real_altivec;
  651.                 fr->synths.stereo[r_1to1][f_real] = synth_1to1_real_stereo_altivec;
  652. #               endif
  653. #               ifndef NO_32BIT
  654.                 fr->synths.plain[r_1to1][f_32] = synth_1to1_s32_altivec;
  655.                 fr->synths.stereo[r_1to1][f_32] = synth_1to1_s32_stereo_altivec;
  656. #               endif
  657.                 done = 1;
  658.         }
  659. #       endif
  660.  
  661. #       ifdef OPT_ARM
  662.         if(!done && (auto_choose || want_dec == arm))
  663.         {
  664.                 chosen = "ARM";
  665.                 fr->cpu_opts.type = arm;
  666. #               ifndef NO_16BIT
  667.                 fr->synths.plain[r_1to1][f_16] = synth_1to1_arm;
  668. #               endif
  669.                 done = 1;
  670.         }
  671. #       endif
  672.  
  673. #       ifdef OPT_GENERIC
  674.         if(!done && (auto_choose || want_dec == generic))
  675.         {
  676.                 chosen = "generic";
  677.                 fr->cpu_opts.type = generic;
  678.                 done = 1;
  679.         }
  680. #       endif
  681.  
  682.         fr->cpu_opts.class = decclass(fr->cpu_opts.type);
  683.  
  684. #       ifndef NO_8BIT
  685. #       ifndef NO_16BIT /* possibility to use a 16->8 wrapper... */
  686.         /* Last chance to use some optimized routine via generic wrappers (for 8bit). */
  687.         if(     fr->cpu_opts.type != ifuenf_dither
  688.              && fr->cpu_opts.type != generic_dither
  689.              && fr->synths.plain[r_1to1][f_16] != synth_base.plain[r_1to1][f_16] )
  690.         {
  691.                 fr->synths.plain[r_1to1][f_8] = synth_1to1_8bit_wrap;
  692.                 fr->synths.mono[r_1to1][f_8] = synth_1to1_8bit_wrap_mono;
  693.                 fr->synths.mono2stereo[r_1to1][f_8] = synth_1to1_8bit_wrap_mono2stereo;
  694.         }
  695. #       endif
  696. #       endif
  697.  
  698. #ifdef OPT_DITHER
  699.         if(done && dithered)
  700.         {
  701.                 /* run-time dither noise table generation */
  702.                 if(!frame_dither_init(fr))
  703.                 {
  704.                         if(NOQUIET) error("Dither noise setup failed!");
  705.                         return 0;
  706.                 }
  707.         }
  708. #endif
  709.  
  710.         if(done)
  711.         {
  712.                 if(VERBOSE) fprintf(stderr, "Decoder: %s\n", chosen);
  713.                 return 1;
  714.         }
  715.         else
  716.         {
  717.                 if(NOQUIET) error("Could not set optimization!");
  718.                 return 0;
  719.         }
  720. }
  721.  
  722. enum optdec dectype(const char* decoder)
  723. {
  724.         enum optdec dt;
  725.         if(   (decoder == NULL)
  726.            || (decoder[0] == 0) )
  727.         return autodec;
  728.  
  729.         for(dt=autodec; dt<nodec; ++dt)
  730.         if(!strcasecmp(decoder, decname[dt])) return dt;
  731.  
  732.         return nodec; /* If we found nothing... */
  733. }
  734.  
  735. #ifdef OPT_MULTI
  736.  
  737. /* same number of entries as full list, but empty at beginning */
  738. static const char *mpg123_supported_decoder_list[] =
  739. {
  740.         #ifdef OPT_SSE
  741.         NULL,
  742.         #endif
  743.         #ifdef OPT_3DNOWEXT
  744.         NULL,
  745.         #endif
  746.         #ifdef OPT_3DNOW
  747.         NULL,
  748.         #endif
  749.         #ifdef OPT_MMX
  750.         NULL,
  751.         #endif
  752.         #ifdef OPT_I586
  753.         NULL,
  754.         #endif
  755.         #ifdef OPT_I586_DITHER
  756.         NULL,
  757.         #endif
  758.         #ifdef OPT_I486
  759.         NULL,
  760.         #endif
  761.         #ifdef OPT_I386
  762.         NULL,
  763.         #endif
  764.         #ifdef OPT_ALTIVEC
  765.         NULL,
  766.         #endif
  767.         #ifdef OPT_X86_64
  768.         NULL,
  769.         #endif
  770.         #ifdef OPT_ARM
  771.         NULL,
  772.         #endif
  773.         #ifdef OPT_GENERIC_FLOAT
  774.         NULL,
  775.         #endif
  776. #       ifdef OPT_GENERIC
  777.         NULL,
  778. #       endif
  779. #       ifdef OPT_GENERIC_DITHER
  780.         NULL,
  781. #       endif
  782.         NULL
  783. };
  784. #endif
  785.  
  786. static const char *mpg123_decoder_list[] =
  787. {
  788.         #ifdef OPT_SSE
  789.         dn_SSE,
  790.         #endif
  791.         #ifdef OPT_3DNOWEXT
  792.         dn_3DNowExt,
  793.         #endif
  794.         #ifdef OPT_3DNOW
  795.         dn_3DNow,
  796.         #endif
  797.         #ifdef OPT_MMX
  798.         dn_MMX,
  799.         #endif
  800.         #ifdef OPT_I586
  801.         dn_i586,
  802.         #endif
  803.         #ifdef OPT_I586_DITHER
  804.         dn_i586_dither,
  805.         #endif
  806.         #ifdef OPT_I486
  807.         dn_i486,
  808.         #endif
  809.         #ifdef OPT_I386
  810.         dn_i386,
  811.         #endif
  812.         #ifdef OPT_ALTIVEC
  813.         dn_AltiVec,
  814.         #endif
  815.         #ifdef OPT_X86_64
  816.         dn_x86_64,
  817.         #endif
  818.         #ifdef OPT_ARM
  819.         dn_ARM,
  820.         #endif
  821.         #ifdef OPT_GENERIC
  822.         dn_generic,
  823.         #endif
  824.         #ifdef OPT_GENERIC_DITHER
  825.         dn_generic_dither,
  826.         #endif
  827.         NULL
  828. };
  829.  
  830. void check_decoders(void )
  831. {
  832. #ifndef OPT_MULTI
  833.         /* In non-multi mode, only the full list (one entry) is used. */
  834.         return;
  835. #else
  836.         const char **d = mpg123_supported_decoder_list;
  837. #ifdef OPT_X86
  838.         getcpuflags(&cpu_flags);
  839.         if(cpu_i586(cpu_flags))
  840.         {
  841.                 /* not yet: if(cpu_sse2(cpu_flags)) printf(" SSE2");
  842.                 if(cpu_sse3(cpu_flags)) printf(" SSE3"); */
  843. #ifdef OPT_SSE
  844.                 if(cpu_sse(cpu_flags)) *(d++) = decname[sse];
  845. #endif
  846. #ifdef OPT_3DNOWEXT
  847.                 if(cpu_3dnowext(cpu_flags)) *(d++) = decname[dreidnowext];
  848. #endif
  849. #ifdef OPT_3DNOW
  850.                 if(cpu_3dnow(cpu_flags)) *(d++) = decname[dreidnow];
  851. #endif
  852. #ifdef OPT_MMX
  853.                 if(cpu_mmx(cpu_flags)) *(d++) = decname[mmx];
  854. #endif
  855. #ifdef OPT_I586
  856.                 *(d++) = decname[ifuenf];
  857. #endif
  858. #ifdef OPT_I586_DITHER
  859.                 *(d++) = decname[ifuenf_dither];
  860. #endif
  861.         }
  862. #endif
  863. /* just assume that the i486 built is run on a i486 cpu... */
  864. #ifdef OPT_I486
  865.         *(d++) = decname[ivier];
  866. #endif
  867. #ifdef OPT_ALTIVEC
  868.         *(d++) = decname[altivec];
  869. #endif
  870. /* every supported x86 can do i386, any cpu can do generic */
  871. #ifdef OPT_I386
  872.         *(d++) = decname[idrei];
  873. #endif
  874. #ifdef OPT_X86_64
  875.         *(d++) = decname[x86_64];
  876. #endif
  877. #ifdef OPT_ARM
  878.         *(d++) = decname[arm];
  879. #endif
  880. #ifdef OPT_GENERIC
  881.         *(d++) = decname[generic];
  882. #endif
  883. #ifdef OPT_GENERIC_DITHER
  884.         *(d++) = decname[generic_dither];
  885. #endif
  886. #endif /* ndef OPT_MULTI */
  887. }
  888.  
  889. const char* attribute_align_arg mpg123_current_decoder(mpg123_handle *mh)
  890. {
  891.         if(mh == NULL) return NULL;
  892.  
  893.         return decname[mh->cpu_opts.type];
  894. }
  895.  
  896. const char attribute_align_arg **mpg123_decoders(){ return mpg123_decoder_list; }
  897. const char attribute_align_arg **mpg123_supported_decoders()
  898. {
  899. #ifdef OPT_MULTI
  900.         return mpg123_supported_decoder_list;
  901. #else
  902.         return mpg123_decoder_list;
  903. #endif
  904. }
  905.