Subversion Repositories Kolibri OS

Rev

Rev 1905 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
1905 serge 1
/*
2
	optimize: get a grip on the different optimizations
3
 
4
	copyright 2006-9 by the mpg123 project - free software under the terms of the LGPL 2.1
5
	see COPYING and AUTHORS files in distribution or http://mpg123.org
6
	initially written by Thomas Orgis, inspired by 3DNow stuff in mpg123.[hc]
7
 
8
	Currently, this file contains the struct and function to choose an optimization variant and works only when OPT_MULTI is in effect.
9
*/
10
 
11
#include "mpg123lib_intern.h" /* includes optimize.h */
12
#include "debug.h"
13
 
14
/* Must match the enum dectype! */
15
 
16
/*
17
	It SUCKS having to define these names that way, but compile-time intialization of string arrays is a bitch.
18
	GCC doesn't see constant stuff when it's wiggling in front of it!
19
	Anyhow: Have a script for that:
20
names="generic generic_dither i386 i486 i586 i586_dither MMX 3DNow 3DNowExt AltiVec SSE x86-64"
21
for i in $names; do echo "##define dn_${i/-/_} \"$i\""; done
22
echo -n "static const char* decname[] =
23
{
24
	\"auto\"
25
	"
26
for i in $names; do echo -n ", dn_${i/-/_}"; done
27
echo "
28
	, \"nodec\"
29
};"
30
*/
31
#define dn_generic "generic"
32
#define dn_generic_dither "generic_dither"
33
#define dn_i386 "i386"
34
#define dn_i486 "i486"
35
#define dn_i586 "i586"
36
#define dn_i586_dither "i586_dither"
37
#define dn_MMX "MMX"
38
#define dn_3DNow "3DNow"
39
#define dn_3DNowExt "3DNowExt"
40
#define dn_AltiVec "AltiVec"
41
#define dn_SSE "SSE"
42
#define dn_x86_64 "x86-64"
43
#define dn_ARM "ARM"
3960 Serge 44
#define dn_NEON "NEON"
1905 serge 45
static const char* decname[] =
46
{
47
	"auto"
3960 Serge 48
	, dn_generic, dn_generic_dither, dn_i386, dn_i486, dn_i586, dn_i586_dither, dn_MMX, dn_3DNow, dn_3DNowExt, dn_AltiVec, dn_SSE, dn_x86_64, dn_ARM, dn_NEON
1905 serge 49
	, "nodec"
50
};
51
 
52
#if (defined OPT_X86) && (defined OPT_MULTI)
53
#include "getcpuflags.h"
3960 Serge 54
static struct cpuflags cpu_flags;
1905 serge 55
#else
56
/* Faking stuff for non-multi builds. The same code for synth function choice is used.
57
   Just no runtime dependency of result... */
3960 Serge 58
#define cpu_flags nothing
1905 serge 59
#define cpu_i586(s)     1
60
#define cpu_fpu(s)      1
61
#define cpu_mmx(s)      1
62
#define cpu_3dnow(s)    1
63
#define cpu_3dnowext(s) 1
64
#define cpu_sse(s)      1
65
#define cpu_sse2(s)     1
66
#define cpu_sse3(s)     1
67
#endif
68
 
69
/* Ugly macros to build conditional synth function array values. */
70
 
71
#ifndef NO_8BIT
72
#define IF8(synth) synth,
73
#else
74
#define IF8(synth)
75
#endif
76
 
77
#ifndef NO_REAL
78
#define IFREAL(synth) synth,
79
#else
80
#define IFREAL(synth)
81
#endif
82
 
83
#ifndef NO_32BIT
84
#define IF32(synth) synth
85
#else
86
#define IF32(synth)
87
#endif
88
 
89
#ifndef NO_16BIT
90
#	define OUT_SYNTHS(synth_16, synth_8, synth_real, synth_32) { synth_16, IF8(synth_8) IFREAL(synth_real) IF32(synth_32) }
91
#else
92
#	define OUT_SYNTHS(synth_16, synth_8, synth_real, synth_32) { IF8(synth_8) IFREAL(synth_real) IF32(synth_32) }
93
#endif
94
 
3960 Serge 95
/* The call of left and right plain synth, wrapped.
96
   This may be replaced by a direct stereo optimized synth. */
97
static int synth_stereo_wrap(real *bandPtr_l, real *bandPtr_r, mpg123_handle *fr)
1905 serge 98
{
3960 Serge 99
	int clip;
100
	clip  = (fr->synth)(bandPtr_l, 0, fr, 0);
101
	clip += (fr->synth)(bandPtr_r, 1, fr, 1);
102
	return clip;
103
}
104
 
105
static const struct synth_s synth_base =
106
{
1905 serge 107
	{ /* plain */
108
		 OUT_SYNTHS(synth_1to1, synth_1to1_8bit, synth_1to1_real, synth_1to1_s32)
109
#		ifndef NO_DOWNSAMPLE
110
		,OUT_SYNTHS(synth_2to1, synth_2to1_8bit, synth_2to1_real, synth_2to1_s32)
111
		,OUT_SYNTHS(synth_4to1, synth_4to1_8bit, synth_4to1_real, synth_4to1_s32)
112
#		endif
113
#		ifndef NO_NTOM
114
		,OUT_SYNTHS(synth_ntom, synth_ntom_8bit, synth_ntom_real, synth_ntom_s32)
115
#		endif
116
	},
117
	{ /* stereo, by default only wrappers over plain synth */
118
		 OUT_SYNTHS(synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap)
119
#		ifndef NO_DOWNSAMPLE
120
		,OUT_SYNTHS(synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap)
121
		,OUT_SYNTHS(synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap)
122
#		endif
123
#		ifndef NO_NTOM
124
		,OUT_SYNTHS(synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap, synth_stereo_wrap)
125
#		endif
126
	},
127
	{ /* mono2stereo */
3960 Serge 128
		 OUT_SYNTHS(synth_1to1_m2s, synth_1to1_8bit_m2s, synth_1to1_real_m2s, synth_1to1_s32_m2s)
1905 serge 129
#		ifndef NO_DOWNSAMPLE
3960 Serge 130
		,OUT_SYNTHS(synth_2to1_m2s, synth_2to1_8bit_m2s, synth_2to1_real_m2s, synth_2to1_s32_m2s)
131
		,OUT_SYNTHS(synth_4to1_m2s, synth_4to1_8bit_m2s, synth_4to1_real_m2s, synth_4to1_s32_m2s)
1905 serge 132
#		endif
133
#		ifndef NO_NTOM
3960 Serge 134
		,OUT_SYNTHS(synth_ntom_m2s, synth_ntom_8bit_m2s, synth_ntom_real_m2s, synth_ntom_s32_m2s)
1905 serge 135
#		endif
136
	},
137
	{ /* mono*/
138
		 OUT_SYNTHS(synth_1to1_mono, synth_1to1_8bit_mono, synth_1to1_real_mono, synth_1to1_s32_mono)
139
#		ifndef NO_DOWNSAMPLE
140
		,OUT_SYNTHS(synth_2to1_mono, synth_2to1_8bit_mono, synth_2to1_real_mono, synth_2to1_s32_mono)
141
		,OUT_SYNTHS(synth_4to1_mono, synth_4to1_8bit_mono, synth_4to1_real_mono, synth_4to1_s32_mono)
142
#		endif
143
#		ifndef NO_NTOM
144
		,OUT_SYNTHS(synth_ntom_mono, synth_ntom_8bit_mono, synth_ntom_real_mono, synth_ntom_s32_mono)
145
#endif
146
	}
147
};
148
 
149
#ifdef OPT_X86
150
/* More plain synths for i386 */
151
const func_synth plain_i386[r_limit][f_limit] =
152
{ /* plain */
153
	 OUT_SYNTHS(synth_1to1_i386, synth_1to1_8bit_i386, synth_1to1_real_i386, synth_1to1_s32_i386)
154
#	ifndef NO_DOWNSAMPLE
155
	,OUT_SYNTHS(synth_2to1_i386, synth_2to1_8bit_i386, synth_2to1_real_i386, synth_2to1_s32_i386)
156
	,OUT_SYNTHS(synth_4to1_i386, synth_4to1_8bit_i386, synth_4to1_real_i386, synth_4to1_s32_i386)
157
#	endif
158
#	ifndef NO_NTOM
159
	,OUT_SYNTHS(synth_ntom, synth_ntom_8bit, synth_ntom_real, synth_ntom_s32)
160
#	endif
161
};
162
#endif
163
 
164
 
165
enum optdec defdec(void){ return defopt; }
166
 
167
enum optcla decclass(const enum optdec type)
168
{
3960 Serge 169
	return (type == mmx || type == sse || type == dreidnowext || type == x86_64  || type == neon) ? mmxsse : normal;
1905 serge 170
}
171
 
172
 
173
static int find_synth(func_synth synth,  const func_synth synths[r_limit][f_limit])
174
{
175
	enum synth_resample ri;
176
	enum synth_format   fi;
177
	for(ri=0; ri
178
	for(fi=0; fi
179
	if(synth == synths[ri][fi])
180
	return TRUE;
181
 
182
	return FALSE;
183
}
184
 
185
/* Determine what kind of decoder is actually active
186
   This depends on runtime choices which may cause fallback to i386 or generic code. */
187
static int find_dectype(mpg123_handle *fr)
188
{
189
	enum optdec type = nodec;
190
	/* Direct and indirect usage, 1to1 stereo decoding.
191
	   Concentrating on the plain stereo synth should be fine, mono stuff is derived. */
192
	func_synth basic_synth = fr->synth;
193
#ifndef NO_8BIT
194
#ifndef NO_16BIT
195
	if(basic_synth == synth_1to1_8bit_wrap)
196
	basic_synth = fr->synths.plain[r_1to1][f_16]; /* That is what's really below the surface. */
197
#endif
198
#endif
199
 
200
	if(FALSE) ; /* Just to initialize the else if ladder. */
201
#ifndef NO_16BIT
202
#ifdef OPT_3DNOWEXT
203
	else if(basic_synth == synth_1to1_3dnowext) type = dreidnowext;
204
#endif
205
#ifdef OPT_SSE
206
	else if(basic_synth == synth_1to1_sse) type = sse;
207
#endif
208
#ifdef OPT_3DNOW
209
	else if(basic_synth == synth_1to1_3dnow) type = dreidnow;
210
#endif
211
#ifdef OPT_MMX
212
	else if(basic_synth == synth_1to1_mmx) type = mmx;
213
#endif
214
#ifdef OPT_I586_DITHER
215
	else if(basic_synth == synth_1to1_i586_dither) type = ifuenf_dither;
216
#endif
217
#ifdef OPT_I586
218
	else if(basic_synth == synth_1to1_i586) type = ifuenf;
219
#endif
220
#ifdef OPT_ALTIVEC
221
	else if(basic_synth == synth_1to1_altivec) type = altivec;
222
#endif
223
#ifdef OPT_X86_64
224
	else if(basic_synth == synth_1to1_x86_64) type = x86_64;
225
#endif
226
#ifdef OPT_ARM
227
	else if(basic_synth == synth_1to1_arm) type = arm;
228
#endif
3960 Serge 229
#ifdef OPT_NEON
230
	else if(basic_synth == synth_1to1_neon) type = neon;
231
#endif
1905 serge 232
#ifdef OPT_GENERIC_DITHER
233
	else if(basic_synth == synth_1to1_dither) type = generic_dither;
234
#endif
235
#ifdef OPT_DITHER /* either i586 or generic! */
236
#ifndef NO_DOWNSAMPLE
237
	else if
238
	(
239
		   basic_synth == synth_2to1_dither
240
		|| basic_synth == synth_4to1_dither
241
	) type = generic_dither;
242
#endif
243
#endif
244
#endif /* 16bit */
245
 
246
#ifndef NO_REAL
247
#ifdef OPT_SSE
248
	else if(basic_synth == synth_1to1_real_sse) type = sse;
249
#endif
250
#ifdef OPT_X86_64
251
	else if(basic_synth == synth_1to1_real_x86_64) type = x86_64;
252
#endif
253
#ifdef OPT_ALTIVEC
254
	else if(basic_synth == synth_1to1_real_altivec) type = altivec;
255
#endif
3960 Serge 256
#ifdef OPT_NEON
257
	else if(basic_synth == synth_1to1_real_neon) type = neon;
258
#endif
1905 serge 259
 
260
#endif /* real */
261
 
262
#ifndef NO_32BIT
263
#ifdef OPT_SSE
264
	else if(basic_synth == synth_1to1_s32_sse) type = sse;
265
#endif
266
#ifdef OPT_X86_64
267
	else if(basic_synth == synth_1to1_s32_x86_64) type = x86_64;
268
#endif
269
#ifdef OPT_ALTIVEC
270
	else if(basic_synth == synth_1to1_s32_altivec) type = altivec;
271
#endif
3960 Serge 272
#ifdef OPT_NEON
273
	else if(basic_synth == synth_1to1_s32_neon) type = neon;
274
#endif
1905 serge 275
#endif /* 32bit */
276
 
277
#ifdef OPT_X86
278
	else if(find_synth(basic_synth, plain_i386))
279
	type = idrei;
280
#endif
281
 
282
	else if(find_synth(basic_synth, synth_base.plain))
283
	type = generic;
284
 
285
 
286
 
287
#ifdef OPT_I486
288
	/* i486 is special ... the specific code is in use for 16bit 1to1 stereo
289
	   otherwise we have i386 active... but still, the distinction doesn't matter*/
290
	type = ivier;
291
#endif
292
 
293
	if(type != nodec)
294
	{
295
		fr->cpu_opts.type = type;
296
		fr->cpu_opts.class = decclass(type);
297
 
298
		debug3("determined active decoder type %i (%s) of class %i", type, decname[type], fr->cpu_opts.class);
299
		return MPG123_OK;
300
	}
301
	else
302
	{
303
		if(NOQUIET) error("Unable to determine active decoder type -- this is SERIOUS b0rkage!");
304
 
305
		fr->err = MPG123_BAD_DECODER_SETUP;
306
		return MPG123_ERR;
307
	}
308
}
309
 
310
/* set synth functions for current frame, optimizations handled by opt_* macros */
311
int set_synth_functions(mpg123_handle *fr)
312
{
313
	enum synth_resample resample = r_none;
314
	enum synth_format basic_format = f_none; /* Default is always 16bit, or whatever. */
315
 
316
	/* Select the basic output format, different from 16bit: 8bit, real. */
317
	if(FALSE){}
318
#ifndef NO_16BIT
319
	else if(fr->af.encoding & MPG123_ENC_16)
320
	basic_format = f_16;
321
#endif
322
#ifndef NO_8BIT
323
	else if(fr->af.encoding & MPG123_ENC_8)
324
	basic_format = f_8;
325
#endif
326
#ifndef NO_REAL
327
	else if(fr->af.encoding & MPG123_ENC_FLOAT)
328
	basic_format = f_real;
329
#endif
330
#ifndef NO_32BIT
3960 Serge 331
	/* 24 bit integer means decoding to 32 bit first. */
332
	else if(fr->af.encoding & MPG123_ENC_32 || fr->af.encoding & MPG123_ENC_24)
1905 serge 333
	basic_format = f_32;
334
#endif
335
 
336
	/* Make sure the chosen format is compiled into this lib. */
337
	if(basic_format == f_none)
338
	{
339
		if(NOQUIET) error("set_synth_functions: This output format is disabled in this build!");
340
 
341
		return -1;
342
	}
343
 
344
	/* Be explicit about downsampling variant. */
345
	switch(fr->down_sample)
346
	{
347
		case 0: resample = r_1to1; break;
348
#ifndef NO_DOWNSAMPLE
349
		case 1: resample = r_2to1; break;
350
		case 2: resample = r_4to1; break;
351
#endif
352
#ifndef NO_NTOM
353
		case 3: resample = r_ntom; break;
354
#endif
355
	}
356
 
357
	if(resample == r_none)
358
	{
359
		if(NOQUIET) error("set_synth_functions: This resampling mode is not supported in this build!");
360
 
361
		return -1;
362
	}
363
 
364
	debug2("selecting synth: resample=%i format=%i", resample, basic_format);
365
	/* Finally selecting the synth functions for stereo / mono. */
366
	fr->synth = fr->synths.plain[resample][basic_format];
367
	fr->synth_stereo = fr->synths.stereo[resample][basic_format];
368
	fr->synth_mono = fr->af.channels==2
369
		? fr->synths.mono2stereo[resample][basic_format] /* Mono MPEG file decoded to stereo. */
370
		: fr->synths.mono[resample][basic_format];       /* Mono MPEG file decoded to mono. */
371
 
372
	if(find_dectype(fr) != MPG123_OK) /* Actually determine the currently active decoder breed. */
373
	{
374
		fr->err = MPG123_BAD_DECODER_SETUP;
375
		return MPG123_ERR;
376
	}
377
 
378
	if(frame_buffers(fr) != 0)
379
	{
380
		fr->err = MPG123_NO_BUFFERS;
381
		if(NOQUIET) error("Failed to set up decoder buffers!");
382
 
383
		return MPG123_ERR;
384
	}
385
 
386
#ifndef NO_8BIT
387
	if(basic_format == f_8)
388
	{
389
		if(make_conv16to8_table(fr) != 0)
390
		{
391
			if(NOQUIET) error("Failed to set up conv16to8 table!");
392
			/* it's a bit more work to get proper error propagation up */
393
			return -1;
394
		}
395
	}
396
#endif
397
 
398
#ifdef OPT_MMXORSSE
399
	/* Special treatment for MMX, SSE and 3DNowExt stuff.
400
	   The real-decoding SSE for x86-64 uses normal tables! */
401
	if(fr->cpu_opts.class == mmxsse
402
#	ifndef NO_REAL
403
	   && basic_format != f_real
404
#	endif
405
#	ifndef NO_32BIT
406
	   && basic_format != f_32
407
#	endif
408
#	ifdef ACCURATE_ROUNDING
409
	   && fr->cpu_opts.type != sse
410
	   && fr->cpu_opts.type != x86_64
3960 Serge 411
	   && fr->cpu_opts.type != neon
1905 serge 412
#	endif
413
	  )
414
	{
415
#ifndef NO_LAYER3
416
		init_layer3_stuff(fr, init_layer3_gainpow2_mmx);
417
#endif
418
#ifndef NO_LAYER12
419
		init_layer12_stuff(fr, init_layer12_table_mmx);
420
#endif
421
		fr->make_decode_tables = make_decode_tables_mmx;
422
	}
423
	else
424
#endif
425
	{
426
#ifndef NO_LAYER3
427
		init_layer3_stuff(fr, init_layer3_gainpow2);
428
#endif
429
#ifndef NO_LAYER12
430
		init_layer12_stuff(fr, init_layer12_table);
431
#endif
432
		fr->make_decode_tables = make_decode_tables;
433
	}
434
 
435
	/* We allocated the table buffers just now, so (re)create the tables. */
436
	fr->make_decode_tables(fr);
437
 
438
	return 0;
439
}
440
 
441
int frame_cpu_opt(mpg123_handle *fr, const char* cpu)
442
{
443
	const char* chosen = ""; /* the chosen decoder opt as string */
444
	enum optdec want_dec = nodec;
445
	int done = 0;
446
	int auto_choose = 0;
447
#ifdef OPT_DITHER
448
	int dithered = FALSE; /* If some dithered decoder is chosen. */
449
#endif
450
 
451
	want_dec = dectype(cpu);
452
	auto_choose = want_dec == autodec;
453
	/* Fill whole array of synth functions with generic code first. */
454
	fr->synths = synth_base;
455
 
456
#ifndef OPT_MULTI
457
	{
458
		if(!auto_choose && want_dec != defopt)
459
		{
460
			if(NOQUIET) error2("you wanted decoder type %i, I only have %i", want_dec, defopt);
461
		}
462
		auto_choose = TRUE; /* There will be only one choice anyway. */
463
	}
464
#endif
465
 
466
	fr->cpu_opts.type = nodec;
467
	/* covers any i386+ cpu; they actually differ only in the synth_1to1 function, mostly... */
468
#ifdef OPT_X86
469
 
3960 Serge 470
#ifdef OPT_MULTI
1905 serge 471
#ifndef NO_LAYER3
472
#if (defined OPT_3DNOW || defined OPT_3DNOWEXT)
3960 Serge 473
	fr->cpu_opts.the_dct36 = dct36;
1905 serge 474
#endif
475
#endif
3960 Serge 476
#endif
1905 serge 477
 
478
	if(cpu_i586(cpu_flags))
479
	{
480
#		ifdef OPT_MULTI
481
		debug2("standard flags: 0x%08x\textended flags: 0x%08x", cpu_flags.std, cpu_flags.ext);
482
#		endif
483
		#ifdef OPT_SSE
484
		if(   !done && (auto_choose || want_dec == sse)
485
		   && cpu_sse(cpu_flags) && cpu_mmx(cpu_flags) )
486
		{
487
			chosen = "SSE";
488
			fr->cpu_opts.type = sse;
489
#			ifndef NO_16BIT
490
			fr->synths.plain[r_1to1][f_16] = synth_1to1_sse;
491
#			ifdef ACCURATE_ROUNDING
492
			fr->synths.stereo[r_1to1][f_16] = synth_1to1_stereo_sse;
493
#			endif
494
#			endif
495
#			ifndef NO_REAL
496
			fr->synths.plain[r_1to1][f_real] = synth_1to1_real_sse;
497
			fr->synths.stereo[r_1to1][f_real] = synth_1to1_real_stereo_sse;
498
#			endif
499
#			ifndef NO_32BIT
500
			fr->synths.plain[r_1to1][f_32] = synth_1to1_s32_sse;
501
			fr->synths.stereo[r_1to1][f_32] = synth_1to1_s32_stereo_sse;
502
#			endif
503
			done = 1;
504
		}
505
		#endif
506
#		ifdef OPT_3DNOWEXT
507
		if(   !done && (auto_choose || want_dec == dreidnowext )
508
		   && cpu_3dnow(cpu_flags)
509
		   && cpu_3dnowext(cpu_flags)
510
		   && cpu_mmx(cpu_flags) )
511
		{
512
			chosen = "3DNowExt";
513
			fr->cpu_opts.type = dreidnowext;
3960 Serge 514
#ifdef OPT_MULTI
1905 serge 515
#			ifndef NO_LAYER3
3960 Serge 516
/* The DCT36 is _bad_, at least compared to gcc 4.4-built C code. */
517
/*			fr->cpu_opts.the_dct36 = dct36_3dnowext; */
1905 serge 518
#			endif
3960 Serge 519
#endif
1905 serge 520
#			ifndef NO_16BIT
521
			fr->synths.plain[r_1to1][f_16] = synth_1to1_3dnowext;
522
#			endif
523
			done = 1;
524
		}
525
		#endif
526
		#ifdef OPT_3DNOW
527
		if(    !done && (auto_choose || want_dec == dreidnow)
528
		    && cpu_3dnow(cpu_flags) && cpu_mmx(cpu_flags) )
529
		{
530
			chosen = "3DNow";
531
			fr->cpu_opts.type = dreidnow;
3960 Serge 532
#ifdef OPT_MULTI
1905 serge 533
#			ifndef NO_LAYER3
3960 Serge 534
/* The DCT36 is _bad_, at least compared to gcc 4.4-built C code. */
535
/*			fr->cpu_opts.the_dct36 = dct36_3dnow; */
1905 serge 536
#			endif
3960 Serge 537
#endif
1905 serge 538
#			ifndef NO_16BIT
539
			fr->synths.plain[r_1to1][f_16] = synth_1to1_3dnow;
540
#			endif
541
			done = 1;
542
		}
543
		#endif
544
		#ifdef OPT_MMX
545
		if(   !done && (auto_choose || want_dec == mmx)
546
		   && cpu_mmx(cpu_flags) )
547
		{
548
			chosen = "MMX";
549
			fr->cpu_opts.type = mmx;
550
#			ifndef NO_16BIT
551
			fr->synths.plain[r_1to1][f_16] = synth_1to1_mmx;
552
#			endif
553
			done = 1;
554
		}
555
		#endif
556
		#ifdef OPT_I586
557
		if(!done && (auto_choose || want_dec == ifuenf))
558
		{
559
			chosen = "i586/pentium";
560
			fr->cpu_opts.type = ifuenf;
561
#			ifndef NO_16BIT
562
			fr->synths.plain[r_1to1][f_16] = synth_1to1_i586;
563
#			endif
564
			done = 1;
565
		}
566
		#endif
567
		#ifdef OPT_I586_DITHER
568
		if(!done && (auto_choose || want_dec == ifuenf_dither))
569
		{
570
			chosen = "dithered i586/pentium";
571
			fr->cpu_opts.type = ifuenf_dither;
572
			dithered = TRUE;
573
#			ifndef NO_16BIT
574
			fr->synths.plain[r_1to1][f_16] = synth_1to1_i586_dither;
575
#			ifndef NO_DOWNSAMPLE
576
			fr->synths.plain[r_2to1][f_16] = synth_2to1_dither;
577
			fr->synths.plain[r_4to1][f_16] = synth_4to1_dither;
578
#			endif
579
#			endif
580
			done = 1;
581
		}
582
		#endif
583
	}
584
	#ifdef OPT_I486
585
	/* That won't cooperate in multi opt mode - forcing i486 in layer3.c
586
	   But still... here it is... maybe for real use in future. */
587
	if(!done && (auto_choose || want_dec == ivier))
588
	{
589
		chosen = "i486";
590
		fr->cpu_opts.type = ivier;
591
		done = 1;
592
	}
593
	#endif
594
	#ifdef OPT_I386
595
	if(!done && (auto_choose || want_dec == idrei))
596
	{
597
		chosen = "i386";
598
		fr->cpu_opts.type = idrei;
599
		done = 1;
600
	}
601
	#endif
602
 
603
	if(done)
604
	{
605
		/*
606
			We have chosen some x86 decoder... fillup some i386 stuff.
607
			There is an open question about using dithered synth_1to1 for 8bit wrappers.
608
			For quality it won't make sense, but wrapped i586_dither wrapped may still be faster...
609
		*/
610
		enum synth_resample ri;
611
		enum synth_format   fi;
612
#		ifndef NO_8BIT
613
#		ifndef NO_16BIT /* possibility to use a 16->8 wrapper... */
614
		if(fr->synths.plain[r_1to1][f_16] != synth_base.plain[r_1to1][f_16])
615
		{
616
			fr->synths.plain[r_1to1][f_8] = synth_1to1_8bit_wrap;
617
			fr->synths.mono[r_1to1][f_8] = synth_1to1_8bit_wrap_mono;
3960 Serge 618
			fr->synths.mono2stereo[r_1to1][f_8] = synth_1to1_8bit_wrap_m2s;
1905 serge 619
		}
620
#		endif
621
#		endif
622
		for(ri=0; ri
623
		for(fi=0; fi
624
		{
625
			if(fr->synths.plain[ri][fi] == synth_base.plain[ri][fi])
626
			fr->synths.plain[ri][fi] = plain_i386[ri][fi];
627
		}
628
	}
629
 
630
#endif /* OPT_X86 */
631
 
632
#ifdef OPT_X86_64
633
	if(!done && (auto_choose || want_dec == x86_64))
634
	{
635
		chosen = "x86-64 (SSE)";
636
		fr->cpu_opts.type = x86_64;
637
#		ifndef NO_16BIT
638
		fr->synths.plain[r_1to1][f_16] = synth_1to1_x86_64;
639
		fr->synths.stereo[r_1to1][f_16] = synth_1to1_stereo_x86_64;
640
#		endif
641
#		ifndef NO_REAL
642
		fr->synths.plain[r_1to1][f_real] = synth_1to1_real_x86_64;
643
		fr->synths.stereo[r_1to1][f_real] = synth_1to1_real_stereo_x86_64;
644
#		endif
645
#		ifndef NO_32BIT
646
		fr->synths.plain[r_1to1][f_32] = synth_1to1_s32_x86_64;
647
		fr->synths.stereo[r_1to1][f_32] = synth_1to1_s32_stereo_x86_64;
648
#		endif
649
		done = 1;
650
	}
651
#endif
652
 
653
#ifdef OPT_GENERIC_DITHER
654
	if(!done && (auto_choose || want_dec == generic_dither))
655
	{
656
		chosen = "dithered generic";
657
		fr->cpu_opts.type = generic_dither;
658
		dithered = TRUE;
659
#		ifndef NO_16BIT
660
		fr->synths.plain[r_1to1][f_16] = synth_1to1_dither;
661
#		ifndef NO_DOWNSAMPLE
662
		fr->synths.plain[r_2to1][f_16] = synth_2to1_dither;
663
		fr->synths.plain[r_4to1][f_16] = synth_4to1_dither;
664
#		endif
665
#		endif
666
		done = 1;
667
	}
668
#endif
669
 
670
#	ifdef OPT_ALTIVEC
671
	if(!done && (auto_choose || want_dec == altivec))
672
	{
673
		chosen = "AltiVec";
674
		fr->cpu_opts.type = altivec;
675
#		ifndef NO_16BIT
676
		fr->synths.plain[r_1to1][f_16] = synth_1to1_altivec;
677
		fr->synths.stereo[r_1to1][f_16] = synth_1to1_stereo_altivec;
678
#		endif
679
#		ifndef NO_REAL
680
		fr->synths.plain[r_1to1][f_real] = synth_1to1_real_altivec;
681
		fr->synths.stereo[r_1to1][f_real] = synth_1to1_real_stereo_altivec;
682
#		endif
683
#		ifndef NO_32BIT
684
		fr->synths.plain[r_1to1][f_32] = synth_1to1_s32_altivec;
685
		fr->synths.stereo[r_1to1][f_32] = synth_1to1_s32_stereo_altivec;
686
#		endif
687
		done = 1;
688
	}
689
#	endif
690
 
3960 Serge 691
#	ifdef OPT_NEON
692
	if(!done && (auto_choose || want_dec == neon))
693
	{
694
		chosen = "NEON";
695
		fr->cpu_opts.type = neon;
696
#		ifndef NO_16BIT
697
		fr->synths.plain[r_1to1][f_16] = synth_1to1_neon;
698
		fr->synths.stereo[r_1to1][f_16] = synth_1to1_stereo_neon;
699
#		endif
700
#		ifndef NO_REAL
701
		fr->synths.plain[r_1to1][f_real] = synth_1to1_real_neon;
702
		fr->synths.stereo[r_1to1][f_real] = synth_1to1_real_stereo_neon;
703
#		endif
704
#		ifndef NO_32BIT
705
		fr->synths.plain[r_1to1][f_32] = synth_1to1_s32_neon;
706
		fr->synths.stereo[r_1to1][f_32] = synth_1to1_s32_stereo_neon;
707
#		endif
708
		done = 1;
709
	}
710
#	endif
711
 
1905 serge 712
#	ifdef OPT_ARM
713
	if(!done && (auto_choose || want_dec == arm))
714
	{
715
		chosen = "ARM";
716
		fr->cpu_opts.type = arm;
717
#		ifndef NO_16BIT
718
		fr->synths.plain[r_1to1][f_16] = synth_1to1_arm;
719
#		endif
720
		done = 1;
721
	}
722
#	endif
723
 
724
#	ifdef OPT_GENERIC
725
	if(!done && (auto_choose || want_dec == generic))
726
	{
727
		chosen = "generic";
728
		fr->cpu_opts.type = generic;
729
		done = 1;
730
	}
731
#	endif
732
 
733
	fr->cpu_opts.class = decclass(fr->cpu_opts.type);
734
 
735
#	ifndef NO_8BIT
736
#	ifndef NO_16BIT /* possibility to use a 16->8 wrapper... */
737
	/* Last chance to use some optimized routine via generic wrappers (for 8bit). */
738
	if(     fr->cpu_opts.type != ifuenf_dither
739
	     && fr->cpu_opts.type != generic_dither
740
	     && fr->synths.plain[r_1to1][f_16] != synth_base.plain[r_1to1][f_16] )
741
	{
742
		fr->synths.plain[r_1to1][f_8] = synth_1to1_8bit_wrap;
743
		fr->synths.mono[r_1to1][f_8] = synth_1to1_8bit_wrap_mono;
3960 Serge 744
		fr->synths.mono2stereo[r_1to1][f_8] = synth_1to1_8bit_wrap_m2s;
1905 serge 745
	}
746
#	endif
747
#	endif
748
 
749
#ifdef OPT_DITHER
750
	if(done && dithered)
751
	{
752
		/* run-time dither noise table generation */
753
		if(!frame_dither_init(fr))
754
		{
755
			if(NOQUIET) error("Dither noise setup failed!");
756
			return 0;
757
		}
758
	}
759
#endif
760
 
761
	if(done)
762
	{
763
		if(VERBOSE) fprintf(stderr, "Decoder: %s\n", chosen);
764
		return 1;
765
	}
766
	else
767
	{
768
		if(NOQUIET) error("Could not set optimization!");
769
		return 0;
770
	}
771
}
772
 
773
enum optdec dectype(const char* decoder)
774
{
775
	enum optdec dt;
776
	if(   (decoder == NULL)
777
	   || (decoder[0] == 0) )
778
	return autodec;
779
 
780
	for(dt=autodec; dt
781
	if(!strcasecmp(decoder, decname[dt])) return dt;
782
 
783
	return nodec; /* If we found nothing... */
784
}
785
 
786
#ifdef OPT_MULTI
787
 
788
/* same number of entries as full list, but empty at beginning */
789
static const char *mpg123_supported_decoder_list[] =
790
{
791
	#ifdef OPT_SSE
792
	NULL,
793
	#endif
794
	#ifdef OPT_3DNOWEXT
795
	NULL,
796
	#endif
797
	#ifdef OPT_3DNOW
798
	NULL,
799
	#endif
800
	#ifdef OPT_MMX
801
	NULL,
802
	#endif
803
	#ifdef OPT_I586
804
	NULL,
805
	#endif
806
	#ifdef OPT_I586_DITHER
807
	NULL,
808
	#endif
809
	#ifdef OPT_I486
810
	NULL,
811
	#endif
812
	#ifdef OPT_I386
813
	NULL,
814
	#endif
815
	#ifdef OPT_ALTIVEC
816
	NULL,
817
	#endif
818
	#ifdef OPT_X86_64
819
	NULL,
820
	#endif
821
	#ifdef OPT_ARM
822
	NULL,
823
	#endif
3960 Serge 824
	#ifdef OPT_NEON
825
	NULL,
826
	#endif
1905 serge 827
	#ifdef OPT_GENERIC_FLOAT
828
	NULL,
829
	#endif
830
#	ifdef OPT_GENERIC
831
	NULL,
832
#	endif
833
#	ifdef OPT_GENERIC_DITHER
834
	NULL,
835
#	endif
836
	NULL
837
};
838
#endif
839
 
840
static const char *mpg123_decoder_list[] =
841
{
842
	#ifdef OPT_SSE
843
	dn_SSE,
844
	#endif
845
	#ifdef OPT_3DNOWEXT
846
	dn_3DNowExt,
847
	#endif
848
	#ifdef OPT_3DNOW
849
	dn_3DNow,
850
	#endif
851
	#ifdef OPT_MMX
852
	dn_MMX,
853
	#endif
854
	#ifdef OPT_I586
855
	dn_i586,
856
	#endif
857
	#ifdef OPT_I586_DITHER
858
	dn_i586_dither,
859
	#endif
860
	#ifdef OPT_I486
861
	dn_i486,
862
	#endif
863
	#ifdef OPT_I386
864
	dn_i386,
865
	#endif
866
	#ifdef OPT_ALTIVEC
867
	dn_AltiVec,
868
	#endif
869
	#ifdef OPT_X86_64
870
	dn_x86_64,
871
	#endif
872
	#ifdef OPT_ARM
873
	dn_ARM,
874
	#endif
3960 Serge 875
	#ifdef OPT_NEON
876
	dn_NEON,
877
	#endif
1905 serge 878
	#ifdef OPT_GENERIC
879
	dn_generic,
880
	#endif
881
	#ifdef OPT_GENERIC_DITHER
882
	dn_generic_dither,
883
	#endif
884
	NULL
885
};
886
 
887
void check_decoders(void )
888
{
889
#ifndef OPT_MULTI
890
	/* In non-multi mode, only the full list (one entry) is used. */
891
	return;
892
#else
893
	const char **d = mpg123_supported_decoder_list;
894
#ifdef OPT_X86
895
	getcpuflags(&cpu_flags);
896
	if(cpu_i586(cpu_flags))
897
	{
898
		/* not yet: if(cpu_sse2(cpu_flags)) printf(" SSE2");
899
		if(cpu_sse3(cpu_flags)) printf(" SSE3"); */
900
#ifdef OPT_SSE
901
		if(cpu_sse(cpu_flags)) *(d++) = decname[sse];
902
#endif
903
#ifdef OPT_3DNOWEXT
904
		if(cpu_3dnowext(cpu_flags)) *(d++) = decname[dreidnowext];
905
#endif
906
#ifdef OPT_3DNOW
907
		if(cpu_3dnow(cpu_flags)) *(d++) = decname[dreidnow];
908
#endif
909
#ifdef OPT_MMX
910
		if(cpu_mmx(cpu_flags)) *(d++) = decname[mmx];
911
#endif
912
#ifdef OPT_I586
913
		*(d++) = decname[ifuenf];
914
#endif
915
#ifdef OPT_I586_DITHER
916
		*(d++) = decname[ifuenf_dither];
917
#endif
918
	}
919
#endif
920
/* just assume that the i486 built is run on a i486 cpu... */
921
#ifdef OPT_I486
922
	*(d++) = decname[ivier];
923
#endif
924
#ifdef OPT_ALTIVEC
925
	*(d++) = decname[altivec];
926
#endif
927
/* every supported x86 can do i386, any cpu can do generic */
928
#ifdef OPT_I386
929
	*(d++) = decname[idrei];
930
#endif
931
#ifdef OPT_X86_64
932
	*(d++) = decname[x86_64];
933
#endif
934
#ifdef OPT_ARM
935
	*(d++) = decname[arm];
936
#endif
3960 Serge 937
#ifdef OPT_NEON
938
	*(d++) = decname[neon];
939
#endif
1905 serge 940
#ifdef OPT_GENERIC
941
	*(d++) = decname[generic];
942
#endif
943
#ifdef OPT_GENERIC_DITHER
944
	*(d++) = decname[generic_dither];
945
#endif
946
#endif /* ndef OPT_MULTI */
947
}
948
 
949
const char* attribute_align_arg mpg123_current_decoder(mpg123_handle *mh)
950
{
951
	if(mh == NULL) return NULL;
952
 
953
	return decname[mh->cpu_opts.type];
954
}
955
 
3960 Serge 956
const char attribute_align_arg **mpg123_decoders(void){ return mpg123_decoder_list; }
957
const char attribute_align_arg **mpg123_supported_decoders(void)
1905 serge 958
{
959
#ifdef OPT_MULTI
960
	return mpg123_supported_decoder_list;
961
#else
962
	return mpg123_decoder_list;
963
#endif
964
}