WebSVN – Kolibri OS – Blame – /contrib/sdk/sources/ffmpeg/ffmpeg-2.1/libavcodec/x86/ac3dsp_init.c

Rev	Author	Line No.	Line
6148	serge	1	/*
		2	* x86-optimized AC-3 DSP utils
		3	* Copyright (c) 2011 Justin Ruggles
		4	*
		5	* This file is part of FFmpeg.
		6	*
		7	* FFmpeg is free software; you can redistribute it and/or
		8	* modify it under the terms of the GNU Lesser General Public
		9	* License as published by the Free Software Foundation; either
		10	* version 2.1 of the License, or (at your option) any later version.
		11	*
		12	* FFmpeg is distributed in the hope that it will be useful,
		13	* but WITHOUT ANY WARRANTY; without even the implied warranty of
		14	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
		15	* Lesser General Public License for more details.
		16	*
		17	* You should have received a copy of the GNU Lesser General Public
		18	* License along with FFmpeg; if not, write to the Free Software
		19	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
		20	*/
		21
		22	#include "libavutil/mem.h"
		23	#include "libavutil/x86/asm.h"
		24	#include "libavutil/x86/cpu.h"
		25	#include "dsputil_x86.h"
		26	#include "libavcodec/ac3.h"
		27	#include "libavcodec/ac3dsp.h"
		28
		29	void ff_ac3_exponent_min_mmx (uint8_t *exp, int num_reuse_blocks, int nb_coefs);
		30	void ff_ac3_exponent_min_mmxext(uint8_t *exp, int num_reuse_blocks, int nb_coefs);
		31	void ff_ac3_exponent_min_sse2 (uint8_t *exp, int num_reuse_blocks, int nb_coefs);
		32
		33	int ff_ac3_max_msb_abs_int16_mmx (const int16_t *src, int len);
		34	int ff_ac3_max_msb_abs_int16_mmxext(const int16_t *src, int len);
		35	int ff_ac3_max_msb_abs_int16_sse2 (const int16_t *src, int len);
		36	int ff_ac3_max_msb_abs_int16_ssse3(const int16_t *src, int len);
		37
		38	void ff_ac3_lshift_int16_mmx (int16_t *src, unsigned int len, unsigned int shift);
		39	void ff_ac3_lshift_int16_sse2(int16_t *src, unsigned int len, unsigned int shift);
		40
		41	void ff_ac3_rshift_int32_mmx (int32_t *src, unsigned int len, unsigned int shift);
		42	void ff_ac3_rshift_int32_sse2(int32_t *src, unsigned int len, unsigned int shift);
		43
		44	void ff_float_to_fixed24_3dnow(int32_t dst, const float src, unsigned int len);
		45	void ff_float_to_fixed24_sse (int32_t dst, const float src, unsigned int len);
		46	void ff_float_to_fixed24_sse2 (int32_t dst, const float src, unsigned int len);
		47
		48	int ff_ac3_compute_mantissa_size_sse2(uint16_t mant_cnt[6][16]);
		49
		50	void ff_ac3_extract_exponents_3dnow(uint8_t exp, int32_t coef, int nb_coefs);
		51	void ff_ac3_extract_exponents_sse2 (uint8_t exp, int32_t coef, int nb_coefs);
		52	void ff_ac3_extract_exponents_ssse3(uint8_t exp, int32_t coef, int nb_coefs);
		53
		54	#if ARCH_X86_32 && defined(__INTEL_COMPILER)
		55	# undef HAVE_7REGS
		56	# define HAVE_7REGS 0
		57	#endif
		58
		59	#if HAVE_SSE_INLINE && HAVE_7REGS
		60
		61	#define IF1(x) x
		62	#define IF0(x)
		63
		64	#define MIX5(mono, stereo) \
		65	__asm__ volatile ( \
		66	"movss 0(%1), %%xmm5 \n" \
		67	"movss 8(%1), %%xmm6 \n" \
		68	"movss 24(%1), %%xmm7 \n" \
		69	"shufps $0, %%xmm5, %%xmm5 \n" \
		70	"shufps $0, %%xmm6, %%xmm6 \n" \
		71	"shufps $0, %%xmm7, %%xmm7 \n" \
		72	"1: \n" \
		73	"movaps (%0, %2), %%xmm0 \n" \
		74	"movaps (%0, %3), %%xmm1 \n" \
		75	"movaps (%0, %4), %%xmm2 \n" \
		76	"movaps (%0, %5), %%xmm3 \n" \
		77	"movaps (%0, %6), %%xmm4 \n" \
		78	"mulps %%xmm5, %%xmm0 \n" \
		79	"mulps %%xmm6, %%xmm1 \n" \
		80	"mulps %%xmm5, %%xmm2 \n" \
		81	"mulps %%xmm7, %%xmm3 \n" \
		82	"mulps %%xmm7, %%xmm4 \n" \
		83	stereo("addps %%xmm1, %%xmm0 \n") \
		84	"addps %%xmm1, %%xmm2 \n" \
		85	"addps %%xmm3, %%xmm0 \n" \
		86	"addps %%xmm4, %%xmm2 \n" \
		87	mono("addps %%xmm2, %%xmm0 \n") \
		88	"movaps %%xmm0, (%0, %2) \n" \
		89	stereo("movaps %%xmm2, (%0, %3) \n") \
		90	"add $16, %0 \n" \
		91	"jl 1b \n" \
		92	: "+&r"(i) \
		93	: "r"(matrix), \
		94	"r"(samples[0] + len), \
		95	"r"(samples[1] + len), \
		96	"r"(samples[2] + len), \
		97	"r"(samples[3] + len), \
		98	"r"(samples[4] + len) \
		99	: XMM_CLOBBERS("%xmm0", "%xmm1", "%xmm2", "%xmm3", \
		100	"%xmm4", "%xmm5", "%xmm6", "%xmm7",) \
		101	"memory" \
		102	);
		103
		104	#define MIX_MISC(stereo) \
		105	__asm__ volatile ( \
		106	"mov %5, %2 \n" \
		107	"1: \n" \
		108	"mov -%c7(%6, %2, %c8), %3 \n" \
		109	"movaps (%3, %0), %%xmm0 \n" \
		110	stereo("movaps %%xmm0, %%xmm1 \n") \
		111	"mulps %%xmm4, %%xmm0 \n" \
		112	stereo("mulps %%xmm5, %%xmm1 \n") \
		113	"2: \n" \
		114	"mov (%6, %2, %c8), %1 \n" \
		115	"movaps (%1, %0), %%xmm2 \n" \
		116	stereo("movaps %%xmm2, %%xmm3 \n") \
		117	"mulps (%4, %2, 8), %%xmm2 \n" \
		118	stereo("mulps 16(%4, %2, 8), %%xmm3 \n") \
		119	"addps %%xmm2, %%xmm0 \n" \
		120	stereo("addps %%xmm3, %%xmm1 \n") \
		121	"add $4, %2 \n" \
		122	"jl 2b \n" \
		123	"mov %5, %2 \n" \
		124	stereo("mov (%6, %2, %c8), %1 \n") \
		125	"movaps %%xmm0, (%3, %0) \n" \
		126	stereo("movaps %%xmm1, (%1, %0) \n") \
		127	"add $16, %0 \n" \
		128	"jl 1b \n" \
		129	: "+&r"(i), "=&r"(j), "=&r"(k), "=&r"(m) \
		130	: "r"(matrix_simd + in_ch), \
		131	"g"((intptr_t) - 4 * (in_ch - 1)), \
		132	"r"(samp + in_ch), \
		133	"i"(sizeof(float )), "i"(sizeof(float )/4) \
		134	: "memory" \
		135	);
		136
		137	static void ac3_downmix_sse(float *samples, float (matrix)[2],
		138	int out_ch, int in_ch, int len)
		139	{
		140	int (matrix_cmp)[2] = (int()[2])matrix;
		141	intptr_t i, j, k, m;
		142
		143	i = -len * sizeof(float);
		144	if (in_ch == 5 && out_ch == 2 &&
		145	!(matrix_cmp[0][1] \| matrix_cmp[2][0] \|
		146	matrix_cmp[3][1] \| matrix_cmp[4][0] \|
		147	(matrix_cmp[1][0] ^ matrix_cmp[1][1]) \|
		148	(matrix_cmp[0][0] ^ matrix_cmp[2][1]))) {
		149	MIX5(IF0, IF1);
		150	} else if (in_ch == 5 && out_ch == 1 &&
		151	matrix_cmp[0][0] == matrix_cmp[2][0] &&
		152	matrix_cmp[3][0] == matrix_cmp[4][0]) {
		153	MIX5(IF1, IF0);
		154	} else {
		155	DECLARE_ALIGNED(16, float, matrix_simd)[AC3_MAX_CHANNELS][2][4];
		156	float *samp[AC3_MAX_CHANNELS];
		157
		158	for (j = 0; j < in_ch; j++)
		159	samp[j] = samples[j] + len;
		160
		161	j = 2 * in_ch * sizeof(float);
		162	__asm__ volatile (
		163	"1: \n"
		164	"sub $8, %0 \n"
		165	"movss (%2, %0), %%xmm4 \n"
		166	"movss 4(%2, %0), %%xmm5 \n"
		167	"shufps $0, %%xmm4, %%xmm4 \n"
		168	"shufps $0, %%xmm5, %%xmm5 \n"
		169	"movaps %%xmm4, (%1, %0, 4) \n"
		170	"movaps %%xmm5, 16(%1, %0, 4) \n"
		171	"jg 1b \n"
		172	: "+&r"(j)
		173	: "r"(matrix_simd), "r"(matrix)
		174	: "memory"
		175	);
		176	if (out_ch == 2) {
		177	MIX_MISC(IF1);
		178	} else {
		179	MIX_MISC(IF0);
		180	}
		181	}
		182	}
		183
		184	#endif /* HAVE_SSE_INLINE && HAVE_7REGS */
		185
		186	av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
		187	{
		188	int cpu_flags = av_get_cpu_flags();
		189
		190	if (EXTERNAL_MMX(cpu_flags)) {
		191	c->ac3_exponent_min = ff_ac3_exponent_min_mmx;
		192	c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx;
		193	c->ac3_lshift_int16 = ff_ac3_lshift_int16_mmx;
		194	c->ac3_rshift_int32 = ff_ac3_rshift_int32_mmx;
		195	}
		196	if (EXTERNAL_AMD3DNOW(cpu_flags)) {
		197	if (!bit_exact) {
		198	c->float_to_fixed24 = ff_float_to_fixed24_3dnow;
		199	}
		200	}
		201	if (EXTERNAL_MMXEXT(cpu_flags)) {
		202	c->ac3_exponent_min = ff_ac3_exponent_min_mmxext;
		203	c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmxext;
		204	}
		205	if (EXTERNAL_SSE(cpu_flags)) {
		206	c->float_to_fixed24 = ff_float_to_fixed24_sse;
		207	}
		208	if (EXTERNAL_SSE2(cpu_flags)) {
		209	c->ac3_exponent_min = ff_ac3_exponent_min_sse2;
		210	c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_sse2;
		211	c->float_to_fixed24 = ff_float_to_fixed24_sse2;
		212	c->compute_mantissa_size = ff_ac3_compute_mantissa_size_sse2;
		213	c->extract_exponents = ff_ac3_extract_exponents_sse2;
		214	if (!(cpu_flags & AV_CPU_FLAG_SSE2SLOW)) {
		215	c->ac3_lshift_int16 = ff_ac3_lshift_int16_sse2;
		216	c->ac3_rshift_int32 = ff_ac3_rshift_int32_sse2;
		217	}
		218	}
		219	if (EXTERNAL_SSSE3(cpu_flags)) {
		220	c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_ssse3;
		221	if (!(cpu_flags & AV_CPU_FLAG_ATOM)) {
		222	c->extract_exponents = ff_ac3_extract_exponents_ssse3;
		223	}
		224	}
		225
		226	#if HAVE_SSE_INLINE && HAVE_7REGS
		227	if (INLINE_SSE(cpu_flags)) {
		228	c->downmix = ac3_downmix_sse;
		229	}
		230	#endif
		231	}

Subversion Repositories Kolibri OS

(root)/contrib/sdk/sources/ffmpeg/ffmpeg-2.1/libavcodec/x86/ac3dsp_init.c – Rev 6148