WebSVN – Kolibri OS – Blame – /contrib/sdk/sources/ffmpeg/ffmpeg-2.1/libavutil/x86/float_dsp_init.c

Rev	Author	Line No.	Line
6148	serge	1	/*
		2	* This file is part of FFmpeg.
		3	*
		4	* FFmpeg is free software; you can redistribute it and/or
		5	* modify it under the terms of the GNU Lesser General Public
		6	* License as published by the Free Software Foundation; either
		7	* version 2.1 of the License, or (at your option) any later version.
		8	*
		9	* FFmpeg is distributed in the hope that it will be useful,
		10	* but WITHOUT ANY WARRANTY; without even the implied warranty of
		11	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
		12	* Lesser General Public License for more details.
		13	*
		14	* You should have received a copy of the GNU Lesser General Public
		15	* License along with FFmpeg; if not, write to the Free Software
		16	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
		17	*/
		18
		19	#include "config.h"
		20
		21	#include "libavutil/attributes.h"
		22	#include "libavutil/cpu.h"
		23	#include "libavutil/float_dsp.h"
		24	#include "cpu.h"
		25	#include "asm.h"
		26
		27	void ff_vector_fmul_sse(float dst, const float src0, const float *src1,
		28	int len);
		29	void ff_vector_fmul_avx(float dst, const float src0, const float *src1,
		30	int len);
		31
		32	void ff_vector_fmac_scalar_sse(float dst, const float src, float mul,
		33	int len);
		34	void ff_vector_fmac_scalar_avx(float dst, const float src, float mul,
		35	int len);
		36
		37	void ff_vector_fmul_scalar_sse(float dst, const float src, float mul,
		38	int len);
		39
		40	void ff_vector_dmul_scalar_sse2(double dst, const double src,
		41	double mul, int len);
		42	void ff_vector_dmul_scalar_avx(double dst, const double src,
		43	double mul, int len);
		44
		45	void ff_vector_fmul_add_sse(float dst, const float src0, const float *src1,
		46	const float *src2, int len);
		47	void ff_vector_fmul_add_avx(float dst, const float src0, const float *src1,
		48	const float *src2, int len);
		49
		50	void ff_vector_fmul_reverse_sse(float dst, const float src0,
		51	const float *src1, int len);
		52	void ff_vector_fmul_reverse_avx(float dst, const float src0,
		53	const float *src1, int len);
		54
		55	float ff_scalarproduct_float_sse(const float v1, const float v2, int order);
		56
		57	void ff_butterflies_float_sse(float src0, float src1, int len);
		58
		59	#if HAVE_6REGS && HAVE_INLINE_ASM
		60	static void vector_fmul_window_3dnowext(float dst, const float src0,
		61	const float src1, const float win,
		62	int len)
		63	{
		64	x86_reg i = -len * 4;
		65	x86_reg j = len * 4 - 8;
		66	__asm__ volatile (
		67	"1: \n"
		68	"pswapd (%5, %1), %%mm1 \n"
		69	"movq (%5, %0), %%mm0 \n"
		70	"pswapd (%4, %1), %%mm5 \n"
		71	"movq (%3, %0), %%mm4 \n"
		72	"movq %%mm0, %%mm2 \n"
		73	"movq %%mm1, %%mm3 \n"
		74	"pfmul %%mm4, %%mm2 \n" // src0[len + i] * win[len + i]
		75	"pfmul %%mm5, %%mm3 \n" // src1[j] * win[len + j]
		76	"pfmul %%mm4, %%mm1 \n" // src0[len + i] * win[len + j]
		77	"pfmul %%mm5, %%mm0 \n" // src1[j] * win[len + i]
		78	"pfadd %%mm3, %%mm2 \n"
		79	"pfsub %%mm0, %%mm1 \n"
		80	"pswapd %%mm2, %%mm2 \n"
		81	"movq %%mm1, (%2, %0) \n"
		82	"movq %%mm2, (%2, %1) \n"
		83	"sub $8, %1 \n"
		84	"add $8, %0 \n"
		85	"jl 1b \n"
		86	"femms \n"
		87	: "+r"(i), "+r"(j)
		88	: "r"(dst + len), "r"(src0 + len), "r"(src1), "r"(win + len)
		89	);
		90	}
		91
		92	static void vector_fmul_window_sse(float dst, const float src0,
		93	const float src1, const float win, int len)
		94	{
		95	x86_reg i = -len * 4;
		96	x86_reg j = len * 4 - 16;
		97	__asm__ volatile (
		98	"1: \n"
		99	"movaps (%5, %1), %%xmm1 \n"
		100	"movaps (%5, %0), %%xmm0 \n"
		101	"movaps (%4, %1), %%xmm5 \n"
		102	"movaps (%3, %0), %%xmm4 \n"
		103	"shufps $0x1b, %%xmm1, %%xmm1 \n"
		104	"shufps $0x1b, %%xmm5, %%xmm5 \n"
		105	"movaps %%xmm0, %%xmm2 \n"
		106	"movaps %%xmm1, %%xmm3 \n"
		107	"mulps %%xmm4, %%xmm2 \n" // src0[len + i] * win[len + i]
		108	"mulps %%xmm5, %%xmm3 \n" // src1[j] * win[len + j]
		109	"mulps %%xmm4, %%xmm1 \n" // src0[len + i] * win[len + j]
		110	"mulps %%xmm5, %%xmm0 \n" // src1[j] * win[len + i]
		111	"addps %%xmm3, %%xmm2 \n"
		112	"subps %%xmm0, %%xmm1 \n"
		113	"shufps $0x1b, %%xmm2, %%xmm2 \n"
		114	"movaps %%xmm1, (%2, %0) \n"
		115	"movaps %%xmm2, (%2, %1) \n"
		116	"sub $16, %1 \n"
		117	"add $16, %0 \n"
		118	"jl 1b \n"
		119	: "+r"(i), "+r"(j)
		120	: "r"(dst + len), "r"(src0 + len), "r"(src1), "r"(win + len)
		121	);
		122	}
		123	#endif /* HAVE_6REGS && HAVE_INLINE_ASM */
		124
		125	av_cold void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp)
		126	{
		127	int cpu_flags = av_get_cpu_flags();
		128
		129	#if HAVE_6REGS && HAVE_INLINE_ASM
		130	if (INLINE_AMD3DNOWEXT(cpu_flags)) {
		131	fdsp->vector_fmul_window = vector_fmul_window_3dnowext;
		132	}
		133	if (INLINE_SSE(cpu_flags)) {
		134	fdsp->vector_fmul_window = vector_fmul_window_sse;
		135	}
		136	#endif
		137	if (EXTERNAL_SSE(cpu_flags)) {
		138	fdsp->vector_fmul = ff_vector_fmul_sse;
		139	fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_sse;
		140	fdsp->vector_fmul_scalar = ff_vector_fmul_scalar_sse;
		141	fdsp->vector_fmul_add = ff_vector_fmul_add_sse;
		142	fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_sse;
		143	fdsp->scalarproduct_float = ff_scalarproduct_float_sse;
		144	fdsp->butterflies_float = ff_butterflies_float_sse;
		145	}
		146	if (EXTERNAL_SSE2(cpu_flags)) {
		147	fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_sse2;
		148	}
		149	if (EXTERNAL_AVX(cpu_flags)) {
		150	fdsp->vector_fmul = ff_vector_fmul_avx;
		151	fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_avx;
		152	fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_avx;
		153	fdsp->vector_fmul_add = ff_vector_fmul_add_avx;
		154	fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_avx;
		155	}
		156	}

Subversion Repositories Kolibri OS

(root)/contrib/sdk/sources/ffmpeg/ffmpeg-2.1/libavutil/x86/float_dsp_init.c – Rev 6148