WebSVN – Kolibri OS – Blame – /contrib/sdk/sources/ffmpeg/libavcodec/x86/dsputil_qns_template.c

Rev	Author	Line No.	Line
4349	Serge	1	/*
		2	* DSP utils : QNS functions are compiled 3 times for mmx/3dnow/ssse3
		3	* Copyright (c) 2004 Michael Niedermayer
		4	*
		5	* MMX optimization by Michael Niedermayer
		6	* 3DNow! and SSSE3 optimization by Zuxy Meng
		7	*
		8	* This file is part of FFmpeg.
		9	*
		10	* FFmpeg is free software; you can redistribute it and/or
		11	* modify it under the terms of the GNU Lesser General Public
		12	* License as published by the Free Software Foundation; either
		13	* version 2.1 of the License, or (at your option) any later version.
		14	*
		15	* FFmpeg is distributed in the hope that it will be useful,
		16	* but WITHOUT ANY WARRANTY; without even the implied warranty of
		17	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
		18	* Lesser General Public License for more details.
		19	*
		20	* You should have received a copy of the GNU Lesser General Public
		21	* License along with FFmpeg; if not, write to the Free Software
		22	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
		23	*/
		24
		25	#define MAX_ABS (512 >> (SCALE_OFFSET>0 ? SCALE_OFFSET : 0))
		26
		27	static int DEF(try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale)
		28	{
		29	x86_reg i=0;
		30
		31	av_assert2(FFABS(scale) < MAX_ABS);
		32	scale<<= 16 + SCALE_OFFSET - BASIS_SHIFT + RECON_SHIFT;
		33
		34	SET_RND(mm6);
		35	__asm__ volatile(
		36	"pxor %%mm7, %%mm7 \n\t"
		37	"movd %4, %%mm5 \n\t"
		38	"punpcklwd %%mm5, %%mm5 \n\t"
		39	"punpcklwd %%mm5, %%mm5 \n\t"
		40	".p2align 4 \n\t"
		41	"1: \n\t"
		42	"movq (%1, %0), %%mm0 \n\t"
		43	"movq 8(%1, %0), %%mm1 \n\t"
		44	PMULHRW(%%mm0, %%mm1, %%mm5, %%mm6)
		45	"paddw (%2, %0), %%mm0 \n\t"
		46	"paddw 8(%2, %0), %%mm1 \n\t"
		47	"psraw $6, %%mm0 \n\t"
		48	"psraw $6, %%mm1 \n\t"
		49	"pmullw (%3, %0), %%mm0 \n\t"
		50	"pmullw 8(%3, %0), %%mm1 \n\t"
		51	"pmaddwd %%mm0, %%mm0 \n\t"
		52	"pmaddwd %%mm1, %%mm1 \n\t"
		53	"paddd %%mm1, %%mm0 \n\t"
		54	"psrld $4, %%mm0 \n\t"
		55	"paddd %%mm0, %%mm7 \n\t"
		56	"add $16, %0 \n\t"
		57	"cmp $128, %0 \n\t" //FIXME optimize & bench
		58	" jb 1b \n\t"
		59	PHADDD(%%mm7, %%mm6)
		60	"psrld $2, %%mm7 \n\t"
		61	"movd %%mm7, %0 \n\t"
		62
		63	: "+r" (i)
		64	: "r"(basis), "r"(rem), "r"(weight), "g"(scale)
		65	);
		66	return i;
		67	}
		68
		69	static void DEF(add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale)
		70	{
		71	x86_reg i=0;
		72
		73	if(FFABS(scale) < MAX_ABS){
		74	scale<<= 16 + SCALE_OFFSET - BASIS_SHIFT + RECON_SHIFT;
		75	SET_RND(mm6);
		76	__asm__ volatile(
		77	"movd %3, %%mm5 \n\t"
		78	"punpcklwd %%mm5, %%mm5 \n\t"
		79	"punpcklwd %%mm5, %%mm5 \n\t"
		80	".p2align 4 \n\t"
		81	"1: \n\t"
		82	"movq (%1, %0), %%mm0 \n\t"
		83	"movq 8(%1, %0), %%mm1 \n\t"
		84	PMULHRW(%%mm0, %%mm1, %%mm5, %%mm6)
		85	"paddw (%2, %0), %%mm0 \n\t"
		86	"paddw 8(%2, %0), %%mm1 \n\t"
		87	"movq %%mm0, (%2, %0) \n\t"
		88	"movq %%mm1, 8(%2, %0) \n\t"
		89	"add $16, %0 \n\t"
		90	"cmp $128, %0 \n\t" // FIXME optimize & bench
		91	" jb 1b \n\t"
		92
		93	: "+r" (i)
		94	: "r"(basis), "r"(rem), "g"(scale)
		95	);
		96	}else{
		97	for(i=0; i<8*8; i++){
		98	rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
		99	}
		100	}
		101	}

Subversion Repositories Kolibri OS

(root)/contrib/sdk/sources/ffmpeg/libavcodec/x86/dsputil_qns_template.c – Rev 4349