WebSVN – Kolibri OS – Blame – /contrib/sdk/sources/ffmpeg/ffmpeg-2.1/libavcodec/x86/mpegvideo.c

Rev	Author	Line No.	Line
6148	serge	1	/*
		2	* Optimized for ia32 CPUs by Nick Kurshev
		3	* h263, mpeg1, mpeg2 dequantizer & draw_edges by Michael Niedermayer
		4	*
		5	* This file is part of FFmpeg.
		6	*
		7	* FFmpeg is free software; you can redistribute it and/or
		8	* modify it under the terms of the GNU Lesser General Public
		9	* License as published by the Free Software Foundation; either
		10	* version 2.1 of the License, or (at your option) any later version.
		11	*
		12	* FFmpeg is distributed in the hope that it will be useful,
		13	* but WITHOUT ANY WARRANTY; without even the implied warranty of
		14	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
		15	* Lesser General Public License for more details.
		16	*
		17	* You should have received a copy of the GNU Lesser General Public
		18	* License along with FFmpeg; if not, write to the Free Software
		19	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
		20	*/
		21
		22	#include "libavutil/attributes.h"
		23	#include "libavutil/cpu.h"
		24	#include "libavutil/x86/asm.h"
		25	#include "libavutil/x86/cpu.h"
		26	#include "libavcodec/avcodec.h"
		27	#include "libavcodec/mpegvideo.h"
		28	#include "dsputil_x86.h"
		29
		30	#if HAVE_MMX_INLINE
		31
		32	static void dct_unquantize_h263_intra_mmx(MpegEncContext *s,
		33	int16_t *block, int n, int qscale)
		34	{
		35	x86_reg level, qmul, qadd, nCoeffs;
		36
		37	qmul = qscale << 1;
		38
		39	av_assert2(s->block_last_index[n]>=0 \|\| s->h263_aic);
		40
		41	if (!s->h263_aic) {
		42	if (n < 4)
		43	level = block[0] * s->y_dc_scale;
		44	else
		45	level = block[0] * s->c_dc_scale;
		46	qadd = (qscale - 1) \| 1;
		47	}else{
		48	qadd = 0;
		49	level= block[0];
		50	}
		51	if(s->ac_pred)
		52	nCoeffs=63;
		53	else
		54	nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
		55
		56	__asm__ volatile(
		57	"movd %1, %%mm6 \n\t" //qmul
		58	"packssdw %%mm6, %%mm6 \n\t"
		59	"packssdw %%mm6, %%mm6 \n\t"
		60	"movd %2, %%mm5 \n\t" //qadd
		61	"pxor %%mm7, %%mm7 \n\t"
		62	"packssdw %%mm5, %%mm5 \n\t"
		63	"packssdw %%mm5, %%mm5 \n\t"
		64	"psubw %%mm5, %%mm7 \n\t"
		65	"pxor %%mm4, %%mm4 \n\t"
		66	".p2align 4 \n\t"
		67	"1: \n\t"
		68	"movq (%0, %3), %%mm0 \n\t"
		69	"movq 8(%0, %3), %%mm1 \n\t"
		70
		71	"pmullw %%mm6, %%mm0 \n\t"
		72	"pmullw %%mm6, %%mm1 \n\t"
		73
		74	"movq (%0, %3), %%mm2 \n\t"
		75	"movq 8(%0, %3), %%mm3 \n\t"
		76
		77	"pcmpgtw %%mm4, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
		78	"pcmpgtw %%mm4, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
		79
		80	"pxor %%mm2, %%mm0 \n\t"
		81	"pxor %%mm3, %%mm1 \n\t"
		82
		83	"paddw %%mm7, %%mm0 \n\t"
		84	"paddw %%mm7, %%mm1 \n\t"
		85
		86	"pxor %%mm0, %%mm2 \n\t"
		87	"pxor %%mm1, %%mm3 \n\t"
		88
		89	"pcmpeqw %%mm7, %%mm0 \n\t" // block[i] == 0 ? -1 : 0
		90	"pcmpeqw %%mm7, %%mm1 \n\t" // block[i] == 0 ? -1 : 0
		91
		92	"pandn %%mm2, %%mm0 \n\t"
		93	"pandn %%mm3, %%mm1 \n\t"
		94
		95	"movq %%mm0, (%0, %3) \n\t"
		96	"movq %%mm1, 8(%0, %3) \n\t"
		97
		98	"add $16, %3 \n\t"
		99	"jng 1b \n\t"
		100	::"r" (block+nCoeffs), "rm"(qmul), "rm" (qadd), "r" (2*(-nCoeffs))
		101	: "memory"
		102	);
		103	block[0]= level;
		104	}
		105
		106
		107	static void dct_unquantize_h263_inter_mmx(MpegEncContext *s,
		108	int16_t *block, int n, int qscale)
		109	{
		110	x86_reg qmul, qadd, nCoeffs;
		111
		112	qmul = qscale << 1;
		113	qadd = (qscale - 1) \| 1;
		114
		115	av_assert2(s->block_last_index[n]>=0 \|\| s->h263_aic);
		116
		117	nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
		118
		119	__asm__ volatile(
		120	"movd %1, %%mm6 \n\t" //qmul
		121	"packssdw %%mm6, %%mm6 \n\t"
		122	"packssdw %%mm6, %%mm6 \n\t"
		123	"movd %2, %%mm5 \n\t" //qadd
		124	"pxor %%mm7, %%mm7 \n\t"
		125	"packssdw %%mm5, %%mm5 \n\t"
		126	"packssdw %%mm5, %%mm5 \n\t"
		127	"psubw %%mm5, %%mm7 \n\t"
		128	"pxor %%mm4, %%mm4 \n\t"
		129	".p2align 4 \n\t"
		130	"1: \n\t"
		131	"movq (%0, %3), %%mm0 \n\t"
		132	"movq 8(%0, %3), %%mm1 \n\t"
		133
		134	"pmullw %%mm6, %%mm0 \n\t"
		135	"pmullw %%mm6, %%mm1 \n\t"
		136
		137	"movq (%0, %3), %%mm2 \n\t"
		138	"movq 8(%0, %3), %%mm3 \n\t"
		139
		140	"pcmpgtw %%mm4, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
		141	"pcmpgtw %%mm4, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
		142
		143	"pxor %%mm2, %%mm0 \n\t"
		144	"pxor %%mm3, %%mm1 \n\t"
		145
		146	"paddw %%mm7, %%mm0 \n\t"
		147	"paddw %%mm7, %%mm1 \n\t"
		148
		149	"pxor %%mm0, %%mm2 \n\t"
		150	"pxor %%mm1, %%mm3 \n\t"
		151
		152	"pcmpeqw %%mm7, %%mm0 \n\t" // block[i] == 0 ? -1 : 0
		153	"pcmpeqw %%mm7, %%mm1 \n\t" // block[i] == 0 ? -1 : 0
		154
		155	"pandn %%mm2, %%mm0 \n\t"
		156	"pandn %%mm3, %%mm1 \n\t"
		157
		158	"movq %%mm0, (%0, %3) \n\t"
		159	"movq %%mm1, 8(%0, %3) \n\t"
		160
		161	"add $16, %3 \n\t"
		162	"jng 1b \n\t"
		163	::"r" (block+nCoeffs), "rm"(qmul), "rm" (qadd), "r" (2*(-nCoeffs))
		164	: "memory"
		165	);
		166	}
		167
		168	static void dct_unquantize_mpeg1_intra_mmx(MpegEncContext *s,
		169	int16_t *block, int n, int qscale)
		170	{
		171	x86_reg nCoeffs;
		172	const uint16_t *quant_matrix;
		173	int block0;
		174
		175	av_assert2(s->block_last_index[n]>=0);
		176
		177	nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1;
		178
		179	if (n < 4)
		180	block0 = block[0] * s->y_dc_scale;
		181	else
		182	block0 = block[0] * s->c_dc_scale;
		183	/* XXX: only mpeg1 */
		184	quant_matrix = s->intra_matrix;
		185	__asm__ volatile(
		186	"pcmpeqw %%mm7, %%mm7 \n\t"
		187	"psrlw $15, %%mm7 \n\t"
		188	"movd %2, %%mm6 \n\t"
		189	"packssdw %%mm6, %%mm6 \n\t"
		190	"packssdw %%mm6, %%mm6 \n\t"
		191	"mov %3, %%"REG_a" \n\t"
		192	".p2align 4 \n\t"
		193	"1: \n\t"
		194	"movq (%0, %%"REG_a"), %%mm0 \n\t"
		195	"movq 8(%0, %%"REG_a"), %%mm1 \n\t"
		196	"movq (%1, %%"REG_a"), %%mm4 \n\t"
		197	"movq 8(%1, %%"REG_a"), %%mm5 \n\t"
		198	"pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
		199	"pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
		200	"pxor %%mm2, %%mm2 \n\t"
		201	"pxor %%mm3, %%mm3 \n\t"
		202	"pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
		203	"pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
		204	"pxor %%mm2, %%mm0 \n\t"
		205	"pxor %%mm3, %%mm1 \n\t"
		206	"psubw %%mm2, %%mm0 \n\t" // abs(block[i])
		207	"psubw %%mm3, %%mm1 \n\t" // abs(block[i])
		208	"pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*q
		209	"pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*q
		210	"pxor %%mm4, %%mm4 \n\t"
		211	"pxor %%mm5, %%mm5 \n\t" // FIXME slow
		212	"pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
		213	"pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
		214	"psraw $3, %%mm0 \n\t"
		215	"psraw $3, %%mm1 \n\t"
		216	"psubw %%mm7, %%mm0 \n\t"
		217	"psubw %%mm7, %%mm1 \n\t"
		218	"por %%mm7, %%mm0 \n\t"
		219	"por %%mm7, %%mm1 \n\t"
		220	"pxor %%mm2, %%mm0 \n\t"
		221	"pxor %%mm3, %%mm1 \n\t"
		222	"psubw %%mm2, %%mm0 \n\t"
		223	"psubw %%mm3, %%mm1 \n\t"
		224	"pandn %%mm0, %%mm4 \n\t"
		225	"pandn %%mm1, %%mm5 \n\t"
		226	"movq %%mm4, (%0, %%"REG_a") \n\t"
		227	"movq %%mm5, 8(%0, %%"REG_a") \n\t"
		228
		229	"add $16, %%"REG_a" \n\t"
		230	"js 1b \n\t"
		231	::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale), "g" (-2*nCoeffs)
		232	: "%"REG_a, "memory"
		233	);
		234	block[0]= block0;
		235	}
		236
		237	static void dct_unquantize_mpeg1_inter_mmx(MpegEncContext *s,
		238	int16_t *block, int n, int qscale)
		239	{
		240	x86_reg nCoeffs;
		241	const uint16_t *quant_matrix;
		242
		243	av_assert2(s->block_last_index[n]>=0);
		244
		245	nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1;
		246
		247	quant_matrix = s->inter_matrix;
		248	__asm__ volatile(
		249	"pcmpeqw %%mm7, %%mm7 \n\t"
		250	"psrlw $15, %%mm7 \n\t"
		251	"movd %2, %%mm6 \n\t"
		252	"packssdw %%mm6, %%mm6 \n\t"
		253	"packssdw %%mm6, %%mm6 \n\t"
		254	"mov %3, %%"REG_a" \n\t"
		255	".p2align 4 \n\t"
		256	"1: \n\t"
		257	"movq (%0, %%"REG_a"), %%mm0 \n\t"
		258	"movq 8(%0, %%"REG_a"), %%mm1 \n\t"
		259	"movq (%1, %%"REG_a"), %%mm4 \n\t"
		260	"movq 8(%1, %%"REG_a"), %%mm5 \n\t"
		261	"pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
		262	"pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
		263	"pxor %%mm2, %%mm2 \n\t"
		264	"pxor %%mm3, %%mm3 \n\t"
		265	"pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
		266	"pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
		267	"pxor %%mm2, %%mm0 \n\t"
		268	"pxor %%mm3, %%mm1 \n\t"
		269	"psubw %%mm2, %%mm0 \n\t" // abs(block[i])
		270	"psubw %%mm3, %%mm1 \n\t" // abs(block[i])
		271	"paddw %%mm0, %%mm0 \n\t" // abs(block[i])*2
		272	"paddw %%mm1, %%mm1 \n\t" // abs(block[i])*2
		273	"paddw %%mm7, %%mm0 \n\t" // abs(block[i])*2 + 1
		274	"paddw %%mm7, %%mm1 \n\t" // abs(block[i])*2 + 1
		275	"pmullw %%mm4, %%mm0 \n\t" // (abs(block[i])2 + 1)q
		276	"pmullw %%mm5, %%mm1 \n\t" // (abs(block[i])2 + 1)q
		277	"pxor %%mm4, %%mm4 \n\t"
		278	"pxor %%mm5, %%mm5 \n\t" // FIXME slow
		279	"pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
		280	"pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
		281	"psraw $4, %%mm0 \n\t"
		282	"psraw $4, %%mm1 \n\t"
		283	"psubw %%mm7, %%mm0 \n\t"
		284	"psubw %%mm7, %%mm1 \n\t"
		285	"por %%mm7, %%mm0 \n\t"
		286	"por %%mm7, %%mm1 \n\t"
		287	"pxor %%mm2, %%mm0 \n\t"
		288	"pxor %%mm3, %%mm1 \n\t"
		289	"psubw %%mm2, %%mm0 \n\t"
		290	"psubw %%mm3, %%mm1 \n\t"
		291	"pandn %%mm0, %%mm4 \n\t"
		292	"pandn %%mm1, %%mm5 \n\t"
		293	"movq %%mm4, (%0, %%"REG_a") \n\t"
		294	"movq %%mm5, 8(%0, %%"REG_a") \n\t"
		295
		296	"add $16, %%"REG_a" \n\t"
		297	"js 1b \n\t"
		298	::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale), "g" (-2*nCoeffs)
		299	: "%"REG_a, "memory"
		300	);
		301	}
		302
		303	static void dct_unquantize_mpeg2_intra_mmx(MpegEncContext *s,
		304	int16_t *block, int n, int qscale)
		305	{
		306	x86_reg nCoeffs;
		307	const uint16_t *quant_matrix;
		308	int block0;
		309
		310	av_assert2(s->block_last_index[n]>=0);
		311
		312	if(s->alternate_scan) nCoeffs= 63; //FIXME
		313	else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
		314
		315	if (n < 4)
		316	block0 = block[0] * s->y_dc_scale;
		317	else
		318	block0 = block[0] * s->c_dc_scale;
		319	quant_matrix = s->intra_matrix;
		320	__asm__ volatile(
		321	"pcmpeqw %%mm7, %%mm7 \n\t"
		322	"psrlw $15, %%mm7 \n\t"
		323	"movd %2, %%mm6 \n\t"
		324	"packssdw %%mm6, %%mm6 \n\t"
		325	"packssdw %%mm6, %%mm6 \n\t"
		326	"mov %3, %%"REG_a" \n\t"
		327	".p2align 4 \n\t"
		328	"1: \n\t"
		329	"movq (%0, %%"REG_a"), %%mm0 \n\t"
		330	"movq 8(%0, %%"REG_a"), %%mm1 \n\t"
		331	"movq (%1, %%"REG_a"), %%mm4 \n\t"
		332	"movq 8(%1, %%"REG_a"), %%mm5 \n\t"
		333	"pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
		334	"pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
		335	"pxor %%mm2, %%mm2 \n\t"
		336	"pxor %%mm3, %%mm3 \n\t"
		337	"pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
		338	"pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
		339	"pxor %%mm2, %%mm0 \n\t"
		340	"pxor %%mm3, %%mm1 \n\t"
		341	"psubw %%mm2, %%mm0 \n\t" // abs(block[i])
		342	"psubw %%mm3, %%mm1 \n\t" // abs(block[i])
		343	"pmullw %%mm4, %%mm0 \n\t" // abs(block[i])*q
		344	"pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*q
		345	"pxor %%mm4, %%mm4 \n\t"
		346	"pxor %%mm5, %%mm5 \n\t" // FIXME slow
		347	"pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
		348	"pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
		349	"psraw $3, %%mm0 \n\t"
		350	"psraw $3, %%mm1 \n\t"
		351	"pxor %%mm2, %%mm0 \n\t"
		352	"pxor %%mm3, %%mm1 \n\t"
		353	"psubw %%mm2, %%mm0 \n\t"
		354	"psubw %%mm3, %%mm1 \n\t"
		355	"pandn %%mm0, %%mm4 \n\t"
		356	"pandn %%mm1, %%mm5 \n\t"
		357	"movq %%mm4, (%0, %%"REG_a") \n\t"
		358	"movq %%mm5, 8(%0, %%"REG_a") \n\t"
		359
		360	"add $16, %%"REG_a" \n\t"
		361	"jng 1b \n\t"
		362	::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale), "g" (-2*nCoeffs)
		363	: "%"REG_a, "memory"
		364	);
		365	block[0]= block0;
		366	//Note, we do not do mismatch control for intra as errors cannot accumulate
		367	}
		368
		369	static void dct_unquantize_mpeg2_inter_mmx(MpegEncContext *s,
		370	int16_t *block, int n, int qscale)
		371	{
		372	x86_reg nCoeffs;
		373	const uint16_t *quant_matrix;
		374
		375	av_assert2(s->block_last_index[n]>=0);
		376
		377	if(s->alternate_scan) nCoeffs= 63; //FIXME
		378	else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
		379
		380	quant_matrix = s->inter_matrix;
		381	__asm__ volatile(
		382	"pcmpeqw %%mm7, %%mm7 \n\t"
		383	"psrlq $48, %%mm7 \n\t"
		384	"movd %2, %%mm6 \n\t"
		385	"packssdw %%mm6, %%mm6 \n\t"
		386	"packssdw %%mm6, %%mm6 \n\t"
		387	"mov %3, %%"REG_a" \n\t"
		388	".p2align 4 \n\t"
		389	"1: \n\t"
		390	"movq (%0, %%"REG_a"), %%mm0 \n\t"
		391	"movq 8(%0, %%"REG_a"), %%mm1 \n\t"
		392	"movq (%1, %%"REG_a"), %%mm4 \n\t"
		393	"movq 8(%1, %%"REG_a"), %%mm5 \n\t"
		394	"pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
		395	"pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
		396	"pxor %%mm2, %%mm2 \n\t"
		397	"pxor %%mm3, %%mm3 \n\t"
		398	"pcmpgtw %%mm0, %%mm2 \n\t" // block[i] < 0 ? -1 : 0
		399	"pcmpgtw %%mm1, %%mm3 \n\t" // block[i] < 0 ? -1 : 0
		400	"pxor %%mm2, %%mm0 \n\t"
		401	"pxor %%mm3, %%mm1 \n\t"
		402	"psubw %%mm2, %%mm0 \n\t" // abs(block[i])
		403	"psubw %%mm3, %%mm1 \n\t" // abs(block[i])
		404	"paddw %%mm0, %%mm0 \n\t" // abs(block[i])*2
		405	"paddw %%mm1, %%mm1 \n\t" // abs(block[i])*2
		406	"pmullw %%mm4, %%mm0 \n\t" // abs(block[i])2q
		407	"pmullw %%mm5, %%mm1 \n\t" // abs(block[i])2q
		408	"paddw %%mm4, %%mm0 \n\t" // (abs(block[i])2 + 1)q
		409	"paddw %%mm5, %%mm1 \n\t" // (abs(block[i])2 + 1)q
		410	"pxor %%mm4, %%mm4 \n\t"
		411	"pxor %%mm5, %%mm5 \n\t" // FIXME slow
		412	"pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
		413	"pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
		414	"psrlw $4, %%mm0 \n\t"
		415	"psrlw $4, %%mm1 \n\t"
		416	"pxor %%mm2, %%mm0 \n\t"
		417	"pxor %%mm3, %%mm1 \n\t"
		418	"psubw %%mm2, %%mm0 \n\t"
		419	"psubw %%mm3, %%mm1 \n\t"
		420	"pandn %%mm0, %%mm4 \n\t"
		421	"pandn %%mm1, %%mm5 \n\t"
		422	"pxor %%mm4, %%mm7 \n\t"
		423	"pxor %%mm5, %%mm7 \n\t"
		424	"movq %%mm4, (%0, %%"REG_a") \n\t"
		425	"movq %%mm5, 8(%0, %%"REG_a") \n\t"
		426
		427	"add $16, %%"REG_a" \n\t"
		428	"jng 1b \n\t"
		429	"movd 124(%0, %3), %%mm0 \n\t"
		430	"movq %%mm7, %%mm6 \n\t"
		431	"psrlq $32, %%mm7 \n\t"
		432	"pxor %%mm6, %%mm7 \n\t"
		433	"movq %%mm7, %%mm6 \n\t"
		434	"psrlq $16, %%mm7 \n\t"
		435	"pxor %%mm6, %%mm7 \n\t"
		436	"pslld $31, %%mm7 \n\t"
		437	"psrlq $15, %%mm7 \n\t"
		438	"pxor %%mm7, %%mm0 \n\t"
		439	"movd %%mm0, 124(%0, %3) \n\t"
		440
		441	::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale), "r" (-2*nCoeffs)
		442	: "%"REG_a, "memory"
		443	);
		444	}
		445
		446	static void denoise_dct_mmx(MpegEncContext s, int16_t block){
		447	const int intra= s->mb_intra;
		448	int *sum= s->dct_error_sum[intra];
		449	uint16_t *offset= s->dct_offset[intra];
		450
		451	s->dct_count[intra]++;
		452
		453	__asm__ volatile(
		454	"pxor %%mm7, %%mm7 \n\t"
		455	"1: \n\t"
		456	"pxor %%mm0, %%mm0 \n\t"
		457	"pxor %%mm1, %%mm1 \n\t"
		458	"movq (%0), %%mm2 \n\t"
		459	"movq 8(%0), %%mm3 \n\t"
		460	"pcmpgtw %%mm2, %%mm0 \n\t"
		461	"pcmpgtw %%mm3, %%mm1 \n\t"
		462	"pxor %%mm0, %%mm2 \n\t"
		463	"pxor %%mm1, %%mm3 \n\t"
		464	"psubw %%mm0, %%mm2 \n\t"
		465	"psubw %%mm1, %%mm3 \n\t"
		466	"movq %%mm2, %%mm4 \n\t"
		467	"movq %%mm3, %%mm5 \n\t"
		468	"psubusw (%2), %%mm2 \n\t"
		469	"psubusw 8(%2), %%mm3 \n\t"
		470	"pxor %%mm0, %%mm2 \n\t"
		471	"pxor %%mm1, %%mm3 \n\t"
		472	"psubw %%mm0, %%mm2 \n\t"
		473	"psubw %%mm1, %%mm3 \n\t"
		474	"movq %%mm2, (%0) \n\t"
		475	"movq %%mm3, 8(%0) \n\t"
		476	"movq %%mm4, %%mm2 \n\t"
		477	"movq %%mm5, %%mm3 \n\t"
		478	"punpcklwd %%mm7, %%mm4 \n\t"
		479	"punpckhwd %%mm7, %%mm2 \n\t"
		480	"punpcklwd %%mm7, %%mm5 \n\t"
		481	"punpckhwd %%mm7, %%mm3 \n\t"
		482	"paddd (%1), %%mm4 \n\t"
		483	"paddd 8(%1), %%mm2 \n\t"
		484	"paddd 16(%1), %%mm5 \n\t"
		485	"paddd 24(%1), %%mm3 \n\t"
		486	"movq %%mm4, (%1) \n\t"
		487	"movq %%mm2, 8(%1) \n\t"
		488	"movq %%mm5, 16(%1) \n\t"
		489	"movq %%mm3, 24(%1) \n\t"
		490	"add $16, %0 \n\t"
		491	"add $32, %1 \n\t"
		492	"add $16, %2 \n\t"
		493	"cmp %3, %0 \n\t"
		494	" jb 1b \n\t"
		495	: "+r" (block), "+r" (sum), "+r" (offset)
		496	: "r"(block+64)
		497	);
		498	}
		499
		500	static void denoise_dct_sse2(MpegEncContext s, int16_t block){
		501	const int intra= s->mb_intra;
		502	int *sum= s->dct_error_sum[intra];
		503	uint16_t *offset= s->dct_offset[intra];
		504
		505	s->dct_count[intra]++;
		506
		507	__asm__ volatile(
		508	"pxor %%xmm7, %%xmm7 \n\t"
		509	"1: \n\t"
		510	"pxor %%xmm0, %%xmm0 \n\t"
		511	"pxor %%xmm1, %%xmm1 \n\t"
		512	"movdqa (%0), %%xmm2 \n\t"
		513	"movdqa 16(%0), %%xmm3 \n\t"
		514	"pcmpgtw %%xmm2, %%xmm0 \n\t"
		515	"pcmpgtw %%xmm3, %%xmm1 \n\t"
		516	"pxor %%xmm0, %%xmm2 \n\t"
		517	"pxor %%xmm1, %%xmm3 \n\t"
		518	"psubw %%xmm0, %%xmm2 \n\t"
		519	"psubw %%xmm1, %%xmm3 \n\t"
		520	"movdqa %%xmm2, %%xmm4 \n\t"
		521	"movdqa %%xmm3, %%xmm5 \n\t"
		522	"psubusw (%2), %%xmm2 \n\t"
		523	"psubusw 16(%2), %%xmm3 \n\t"
		524	"pxor %%xmm0, %%xmm2 \n\t"
		525	"pxor %%xmm1, %%xmm3 \n\t"
		526	"psubw %%xmm0, %%xmm2 \n\t"
		527	"psubw %%xmm1, %%xmm3 \n\t"
		528	"movdqa %%xmm2, (%0) \n\t"
		529	"movdqa %%xmm3, 16(%0) \n\t"
		530	"movdqa %%xmm4, %%xmm6 \n\t"
		531	"movdqa %%xmm5, %%xmm0 \n\t"
		532	"punpcklwd %%xmm7, %%xmm4 \n\t"
		533	"punpckhwd %%xmm7, %%xmm6 \n\t"
		534	"punpcklwd %%xmm7, %%xmm5 \n\t"
		535	"punpckhwd %%xmm7, %%xmm0 \n\t"
		536	"paddd (%1), %%xmm4 \n\t"
		537	"paddd 16(%1), %%xmm6 \n\t"
		538	"paddd 32(%1), %%xmm5 \n\t"
		539	"paddd 48(%1), %%xmm0 \n\t"
		540	"movdqa %%xmm4, (%1) \n\t"
		541	"movdqa %%xmm6, 16(%1) \n\t"
		542	"movdqa %%xmm5, 32(%1) \n\t"
		543	"movdqa %%xmm0, 48(%1) \n\t"
		544	"add $32, %0 \n\t"
		545	"add $64, %1 \n\t"
		546	"add $32, %2 \n\t"
		547	"cmp %3, %0 \n\t"
		548	" jb 1b \n\t"
		549	: "+r" (block), "+r" (sum), "+r" (offset)
		550	: "r"(block+64)
		551	XMM_CLOBBERS_ONLY("%xmm0", "%xmm1", "%xmm2", "%xmm3",
		552	"%xmm4", "%xmm5", "%xmm6", "%xmm7")
		553	);
		554	}
		555
		556	#endif /* HAVE_MMX_INLINE */
		557
		558	av_cold void ff_MPV_common_init_x86(MpegEncContext *s)
		559	{
		560	#if HAVE_MMX_INLINE
		561	int cpu_flags = av_get_cpu_flags();
		562
		563	if (INLINE_MMX(cpu_flags)) {
		564	s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_mmx;
		565	s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_mmx;
		566	s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_mmx;
		567	s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_mmx;
		568	if(!(s->flags & CODEC_FLAG_BITEXACT))
		569	s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_mmx;
		570	s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_mmx;
		571	s->denoise_dct = denoise_dct_mmx;
		572	}
		573	if (INLINE_SSE2(cpu_flags)) {
		574	s->denoise_dct = denoise_dct_sse2;
		575	}
		576	#endif /* HAVE_MMX_INLINE */
		577	}

Subversion Repositories Kolibri OS

(root)/contrib/sdk/sources/ffmpeg/ffmpeg-2.1/libavcodec/x86/mpegvideo.c – Rev 6148