WebSVN – Kolibri OS – Blame – /contrib/sdk/sources/ffmpeg/libpostproc/postprocess.c

Rev	Author	Line No.	Line
4349	Serge	1	/*
		2	* Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
		3	*
		4	* AltiVec optimizations (C) 2004 Romain Dolbeau
		5	*
		6	* This file is part of FFmpeg.
		7	*
		8	* FFmpeg is free software; you can redistribute it and/or modify
		9	* it under the terms of the GNU General Public License as published by
		10	* the Free Software Foundation; either version 2 of the License, or
		11	* (at your option) any later version.
		12	*
		13	* FFmpeg is distributed in the hope that it will be useful,
		14	* but WITHOUT ANY WARRANTY; without even the implied warranty of
		15	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
		16	* GNU General Public License for more details.
		17	*
		18	* You should have received a copy of the GNU General Public License
		19	* along with FFmpeg; if not, write to the Free Software
		20	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
		21	*/
		22
		23	/**
		24	* @file
		25	* postprocessing.
		26	*/
		27
		28	/*
		29	C MMX MMX2 3DNow AltiVec
		30	isVertDC Ec Ec Ec
		31	isVertMinMaxOk Ec Ec Ec
		32	doVertLowPass E e e Ec
		33	doVertDefFilter Ec Ec e e Ec
		34	isHorizDC Ec Ec Ec
		35	isHorizMinMaxOk a E Ec
		36	doHorizLowPass E e e Ec
		37	doHorizDefFilter Ec Ec e e Ec
		38	do_a_deblock Ec E Ec E
		39	deRing E e e* Ecp
		40	Vertical RKAlgo1 E a a
		41	Horizontal RKAlgo1 a a
		42	Vertical X1# a E E
		43	Horizontal X1# a E E
		44	LinIpolDeinterlace e E E*
		45	CubicIpolDeinterlace a e e*
		46	LinBlendDeinterlace e E E*
		47	MedianDeinterlace# E Ec Ec
		48	TempDeNoiser# E e e Ec
		49
		50	* I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
		51	# more or less selfinvented filters so the exactness is not too meaningful
		52	E = Exact implementation
		53	e = almost exact implementation (slightly different rounding,...)
		54	a = alternative / approximate impl
		55	c = checked against the other implementations (-vo md5)
		56	p = partially optimized, still some work to do
		57	*/
		58
		59	/*
		60	TODO:
		61	reduce the time wasted on the mem transfer
		62	unroll stuff if instructions depend too much on the prior one
		63	move YScale thing to the end instead of fixing QP
		64	write a faster and higher quality deblocking filter :)
		65	make the mainloop more flexible (variable number of blocks at once
		66	(the if/else stuff per block is slowing things down)
		67	compare the quality & speed of all filters
		68	split this huge file
		69	optimize c versions
		70	try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
		71	...
		72	*/
		73
		74	//Changelog: use git log
		75
		76	#include "config.h"
		77	#include "libavutil/avutil.h"
		78	#include "libavutil/avassert.h"
		79	#include
		80	#include
		81	#include
		82	#include
		83	//#undef HAVE_MMXEXT_INLINE
		84	//#define HAVE_AMD3DNOW_INLINE
		85	//#undef HAVE_MMX_INLINE
		86	//#undef ARCH_X86
		87	//#define DEBUG_BRIGHTNESS
		88	#include "postprocess.h"
		89	#include "postprocess_internal.h"
		90	#include "libavutil/avstring.h"
		91
		92	unsigned postproc_version(void)
		93	{
		94	av_assert0(LIBPOSTPROC_VERSION_MICRO >= 100);
		95	return LIBPOSTPROC_VERSION_INT;
		96	}
		97
		98	const char *postproc_configuration(void)
		99	{
		100	return FFMPEG_CONFIGURATION;
		101	}
		102
		103	const char *postproc_license(void)
		104	{
		105	#define LICENSE_PREFIX "libpostproc license: "
		106	return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
		107	}
		108
		109	#if HAVE_ALTIVEC_H
		110	#include
		111	#endif
		112
		113	#define GET_MODE_BUFFER_SIZE 500
		114	#define OPTIONS_ARRAY_SIZE 10
		115	#define BLOCK_SIZE 8
		116	#define TEMP_STRIDE 8
		117	//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
		118
		119	#if ARCH_X86 && HAVE_INLINE_ASM
		120	DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
		121	DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
		122	DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
		123	DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
		124	DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
		125	DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
		126	DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
		127	DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
		128	#endif
		129
		130	DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
		131
		132
		133	static const struct PPFilter filters[]=
		134	{
		135	{"hb", "hdeblock", 1, 1, 3, H_DEBLOCK},
		136	{"vb", "vdeblock", 1, 2, 4, V_DEBLOCK},
		137	/* {"hr", "rkhdeblock", 1, 1, 3, H_RK1_FILTER},
		138	{"vr", "rkvdeblock", 1, 2, 4, V_RK1_FILTER},*/
		139	{"h1", "x1hdeblock", 1, 1, 3, H_X1_FILTER},
		140	{"v1", "x1vdeblock", 1, 2, 4, V_X1_FILTER},
		141	{"ha", "ahdeblock", 1, 1, 3, H_A_DEBLOCK},
		142	{"va", "avdeblock", 1, 2, 4, V_A_DEBLOCK},
		143	{"dr", "dering", 1, 5, 6, DERING},
		144	{"al", "autolevels", 0, 1, 2, LEVEL_FIX},
		145	{"lb", "linblenddeint", 1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
		146	{"li", "linipoldeint", 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
		147	{"ci", "cubicipoldeint", 1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
		148	{"md", "mediandeint", 1, 1, 4, MEDIAN_DEINT_FILTER},
		149	{"fd", "ffmpegdeint", 1, 1, 4, FFMPEG_DEINT_FILTER},
		150	{"l5", "lowpass5", 1, 1, 4, LOWPASS5_DEINT_FILTER},
		151	{"tn", "tmpnoise", 1, 7, 8, TEMP_NOISE_FILTER},
		152	{"fq", "forcequant", 1, 0, 0, FORCE_QUANT},
		153	{"be", "bitexact", 1, 0, 0, BITEXACT},
		154	{NULL, NULL,0,0,0,0} //End Marker
		155	};
		156
		157	static const char *replaceTable[]=
		158	{
		159	"default", "hb:a,vb:a,dr:a",
		160	"de", "hb:a,vb:a,dr:a",
		161	"fast", "h1:a,v1:a,dr:a",
		162	"fa", "h1:a,v1:a,dr:a",
		163	"ac", "ha:a:128:7,va:a,dr:a",
		164	NULL //End Marker
		165	};
		166
		167
		168	#if ARCH_X86 && HAVE_INLINE_ASM
		169	static inline void prefetchnta(void *p)
		170	{
		171	__asm__ volatile( "prefetchnta (%0)\n\t"
		172	: : "r" (p)
		173	);
		174	}
		175
		176	static inline void prefetcht0(void *p)
		177	{
		178	__asm__ volatile( "prefetcht0 (%0)\n\t"
		179	: : "r" (p)
		180	);
		181	}
		182
		183	static inline void prefetcht1(void *p)
		184	{
		185	__asm__ volatile( "prefetcht1 (%0)\n\t"
		186	: : "r" (p)
		187	);
		188	}
		189
		190	static inline void prefetcht2(void *p)
		191	{
		192	__asm__ volatile( "prefetcht2 (%0)\n\t"
		193	: : "r" (p)
		194	);
		195	}
		196	#endif
		197
		198	/* The horizontal functions exist only in C because the MMX
		199	* code is faster with vertical filters and transposing. */
		200
		201	/**
		202	* Check if the given 8x8 Block is mostly "flat"
		203	*/
		204	static inline int isHorizDC_C(const uint8_t src[], int stride, const PPContext *c)
		205	{
		206	int numEq= 0;
		207	int y;
		208	const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
		209	const int dcThreshold= dcOffset*2 + 1;
		210
		211	for(y=0; y
		212	if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
		213	if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
		214	if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
		215	if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
		216	if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
		217	if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
		218	if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
		219	src+= stride;
		220	}
		221	return numEq > c->ppMode.flatnessThreshold;
		222	}
		223
		224	/**
		225	* Check if the middle 8x8 Block in the given 8x16 block is flat
		226	*/
		227	static inline int isVertDC_C(const uint8_t src[], int stride, const PPContext *c)
		228	{
		229	int numEq= 0;
		230	int y;
		231	const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
		232	const int dcThreshold= dcOffset*2 + 1;
		233
		234	src+= stride*4; // src points to begin of the 8x8 Block
		235	for(y=0; y
		236	if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
		237	if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
		238	if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
		239	if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
		240	if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
		241	if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
		242	if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
		243	if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
		244	src+= stride;
		245	}
		246	return numEq > c->ppMode.flatnessThreshold;
		247	}
		248
		249	static inline int isHorizMinMaxOk_C(const uint8_t src[], int stride, int QP)
		250	{
		251	int i;
		252	for(i=0; i<2; i++){
		253	if((unsigned)(src[0] - src[5] + 2QP) > 4QP) return 0;
		254	src += stride;
		255	if((unsigned)(src[2] - src[7] + 2QP) > 4QP) return 0;
		256	src += stride;
		257	if((unsigned)(src[4] - src[1] + 2QP) > 4QP) return 0;
		258	src += stride;
		259	if((unsigned)(src[6] - src[3] + 2QP) > 4QP) return 0;
		260	src += stride;
		261	}
		262	return 1;
		263	}
		264
		265	static inline int isVertMinMaxOk_C(const uint8_t src[], int stride, int QP)
		266	{
		267	int x;
		268	src+= stride*4;
		269	for(x=0; x
		270	if((unsigned)(src[ x + 0stride] - src[ x + 5stride] + 2QP) > 4QP) return 0;
		271	if((unsigned)(src[1+x + 2stride] - src[1+x + 7stride] + 2QP) > 4QP) return 0;
		272	if((unsigned)(src[2+x + 4stride] - src[2+x + 1stride] + 2QP) > 4QP) return 0;
		273	if((unsigned)(src[3+x + 6stride] - src[3+x + 3stride] + 2QP) > 4QP) return 0;
		274	}
		275	return 1;
		276	}
		277
		278	static inline int horizClassify_C(const uint8_t src[], int stride, const PPContext *c)
		279	{
		280	if( isHorizDC_C(src, stride, c) ){
		281	if( isHorizMinMaxOk_C(src, stride, c->QP) )
		282	return 1;
		283	else
		284	return 0;
		285	}else{
		286	return 2;
		287	}
		288	}
		289
		290	static inline int vertClassify_C(const uint8_t src[], int stride, const PPContext *c)
		291	{
		292	if( isVertDC_C(src, stride, c) ){
		293	if( isVertMinMaxOk_C(src, stride, c->QP) )
		294	return 1;
		295	else
		296	return 0;
		297	}else{
		298	return 2;
		299	}
		300	}
		301
		302	static inline void doHorizDefFilter_C(uint8_t dst[], int stride, const PPContext *c)
		303	{
		304	int y;
		305	for(y=0; y
		306	const int middleEnergy= 5(dst[4] - dst[3]) + 2(dst[2] - dst[5]);
		307
		308	if(FFABS(middleEnergy) < 8*c->QP){
		309	const int q=(dst[3] - dst[4])/2;
		310	const int leftEnergy= 5(dst[2] - dst[1]) + 2(dst[0] - dst[3]);
		311	const int rightEnergy= 5(dst[6] - dst[5]) + 2(dst[4] - dst[7]);
		312
		313	int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
		314	d= FFMAX(d, 0);
		315
		316	d= (5*d + 32) >> 6;
		317	d*= FFSIGN(-middleEnergy);
		318
		319	if(q>0)
		320	{
		321	d= d<0 ? 0 : d;
		322	d= d>q ? q : d;
		323	}
		324	else
		325	{
		326	d= d>0 ? 0 : d;
		327	d= d
		328	}
		329
		330	dst[3]-= d;
		331	dst[4]+= d;
		332	}
		333	dst+= stride;
		334	}
		335	}
		336
		337	/**
		338	* Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
		339	* using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
		340	*/
		341	static inline void doHorizLowPass_C(uint8_t dst[], int stride, const PPContext *c)
		342	{
		343	int y;
		344	for(y=0; y
		345	const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
		346	const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
		347
		348	int sums[10];
		349	sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
		350	sums[1] = sums[0] - first + dst[3];
		351	sums[2] = sums[1] - first + dst[4];
		352	sums[3] = sums[2] - first + dst[5];
		353	sums[4] = sums[3] - first + dst[6];
		354	sums[5] = sums[4] - dst[0] + dst[7];
		355	sums[6] = sums[5] - dst[1] + last;
		356	sums[7] = sums[6] - dst[2] + last;
		357	sums[8] = sums[7] - dst[3] + last;
		358	sums[9] = sums[8] - dst[4] + last;
		359
		360	dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
		361	dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
		362	dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
		363	dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
		364	dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
		365	dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
		366	dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
		367	dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
		368
		369	dst+= stride;
		370	}
		371	}
		372
		373	/**
		374	* Experimental Filter 1 (Horizontal)
		375	* will not damage linear gradients
		376	* Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
		377	* can only smooth blocks at the expected locations (it cannot smooth them if they did move)
		378	* MMX2 version does correct clipping C version does not
		379	* not identical with the vertical one
		380	*/
		381	static inline void horizX1Filter(uint8_t *src, int stride, int QP)
		382	{
		383	int y;
		384	static uint64_t lut[256];
		385	if(!lut[255])
		386	{
		387	int i;
		388	for(i=0; i<256; i++)
		389	{
		390	int v= i < 128 ? 2i : 2(i-256);
		391	/*
		392	//Simulate 112242211 9-Tap filter
		393	uint64_t a= (v/16) & 0xFF;
		394	uint64_t b= (v/8) & 0xFF;
		395	uint64_t c= (v/4) & 0xFF;
		396	uint64_t d= (3*v/8) & 0xFF;
		397	*/
		398	//Simulate piecewise linear interpolation
		399	uint64_t a= (v/16) & 0xFF;
		400	uint64_t b= (v*3/16) & 0xFF;
		401	uint64_t c= (v*5/16) & 0xFF;
		402	uint64_t d= (7*v/16) & 0xFF;
		403	uint64_t A= (0x100 - a)&0xFF;
		404	uint64_t B= (0x100 - b)&0xFF;
		405	uint64_t C= (0x100 - c)&0xFF;
		406	uint64_t D= (0x100 - c)&0xFF;
		407
		408	lut[i] = (a<<56) \| (b<<48) \| (c<<40) \| (d<<32) \|
		409	(D<<24) \| (C<<16) \| (B<<8) \| (A);
		410	//lut[i] = (v<<32) \| (v<<24);
		411	}
		412	}
		413
		414	for(y=0; y
		415	int a= src[1] - src[2];
		416	int b= src[3] - src[4];
		417	int c= src[5] - src[6];
		418
		419	int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
		420
		421	if(d < QP){
		422	int v = d * FFSIGN(-b);
		423
		424	src[1] +=v/8;
		425	src[2] +=v/4;
		426	src[3] +=3*v/8;
		427	src[4] -=3*v/8;
		428	src[5] -=v/4;
		429	src[6] -=v/8;
		430	}
		431	src+=stride;
		432	}
		433	}
		434
		435	/**
		436	* accurate deblock filter
		437	*/
		438	static av_always_inline void do_a_deblock_C(uint8_t *src, int step,
		439	int stride, const PPContext *c)
		440	{
		441	int y;
		442	const int QP= c->QP;
		443	const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
		444	const int dcThreshold= dcOffset*2 + 1;
		445	//START_TIMER
		446	src+= step*4; // src points to begin of the 8x8 Block
		447	for(y=0; y<8; y++){
		448	int numEq= 0;
		449
		450	if(((unsigned)(src[-1step] - src[0step] + dcOffset)) < dcThreshold) numEq++;
		451	if(((unsigned)(src[ 0step] - src[1step] + dcOffset)) < dcThreshold) numEq++;
		452	if(((unsigned)(src[ 1step] - src[2step] + dcOffset)) < dcThreshold) numEq++;
		453	if(((unsigned)(src[ 2step] - src[3step] + dcOffset)) < dcThreshold) numEq++;
		454	if(((unsigned)(src[ 3step] - src[4step] + dcOffset)) < dcThreshold) numEq++;
		455	if(((unsigned)(src[ 4step] - src[5step] + dcOffset)) < dcThreshold) numEq++;
		456	if(((unsigned)(src[ 5step] - src[6step] + dcOffset)) < dcThreshold) numEq++;
		457	if(((unsigned)(src[ 6step] - src[7step] + dcOffset)) < dcThreshold) numEq++;
		458	if(((unsigned)(src[ 7step] - src[8step] + dcOffset)) < dcThreshold) numEq++;
		459	if(numEq > c->ppMode.flatnessThreshold){
		460	int min, max, x;
		461
		462	if(src[0] > src[step]){
		463	max= src[0];
		464	min= src[step];
		465	}else{
		466	max= src[step];
		467	min= src[0];
		468	}
		469	for(x=2; x<8; x+=2){
		470	if(src[xstep] > src[(x+1)step]){
		471	if(src[x step] > max) max= src[ x step];
		472	if(src[(x+1)step] < min) min= src[(x+1)step];
		473	}else{
		474	if(src[(x+1)step] > max) max= src[(x+1)step];
		475	if(src[ x step] < min) min= src[ x step];
		476	}
		477	}
		478	if(max-min < 2*QP){
		479	const int first= FFABS(src[-1step] - src[0]) < QP ? src[-1step] : src[0];
		480	const int last= FFABS(src[8step] - src[7step]) < QP ? src[8step] : src[7step];
		481
		482	int sums[10];
		483	sums[0] = 4first + src[0step] + src[1step] + src[2step] + 4;
		484	sums[1] = sums[0] - first + src[3*step];
		485	sums[2] = sums[1] - first + src[4*step];
		486	sums[3] = sums[2] - first + src[5*step];
		487	sums[4] = sums[3] - first + src[6*step];
		488	sums[5] = sums[4] - src[0step] + src[7step];
		489	sums[6] = sums[5] - src[1*step] + last;
		490	sums[7] = sums[6] - src[2*step] + last;
		491	sums[8] = sums[7] - src[3*step] + last;
		492	sums[9] = sums[8] - src[4*step] + last;
		493
		494	src[0step]= (sums[0] + sums[2] + 2src[0*step])>>4;
		495	src[1step]= (sums[1] + sums[3] + 2src[1*step])>>4;
		496	src[2step]= (sums[2] + sums[4] + 2src[2*step])>>4;
		497	src[3step]= (sums[3] + sums[5] + 2src[3*step])>>4;
		498	src[4step]= (sums[4] + sums[6] + 2src[4*step])>>4;
		499	src[5step]= (sums[5] + sums[7] + 2src[5*step])>>4;
		500	src[6step]= (sums[6] + sums[8] + 2src[6*step])>>4;
		501	src[7step]= (sums[7] + sums[9] + 2src[7*step])>>4;
		502	}
		503	}else{
		504	const int middleEnergy= 5(src[4step] - src[3step]) + 2(src[2step] - src[5step]);
		505
		506	if(FFABS(middleEnergy) < 8*QP){
		507	const int q=(src[3step] - src[4step])/2;
		508	const int leftEnergy= 5(src[2step] - src[1step]) + 2(src[0step] - src[3step]);
		509	const int rightEnergy= 5(src[6step] - src[5step]) + 2(src[4step] - src[7step]);
		510
		511	int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
		512	d= FFMAX(d, 0);
		513
		514	d= (5*d + 32) >> 6;
		515	d*= FFSIGN(-middleEnergy);
		516
		517	if(q>0){
		518	d= d<0 ? 0 : d;
		519	d= d>q ? q : d;
		520	}else{
		521	d= d>0 ? 0 : d;
		522	d= d
		523	}
		524
		525	src[3*step]-= d;
		526	src[4*step]+= d;
		527	}
		528	}
		529
		530	src += stride;
		531	}
		532	/*if(step==16){
		533	STOP_TIMER("step16")
		534	}else{
		535	STOP_TIMER("stepX")
		536	}*/
		537	}
		538
		539	//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
		540	//Plain C versions
		541	//we always compile C for testing which needs bitexactness
		542	#define TEMPLATE_PP_C 1
		543	#include "postprocess_template.c"
		544
		545	#if HAVE_ALTIVEC
		546	# define TEMPLATE_PP_ALTIVEC 1
		547	# include "postprocess_altivec_template.c"
		548	# include "postprocess_template.c"
		549	#endif
		550
		551	#if ARCH_X86 && HAVE_INLINE_ASM
		552	# if CONFIG_RUNTIME_CPUDETECT
		553	# define TEMPLATE_PP_MMX 1
		554	# include "postprocess_template.c"
		555	# define TEMPLATE_PP_MMXEXT 1
		556	# include "postprocess_template.c"
		557	# define TEMPLATE_PP_3DNOW 1
		558	# include "postprocess_template.c"
		559	# define TEMPLATE_PP_SSE2 1
		560	# include "postprocess_template.c"
		561	# else
		562	# if HAVE_SSE2_INLINE
		563	# define TEMPLATE_PP_SSE2 1
		564	# include "postprocess_template.c"
		565	# elif HAVE_MMXEXT_INLINE
		566	# define TEMPLATE_PP_MMXEXT 1
		567	# include "postprocess_template.c"
		568	# elif HAVE_AMD3DNOW_INLINE
		569	# define TEMPLATE_PP_3DNOW 1
		570	# include "postprocess_template.c"
		571	# elif HAVE_MMX_INLINE
		572	# define TEMPLATE_PP_MMX 1
		573	# include "postprocess_template.c"
		574	# endif
		575	# endif
		576	#endif
		577
		578	typedef void (*pp_fn)(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
		579	const QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c2);
		580
		581	static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
		582	const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode vm, pp_context vc)
		583	{
		584	pp_fn pp = postProcess_C;
		585	PPContext c= (PPContext )vc;
		586	PPMode ppMode= (PPMode )vm;
		587	c->ppMode= *ppMode; //FIXME
		588
		589	if (!(ppMode->lumMode & BITEXACT)) {
		590	#if CONFIG_RUNTIME_CPUDETECT
		591	#if ARCH_X86 && HAVE_INLINE_ASM
		592	// ordered per speed fastest first
		593	if (c->cpuCaps & AV_CPU_FLAG_SSE2) pp = postProcess_SSE2;
		594	else if (c->cpuCaps & AV_CPU_FLAG_MMXEXT) pp = postProcess_MMX2;
		595	else if (c->cpuCaps & AV_CPU_FLAG_3DNOW) pp = postProcess_3DNow;
		596	else if (c->cpuCaps & AV_CPU_FLAG_MMX) pp = postProcess_MMX;
		597	#elif HAVE_ALTIVEC
		598	if (c->cpuCaps & AV_CPU_FLAG_ALTIVEC) pp = postProcess_altivec;
		599	#endif
		600	#else /* CONFIG_RUNTIME_CPUDETECT */
		601	#if HAVE_SSE2_INLINE
		602	pp = postProcess_SSE2;
		603	#elif HAVE_MMXEXT_INLINE
		604	pp = postProcess_MMX2;
		605	#elif HAVE_AMD3DNOW_INLINE
		606	pp = postProcess_3DNow;
		607	#elif HAVE_MMX_INLINE
		608	pp = postProcess_MMX;
		609	#elif HAVE_ALTIVEC
		610	pp = postProcess_altivec;
		611	#endif
		612	#endif /* !CONFIG_RUNTIME_CPUDETECT */
		613	}
		614
		615	pp(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
		616	}
		617
		618	/* -pp Command line Help
		619	*/
		620	const char pp_help[] =
		621	"Available postprocessing filters:\n"
		622	"Filters Options\n"
		623	"short long name short long option Description\n"
		624	"* * a autoq CPU power dependent enabler\n"
		625	" c chrom chrominance filtering enabled\n"
		626	" y nochrom chrominance filtering disabled\n"
		627	" n noluma luma filtering disabled\n"
		628	"hb hdeblock (2 threshold) horizontal deblocking filter\n"
		629	" 1. difference factor: default=32, higher -> more deblocking\n"
		630	" 2. flatness threshold: default=39, lower -> more deblocking\n"
		631	" the h & v deblocking filters share these\n"
		632	" so you can't set different thresholds for h / v\n"
		633	"vb vdeblock (2 threshold) vertical deblocking filter\n"
		634	"ha hadeblock (2 threshold) horizontal deblocking filter\n"
		635	"va vadeblock (2 threshold) vertical deblocking filter\n"
		636	"h1 x1hdeblock experimental h deblock filter 1\n"
		637	"v1 x1vdeblock experimental v deblock filter 1\n"
		638	"dr dering deringing filter\n"
		639	"al autolevels automatic brightness / contrast\n"
		640	" f fullyrange stretch luminance to (0..255)\n"
		641	"lb linblenddeint linear blend deinterlacer\n"
		642	"li linipoldeint linear interpolating deinterlace\n"
		643	"ci cubicipoldeint cubic interpolating deinterlacer\n"
		644	"md mediandeint median deinterlacer\n"
		645	"fd ffmpegdeint ffmpeg deinterlacer\n"
		646	"l5 lowpass5 FIR lowpass deinterlacer\n"
		647	"de default hb:a,vb:a,dr:a\n"
		648	"fa fast h1:a,v1:a,dr:a\n"
		649	"ac ha:a:128:7,va:a,dr:a\n"
		650	"tn tmpnoise (3 threshold) temporal noise reducer\n"
		651	" 1. <= 2. <= 3. larger -> stronger filtering\n"
		652	"fq forceQuant force quantizer\n"
		653	"Usage:\n"
		654	"[:
		655	"long form example:\n"
		656	"vdeblock:autoq/hdeblock:autoq/linblenddeint default,-vdeblock\n"
		657	"short form example:\n"
		658	"vb:a/hb:a/lb de,-vb\n"
		659	"more examples:\n"
		660	"tn:64:128:256\n"
		661	"\n"
		662	;
		663
		664	pp_mode pp_get_mode_by_name_and_quality(const char name, int quality)
		665	{
		666	char temp[GET_MODE_BUFFER_SIZE];
		667	char *p= temp;
		668	static const char filterDelimiters[] = ",/";
		669	static const char optionDelimiters[] = ":\|";
		670	struct PPMode *ppMode;
		671	char *filterToken;
		672
		673	if (!name) {
		674	av_log(NULL, AV_LOG_ERROR, "pp: Missing argument\n");
		675	return NULL;
		676	}
		677
		678	if (!strcmp(name, "help")) {
		679	const char *p;
		680	for (p = pp_help; strchr(p, '\n'); p = strchr(p, '\n') + 1) {
		681	av_strlcpy(temp, p, FFMIN(sizeof(temp), strchr(p, '\n') - p + 2));
		682	av_log(NULL, AV_LOG_INFO, "%s", temp);
		683	}
		684	return NULL;
		685	}
		686
		687	ppMode= av_malloc(sizeof(PPMode));
		688
		689	ppMode->lumMode= 0;
		690	ppMode->chromMode= 0;
		691	ppMode->maxTmpNoise[0]= 700;
		692	ppMode->maxTmpNoise[1]= 1500;
		693	ppMode->maxTmpNoise[2]= 3000;
		694	ppMode->maxAllowedY= 234;
		695	ppMode->minAllowedY= 16;
		696	ppMode->baseDcDiff= 256/8;
		697	ppMode->flatnessThreshold= 56-16-1;
		698	ppMode->maxClippedThreshold= 0.01;
		699	ppMode->error=0;
		700
		701	memset(temp, 0, GET_MODE_BUFFER_SIZE);
		702	av_strlcpy(temp, name, GET_MODE_BUFFER_SIZE - 1);
		703
		704	av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
		705
		706	for(;;){
		707	char *filterName;
		708	int q= 1000000; //PP_QUALITY_MAX;
		709	int chrom=-1;
		710	int luma=-1;
		711	char *option;
		712	char *options[OPTIONS_ARRAY_SIZE];
		713	int i;
		714	int filterNameOk=0;
		715	int numOfUnknownOptions=0;
		716	int enable=1; //does the user want us to enabled or disabled the filter
		717
		718	filterToken= strtok(p, filterDelimiters);
		719	if(filterToken == NULL) break;
		720	p+= strlen(filterToken) + 1; // p points to next filterToken
		721	filterName= strtok(filterToken, optionDelimiters);
		722	av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
		723
		724	if(*filterName == '-'){
		725	enable=0;
		726	filterName++;
		727	}
		728
		729	for(;;){ //for all options
		730	option= strtok(NULL, optionDelimiters);
		731	if(option == NULL) break;
		732
		733	av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
		734	if(!strcmp("autoq", option) \|\| !strcmp("a", option)) q= quality;
		735	else if(!strcmp("nochrom", option) \|\| !strcmp("y", option)) chrom=0;
		736	else if(!strcmp("chrom", option) \|\| !strcmp("c", option)) chrom=1;
		737	else if(!strcmp("noluma", option) \|\| !strcmp("n", option)) luma=0;
		738	else{
		739	options[numOfUnknownOptions] = option;
		740	numOfUnknownOptions++;
		741	}
		742	if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
		743	}
		744	options[numOfUnknownOptions] = NULL;
		745
		746	/* replace stuff from the replace Table */
		747	for(i=0; replaceTable[2*i]!=NULL; i++){
		748	if(!strcmp(replaceTable[2*i], filterName)){
		749	int newlen= strlen(replaceTable[2*i + 1]);
		750	int plen;
		751	int spaceLeft;
		752
		753	p--, *p=',';
		754
		755	plen= strlen(p);
		756	spaceLeft= p - temp + plen;
		757	if(spaceLeft + newlen >= GET_MODE_BUFFER_SIZE - 1){
		758	ppMode->error++;
		759	break;
		760	}
		761	memmove(p + newlen, p, plen+1);
		762	memcpy(p, replaceTable[2*i + 1], newlen);
		763	filterNameOk=1;
		764	}
		765	}
		766
		767	for(i=0; filters[i].shortName!=NULL; i++){
		768	if( !strcmp(filters[i].longName, filterName)
		769	\|\| !strcmp(filters[i].shortName, filterName)){
		770	ppMode->lumMode &= ~filters[i].mask;
		771	ppMode->chromMode &= ~filters[i].mask;
		772
		773	filterNameOk=1;
		774	if(!enable) break; // user wants to disable it
		775
		776	if(q >= filters[i].minLumQuality && luma)
		777	ppMode->lumMode\|= filters[i].mask;
		778	if(chrom==1 \|\| (chrom==-1 && filters[i].chromDefault))
		779	if(q >= filters[i].minChromQuality)
		780	ppMode->chromMode\|= filters[i].mask;
		781
		782	if(filters[i].mask == LEVEL_FIX){
		783	int o;
		784	ppMode->minAllowedY= 16;
		785	ppMode->maxAllowedY= 234;
		786	for(o=0; options[o]!=NULL; o++){
		787	if( !strcmp(options[o],"fullyrange")
		788	\|\|!strcmp(options[o],"f")){
		789	ppMode->minAllowedY= 0;
		790	ppMode->maxAllowedY= 255;
		791	numOfUnknownOptions--;
		792	}
		793	}
		794	}
		795	else if(filters[i].mask == TEMP_NOISE_FILTER)
		796	{
		797	int o;
		798	int numOfNoises=0;
		799
		800	for(o=0; options[o]!=NULL; o++){
		801	char *tail;
		802	ppMode->maxTmpNoise[numOfNoises]=
		803	strtol(options[o], &tail, 0);
		804	if(tail!=options[o]){
		805	numOfNoises++;
		806	numOfUnknownOptions--;
		807	if(numOfNoises >= 3) break;
		808	}
		809	}
		810	}
		811	else if(filters[i].mask == V_DEBLOCK \|\| filters[i].mask == H_DEBLOCK
		812	\|\| filters[i].mask == V_A_DEBLOCK \|\| filters[i].mask == H_A_DEBLOCK){
		813	int o;
		814
		815	for(o=0; options[o]!=NULL && o<2; o++){
		816	char *tail;
		817	int val= strtol(options[o], &tail, 0);
		818	if(tail==options[o]) break;
		819
		820	numOfUnknownOptions--;
		821	if(o==0) ppMode->baseDcDiff= val;
		822	else ppMode->flatnessThreshold= val;
		823	}
		824	}
		825	else if(filters[i].mask == FORCE_QUANT){
		826	int o;
		827	ppMode->forcedQuant= 15;
		828
		829	for(o=0; options[o]!=NULL && o<1; o++){
		830	char *tail;
		831	int val= strtol(options[o], &tail, 0);
		832	if(tail==options[o]) break;
		833
		834	numOfUnknownOptions--;
		835	ppMode->forcedQuant= val;
		836	}
		837	}
		838	}
		839	}
		840	if(!filterNameOk) ppMode->error++;
		841	ppMode->error += numOfUnknownOptions;
		842	}
		843
		844	av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
		845	if(ppMode->error){
		846	av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
		847	av_free(ppMode);
		848	return NULL;
		849	}
		850	return ppMode;
		851	}
		852
		853	void pp_free_mode(pp_mode *mode){
		854	av_free(mode);
		855	}
		856
		857	static void reallocAlign(void **p, int alignment, int size){
		858	av_free(*p);
		859	*p= av_mallocz(size);
		860	}
		861
		862	static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
		863	int mbWidth = (width+15)>>4;
		864	int mbHeight= (height+15)>>4;
		865	int i;
		866
		867	c->stride= stride;
		868	c->qpStride= qpStride;
		869
		870	reallocAlign((void *)&c->tempDst, 8, stride24+32);
		871	reallocAlign((void *)&c->tempSrc, 8, stride24);
		872	reallocAlign((void *)&c->tempBlocks, 8, 216*8);
		873	reallocAlign((void *)&c->yHistogram, 8, 256sizeof(uint64_t));
		874	for(i=0; i<256; i++)
		875	c->yHistogram[i]= widthheight/6415/256;
		876
		877	for(i=0; i<3; i++){
		878	//Note: The +17*1024 is just there so I do not have to worry about r/w over the end.
		879	reallocAlign((void *)&c->tempBlurred[i], 8, stridembHeight16 + 171024);
		880	reallocAlign((void *)&c->tempBlurredPast[i], 8, 256((height+7)&(~7))/2 + 17*1024);//FIXME size
		881	}
		882
		883	reallocAlign((void *)&c->deintTemp, 8, 2width+32);
		884	reallocAlign((void *)&c->nonBQPTable, 8, qpStridembHeight*sizeof(QP_STORE_T));
		885	reallocAlign((void *)&c->stdQPTable, 8, qpStridembHeight*sizeof(QP_STORE_T));
		886	reallocAlign((void *)&c->forcedQPTable, 8, mbWidthsizeof(QP_STORE_T));
		887	}
		888
		889	static const char * context_to_name(void * ptr) {
		890	return "postproc";
		891	}
		892
		893	static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
		894
		895	pp_context *pp_get_context(int width, int height, int cpuCaps){
		896	PPContext *c= av_malloc(sizeof(PPContext));
		897	int stride= FFALIGN(width, 16); //assumed / will realloc if needed
		898	int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
		899
		900	memset(c, 0, sizeof(PPContext));
		901	c->av_class = &av_codec_context_class;
		902	if(cpuCaps&PP_FORMAT){
		903	c->hChromaSubSample= cpuCaps&0x3;
		904	c->vChromaSubSample= (cpuCaps>>4)&0x3;
		905	}else{
		906	c->hChromaSubSample= 1;
		907	c->vChromaSubSample= 1;
		908	}
		909	if (cpuCaps & PP_CPU_CAPS_AUTO) {
		910	c->cpuCaps = av_get_cpu_flags();
		911	} else {
		912	c->cpuCaps = 0;
		913	if (cpuCaps & PP_CPU_CAPS_MMX) c->cpuCaps \|= AV_CPU_FLAG_MMX;
		914	if (cpuCaps & PP_CPU_CAPS_MMX2) c->cpuCaps \|= AV_CPU_FLAG_MMXEXT;
		915	if (cpuCaps & PP_CPU_CAPS_3DNOW) c->cpuCaps \|= AV_CPU_FLAG_3DNOW;
		916	if (cpuCaps & PP_CPU_CAPS_ALTIVEC) c->cpuCaps \|= AV_CPU_FLAG_ALTIVEC;
		917	}
		918
		919	reallocBuffers(c, width, height, stride, qpStride);
		920
		921	c->frameNum=-1;
		922
		923	return c;
		924	}
		925
		926	void pp_free_context(void *vc){
		927	PPContext c = (PPContext)vc;
		928	int i;
		929
		930	for(i=0; i<3; i++) av_free(c->tempBlurred[i]);
		931	for(i=0; i<3; i++) av_free(c->tempBlurredPast[i]);
		932
		933	av_free(c->tempBlocks);
		934	av_free(c->yHistogram);
		935	av_free(c->tempDst);
		936	av_free(c->tempSrc);
		937	av_free(c->deintTemp);
		938	av_free(c->stdQPTable);
		939	av_free(c->nonBQPTable);
		940	av_free(c->forcedQPTable);
		941
		942	memset(c, 0, sizeof(PPContext));
		943
		944	av_free(c);
		945	}
		946
		947	void pp_postprocess(const uint8_t * src[3], const int srcStride[3],
		948	uint8_t * dst[3], const int dstStride[3],
		949	int width, int height,
		950	const QP_STORE_T *QP_store, int QPStride,
		951	pp_mode vm, void vc, int pict_type)
		952	{
		953	int mbWidth = (width+15)>>4;
		954	int mbHeight= (height+15)>>4;
		955	PPMode mode = (PPMode)vm;
		956	PPContext c = (PPContext)vc;
		957	int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
		958	int absQPStride = FFABS(QPStride);
		959
		960	// c->stride and c->QPStride are always positive
		961	if(c->stride < minStride \|\| c->qpStride < absQPStride)
		962	reallocBuffers(c, width, height,
		963	FFMAX(minStride, c->stride),
		964	FFMAX(c->qpStride, absQPStride));
		965
		966	if(QP_store==NULL \|\| (mode->lumMode & FORCE_QUANT)){
		967	int i;
		968	QP_store= c->forcedQPTable;
		969	absQPStride = QPStride = 0;
		970	if(mode->lumMode & FORCE_QUANT)
		971	for(i=0; iforcedQPTable[i]= mode->forcedQuant;
		972	else
		973	for(i=0; iforcedQPTable[i]= 1;
		974	}
		975
		976	if(pict_type & PP_PICT_TYPE_QP2){
		977	int i;
		978	const int count= mbHeight * absQPStride;
		979	for(i=0; i<(count>>2); i++){
		980	((uint32_t)c->stdQPTable)[i] = (((const uint32_t)QP_store)[i]>>1) & 0x7F7F7F7F;
		981	}
		982	for(i<<=2; i
		983	c->stdQPTable[i] = QP_store[i]>>1;
		984	}
		985	QP_store= c->stdQPTable;
		986	QPStride= absQPStride;
		987	}
		988
		989	if(0){
		990	int x,y;
		991	for(y=0; y
		992	for(x=0; x
		993	av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
		994	}
		995	av_log(c, AV_LOG_INFO, "\n");
		996	}
		997	av_log(c, AV_LOG_INFO, "\n");
		998	}
		999
		1000	if((pict_type&7)!=3){
		1001	if (QPStride >= 0){
		1002	int i;
		1003	const int count= mbHeight * QPStride;
		1004	for(i=0; i<(count>>2); i++){
		1005	((uint32_t)c->nonBQPTable)[i] = ((const uint32_t)QP_store)[i] & 0x3F3F3F3F;
		1006	}
		1007	for(i<<=2; i
		1008	c->nonBQPTable[i] = QP_store[i] & 0x3F;
		1009	}
		1010	} else {
		1011	int i,j;
		1012	for(i=0; i
		1013	for(j=0; j
		1014	c->nonBQPTable[iabsQPStride+j] = QP_store[iQPStride+j] & 0x3F;
		1015	}
		1016	}
		1017	}
		1018	}
		1019
		1020	av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
		1021	mode->lumMode, mode->chromMode);
		1022
		1023	postProcess(src[0], srcStride[0], dst[0], dstStride[0],
		1024	width, height, QP_store, QPStride, 0, mode, c);
		1025
		1026	width = (width )>>c->hChromaSubSample;
		1027	height = (height)>>c->vChromaSubSample;
		1028
		1029	if(mode->chromMode){
		1030	postProcess(src[1], srcStride[1], dst[1], dstStride[1],
		1031	width, height, QP_store, QPStride, 1, mode, c);
		1032	postProcess(src[2], srcStride[2], dst[2], dstStride[2],
		1033	width, height, QP_store, QPStride, 2, mode, c);
		1034	}
		1035	else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
		1036	linecpy(dst[1], src[1], height, srcStride[1]);
		1037	linecpy(dst[2], src[2], height, srcStride[2]);
		1038	}else{
		1039	int y;
		1040	for(y=0; y
		1041	memcpy(&(dst[1][ydstStride[1]]), &(src[1][ysrcStride[1]]), width);
		1042	memcpy(&(dst[2][ydstStride[2]]), &(src[2][ysrcStride[2]]), width);
		1043	}
		1044	}
		1045	}

Subversion Repositories Kolibri OS

(root)/contrib/sdk/sources/ffmpeg/libpostproc/postprocess.c – Rev 4349