WebSVN – Kolibri OS – Blame – /contrib/sdk/sources/ffmpeg/libavcodec/faandct.c

Rev	Author	Line No.	Line
4349	Serge	1	/*
		2	* Floating point AAN DCT
		3	* this implementation is based upon the IJG integer AAN DCT (see jfdctfst.c)
		4	*
		5	* Copyright (c) 2003 Michael Niedermayer
		6	* Copyright (c) 2003 Roman Shaposhnik
		7	*
		8	* Permission to use, copy, modify, and/or distribute this software for any
		9	* purpose with or without fee is hereby granted, provided that the above
		10	* copyright notice and this permission notice appear in all copies.
		11	*
		12	* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
		13	* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
		14	* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
		15	* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
		16	* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
		17	* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
		18	* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
		19	*/
		20
		21	/**
		22	* @file
		23	* @brief
		24	* Floating point AAN DCT
		25	* @author Michael Niedermayer
		26	*/
		27
		28	#include "faandct.h"
		29	#include "libavutil/internal.h"
		30	#include "libavutil/libm.h"
		31
		32	#define FLOAT float
		33
		34	//numbers generated by simple c code (not as accurate as they could be)
		35	/*
		36	for(i=0; i<8; i++){
		37	printf("#define B%d %1.20llf\n", i, (long double)1.0/(cosl(iacosl(-1.0)/(long double)16.0)sqrtl(2)));
		38	}
		39	*/
		40	#define B0 1.00000000000000000000
		41	#define B1 0.72095982200694791383 // (cos(pi*1/16)sqrt(2))^-1
		42	#define B2 0.76536686473017954350 // (cos(pi*2/16)sqrt(2))^-1
		43	#define B3 0.85043009476725644878 // (cos(pi*3/16)sqrt(2))^-1
		44	#define B4 1.00000000000000000000 // (cos(pi*4/16)sqrt(2))^-1
		45	#define B5 1.27275858057283393842 // (cos(pi*5/16)sqrt(2))^-1
		46	#define B6 1.84775906502257351242 // (cos(pi*6/16)sqrt(2))^-1
		47	#define B7 3.62450978541155137218 // (cos(pi*7/16)sqrt(2))^-1
		48
		49
		50	#define A1 0.70710678118654752438 // cos(pi*4/16)
		51	#define A2 0.54119610014619698435 // cos(pi*6/16)sqrt(2)
		52	#define A5 0.38268343236508977170 // cos(pi*6/16)
		53	#define A4 1.30656296487637652774 // cos(pi*2/16)sqrt(2)
		54
		55	static const FLOAT postscale[64]={
		56	B0B0, B0B1, B0B2, B0B3, B0B4, B0B5, B0B6, B0B7,
		57	B1B0, B1B1, B1B2, B1B3, B1B4, B1B5, B1B6, B1B7,
		58	B2B0, B2B1, B2B2, B2B3, B2B4, B2B5, B2B6, B2B7,
		59	B3B0, B3B1, B3B2, B3B3, B3B4, B3B5, B3B6, B3B7,
		60	B4B0, B4B1, B4B2, B4B3, B4B4, B4B5, B4B6, B4B7,
		61	B5B0, B5B1, B5B2, B5B3, B5B4, B5B5, B5B6, B5B7,
		62	B6B0, B6B1, B6B2, B6B3, B6B4, B6B5, B6B6, B6B7,
		63	B7B0, B7B1, B7B2, B7B3, B7B4, B7B5, B7B6, B7B7,
		64	};
		65
		66	static av_always_inline void row_fdct(FLOAT temp[64], int16_t *data)
		67	{
		68	FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
		69	FLOAT tmp10, tmp11, tmp12, tmp13;
		70	FLOAT z2, z4, z11, z13;
		71	FLOAT av_unused z5;
		72	int i;
		73
		74	for (i=0; i<8*8; i+=8) {
		75	tmp0= data[0 + i] + data[7 + i];
		76	tmp7= data[0 + i] - data[7 + i];
		77	tmp1= data[1 + i] + data[6 + i];
		78	tmp6= data[1 + i] - data[6 + i];
		79	tmp2= data[2 + i] + data[5 + i];
		80	tmp5= data[2 + i] - data[5 + i];
		81	tmp3= data[3 + i] + data[4 + i];
		82	tmp4= data[3 + i] - data[4 + i];
		83
		84	tmp10= tmp0 + tmp3;
		85	tmp13= tmp0 - tmp3;
		86	tmp11= tmp1 + tmp2;
		87	tmp12= tmp1 - tmp2;
		88
		89	temp[0 + i]= tmp10 + tmp11;
		90	temp[4 + i]= tmp10 - tmp11;
		91
		92	tmp12 += tmp13;
		93	tmp12 *= A1;
		94	temp[2 + i]= tmp13 + tmp12;
		95	temp[6 + i]= tmp13 - tmp12;
		96
		97	tmp4 += tmp5;
		98	tmp5 += tmp6;
		99	tmp6 += tmp7;
		100
		101	#if 0
		102	z5= (tmp4 - tmp6) * A5;
		103	z2= tmp4*A2 + z5;
		104	z4= tmp6*A4 + z5;
		105	#else
		106	z2= tmp4(A2+A5) - tmp6A5;
		107	z4= tmp6(A4-A5) + tmp4A5;
		108	#endif
		109	tmp5*=A1;
		110
		111	z11= tmp7 + tmp5;
		112	z13= tmp7 - tmp5;
		113
		114	temp[5 + i]= z13 + z2;
		115	temp[3 + i]= z13 - z2;
		116	temp[1 + i]= z11 + z4;
		117	temp[7 + i]= z11 - z4;
		118	}
		119	}
		120
		121	void ff_faandct(int16_t *data)
		122	{
		123	FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
		124	FLOAT tmp10, tmp11, tmp12, tmp13;
		125	FLOAT z2, z4, z11, z13;
		126	FLOAT av_unused z5;
		127	FLOAT temp[64];
		128	int i;
		129
		130	emms_c();
		131
		132	row_fdct(temp, data);
		133
		134	for (i=0; i<8; i++) {
		135	tmp0= temp[80 + i] + temp[87 + i];
		136	tmp7= temp[80 + i] - temp[87 + i];
		137	tmp1= temp[81 + i] + temp[86 + i];
		138	tmp6= temp[81 + i] - temp[86 + i];
		139	tmp2= temp[82 + i] + temp[85 + i];
		140	tmp5= temp[82 + i] - temp[85 + i];
		141	tmp3= temp[83 + i] + temp[84 + i];
		142	tmp4= temp[83 + i] - temp[84 + i];
		143
		144	tmp10= tmp0 + tmp3;
		145	tmp13= tmp0 - tmp3;
		146	tmp11= tmp1 + tmp2;
		147	tmp12= tmp1 - tmp2;
		148
		149	data[80 + i]= lrintf(postscale[80 + i] * (tmp10 + tmp11));
		150	data[84 + i]= lrintf(postscale[84 + i] * (tmp10 - tmp11));
		151
		152	tmp12 += tmp13;
		153	tmp12 *= A1;
		154	data[82 + i]= lrintf(postscale[82 + i] * (tmp13 + tmp12));
		155	data[86 + i]= lrintf(postscale[86 + i] * (tmp13 - tmp12));
		156
		157	tmp4 += tmp5;
		158	tmp5 += tmp6;
		159	tmp6 += tmp7;
		160
		161	#if 0
		162	z5= (tmp4 - tmp6) * A5;
		163	z2= tmp4*A2 + z5;
		164	z4= tmp6*A4 + z5;
		165	#else
		166	z2= tmp4(A2+A5) - tmp6A5;
		167	z4= tmp6(A4-A5) + tmp4A5;
		168	#endif
		169	tmp5*=A1;
		170
		171	z11= tmp7 + tmp5;
		172	z13= tmp7 - tmp5;
		173
		174	data[85 + i]= lrintf(postscale[85 + i] * (z13 + z2));
		175	data[83 + i]= lrintf(postscale[83 + i] * (z13 - z2));
		176	data[81 + i]= lrintf(postscale[81 + i] * (z11 + z4));
		177	data[87 + i]= lrintf(postscale[87 + i] * (z11 - z4));
		178	}
		179	}
		180
		181	void ff_faandct248(int16_t *data)
		182	{
		183	FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
		184	FLOAT tmp10, tmp11, tmp12, tmp13;
		185	FLOAT temp[64];
		186	int i;
		187
		188	emms_c();
		189
		190	row_fdct(temp, data);
		191
		192	for (i=0; i<8; i++) {
		193	tmp0 = temp[80 + i] + temp[81 + i];
		194	tmp1 = temp[82 + i] + temp[83 + i];
		195	tmp2 = temp[84 + i] + temp[85 + i];
		196	tmp3 = temp[86 + i] + temp[87 + i];
		197	tmp4 = temp[80 + i] - temp[81 + i];
		198	tmp5 = temp[82 + i] - temp[83 + i];
		199	tmp6 = temp[84 + i] - temp[85 + i];
		200	tmp7 = temp[86 + i] - temp[87 + i];
		201
		202	tmp10 = tmp0 + tmp3;
		203	tmp11 = tmp1 + tmp2;
		204	tmp12 = tmp1 - tmp2;
		205	tmp13 = tmp0 - tmp3;
		206
		207	data[80 + i] = lrintf(postscale[80 + i] * (tmp10 + tmp11));
		208	data[84 + i] = lrintf(postscale[84 + i] * (tmp10 - tmp11));
		209
		210	tmp12 += tmp13;
		211	tmp12 *= A1;
		212	data[82 + i] = lrintf(postscale[82 + i] * (tmp13 + tmp12));
		213	data[86 + i] = lrintf(postscale[86 + i] * (tmp13 - tmp12));
		214
		215	tmp10 = tmp4 + tmp7;
		216	tmp11 = tmp5 + tmp6;
		217	tmp12 = tmp5 - tmp6;
		218	tmp13 = tmp4 - tmp7;
		219
		220	data[81 + i] = lrintf(postscale[80 + i] * (tmp10 + tmp11));
		221	data[85 + i] = lrintf(postscale[84 + i] * (tmp10 - tmp11));
		222
		223	tmp12 += tmp13;
		224	tmp12 *= A1;
		225	data[83 + i] = lrintf(postscale[82 + i] * (tmp13 + tmp12));
		226	data[87 + i] = lrintf(postscale[86 + i] * (tmp13 - tmp12));
		227	}
		228	}

Subversion Repositories Kolibri OS

(root)/contrib/sdk/sources/ffmpeg/libavcodec/faandct.c @ 4525 – Rev 4349