WebSVN – Kolibri OS – Blame – /contrib/sdk/sources/ffmpeg/ffmpeg-2.1/libavcodec/simple_idct_template.c

Rev	Author	Line No.	Line
6148	serge	1	/*
		2	* Simple IDCT
		3	*
		4	* Copyright (c) 2001 Michael Niedermayer
		5	*
		6	* This file is part of FFmpeg.
		7	*
		8	* FFmpeg is free software; you can redistribute it and/or
		9	* modify it under the terms of the GNU Lesser General Public
		10	* License as published by the Free Software Foundation; either
		11	* version 2.1 of the License, or (at your option) any later version.
		12	*
		13	* FFmpeg is distributed in the hope that it will be useful,
		14	* but WITHOUT ANY WARRANTY; without even the implied warranty of
		15	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
		16	* Lesser General Public License for more details.
		17	*
		18	* You should have received a copy of the GNU Lesser General Public
		19	* License along with FFmpeg; if not, write to the Free Software
		20	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
		21	*/
		22
		23	/**
		24	* @file
		25	* simpleidct in C.
		26	*/
		27
		28	/*
		29	based upon some outcommented c code from mpeg2dec (idct_mmx.c
		30	written by Aaron Holtzman )
		31	*/
		32
		33	#include "bit_depth_template.c"
		34
		35	#undef W1
		36	#undef W2
		37	#undef W3
		38	#undef W4
		39	#undef W5
		40	#undef W6
		41	#undef W7
		42	#undef ROW_SHIFT
		43	#undef COL_SHIFT
		44	#undef DC_SHIFT
		45	#undef MUL
		46	#undef MAC
		47
		48	#if BIT_DEPTH == 8
		49
		50	#define W1 22725 //cos(iM_PI/16)sqrt(2)*(1<<14) + 0.5
		51	#define W2 21407 //cos(iM_PI/16)sqrt(2)*(1<<14) + 0.5
		52	#define W3 19266 //cos(iM_PI/16)sqrt(2)*(1<<14) + 0.5
		53	#define W4 16383 //cos(iM_PI/16)sqrt(2)*(1<<14) + 0.5
		54	#define W5 12873 //cos(iM_PI/16)sqrt(2)*(1<<14) + 0.5
		55	#define W6 8867 //cos(iM_PI/16)sqrt(2)*(1<<14) + 0.5
		56	#define W7 4520 //cos(iM_PI/16)sqrt(2)*(1<<14) + 0.5
		57
		58	#define ROW_SHIFT 11
		59	#define COL_SHIFT 20
		60	#define DC_SHIFT 3
		61
		62	#define MUL(a, b) MUL16(a, b)
		63	#define MAC(a, b, c) MAC16(a, b, c)
		64
		65	#elif BIT_DEPTH == 10 \|\| BIT_DEPTH == 12
		66
		67	#if BIT_DEPTH == 10
		68	#define W1 90901
		69	#define W2 85627
		70	#define W3 77062
		71	#define W4 65535
		72	#define W5 51491
		73	#define W6 35468
		74	#define W7 18081
		75
		76	#define ROW_SHIFT 15
		77	#define COL_SHIFT 20
		78	#define DC_SHIFT 1
		79	#else
		80	#define W1 45451
		81	#define W2 42813
		82	#define W3 38531
		83	#define W4 32767
		84	#define W5 25746
		85	#define W6 17734
		86	#define W7 9041
		87
		88	#define ROW_SHIFT 16
		89	#define COL_SHIFT 17
		90	#define DC_SHIFT -1
		91	#endif
		92
		93	#define MUL(a, b) ((a) * (b))
		94	#define MAC(a, b, c) ((a) += (b) * (c))
		95
		96	#else
		97
		98	#error "Unsupported bitdepth"
		99
		100	#endif
		101
		102	static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift)
		103	{
		104	int a0, a1, a2, a3, b0, b1, b2, b3;
		105
		106	#if HAVE_FAST_64BIT
		107	#define ROW0_MASK (0xffffLL << 48 * HAVE_BIGENDIAN)
		108	if (((((uint64_t )row)[0] & ~ROW0_MASK) \| ((uint64_t )row)[1]) == 0) {
		109	uint64_t temp;
		110	if (DC_SHIFT - extra_shift > 0) {
		111	temp = (row[0] << (DC_SHIFT - extra_shift)) & 0xffff;
		112	} else {
		113	temp = (row[0] >> (extra_shift - DC_SHIFT)) & 0xffff;
		114	}
		115	temp += temp << 16;
		116	temp += temp << 32;
		117	((uint64_t *)row)[0] = temp;
		118	((uint64_t *)row)[1] = temp;
		119	return;
		120	}
		121	#else
		122	if (!(((uint32_t*)row)[1] \|
		123	((uint32_t*)row)[2] \|
		124	((uint32_t*)row)[3] \|
		125	row[1])) {
		126	uint32_t temp;
		127	if (DC_SHIFT - extra_shift > 0) {
		128	temp = (row[0] << (DC_SHIFT - extra_shift)) & 0xffff;
		129	} else {
		130	temp = (row[0] >> (extra_shift - DC_SHIFT)) & 0xffff;
		131	}
		132	temp += temp << 16;
		133	((uint32_t)row)[0]=((uint32_t)row)[1] =
		134	((uint32_t)row)[2]=((uint32_t)row)[3] = temp;
		135	return;
		136	}
		137	#endif
		138
		139	a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1));
		140	a1 = a0;
		141	a2 = a0;
		142	a3 = a0;
		143
		144	a0 += W2 * row[2];
		145	a1 += W6 * row[2];
		146	a2 -= W6 * row[2];
		147	a3 -= W2 * row[2];
		148
		149	b0 = MUL(W1, row[1]);
		150	MAC(b0, W3, row[3]);
		151	b1 = MUL(W3, row[1]);
		152	MAC(b1, -W7, row[3]);
		153	b2 = MUL(W5, row[1]);
		154	MAC(b2, -W1, row[3]);
		155	b3 = MUL(W7, row[1]);
		156	MAC(b3, -W5, row[3]);
		157
		158	if (AV_RN64A(row + 4)) {
		159	a0 += W4row[4] + W6row[6];
		160	a1 += - W4row[4] - W2row[6];
		161	a2 += - W4row[4] + W2row[6];
		162	a3 += W4row[4] - W6row[6];
		163
		164	MAC(b0, W5, row[5]);
		165	MAC(b0, W7, row[7]);
		166
		167	MAC(b1, -W1, row[5]);
		168	MAC(b1, -W5, row[7]);
		169
		170	MAC(b2, W7, row[5]);
		171	MAC(b2, W3, row[7]);
		172
		173	MAC(b3, W3, row[5]);
		174	MAC(b3, -W1, row[7]);
		175	}
		176
		177	row[0] = (a0 + b0) >> (ROW_SHIFT + extra_shift);
		178	row[7] = (a0 - b0) >> (ROW_SHIFT + extra_shift);
		179	row[1] = (a1 + b1) >> (ROW_SHIFT + extra_shift);
		180	row[6] = (a1 - b1) >> (ROW_SHIFT + extra_shift);
		181	row[2] = (a2 + b2) >> (ROW_SHIFT + extra_shift);
		182	row[5] = (a2 - b2) >> (ROW_SHIFT + extra_shift);
		183	row[3] = (a3 + b3) >> (ROW_SHIFT + extra_shift);
		184	row[4] = (a3 - b3) >> (ROW_SHIFT + extra_shift);
		185	}
		186
		187	#define IDCT_COLS do { \
		188	a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); \
		189	a1 = a0; \
		190	a2 = a0; \
		191	a3 = a0; \
		192	\
		193	a0 += W2col[82]; \
		194	a1 += W6col[82]; \
		195	a2 += -W6col[82]; \
		196	a3 += -W2col[82]; \
		197	\
		198	b0 = MUL(W1, col[8*1]); \
		199	b1 = MUL(W3, col[8*1]); \
		200	b2 = MUL(W5, col[8*1]); \
		201	b3 = MUL(W7, col[8*1]); \
		202	\
		203	MAC(b0, W3, col[8*3]); \
		204	MAC(b1, -W7, col[8*3]); \
		205	MAC(b2, -W1, col[8*3]); \
		206	MAC(b3, -W5, col[8*3]); \
		207	\
		208	if (col[8*4]) { \
		209	a0 += W4col[84]; \
		210	a1 += -W4col[84]; \
		211	a2 += -W4col[84]; \
		212	a3 += W4col[84]; \
		213	} \
		214	\
		215	if (col[8*5]) { \
		216	MAC(b0, W5, col[8*5]); \
		217	MAC(b1, -W1, col[8*5]); \
		218	MAC(b2, W7, col[8*5]); \
		219	MAC(b3, W3, col[8*5]); \
		220	} \
		221	\
		222	if (col[8*6]) { \
		223	a0 += W6col[86]; \
		224	a1 += -W2col[86]; \
		225	a2 += W2col[86]; \
		226	a3 += -W6col[86]; \
		227	} \
		228	\
		229	if (col[8*7]) { \
		230	MAC(b0, W7, col[8*7]); \
		231	MAC(b1, -W5, col[8*7]); \
		232	MAC(b2, W3, col[8*7]); \
		233	MAC(b3, -W1, col[8*7]); \
		234	} \
		235	} while (0)
		236
		237	static inline void FUNC(idctSparseColPut)(pixel *dest, int line_size,
		238	int16_t *col)
		239	{
		240	int a0, a1, a2, a3, b0, b1, b2, b3;
		241
		242	IDCT_COLS;
		243
		244	dest[0] = av_clip_pixel((a0 + b0) >> COL_SHIFT);
		245	dest += line_size;
		246	dest[0] = av_clip_pixel((a1 + b1) >> COL_SHIFT);
		247	dest += line_size;
		248	dest[0] = av_clip_pixel((a2 + b2) >> COL_SHIFT);
		249	dest += line_size;
		250	dest[0] = av_clip_pixel((a3 + b3) >> COL_SHIFT);
		251	dest += line_size;
		252	dest[0] = av_clip_pixel((a3 - b3) >> COL_SHIFT);
		253	dest += line_size;
		254	dest[0] = av_clip_pixel((a2 - b2) >> COL_SHIFT);
		255	dest += line_size;
		256	dest[0] = av_clip_pixel((a1 - b1) >> COL_SHIFT);
		257	dest += line_size;
		258	dest[0] = av_clip_pixel((a0 - b0) >> COL_SHIFT);
		259	}
		260
		261	static inline void FUNC(idctSparseColAdd)(pixel *dest, int line_size,
		262	int16_t *col)
		263	{
		264	int a0, a1, a2, a3, b0, b1, b2, b3;
		265
		266	IDCT_COLS;
		267
		268	dest[0] = av_clip_pixel(dest[0] + ((a0 + b0) >> COL_SHIFT));
		269	dest += line_size;
		270	dest[0] = av_clip_pixel(dest[0] + ((a1 + b1) >> COL_SHIFT));
		271	dest += line_size;
		272	dest[0] = av_clip_pixel(dest[0] + ((a2 + b2) >> COL_SHIFT));
		273	dest += line_size;
		274	dest[0] = av_clip_pixel(dest[0] + ((a3 + b3) >> COL_SHIFT));
		275	dest += line_size;
		276	dest[0] = av_clip_pixel(dest[0] + ((a3 - b3) >> COL_SHIFT));
		277	dest += line_size;
		278	dest[0] = av_clip_pixel(dest[0] + ((a2 - b2) >> COL_SHIFT));
		279	dest += line_size;
		280	dest[0] = av_clip_pixel(dest[0] + ((a1 - b1) >> COL_SHIFT));
		281	dest += line_size;
		282	dest[0] = av_clip_pixel(dest[0] + ((a0 - b0) >> COL_SHIFT));
		283	}
		284
		285	static inline void FUNC(idctSparseCol)(int16_t *col)
		286	{
		287	int a0, a1, a2, a3, b0, b1, b2, b3;
		288
		289	IDCT_COLS;
		290
		291	col[0 ] = ((a0 + b0) >> COL_SHIFT);
		292	col[8 ] = ((a1 + b1) >> COL_SHIFT);
		293	col[16] = ((a2 + b2) >> COL_SHIFT);
		294	col[24] = ((a3 + b3) >> COL_SHIFT);
		295	col[32] = ((a3 - b3) >> COL_SHIFT);
		296	col[40] = ((a2 - b2) >> COL_SHIFT);
		297	col[48] = ((a1 - b1) >> COL_SHIFT);
		298	col[56] = ((a0 - b0) >> COL_SHIFT);
		299	}
		300
		301	void FUNC(ff_simple_idct_put)(uint8_t dest_, int line_size, int16_t block)
		302	{
		303	pixel dest = (pixel )dest_;
		304	int i;
		305
		306	line_size /= sizeof(pixel);
		307
		308	for (i = 0; i < 8; i++)
		309	FUNC(idctRowCondDC)(block + i*8, 0);
		310
		311	for (i = 0; i < 8; i++)
		312	FUNC(idctSparseColPut)(dest + i, line_size, block + i);
		313	}
		314
		315	void FUNC(ff_simple_idct_add)(uint8_t dest_, int line_size, int16_t block)
		316	{
		317	pixel dest = (pixel )dest_;
		318	int i;
		319
		320	line_size /= sizeof(pixel);
		321
		322	for (i = 0; i < 8; i++)
		323	FUNC(idctRowCondDC)(block + i*8, 0);
		324
		325	for (i = 0; i < 8; i++)
		326	FUNC(idctSparseColAdd)(dest + i, line_size, block + i);
		327	}
		328
		329	void FUNC(ff_simple_idct)(int16_t *block)
		330	{
		331	int i;
		332
		333	for (i = 0; i < 8; i++)
		334	FUNC(idctRowCondDC)(block + i*8, 0);
		335
		336	for (i = 0; i < 8; i++)
		337	FUNC(idctSparseCol)(block + i);
		338	}

Subversion Repositories Kolibri OS

(root)/contrib/sdk/sources/ffmpeg/ffmpeg-2.1/libavcodec/simple_idct_template.c – Rev 6148