WebSVN – Kolibri OS – Blame – /contrib/sdk/sources/ffmpeg/libavcodec/jfdctfst.c

Rev	Author	Line No.	Line
4349	Serge	1	/*
		2	* This file is part of the Independent JPEG Group's software.
		3	*
		4	* The authors make NO WARRANTY or representation, either express or implied,
		5	* with respect to this software, its quality, accuracy, merchantability, or
		6	* fitness for a particular purpose. This software is provided "AS IS", and
		7	* you, its user, assume the entire risk as to its quality and accuracy.
		8	*
		9	* This software is copyright (C) 1994-1996, Thomas G. Lane.
		10	* All Rights Reserved except as specified below.
		11	*
		12	* Permission is hereby granted to use, copy, modify, and distribute this
		13	* software (or portions thereof) for any purpose, without fee, subject to
		14	* these conditions:
		15	* (1) If any part of the source code for this software is distributed, then
		16	* this README file must be included, with this copyright and no-warranty
		17	* notice unaltered; and any additions, deletions, or changes to the original
		18	* files must be clearly indicated in accompanying documentation.
		19	* (2) If only executable code is distributed, then the accompanying
		20	* documentation must state that "this software is based in part on the work
		21	* of the Independent JPEG Group".
		22	* (3) Permission for use of this software is granted only if the user accepts
		23	* full responsibility for any undesirable consequences; the authors accept
		24	* NO LIABILITY for damages of any kind.
		25	*
		26	* These conditions apply to any software derived from or based on the IJG
		27	* code, not just to the unmodified library. If you use our work, you ought
		28	* to acknowledge us.
		29	*
		30	* Permission is NOT granted for the use of any IJG author's name or company
		31	* name in advertising or publicity relating to this software or products
		32	* derived from it. This software may be referred to only as "the Independent
		33	* JPEG Group's software".
		34	*
		35	* We specifically permit and encourage the use of this software as the basis
		36	* of commercial products, provided that all warranty or liability claims are
		37	* assumed by the product vendor.
		38	*
		39	* This file contains a fast, not so accurate integer implementation of the
		40	* forward DCT (Discrete Cosine Transform).
		41	*
		42	* A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
		43	* on each column. Direct algorithms are also available, but they are
		44	* much more complex and seem not to be any faster when reduced to code.
		45	*
		46	* This implementation is based on Arai, Agui, and Nakajima's algorithm for
		47	* scaled DCT. Their original paper (Trans. IEICE E-71(11):1095) is in
		48	* Japanese, but the algorithm is described in the Pennebaker & Mitchell
		49	* JPEG textbook (see REFERENCES section in file README). The following code
		50	* is based directly on figure 4-8 in P&M.
		51	* While an 8-point DCT cannot be done in less than 11 multiplies, it is
		52	* possible to arrange the computation so that many of the multiplies are
		53	* simple scalings of the final outputs. These multiplies can then be
		54	* folded into the multiplications or divisions by the JPEG quantization
		55	* table entries. The AA&N method leaves only 5 multiplies and 29 adds
		56	* to be done in the DCT itself.
		57	* The primary disadvantage of this method is that with fixed-point math,
		58	* accuracy is lost due to imprecise representation of the scaled
		59	* quantization values. The smaller the quantization table entry, the less
		60	* precise the scaled value, so this implementation does worse with high-
		61	* quality-setting files than with low-quality ones.
		62	*/
		63
		64	/**
		65	* @file
		66	* Independent JPEG Group's fast AAN dct.
		67	*/
		68
		69	#include
		70	#include
		71	#include "libavutil/common.h"
		72	#include "dct.h"
		73
		74	#define DCTSIZE 8
		75	#define GLOBAL(x) x
		76	#define RIGHT_SHIFT(x, n) ((x) >> (n))
		77
		78	/*
		79	* This module is specialized to the case DCTSIZE = 8.
		80	*/
		81
		82	#if DCTSIZE != 8
		83	Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
		84	#endif
		85
		86
		87	/* Scaling decisions are generally the same as in the LL&M algorithm;
		88	* see jfdctint.c for more details. However, we choose to descale
		89	* (right shift) multiplication products as soon as they are formed,
		90	* rather than carrying additional fractional bits into subsequent additions.
		91	* This compromises accuracy slightly, but it lets us save a few shifts.
		92	* More importantly, 16-bit arithmetic is then adequate (for 8-bit samples)
		93	* everywhere except in the multiplications proper; this saves a good deal
		94	* of work on 16-bit-int machines.
		95	*
		96	* Again to save a few shifts, the intermediate results between pass 1 and
		97	* pass 2 are not upscaled, but are represented only to integral precision.
		98	*
		99	* A final compromise is to represent the multiplicative constants to only
		100	* 8 fractional bits, rather than 13. This saves some shifting work on some
		101	* machines, and may also reduce the cost of multiplication (since there
		102	* are fewer one-bits in the constants).
		103	*/
		104
		105	#define CONST_BITS 8
		106
		107
		108	/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
		109	* causing a lot of useless floating-point operations at run time.
		110	* To get around this we use the following pre-calculated constants.
		111	* If you change CONST_BITS you may want to add appropriate values.
		112	* (With a reasonable C compiler, you can just rely on the FIX() macro...)
		113	*/
		114
		115	#if CONST_BITS == 8
		116	#define FIX_0_382683433 ((int32_t) 98) /* FIX(0.382683433) */
		117	#define FIX_0_541196100 ((int32_t) 139) /* FIX(0.541196100) */
		118	#define FIX_0_707106781 ((int32_t) 181) /* FIX(0.707106781) */
		119	#define FIX_1_306562965 ((int32_t) 334) /* FIX(1.306562965) */
		120	#else
		121	#define FIX_0_382683433 FIX(0.382683433)
		122	#define FIX_0_541196100 FIX(0.541196100)
		123	#define FIX_0_707106781 FIX(0.707106781)
		124	#define FIX_1_306562965 FIX(1.306562965)
		125	#endif
		126
		127
		128	/* We can gain a little more speed, with a further compromise in accuracy,
		129	* by omitting the addition in a descaling shift. This yields an incorrectly
		130	* rounded result half the time...
		131	*/
		132
		133	#ifndef USE_ACCURATE_ROUNDING
		134	#undef DESCALE
		135	#define DESCALE(x,n) RIGHT_SHIFT(x, n)
		136	#endif
		137
		138
		139	/* Multiply a int16_t variable by an int32_t constant, and immediately
		140	* descale to yield a int16_t result.
		141	*/
		142
		143	#define MULTIPLY(var,const) ((int16_t) DESCALE((var) * (const), CONST_BITS))
		144
		145	static av_always_inline void row_fdct(int16_t * data){
		146	int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
		147	int tmp10, tmp11, tmp12, tmp13;
		148	int z1, z2, z3, z4, z5, z11, z13;
		149	int16_t *dataptr;
		150	int ctr;
		151
		152	/* Pass 1: process rows. */
		153
		154	dataptr = data;
		155	for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
		156	tmp0 = dataptr[0] + dataptr[7];
		157	tmp7 = dataptr[0] - dataptr[7];
		158	tmp1 = dataptr[1] + dataptr[6];
		159	tmp6 = dataptr[1] - dataptr[6];
		160	tmp2 = dataptr[2] + dataptr[5];
		161	tmp5 = dataptr[2] - dataptr[5];
		162	tmp3 = dataptr[3] + dataptr[4];
		163	tmp4 = dataptr[3] - dataptr[4];
		164
		165	/* Even part */
		166
		167	tmp10 = tmp0 + tmp3; /* phase 2 */
		168	tmp13 = tmp0 - tmp3;
		169	tmp11 = tmp1 + tmp2;
		170	tmp12 = tmp1 - tmp2;
		171
		172	dataptr[0] = tmp10 + tmp11; /* phase 3 */
		173	dataptr[4] = tmp10 - tmp11;
		174
		175	z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
		176	dataptr[2] = tmp13 + z1; /* phase 5 */
		177	dataptr[6] = tmp13 - z1;
		178
		179	/* Odd part */
		180
		181	tmp10 = tmp4 + tmp5; /* phase 2 */
		182	tmp11 = tmp5 + tmp6;
		183	tmp12 = tmp6 + tmp7;
		184
		185	/* The rotator is modified from fig 4-8 to avoid extra negations. */
		186	z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
		187	z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
		188	z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
		189	z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
		190
		191	z11 = tmp7 + z3; /* phase 5 */
		192	z13 = tmp7 - z3;
		193
		194	dataptr[5] = z13 + z2; /* phase 6 */
		195	dataptr[3] = z13 - z2;
		196	dataptr[1] = z11 + z4;
		197	dataptr[7] = z11 - z4;
		198
		199	dataptr += DCTSIZE; /* advance pointer to next row */
		200	}
		201	}
		202
		203	/*
		204	* Perform the forward DCT on one block of samples.
		205	*/
		206
		207	GLOBAL(void)
		208	ff_fdct_ifast (int16_t * data)
		209	{
		210	int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
		211	int tmp10, tmp11, tmp12, tmp13;
		212	int z1, z2, z3, z4, z5, z11, z13;
		213	int16_t *dataptr;
		214	int ctr;
		215
		216	row_fdct(data);
		217
		218	/* Pass 2: process columns. */
		219
		220	dataptr = data;
		221	for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
		222	tmp0 = dataptr[DCTSIZE0] + dataptr[DCTSIZE7];
		223	tmp7 = dataptr[DCTSIZE0] - dataptr[DCTSIZE7];
		224	tmp1 = dataptr[DCTSIZE1] + dataptr[DCTSIZE6];
		225	tmp6 = dataptr[DCTSIZE1] - dataptr[DCTSIZE6];
		226	tmp2 = dataptr[DCTSIZE2] + dataptr[DCTSIZE5];
		227	tmp5 = dataptr[DCTSIZE2] - dataptr[DCTSIZE5];
		228	tmp3 = dataptr[DCTSIZE3] + dataptr[DCTSIZE4];
		229	tmp4 = dataptr[DCTSIZE3] - dataptr[DCTSIZE4];
		230
		231	/* Even part */
		232
		233	tmp10 = tmp0 + tmp3; /* phase 2 */
		234	tmp13 = tmp0 - tmp3;
		235	tmp11 = tmp1 + tmp2;
		236	tmp12 = tmp1 - tmp2;
		237
		238	dataptr[DCTSIZE0] = tmp10 + tmp11; / phase 3 */
		239	dataptr[DCTSIZE*4] = tmp10 - tmp11;
		240
		241	z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
		242	dataptr[DCTSIZE2] = tmp13 + z1; / phase 5 */
		243	dataptr[DCTSIZE*6] = tmp13 - z1;
		244
		245	/* Odd part */
		246
		247	tmp10 = tmp4 + tmp5; /* phase 2 */
		248	tmp11 = tmp5 + tmp6;
		249	tmp12 = tmp6 + tmp7;
		250
		251	/* The rotator is modified from fig 4-8 to avoid extra negations. */
		252	z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
		253	z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
		254	z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
		255	z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
		256
		257	z11 = tmp7 + z3; /* phase 5 */
		258	z13 = tmp7 - z3;
		259
		260	dataptr[DCTSIZE5] = z13 + z2; / phase 6 */
		261	dataptr[DCTSIZE*3] = z13 - z2;
		262	dataptr[DCTSIZE*1] = z11 + z4;
		263	dataptr[DCTSIZE*7] = z11 - z4;
		264
		265	dataptr++; /* advance pointer to next column */
		266	}
		267	}
		268
		269	/*
		270	* Perform the forward 2-4-8 DCT on one block of samples.
		271	*/
		272
		273	GLOBAL(void)
		274	ff_fdct_ifast248 (int16_t * data)
		275	{
		276	int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
		277	int tmp10, tmp11, tmp12, tmp13;
		278	int z1;
		279	int16_t *dataptr;
		280	int ctr;
		281
		282	row_fdct(data);
		283
		284	/* Pass 2: process columns. */
		285
		286	dataptr = data;
		287	for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
		288	tmp0 = dataptr[DCTSIZE0] + dataptr[DCTSIZE1];
		289	tmp1 = dataptr[DCTSIZE2] + dataptr[DCTSIZE3];
		290	tmp2 = dataptr[DCTSIZE4] + dataptr[DCTSIZE5];
		291	tmp3 = dataptr[DCTSIZE6] + dataptr[DCTSIZE7];
		292	tmp4 = dataptr[DCTSIZE0] - dataptr[DCTSIZE1];
		293	tmp5 = dataptr[DCTSIZE2] - dataptr[DCTSIZE3];
		294	tmp6 = dataptr[DCTSIZE4] - dataptr[DCTSIZE5];
		295	tmp7 = dataptr[DCTSIZE6] - dataptr[DCTSIZE7];
		296
		297	/* Even part */
		298
		299	tmp10 = tmp0 + tmp3;
		300	tmp11 = tmp1 + tmp2;
		301	tmp12 = tmp1 - tmp2;
		302	tmp13 = tmp0 - tmp3;
		303
		304	dataptr[DCTSIZE*0] = tmp10 + tmp11;
		305	dataptr[DCTSIZE*4] = tmp10 - tmp11;
		306
		307	z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781);
		308	dataptr[DCTSIZE*2] = tmp13 + z1;
		309	dataptr[DCTSIZE*6] = tmp13 - z1;
		310
		311	tmp10 = tmp4 + tmp7;
		312	tmp11 = tmp5 + tmp6;
		313	tmp12 = tmp5 - tmp6;
		314	tmp13 = tmp4 - tmp7;
		315
		316	dataptr[DCTSIZE*1] = tmp10 + tmp11;
		317	dataptr[DCTSIZE*5] = tmp10 - tmp11;
		318
		319	z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781);
		320	dataptr[DCTSIZE*3] = tmp13 + z1;
		321	dataptr[DCTSIZE*7] = tmp13 - z1;
		322
		323	dataptr++; /* advance pointer to next column */
		324	}
		325	}
		326
		327
		328	#undef GLOBAL
		329	#undef CONST_BITS
		330	#undef DESCALE
		331	#undef FIX_0_541196100
		332	#undef FIX_1_306562965

Subversion Repositories Kolibri OS

(root)/contrib/sdk/sources/ffmpeg/libavcodec/jfdctfst.c @ 4361 – Rev 4349