WebSVN – Kolibri OS – Blame – /contrib/sdk/sources/ffmpeg/libavcodec/jrevdct.c

Rev	Author	Line No.	Line
4349	Serge	1	/*
		2	* This file is part of the Independent JPEG Group's software.
		3	*
		4	* The authors make NO WARRANTY or representation, either express or implied,
		5	* with respect to this software, its quality, accuracy, merchantability, or
		6	* fitness for a particular purpose. This software is provided "AS IS", and
		7	* you, its user, assume the entire risk as to its quality and accuracy.
		8	*
		9	* This software is copyright (C) 1991, 1992, Thomas G. Lane.
		10	* All Rights Reserved except as specified below.
		11	*
		12	* Permission is hereby granted to use, copy, modify, and distribute this
		13	* software (or portions thereof) for any purpose, without fee, subject to
		14	* these conditions:
		15	* (1) If any part of the source code for this software is distributed, then
		16	* this README file must be included, with this copyright and no-warranty
		17	* notice unaltered; and any additions, deletions, or changes to the original
		18	* files must be clearly indicated in accompanying documentation.
		19	* (2) If only executable code is distributed, then the accompanying
		20	* documentation must state that "this software is based in part on the work
		21	* of the Independent JPEG Group".
		22	* (3) Permission for use of this software is granted only if the user accepts
		23	* full responsibility for any undesirable consequences; the authors accept
		24	* NO LIABILITY for damages of any kind.
		25	*
		26	* These conditions apply to any software derived from or based on the IJG
		27	* code, not just to the unmodified library. If you use our work, you ought
		28	* to acknowledge us.
		29	*
		30	* Permission is NOT granted for the use of any IJG author's name or company
		31	* name in advertising or publicity relating to this software or products
		32	* derived from it. This software may be referred to only as "the Independent
		33	* JPEG Group's software".
		34	*
		35	* We specifically permit and encourage the use of this software as the basis
		36	* of commercial products, provided that all warranty or liability claims are
		37	* assumed by the product vendor.
		38	*
		39	* This file contains the basic inverse-DCT transformation subroutine.
		40	*
		41	* This implementation is based on an algorithm described in
		42	* C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
		43	* Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
		44	* Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
		45	* The primary algorithm described there uses 11 multiplies and 29 adds.
		46	* We use their alternate method with 12 multiplies and 32 adds.
		47	* The advantage of this method is that no data path contains more than one
		48	* multiplication; this allows a very simple and accurate implementation in
		49	* scaled fixed-point arithmetic, with a minimal number of shifts.
		50	*
		51	* I've made lots of modifications to attempt to take advantage of the
		52	* sparse nature of the DCT matrices we're getting. Although the logic
		53	* is cumbersome, it's straightforward and the resulting code is much
		54	* faster.
		55	*
		56	* A better way to do this would be to pass in the DCT block as a sparse
		57	* matrix, perhaps with the difference cases encoded.
		58	*/
		59
		60	/**
		61	* @file
		62	* Independent JPEG Group's LLM idct.
		63	*/
		64
		65	#include "libavutil/common.h"
		66	#include "dct.h"
		67
		68	#define EIGHT_BIT_SAMPLES
		69
		70	#define DCTSIZE 8
		71	#define DCTSIZE2 64
		72
		73	#define GLOBAL
		74
		75	#define RIGHT_SHIFT(x, n) ((x) >> (n))
		76
		77	typedef int16_t DCTBLOCK[DCTSIZE2];
		78
		79	#define CONST_BITS 13
		80
		81	/*
		82	* This routine is specialized to the case DCTSIZE = 8.
		83	*/
		84
		85	#if DCTSIZE != 8
		86	Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
		87	#endif
		88
		89
		90	/*
		91	* A 2-D IDCT can be done by 1-D IDCT on each row followed by 1-D IDCT
		92	* on each column. Direct algorithms are also available, but they are
		93	* much more complex and seem not to be any faster when reduced to code.
		94	*
		95	* The poop on this scaling stuff is as follows:
		96	*
		97	* Each 1-D IDCT step produces outputs which are a factor of sqrt(N)
		98	* larger than the true IDCT outputs. The final outputs are therefore
		99	* a factor of N larger than desired; since N=8 this can be cured by
		100	* a simple right shift at the end of the algorithm. The advantage of
		101	* this arrangement is that we save two multiplications per 1-D IDCT,
		102	* because the y0 and y4 inputs need not be divided by sqrt(N).
		103	*
		104	* We have to do addition and subtraction of the integer inputs, which
		105	* is no problem, and multiplication by fractional constants, which is
		106	* a problem to do in integer arithmetic. We multiply all the constants
		107	* by CONST_SCALE and convert them to integer constants (thus retaining
		108	* CONST_BITS bits of precision in the constants). After doing a
		109	* multiplication we have to divide the product by CONST_SCALE, with proper
		110	* rounding, to produce the correct output. This division can be done
		111	* cheaply as a right shift of CONST_BITS bits. We postpone shifting
		112	* as long as possible so that partial sums can be added together with
		113	* full fractional precision.
		114	*
		115	* The outputs of the first pass are scaled up by PASS1_BITS bits so that
		116	* they are represented to better-than-integral precision. These outputs
		117	* require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
		118	* with the recommended scaling. (To scale up 12-bit sample data further, an
		119	* intermediate int32 array would be needed.)
		120	*
		121	* To avoid overflow of the 32-bit intermediate results in pass 2, we must
		122	* have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis
		123	* shows that the values given below are the most effective.
		124	*/
		125
		126	#ifdef EIGHT_BIT_SAMPLES
		127	#define PASS1_BITS 2
		128	#else
		129	#define PASS1_BITS 1 /* lose a little precision to avoid overflow */
		130	#endif
		131
		132	#define ONE ((int32_t) 1)
		133
		134	#define CONST_SCALE (ONE << CONST_BITS)
		135
		136	/* Convert a positive real constant to an integer scaled by CONST_SCALE.
		137	* IMPORTANT: if your compiler doesn't do this arithmetic at compile time,
		138	* you will pay a significant penalty in run time. In that case, figure
		139	* the correct integer constant values and insert them by hand.
		140	*/
		141
		142	/* Actually FIX is no longer used, we precomputed them all */
		143	#define FIX(x) ((int32_t) ((x) * CONST_SCALE + 0.5))
		144
		145	/* Descale and correctly round an int32_t value that's scaled by N bits.
		146	* We assume RIGHT_SHIFT rounds towards minus infinity, so adding
		147	* the fudge factor is correct for either sign of X.
		148	*/
		149
		150	#define DESCALE(x,n) RIGHT_SHIFT((x) + (ONE << ((n)-1)), n)
		151
		152	/* Multiply an int32_t variable by an int32_t constant to yield an int32_t result.
		153	* For 8-bit samples with the recommended scaling, all the variable
		154	* and constant values involved are no more than 16 bits wide, so a
		155	* 16x16->32 bit multiply can be used instead of a full 32x32 multiply;
		156	* this provides a useful speedup on many machines.
		157	* There is no way to specify a 16x16->32 multiply in portable C, but
		158	* some C compilers will do the right thing if you provide the correct
		159	* combination of casts.
		160	* NB: for 12-bit samples, a full 32-bit multiplication will be needed.
		161	*/
		162
		163	#ifdef EIGHT_BIT_SAMPLES
		164	#ifdef SHORTxSHORT_32 /* may work if 'int' is 32 bits */
		165	#define MULTIPLY(var,const) (((int16_t) (var)) * ((int16_t) (const)))
		166	#endif
		167	#ifdef SHORTxLCONST_32 /* known to work with Microsoft C 6.0 */
		168	#define MULTIPLY(var,const) (((int16_t) (var)) * ((int32_t) (const)))
		169	#endif
		170	#endif
		171
		172	#ifndef MULTIPLY /* default definition */
		173	#define MULTIPLY(var,const) ((var) * (const))
		174	#endif
		175
		176
		177	/*
		178	Unlike our decoder where we approximate the FIXes, we need to use exact
		179	ones here or successive P-frames will drift too much with Reference frame coding
		180	*/
		181	#define FIX_0_211164243 1730
		182	#define FIX_0_275899380 2260
		183	#define FIX_0_298631336 2446
		184	#define FIX_0_390180644 3196
		185	#define FIX_0_509795579 4176
		186	#define FIX_0_541196100 4433
		187	#define FIX_0_601344887 4926
		188	#define FIX_0_765366865 6270
		189	#define FIX_0_785694958 6436
		190	#define FIX_0_899976223 7373
		191	#define FIX_1_061594337 8697
		192	#define FIX_1_111140466 9102
		193	#define FIX_1_175875602 9633
		194	#define FIX_1_306562965 10703
		195	#define FIX_1_387039845 11363
		196	#define FIX_1_451774981 11893
		197	#define FIX_1_501321110 12299
		198	#define FIX_1_662939225 13623
		199	#define FIX_1_847759065 15137
		200	#define FIX_1_961570560 16069
		201	#define FIX_2_053119869 16819
		202	#define FIX_2_172734803 17799
		203	#define FIX_2_562915447 20995
		204	#define FIX_3_072711026 25172
		205
		206	/*
		207	* Perform the inverse DCT on one block of coefficients.
		208	*/
		209
		210	void ff_j_rev_dct(DCTBLOCK data)
		211	{
		212	int32_t tmp0, tmp1, tmp2, tmp3;
		213	int32_t tmp10, tmp11, tmp12, tmp13;
		214	int32_t z1, z2, z3, z4, z5;
		215	int32_t d0, d1, d2, d3, d4, d5, d6, d7;
		216	register int16_t *dataptr;
		217	int rowctr;
		218
		219	/* Pass 1: process rows. */
		220	/* Note results are scaled up by sqrt(8) compared to a true IDCT; */
		221	/* furthermore, we scale the results by 2*PASS1_BITS. /
		222
		223	dataptr = data;
		224
		225	for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
		226	/* Due to quantization, we will usually find that many of the input
		227	* coefficients are zero, especially the AC terms. We can exploit this
		228	* by short-circuiting the IDCT calculation for any row in which all
		229	* the AC terms are zero. In that case each output is equal to the
		230	* DC coefficient (with scale factor as needed).
		231	* With typical images and quantization tables, half or more of the
		232	* row DCT calculations can be simplified this way.
		233	*/
		234
		235	register int idataptr = (int)dataptr;
		236
		237	/* WARNING: we do the same permutation as MMX idct to simplify the
		238	video core */
		239	d0 = dataptr[0];
		240	d2 = dataptr[1];
		241	d4 = dataptr[2];
		242	d6 = dataptr[3];
		243	d1 = dataptr[4];
		244	d3 = dataptr[5];
		245	d5 = dataptr[6];
		246	d7 = dataptr[7];
		247
		248	if ((d1 \| d2 \| d3 \| d4 \| d5 \| d6 \| d7) == 0) {
		249	/* AC terms all zero */
		250	if (d0) {
		251	/* Compute a 32 bit value to assign. */
		252	int16_t dcval = (int16_t) (d0 << PASS1_BITS);
		253	register int v = (dcval & 0xffff) \| ((dcval << 16) & 0xffff0000);
		254
		255	idataptr[0] = v;
		256	idataptr[1] = v;
		257	idataptr[2] = v;
		258	idataptr[3] = v;
		259	}
		260
		261	dataptr += DCTSIZE; /* advance pointer to next row */
		262	continue;
		263	}
		264
		265	/* Even part: reverse the even part of the forward DCT. */
		266	/* The rotator is sqrt(2)c(-6). /
		267	{
		268	if (d6) {
		269	if (d2) {
		270	/* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
		271	z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
		272	tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
		273	tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
		274
		275	tmp0 = (d0 + d4) << CONST_BITS;
		276	tmp1 = (d0 - d4) << CONST_BITS;
		277
		278	tmp10 = tmp0 + tmp3;
		279	tmp13 = tmp0 - tmp3;
		280	tmp11 = tmp1 + tmp2;
		281	tmp12 = tmp1 - tmp2;
		282	} else {
		283	/* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
		284	tmp2 = MULTIPLY(-d6, FIX_1_306562965);
		285	tmp3 = MULTIPLY(d6, FIX_0_541196100);
		286
		287	tmp0 = (d0 + d4) << CONST_BITS;
		288	tmp1 = (d0 - d4) << CONST_BITS;
		289
		290	tmp10 = tmp0 + tmp3;
		291	tmp13 = tmp0 - tmp3;
		292	tmp11 = tmp1 + tmp2;
		293	tmp12 = tmp1 - tmp2;
		294	}
		295	} else {
		296	if (d2) {
		297	/* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
		298	tmp2 = MULTIPLY(d2, FIX_0_541196100);
		299	tmp3 = MULTIPLY(d2, FIX_1_306562965);
		300
		301	tmp0 = (d0 + d4) << CONST_BITS;
		302	tmp1 = (d0 - d4) << CONST_BITS;
		303
		304	tmp10 = tmp0 + tmp3;
		305	tmp13 = tmp0 - tmp3;
		306	tmp11 = tmp1 + tmp2;
		307	tmp12 = tmp1 - tmp2;
		308	} else {
		309	/* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
		310	tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
		311	tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
		312	}
		313	}
		314
		315	/* Odd part per figure 8; the matrix is unitary and hence its
		316	* transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
		317	*/
		318
		319	if (d7) {
		320	if (d5) {
		321	if (d3) {
		322	if (d1) {
		323	/* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
		324	z1 = d7 + d1;
		325	z2 = d5 + d3;
		326	z3 = d7 + d3;
		327	z4 = d5 + d1;
		328	z5 = MULTIPLY(z3 + z4, FIX_1_175875602);
		329
		330	tmp0 = MULTIPLY(d7, FIX_0_298631336);
		331	tmp1 = MULTIPLY(d5, FIX_2_053119869);
		332	tmp2 = MULTIPLY(d3, FIX_3_072711026);
		333	tmp3 = MULTIPLY(d1, FIX_1_501321110);
		334	z1 = MULTIPLY(-z1, FIX_0_899976223);
		335	z2 = MULTIPLY(-z2, FIX_2_562915447);
		336	z3 = MULTIPLY(-z3, FIX_1_961570560);
		337	z4 = MULTIPLY(-z4, FIX_0_390180644);
		338
		339	z3 += z5;
		340	z4 += z5;
		341
		342	tmp0 += z1 + z3;
		343	tmp1 += z2 + z4;
		344	tmp2 += z2 + z3;
		345	tmp3 += z1 + z4;
		346	} else {
		347	/* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
		348	z2 = d5 + d3;
		349	z3 = d7 + d3;
		350	z5 = MULTIPLY(z3 + d5, FIX_1_175875602);
		351
		352	tmp0 = MULTIPLY(d7, FIX_0_298631336);
		353	tmp1 = MULTIPLY(d5, FIX_2_053119869);
		354	tmp2 = MULTIPLY(d3, FIX_3_072711026);
		355	z1 = MULTIPLY(-d7, FIX_0_899976223);
		356	z2 = MULTIPLY(-z2, FIX_2_562915447);
		357	z3 = MULTIPLY(-z3, FIX_1_961570560);
		358	z4 = MULTIPLY(-d5, FIX_0_390180644);
		359
		360	z3 += z5;
		361	z4 += z5;
		362
		363	tmp0 += z1 + z3;
		364	tmp1 += z2 + z4;
		365	tmp2 += z2 + z3;
		366	tmp3 = z1 + z4;
		367	}
		368	} else {
		369	if (d1) {
		370	/* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
		371	z1 = d7 + d1;
		372	z4 = d5 + d1;
		373	z5 = MULTIPLY(d7 + z4, FIX_1_175875602);
		374
		375	tmp0 = MULTIPLY(d7, FIX_0_298631336);
		376	tmp1 = MULTIPLY(d5, FIX_2_053119869);
		377	tmp3 = MULTIPLY(d1, FIX_1_501321110);
		378	z1 = MULTIPLY(-z1, FIX_0_899976223);
		379	z2 = MULTIPLY(-d5, FIX_2_562915447);
		380	z3 = MULTIPLY(-d7, FIX_1_961570560);
		381	z4 = MULTIPLY(-z4, FIX_0_390180644);
		382
		383	z3 += z5;
		384	z4 += z5;
		385
		386	tmp0 += z1 + z3;
		387	tmp1 += z2 + z4;
		388	tmp2 = z2 + z3;
		389	tmp3 += z1 + z4;
		390	} else {
		391	/* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
		392	tmp0 = MULTIPLY(-d7, FIX_0_601344887);
		393	z1 = MULTIPLY(-d7, FIX_0_899976223);
		394	z3 = MULTIPLY(-d7, FIX_1_961570560);
		395	tmp1 = MULTIPLY(-d5, FIX_0_509795579);
		396	z2 = MULTIPLY(-d5, FIX_2_562915447);
		397	z4 = MULTIPLY(-d5, FIX_0_390180644);
		398	z5 = MULTIPLY(d5 + d7, FIX_1_175875602);
		399
		400	z3 += z5;
		401	z4 += z5;
		402
		403	tmp0 += z3;
		404	tmp1 += z4;
		405	tmp2 = z2 + z3;
		406	tmp3 = z1 + z4;
		407	}
		408	}
		409	} else {
		410	if (d3) {
		411	if (d1) {
		412	/* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
		413	z1 = d7 + d1;
		414	z3 = d7 + d3;
		415	z5 = MULTIPLY(z3 + d1, FIX_1_175875602);
		416
		417	tmp0 = MULTIPLY(d7, FIX_0_298631336);
		418	tmp2 = MULTIPLY(d3, FIX_3_072711026);
		419	tmp3 = MULTIPLY(d1, FIX_1_501321110);
		420	z1 = MULTIPLY(-z1, FIX_0_899976223);
		421	z2 = MULTIPLY(-d3, FIX_2_562915447);
		422	z3 = MULTIPLY(-z3, FIX_1_961570560);
		423	z4 = MULTIPLY(-d1, FIX_0_390180644);
		424
		425	z3 += z5;
		426	z4 += z5;
		427
		428	tmp0 += z1 + z3;
		429	tmp1 = z2 + z4;
		430	tmp2 += z2 + z3;
		431	tmp3 += z1 + z4;
		432	} else {
		433	/* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
		434	z3 = d7 + d3;
		435
		436	tmp0 = MULTIPLY(-d7, FIX_0_601344887);
		437	z1 = MULTIPLY(-d7, FIX_0_899976223);
		438	tmp2 = MULTIPLY(d3, FIX_0_509795579);
		439	z2 = MULTIPLY(-d3, FIX_2_562915447);
		440	z5 = MULTIPLY(z3, FIX_1_175875602);
		441	z3 = MULTIPLY(-z3, FIX_0_785694958);
		442
		443	tmp0 += z3;
		444	tmp1 = z2 + z5;
		445	tmp2 += z3;
		446	tmp3 = z1 + z5;
		447	}
		448	} else {
		449	if (d1) {
		450	/* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
		451	z1 = d7 + d1;
		452	z5 = MULTIPLY(z1, FIX_1_175875602);
		453
		454	z1 = MULTIPLY(z1, FIX_0_275899380);
		455	z3 = MULTIPLY(-d7, FIX_1_961570560);
		456	tmp0 = MULTIPLY(-d7, FIX_1_662939225);
		457	z4 = MULTIPLY(-d1, FIX_0_390180644);
		458	tmp3 = MULTIPLY(d1, FIX_1_111140466);
		459
		460	tmp0 += z1;
		461	tmp1 = z4 + z5;
		462	tmp2 = z3 + z5;
		463	tmp3 += z1;
		464	} else {
		465	/* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
		466	tmp0 = MULTIPLY(-d7, FIX_1_387039845);
		467	tmp1 = MULTIPLY(d7, FIX_1_175875602);
		468	tmp2 = MULTIPLY(-d7, FIX_0_785694958);
		469	tmp3 = MULTIPLY(d7, FIX_0_275899380);
		470	}
		471	}
		472	}
		473	} else {
		474	if (d5) {
		475	if (d3) {
		476	if (d1) {
		477	/* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
		478	z2 = d5 + d3;
		479	z4 = d5 + d1;
		480	z5 = MULTIPLY(d3 + z4, FIX_1_175875602);
		481
		482	tmp1 = MULTIPLY(d5, FIX_2_053119869);
		483	tmp2 = MULTIPLY(d3, FIX_3_072711026);
		484	tmp3 = MULTIPLY(d1, FIX_1_501321110);
		485	z1 = MULTIPLY(-d1, FIX_0_899976223);
		486	z2 = MULTIPLY(-z2, FIX_2_562915447);
		487	z3 = MULTIPLY(-d3, FIX_1_961570560);
		488	z4 = MULTIPLY(-z4, FIX_0_390180644);
		489
		490	z3 += z5;
		491	z4 += z5;
		492
		493	tmp0 = z1 + z3;
		494	tmp1 += z2 + z4;
		495	tmp2 += z2 + z3;
		496	tmp3 += z1 + z4;
		497	} else {
		498	/* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
		499	z2 = d5 + d3;
		500
		501	z5 = MULTIPLY(z2, FIX_1_175875602);
		502	tmp1 = MULTIPLY(d5, FIX_1_662939225);
		503	z4 = MULTIPLY(-d5, FIX_0_390180644);
		504	z2 = MULTIPLY(-z2, FIX_1_387039845);
		505	tmp2 = MULTIPLY(d3, FIX_1_111140466);
		506	z3 = MULTIPLY(-d3, FIX_1_961570560);
		507
		508	tmp0 = z3 + z5;
		509	tmp1 += z2;
		510	tmp2 += z2;
		511	tmp3 = z4 + z5;
		512	}
		513	} else {
		514	if (d1) {
		515	/* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
		516	z4 = d5 + d1;
		517
		518	z5 = MULTIPLY(z4, FIX_1_175875602);
		519	z1 = MULTIPLY(-d1, FIX_0_899976223);
		520	tmp3 = MULTIPLY(d1, FIX_0_601344887);
		521	tmp1 = MULTIPLY(-d5, FIX_0_509795579);
		522	z2 = MULTIPLY(-d5, FIX_2_562915447);
		523	z4 = MULTIPLY(z4, FIX_0_785694958);
		524
		525	tmp0 = z1 + z5;
		526	tmp1 += z4;
		527	tmp2 = z2 + z5;
		528	tmp3 += z4;
		529	} else {
		530	/* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
		531	tmp0 = MULTIPLY(d5, FIX_1_175875602);
		532	tmp1 = MULTIPLY(d5, FIX_0_275899380);
		533	tmp2 = MULTIPLY(-d5, FIX_1_387039845);
		534	tmp3 = MULTIPLY(d5, FIX_0_785694958);
		535	}
		536	}
		537	} else {
		538	if (d3) {
		539	if (d1) {
		540	/* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
		541	z5 = d1 + d3;
		542	tmp3 = MULTIPLY(d1, FIX_0_211164243);
		543	tmp2 = MULTIPLY(-d3, FIX_1_451774981);
		544	z1 = MULTIPLY(d1, FIX_1_061594337);
		545	z2 = MULTIPLY(-d3, FIX_2_172734803);
		546	z4 = MULTIPLY(z5, FIX_0_785694958);
		547	z5 = MULTIPLY(z5, FIX_1_175875602);
		548
		549	tmp0 = z1 - z4;
		550	tmp1 = z2 + z4;
		551	tmp2 += z5;
		552	tmp3 += z5;
		553	} else {
		554	/* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
		555	tmp0 = MULTIPLY(-d3, FIX_0_785694958);
		556	tmp1 = MULTIPLY(-d3, FIX_1_387039845);
		557	tmp2 = MULTIPLY(-d3, FIX_0_275899380);
		558	tmp3 = MULTIPLY(d3, FIX_1_175875602);
		559	}
		560	} else {
		561	if (d1) {
		562	/* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
		563	tmp0 = MULTIPLY(d1, FIX_0_275899380);
		564	tmp1 = MULTIPLY(d1, FIX_0_785694958);
		565	tmp2 = MULTIPLY(d1, FIX_1_175875602);
		566	tmp3 = MULTIPLY(d1, FIX_1_387039845);
		567	} else {
		568	/* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
		569	tmp0 = tmp1 = tmp2 = tmp3 = 0;
		570	}
		571	}
		572	}
		573	}
		574	}
		575	/* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
		576
		577	dataptr[0] = (int16_t) DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
		578	dataptr[7] = (int16_t) DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
		579	dataptr[1] = (int16_t) DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
		580	dataptr[6] = (int16_t) DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
		581	dataptr[2] = (int16_t) DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
		582	dataptr[5] = (int16_t) DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
		583	dataptr[3] = (int16_t) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
		584	dataptr[4] = (int16_t) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
		585
		586	dataptr += DCTSIZE; /* advance pointer to next row */
		587	}
		588
		589	/* Pass 2: process columns. */
		590	/* Note that we must descale the results by a factor of 8 == 2*3, /
		591	/* and also undo the PASS1_BITS scaling. */
		592
		593	dataptr = data;
		594	for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
		595	/* Columns of zeroes can be exploited in the same way as we did with rows.
		596	* However, the row calculation has created many nonzero AC terms, so the
		597	* simplification applies less often (typically 5% to 10% of the time).
		598	* On machines with very fast multiplication, it's possible that the
		599	* test takes more time than it's worth. In that case this section
		600	* may be commented out.
		601	*/
		602
		603	d0 = dataptr[DCTSIZE*0];
		604	d1 = dataptr[DCTSIZE*1];
		605	d2 = dataptr[DCTSIZE*2];
		606	d3 = dataptr[DCTSIZE*3];
		607	d4 = dataptr[DCTSIZE*4];
		608	d5 = dataptr[DCTSIZE*5];
		609	d6 = dataptr[DCTSIZE*6];
		610	d7 = dataptr[DCTSIZE*7];
		611
		612	/* Even part: reverse the even part of the forward DCT. */
		613	/* The rotator is sqrt(2)c(-6). /
		614	if (d6) {
		615	if (d2) {
		616	/* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
		617	z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
		618	tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
		619	tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
		620
		621	tmp0 = (d0 + d4) << CONST_BITS;
		622	tmp1 = (d0 - d4) << CONST_BITS;
		623
		624	tmp10 = tmp0 + tmp3;
		625	tmp13 = tmp0 - tmp3;
		626	tmp11 = tmp1 + tmp2;
		627	tmp12 = tmp1 - tmp2;
		628	} else {
		629	/* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
		630	tmp2 = MULTIPLY(-d6, FIX_1_306562965);
		631	tmp3 = MULTIPLY(d6, FIX_0_541196100);
		632
		633	tmp0 = (d0 + d4) << CONST_BITS;
		634	tmp1 = (d0 - d4) << CONST_BITS;
		635
		636	tmp10 = tmp0 + tmp3;
		637	tmp13 = tmp0 - tmp3;
		638	tmp11 = tmp1 + tmp2;
		639	tmp12 = tmp1 - tmp2;
		640	}
		641	} else {
		642	if (d2) {
		643	/* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
		644	tmp2 = MULTIPLY(d2, FIX_0_541196100);
		645	tmp3 = MULTIPLY(d2, FIX_1_306562965);
		646
		647	tmp0 = (d0 + d4) << CONST_BITS;
		648	tmp1 = (d0 - d4) << CONST_BITS;
		649
		650	tmp10 = tmp0 + tmp3;
		651	tmp13 = tmp0 - tmp3;
		652	tmp11 = tmp1 + tmp2;
		653	tmp12 = tmp1 - tmp2;
		654	} else {
		655	/* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
		656	tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
		657	tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
		658	}
		659	}
		660
		661	/* Odd part per figure 8; the matrix is unitary and hence its
		662	* transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively.
		663	*/
		664	if (d7) {
		665	if (d5) {
		666	if (d3) {
		667	if (d1) {
		668	/* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
		669	z1 = d7 + d1;
		670	z2 = d5 + d3;
		671	z3 = d7 + d3;
		672	z4 = d5 + d1;
		673	z5 = MULTIPLY(z3 + z4, FIX_1_175875602);
		674
		675	tmp0 = MULTIPLY(d7, FIX_0_298631336);
		676	tmp1 = MULTIPLY(d5, FIX_2_053119869);
		677	tmp2 = MULTIPLY(d3, FIX_3_072711026);
		678	tmp3 = MULTIPLY(d1, FIX_1_501321110);
		679	z1 = MULTIPLY(-z1, FIX_0_899976223);
		680	z2 = MULTIPLY(-z2, FIX_2_562915447);
		681	z3 = MULTIPLY(-z3, FIX_1_961570560);
		682	z4 = MULTIPLY(-z4, FIX_0_390180644);
		683
		684	z3 += z5;
		685	z4 += z5;
		686
		687	tmp0 += z1 + z3;
		688	tmp1 += z2 + z4;
		689	tmp2 += z2 + z3;
		690	tmp3 += z1 + z4;
		691	} else {
		692	/* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
		693	z2 = d5 + d3;
		694	z3 = d7 + d3;
		695	z5 = MULTIPLY(z3 + d5, FIX_1_175875602);
		696
		697	tmp0 = MULTIPLY(d7, FIX_0_298631336);
		698	tmp1 = MULTIPLY(d5, FIX_2_053119869);
		699	tmp2 = MULTIPLY(d3, FIX_3_072711026);
		700	z1 = MULTIPLY(-d7, FIX_0_899976223);
		701	z2 = MULTIPLY(-z2, FIX_2_562915447);
		702	z3 = MULTIPLY(-z3, FIX_1_961570560);
		703	z4 = MULTIPLY(-d5, FIX_0_390180644);
		704
		705	z3 += z5;
		706	z4 += z5;
		707
		708	tmp0 += z1 + z3;
		709	tmp1 += z2 + z4;
		710	tmp2 += z2 + z3;
		711	tmp3 = z1 + z4;
		712	}
		713	} else {
		714	if (d1) {
		715	/* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
		716	z1 = d7 + d1;
		717	z3 = d7;
		718	z4 = d5 + d1;
		719	z5 = MULTIPLY(z3 + z4, FIX_1_175875602);
		720
		721	tmp0 = MULTIPLY(d7, FIX_0_298631336);
		722	tmp1 = MULTIPLY(d5, FIX_2_053119869);
		723	tmp3 = MULTIPLY(d1, FIX_1_501321110);
		724	z1 = MULTIPLY(-z1, FIX_0_899976223);
		725	z2 = MULTIPLY(-d5, FIX_2_562915447);
		726	z3 = MULTIPLY(-d7, FIX_1_961570560);
		727	z4 = MULTIPLY(-z4, FIX_0_390180644);
		728
		729	z3 += z5;
		730	z4 += z5;
		731
		732	tmp0 += z1 + z3;
		733	tmp1 += z2 + z4;
		734	tmp2 = z2 + z3;
		735	tmp3 += z1 + z4;
		736	} else {
		737	/* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
		738	tmp0 = MULTIPLY(-d7, FIX_0_601344887);
		739	z1 = MULTIPLY(-d7, FIX_0_899976223);
		740	z3 = MULTIPLY(-d7, FIX_1_961570560);
		741	tmp1 = MULTIPLY(-d5, FIX_0_509795579);
		742	z2 = MULTIPLY(-d5, FIX_2_562915447);
		743	z4 = MULTIPLY(-d5, FIX_0_390180644);
		744	z5 = MULTIPLY(d5 + d7, FIX_1_175875602);
		745
		746	z3 += z5;
		747	z4 += z5;
		748
		749	tmp0 += z3;
		750	tmp1 += z4;
		751	tmp2 = z2 + z3;
		752	tmp3 = z1 + z4;
		753	}
		754	}
		755	} else {
		756	if (d3) {
		757	if (d1) {
		758	/* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
		759	z1 = d7 + d1;
		760	z3 = d7 + d3;
		761	z5 = MULTIPLY(z3 + d1, FIX_1_175875602);
		762
		763	tmp0 = MULTIPLY(d7, FIX_0_298631336);
		764	tmp2 = MULTIPLY(d3, FIX_3_072711026);
		765	tmp3 = MULTIPLY(d1, FIX_1_501321110);
		766	z1 = MULTIPLY(-z1, FIX_0_899976223);
		767	z2 = MULTIPLY(-d3, FIX_2_562915447);
		768	z3 = MULTIPLY(-z3, FIX_1_961570560);
		769	z4 = MULTIPLY(-d1, FIX_0_390180644);
		770
		771	z3 += z5;
		772	z4 += z5;
		773
		774	tmp0 += z1 + z3;
		775	tmp1 = z2 + z4;
		776	tmp2 += z2 + z3;
		777	tmp3 += z1 + z4;
		778	} else {
		779	/* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
		780	z3 = d7 + d3;
		781
		782	tmp0 = MULTIPLY(-d7, FIX_0_601344887);
		783	z1 = MULTIPLY(-d7, FIX_0_899976223);
		784	tmp2 = MULTIPLY(d3, FIX_0_509795579);
		785	z2 = MULTIPLY(-d3, FIX_2_562915447);
		786	z5 = MULTIPLY(z3, FIX_1_175875602);
		787	z3 = MULTIPLY(-z3, FIX_0_785694958);
		788
		789	tmp0 += z3;
		790	tmp1 = z2 + z5;
		791	tmp2 += z3;
		792	tmp3 = z1 + z5;
		793	}
		794	} else {
		795	if (d1) {
		796	/* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
		797	z1 = d7 + d1;
		798	z5 = MULTIPLY(z1, FIX_1_175875602);
		799
		800	z1 = MULTIPLY(z1, FIX_0_275899380);
		801	z3 = MULTIPLY(-d7, FIX_1_961570560);
		802	tmp0 = MULTIPLY(-d7, FIX_1_662939225);
		803	z4 = MULTIPLY(-d1, FIX_0_390180644);
		804	tmp3 = MULTIPLY(d1, FIX_1_111140466);
		805
		806	tmp0 += z1;
		807	tmp1 = z4 + z5;
		808	tmp2 = z3 + z5;
		809	tmp3 += z1;
		810	} else {
		811	/* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
		812	tmp0 = MULTIPLY(-d7, FIX_1_387039845);
		813	tmp1 = MULTIPLY(d7, FIX_1_175875602);
		814	tmp2 = MULTIPLY(-d7, FIX_0_785694958);
		815	tmp3 = MULTIPLY(d7, FIX_0_275899380);
		816	}
		817	}
		818	}
		819	} else {
		820	if (d5) {
		821	if (d3) {
		822	if (d1) {
		823	/* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
		824	z2 = d5 + d3;
		825	z4 = d5 + d1;
		826	z5 = MULTIPLY(d3 + z4, FIX_1_175875602);
		827
		828	tmp1 = MULTIPLY(d5, FIX_2_053119869);
		829	tmp2 = MULTIPLY(d3, FIX_3_072711026);
		830	tmp3 = MULTIPLY(d1, FIX_1_501321110);
		831	z1 = MULTIPLY(-d1, FIX_0_899976223);
		832	z2 = MULTIPLY(-z2, FIX_2_562915447);
		833	z3 = MULTIPLY(-d3, FIX_1_961570560);
		834	z4 = MULTIPLY(-z4, FIX_0_390180644);
		835
		836	z3 += z5;
		837	z4 += z5;
		838
		839	tmp0 = z1 + z3;
		840	tmp1 += z2 + z4;
		841	tmp2 += z2 + z3;
		842	tmp3 += z1 + z4;
		843	} else {
		844	/* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
		845	z2 = d5 + d3;
		846
		847	z5 = MULTIPLY(z2, FIX_1_175875602);
		848	tmp1 = MULTIPLY(d5, FIX_1_662939225);
		849	z4 = MULTIPLY(-d5, FIX_0_390180644);
		850	z2 = MULTIPLY(-z2, FIX_1_387039845);
		851	tmp2 = MULTIPLY(d3, FIX_1_111140466);
		852	z3 = MULTIPLY(-d3, FIX_1_961570560);
		853
		854	tmp0 = z3 + z5;
		855	tmp1 += z2;
		856	tmp2 += z2;
		857	tmp3 = z4 + z5;
		858	}
		859	} else {
		860	if (d1) {
		861	/* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
		862	z4 = d5 + d1;
		863
		864	z5 = MULTIPLY(z4, FIX_1_175875602);
		865	z1 = MULTIPLY(-d1, FIX_0_899976223);
		866	tmp3 = MULTIPLY(d1, FIX_0_601344887);
		867	tmp1 = MULTIPLY(-d5, FIX_0_509795579);
		868	z2 = MULTIPLY(-d5, FIX_2_562915447);
		869	z4 = MULTIPLY(z4, FIX_0_785694958);
		870
		871	tmp0 = z1 + z5;
		872	tmp1 += z4;
		873	tmp2 = z2 + z5;
		874	tmp3 += z4;
		875	} else {
		876	/* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
		877	tmp0 = MULTIPLY(d5, FIX_1_175875602);
		878	tmp1 = MULTIPLY(d5, FIX_0_275899380);
		879	tmp2 = MULTIPLY(-d5, FIX_1_387039845);
		880	tmp3 = MULTIPLY(d5, FIX_0_785694958);
		881	}
		882	}
		883	} else {
		884	if (d3) {
		885	if (d1) {
		886	/* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
		887	z5 = d1 + d3;
		888	tmp3 = MULTIPLY(d1, FIX_0_211164243);
		889	tmp2 = MULTIPLY(-d3, FIX_1_451774981);
		890	z1 = MULTIPLY(d1, FIX_1_061594337);
		891	z2 = MULTIPLY(-d3, FIX_2_172734803);
		892	z4 = MULTIPLY(z5, FIX_0_785694958);
		893	z5 = MULTIPLY(z5, FIX_1_175875602);
		894
		895	tmp0 = z1 - z4;
		896	tmp1 = z2 + z4;
		897	tmp2 += z5;
		898	tmp3 += z5;
		899	} else {
		900	/* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
		901	tmp0 = MULTIPLY(-d3, FIX_0_785694958);
		902	tmp1 = MULTIPLY(-d3, FIX_1_387039845);
		903	tmp2 = MULTIPLY(-d3, FIX_0_275899380);
		904	tmp3 = MULTIPLY(d3, FIX_1_175875602);
		905	}
		906	} else {
		907	if (d1) {
		908	/* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
		909	tmp0 = MULTIPLY(d1, FIX_0_275899380);
		910	tmp1 = MULTIPLY(d1, FIX_0_785694958);
		911	tmp2 = MULTIPLY(d1, FIX_1_175875602);
		912	tmp3 = MULTIPLY(d1, FIX_1_387039845);
		913	} else {
		914	/* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
		915	tmp0 = tmp1 = tmp2 = tmp3 = 0;
		916	}
		917	}
		918	}
		919	}
		920
		921	/* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
		922
		923	dataptr[DCTSIZE*0] = (int16_t) DESCALE(tmp10 + tmp3,
		924	CONST_BITS+PASS1_BITS+3);
		925	dataptr[DCTSIZE*7] = (int16_t) DESCALE(tmp10 - tmp3,
		926	CONST_BITS+PASS1_BITS+3);
		927	dataptr[DCTSIZE*1] = (int16_t) DESCALE(tmp11 + tmp2,
		928	CONST_BITS+PASS1_BITS+3);
		929	dataptr[DCTSIZE*6] = (int16_t) DESCALE(tmp11 - tmp2,
		930	CONST_BITS+PASS1_BITS+3);
		931	dataptr[DCTSIZE*2] = (int16_t) DESCALE(tmp12 + tmp1,
		932	CONST_BITS+PASS1_BITS+3);
		933	dataptr[DCTSIZE*5] = (int16_t) DESCALE(tmp12 - tmp1,
		934	CONST_BITS+PASS1_BITS+3);
		935	dataptr[DCTSIZE*3] = (int16_t) DESCALE(tmp13 + tmp0,
		936	CONST_BITS+PASS1_BITS+3);
		937	dataptr[DCTSIZE*4] = (int16_t) DESCALE(tmp13 - tmp0,
		938	CONST_BITS+PASS1_BITS+3);
		939
		940	dataptr++; /* advance pointer to next column */
		941	}
		942	}
		943
		944	#undef DCTSIZE
		945	#define DCTSIZE 4
		946	#define DCTSTRIDE 8
		947
		948	void ff_j_rev_dct4(DCTBLOCK data)
		949	{
		950	int32_t tmp0, tmp1, tmp2, tmp3;
		951	int32_t tmp10, tmp11, tmp12, tmp13;
		952	int32_t z1;
		953	int32_t d0, d2, d4, d6;
		954	register int16_t *dataptr;
		955	int rowctr;
		956
		957	/* Pass 1: process rows. */
		958	/* Note results are scaled up by sqrt(8) compared to a true IDCT; */
		959	/* furthermore, we scale the results by 2*PASS1_BITS. /
		960
		961	data[0] += 4;
		962
		963	dataptr = data;
		964
		965	for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
		966	/* Due to quantization, we will usually find that many of the input
		967	* coefficients are zero, especially the AC terms. We can exploit this
		968	* by short-circuiting the IDCT calculation for any row in which all
		969	* the AC terms are zero. In that case each output is equal to the
		970	* DC coefficient (with scale factor as needed).
		971	* With typical images and quantization tables, half or more of the
		972	* row DCT calculations can be simplified this way.
		973	*/
		974
		975	register int idataptr = (int)dataptr;
		976
		977	d0 = dataptr[0];
		978	d2 = dataptr[1];
		979	d4 = dataptr[2];
		980	d6 = dataptr[3];
		981
		982	if ((d2 \| d4 \| d6) == 0) {
		983	/* AC terms all zero */
		984	if (d0) {
		985	/* Compute a 32 bit value to assign. */
		986	int16_t dcval = (int16_t) (d0 << PASS1_BITS);
		987	register int v = (dcval & 0xffff) \| ((dcval << 16) & 0xffff0000);
		988
		989	idataptr[0] = v;
		990	idataptr[1] = v;
		991	}
		992
		993	dataptr += DCTSTRIDE; /* advance pointer to next row */
		994	continue;
		995	}
		996
		997	/* Even part: reverse the even part of the forward DCT. */
		998	/* The rotator is sqrt(2)c(-6). /
		999	if (d6) {
		1000	if (d2) {
		1001	/* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
		1002	z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
		1003	tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
		1004	tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
		1005
		1006	tmp0 = (d0 + d4) << CONST_BITS;
		1007	tmp1 = (d0 - d4) << CONST_BITS;
		1008
		1009	tmp10 = tmp0 + tmp3;
		1010	tmp13 = tmp0 - tmp3;
		1011	tmp11 = tmp1 + tmp2;
		1012	tmp12 = tmp1 - tmp2;
		1013	} else {
		1014	/* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
		1015	tmp2 = MULTIPLY(-d6, FIX_1_306562965);
		1016	tmp3 = MULTIPLY(d6, FIX_0_541196100);
		1017
		1018	tmp0 = (d0 + d4) << CONST_BITS;
		1019	tmp1 = (d0 - d4) << CONST_BITS;
		1020
		1021	tmp10 = tmp0 + tmp3;
		1022	tmp13 = tmp0 - tmp3;
		1023	tmp11 = tmp1 + tmp2;
		1024	tmp12 = tmp1 - tmp2;
		1025	}
		1026	} else {
		1027	if (d2) {
		1028	/* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
		1029	tmp2 = MULTIPLY(d2, FIX_0_541196100);
		1030	tmp3 = MULTIPLY(d2, FIX_1_306562965);
		1031
		1032	tmp0 = (d0 + d4) << CONST_BITS;
		1033	tmp1 = (d0 - d4) << CONST_BITS;
		1034
		1035	tmp10 = tmp0 + tmp3;
		1036	tmp13 = tmp0 - tmp3;
		1037	tmp11 = tmp1 + tmp2;
		1038	tmp12 = tmp1 - tmp2;
		1039	} else {
		1040	/* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
		1041	tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
		1042	tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
		1043	}
		1044	}
		1045
		1046	/* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
		1047
		1048	dataptr[0] = (int16_t) DESCALE(tmp10, CONST_BITS-PASS1_BITS);
		1049	dataptr[1] = (int16_t) DESCALE(tmp11, CONST_BITS-PASS1_BITS);
		1050	dataptr[2] = (int16_t) DESCALE(tmp12, CONST_BITS-PASS1_BITS);
		1051	dataptr[3] = (int16_t) DESCALE(tmp13, CONST_BITS-PASS1_BITS);
		1052
		1053	dataptr += DCTSTRIDE; /* advance pointer to next row */
		1054	}
		1055
		1056	/* Pass 2: process columns. */
		1057	/* Note that we must descale the results by a factor of 8 == 2*3, /
		1058	/* and also undo the PASS1_BITS scaling. */
		1059
		1060	dataptr = data;
		1061	for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
		1062	/* Columns of zeroes can be exploited in the same way as we did with rows.
		1063	* However, the row calculation has created many nonzero AC terms, so the
		1064	* simplification applies less often (typically 5% to 10% of the time).
		1065	* On machines with very fast multiplication, it's possible that the
		1066	* test takes more time than it's worth. In that case this section
		1067	* may be commented out.
		1068	*/
		1069
		1070	d0 = dataptr[DCTSTRIDE*0];
		1071	d2 = dataptr[DCTSTRIDE*1];
		1072	d4 = dataptr[DCTSTRIDE*2];
		1073	d6 = dataptr[DCTSTRIDE*3];
		1074
		1075	/* Even part: reverse the even part of the forward DCT. */
		1076	/* The rotator is sqrt(2)c(-6). /
		1077	if (d6) {
		1078	if (d2) {
		1079	/* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
		1080	z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
		1081	tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
		1082	tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
		1083
		1084	tmp0 = (d0 + d4) << CONST_BITS;
		1085	tmp1 = (d0 - d4) << CONST_BITS;
		1086
		1087	tmp10 = tmp0 + tmp3;
		1088	tmp13 = tmp0 - tmp3;
		1089	tmp11 = tmp1 + tmp2;
		1090	tmp12 = tmp1 - tmp2;
		1091	} else {
		1092	/* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
		1093	tmp2 = MULTIPLY(-d6, FIX_1_306562965);
		1094	tmp3 = MULTIPLY(d6, FIX_0_541196100);
		1095
		1096	tmp0 = (d0 + d4) << CONST_BITS;
		1097	tmp1 = (d0 - d4) << CONST_BITS;
		1098
		1099	tmp10 = tmp0 + tmp3;
		1100	tmp13 = tmp0 - tmp3;
		1101	tmp11 = tmp1 + tmp2;
		1102	tmp12 = tmp1 - tmp2;
		1103	}
		1104	} else {
		1105	if (d2) {
		1106	/* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
		1107	tmp2 = MULTIPLY(d2, FIX_0_541196100);
		1108	tmp3 = MULTIPLY(d2, FIX_1_306562965);
		1109
		1110	tmp0 = (d0 + d4) << CONST_BITS;
		1111	tmp1 = (d0 - d4) << CONST_BITS;
		1112
		1113	tmp10 = tmp0 + tmp3;
		1114	tmp13 = tmp0 - tmp3;
		1115	tmp11 = tmp1 + tmp2;
		1116	tmp12 = tmp1 - tmp2;
		1117	} else {
		1118	/* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
		1119	tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
		1120	tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
		1121	}
		1122	}
		1123
		1124	/* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
		1125
		1126	dataptr[DCTSTRIDE*0] = tmp10 >> (CONST_BITS+PASS1_BITS+3);
		1127	dataptr[DCTSTRIDE*1] = tmp11 >> (CONST_BITS+PASS1_BITS+3);
		1128	dataptr[DCTSTRIDE*2] = tmp12 >> (CONST_BITS+PASS1_BITS+3);
		1129	dataptr[DCTSTRIDE*3] = tmp13 >> (CONST_BITS+PASS1_BITS+3);
		1130
		1131	dataptr++; /* advance pointer to next column */
		1132	}
		1133	}
		1134
		1135	void ff_j_rev_dct2(DCTBLOCK data){
		1136	int d00, d01, d10, d11;
		1137
		1138	data[0] += 4;
		1139	d00 = data[0+0DCTSTRIDE] + data[1+0DCTSTRIDE];
		1140	d01 = data[0+0DCTSTRIDE] - data[1+0DCTSTRIDE];
		1141	d10 = data[0+1DCTSTRIDE] + data[1+1DCTSTRIDE];
		1142	d11 = data[0+1DCTSTRIDE] - data[1+1DCTSTRIDE];
		1143
		1144	data[0+0*DCTSTRIDE]= (d00 + d10)>>3;
		1145	data[1+0*DCTSTRIDE]= (d01 + d11)>>3;
		1146	data[0+1*DCTSTRIDE]= (d00 - d10)>>3;
		1147	data[1+1*DCTSTRIDE]= (d01 - d11)>>3;
		1148	}
		1149
		1150	void ff_j_rev_dct1(DCTBLOCK data){
		1151	data[0] = (data[0] + 4)>>3;
		1152	}
		1153
		1154	#undef FIX
		1155	#undef CONST_BITS

Subversion Repositories Kolibri OS

(root)/contrib/sdk/sources/ffmpeg/libavcodec/jrevdct.c – Rev 4349