WebSVN – Kolibri OS – Blame – /contrib/sdk/sources/ffmpeg/libavcodec/dsputil.c

Rev	Author	Line No.	Line
4349	Serge	1	/*
		2	* DSP utils
		3	* Copyright (c) 2000, 2001 Fabrice Bellard
		4	* Copyright (c) 2002-2004 Michael Niedermayer
		5	*
		6	* gmc & q-pel & 32/64 bit based MC by Michael Niedermayer
		7	*
		8	* This file is part of FFmpeg.
		9	*
		10	* FFmpeg is free software; you can redistribute it and/or
		11	* modify it under the terms of the GNU Lesser General Public
		12	* License as published by the Free Software Foundation; either
		13	* version 2.1 of the License, or (at your option) any later version.
		14	*
		15	* FFmpeg is distributed in the hope that it will be useful,
		16	* but WITHOUT ANY WARRANTY; without even the implied warranty of
		17	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
		18	* Lesser General Public License for more details.
		19	*
		20	* You should have received a copy of the GNU Lesser General Public
		21	* License along with FFmpeg; if not, write to the Free Software
		22	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
		23	*/
		24
		25	/**
		26	* @file
		27	* DSP utils
		28	*/
		29
		30	#include "libavutil/attributes.h"
		31	#include "libavutil/imgutils.h"
		32	#include "libavutil/internal.h"
		33	#include "avcodec.h"
		34	#include "copy_block.h"
		35	#include "dct.h"
		36	#include "dsputil.h"
		37	#include "simple_idct.h"
		38	#include "faandct.h"
		39	#include "faanidct.h"
		40	#include "imgconvert.h"
		41	#include "mathops.h"
		42	#include "mpegvideo.h"
		43	#include "config.h"
		44	#include "diracdsp.h"
		45
		46	uint32_t ff_squareTbl[512] = {0, };
		47
		48	#define BIT_DEPTH 16
		49	#include "dsputil_template.c"
		50	#undef BIT_DEPTH
		51
		52	#define BIT_DEPTH 8
		53	#include "dsputil_template.c"
		54
		55	// 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
		56	#define pb_7f (~0UL/255 * 0x7f)
		57	#define pb_80 (~0UL/255 * 0x80)
		58
		59	/* Specific zigzag scan for 248 idct. NOTE that unlike the
		60	specification, we interleave the fields */
		61	const uint8_t ff_zigzag248_direct[64] = {
		62	0, 8, 1, 9, 16, 24, 2, 10,
		63	17, 25, 32, 40, 48, 56, 33, 41,
		64	18, 26, 3, 11, 4, 12, 19, 27,
		65	34, 42, 49, 57, 50, 58, 35, 43,
		66	20, 28, 5, 13, 6, 14, 21, 29,
		67	36, 44, 51, 59, 52, 60, 37, 45,
		68	22, 30, 7, 15, 23, 31, 38, 46,
		69	53, 61, 54, 62, 39, 47, 55, 63,
		70	};
		71
		72	/* not permutated inverse zigzag_direct + 1 for MMX quantizer */
		73	DECLARE_ALIGNED(16, uint16_t, ff_inv_zigzag_direct16)[64];
		74
		75	const uint8_t ff_alternate_horizontal_scan[64] = {
		76	0, 1, 2, 3, 8, 9, 16, 17,
		77	10, 11, 4, 5, 6, 7, 15, 14,
		78	13, 12, 19, 18, 24, 25, 32, 33,
		79	26, 27, 20, 21, 22, 23, 28, 29,
		80	30, 31, 34, 35, 40, 41, 48, 49,
		81	42, 43, 36, 37, 38, 39, 44, 45,
		82	46, 47, 50, 51, 56, 57, 58, 59,
		83	52, 53, 54, 55, 60, 61, 62, 63,
		84	};
		85
		86	const uint8_t ff_alternate_vertical_scan[64] = {
		87	0, 8, 16, 24, 1, 9, 2, 10,
		88	17, 25, 32, 40, 48, 56, 57, 49,
		89	41, 33, 26, 18, 3, 11, 4, 12,
		90	19, 27, 34, 42, 50, 58, 35, 43,
		91	51, 59, 20, 28, 5, 13, 6, 14,
		92	21, 29, 36, 44, 52, 60, 37, 45,
		93	53, 61, 22, 30, 7, 15, 23, 31,
		94	38, 46, 54, 62, 39, 47, 55, 63,
		95	};
		96
		97	/* Input permutation for the simple_idct_mmx */
		98	static const uint8_t simple_mmx_permutation[64]={
		99	0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
		100	0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
		101	0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
		102	0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
		103	0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
		104	0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
		105	0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
		106	0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
		107	};
		108
		109	static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
		110
		111	av_cold void ff_init_scantable(uint8_t permutation, ScanTable st,
		112	const uint8_t *src_scantable)
		113	{
		114	int i;
		115	int end;
		116
		117	st->scantable= src_scantable;
		118
		119	for(i=0; i<64; i++){
		120	int j;
		121	j = src_scantable[i];
		122	st->permutated[i] = permutation[j];
		123	}
		124
		125	end=-1;
		126	for(i=0; i<64; i++){
		127	int j;
		128	j = st->permutated[i];
		129	if(j>end) end=j;
		130	st->raster_end[i]= end;
		131	}
		132	}
		133
		134	av_cold void ff_init_scantable_permutation(uint8_t *idct_permutation,
		135	int idct_permutation_type)
		136	{
		137	int i;
		138
		139	switch(idct_permutation_type){
		140	case FF_NO_IDCT_PERM:
		141	for(i=0; i<64; i++)
		142	idct_permutation[i]= i;
		143	break;
		144	case FF_LIBMPEG2_IDCT_PERM:
		145	for(i=0; i<64; i++)
		146	idct_permutation[i]= (i & 0x38) \| ((i & 6) >> 1) \| ((i & 1) << 2);
		147	break;
		148	case FF_SIMPLE_IDCT_PERM:
		149	for(i=0; i<64; i++)
		150	idct_permutation[i]= simple_mmx_permutation[i];
		151	break;
		152	case FF_TRANSPOSE_IDCT_PERM:
		153	for(i=0; i<64; i++)
		154	idct_permutation[i]= ((i&7)<<3) \| (i>>3);
		155	break;
		156	case FF_PARTTRANS_IDCT_PERM:
		157	for(i=0; i<64; i++)
		158	idct_permutation[i]= (i&0x24) \| ((i&3)<<3) \| ((i>>3)&3);
		159	break;
		160	case FF_SSE2_IDCT_PERM:
		161	for(i=0; i<64; i++)
		162	idct_permutation[i]= (i&0x38) \| idct_sse2_row_perm[i&7];
		163	break;
		164	default:
		165	av_log(NULL, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
		166	}
		167	}
		168
		169	static int pix_sum_c(uint8_t * pix, int line_size)
		170	{
		171	int s, i, j;
		172
		173	s = 0;
		174	for (i = 0; i < 16; i++) {
		175	for (j = 0; j < 16; j += 8) {
		176	s += pix[0];
		177	s += pix[1];
		178	s += pix[2];
		179	s += pix[3];
		180	s += pix[4];
		181	s += pix[5];
		182	s += pix[6];
		183	s += pix[7];
		184	pix += 8;
		185	}
		186	pix += line_size - 16;
		187	}
		188	return s;
		189	}
		190
		191	static int pix_norm1_c(uint8_t * pix, int line_size)
		192	{
		193	int s, i, j;
		194	uint32_t *sq = ff_squareTbl + 256;
		195
		196	s = 0;
		197	for (i = 0; i < 16; i++) {
		198	for (j = 0; j < 16; j += 8) {
		199	#if 0
		200	s += sq[pix[0]];
		201	s += sq[pix[1]];
		202	s += sq[pix[2]];
		203	s += sq[pix[3]];
		204	s += sq[pix[4]];
		205	s += sq[pix[5]];
		206	s += sq[pix[6]];
		207	s += sq[pix[7]];
		208	#else
		209	#if HAVE_FAST_64BIT
		210	register uint64_t x=(uint64_t)pix;
		211	s += sq[x&0xff];
		212	s += sq[(x>>8)&0xff];
		213	s += sq[(x>>16)&0xff];
		214	s += sq[(x>>24)&0xff];
		215	s += sq[(x>>32)&0xff];
		216	s += sq[(x>>40)&0xff];
		217	s += sq[(x>>48)&0xff];
		218	s += sq[(x>>56)&0xff];
		219	#else
		220	register uint32_t x=(uint32_t)pix;
		221	s += sq[x&0xff];
		222	s += sq[(x>>8)&0xff];
		223	s += sq[(x>>16)&0xff];
		224	s += sq[(x>>24)&0xff];
		225	x=(uint32_t)(pix+4);
		226	s += sq[x&0xff];
		227	s += sq[(x>>8)&0xff];
		228	s += sq[(x>>16)&0xff];
		229	s += sq[(x>>24)&0xff];
		230	#endif
		231	#endif
		232	pix += 8;
		233	}
		234	pix += line_size - 16;
		235	}
		236	return s;
		237	}
		238
		239	static void bswap_buf(uint32_t dst, const uint32_t src, int w){
		240	int i;
		241
		242	for(i=0; i+8<=w; i+=8){
		243	dst[i+0]= av_bswap32(src[i+0]);
		244	dst[i+1]= av_bswap32(src[i+1]);
		245	dst[i+2]= av_bswap32(src[i+2]);
		246	dst[i+3]= av_bswap32(src[i+3]);
		247	dst[i+4]= av_bswap32(src[i+4]);
		248	dst[i+5]= av_bswap32(src[i+5]);
		249	dst[i+6]= av_bswap32(src[i+6]);
		250	dst[i+7]= av_bswap32(src[i+7]);
		251	}
		252	for(;i
		253	dst[i+0]= av_bswap32(src[i+0]);
		254	}
		255	}
		256
		257	static void bswap16_buf(uint16_t dst, const uint16_t src, int len)
		258	{
		259	while (len--)
		260	dst++ = av_bswap16(src++);
		261	}
		262
		263	static int sse4_c(void v, uint8_t pix1, uint8_t * pix2, int line_size, int h)
		264	{
		265	int s, i;
		266	uint32_t *sq = ff_squareTbl + 256;
		267
		268	s = 0;
		269	for (i = 0; i < h; i++) {
		270	s += sq[pix1[0] - pix2[0]];
		271	s += sq[pix1[1] - pix2[1]];
		272	s += sq[pix1[2] - pix2[2]];
		273	s += sq[pix1[3] - pix2[3]];
		274	pix1 += line_size;
		275	pix2 += line_size;
		276	}
		277	return s;
		278	}
		279
		280	static int sse8_c(void v, uint8_t pix1, uint8_t * pix2, int line_size, int h)
		281	{
		282	int s, i;
		283	uint32_t *sq = ff_squareTbl + 256;
		284
		285	s = 0;
		286	for (i = 0; i < h; i++) {
		287	s += sq[pix1[0] - pix2[0]];
		288	s += sq[pix1[1] - pix2[1]];
		289	s += sq[pix1[2] - pix2[2]];
		290	s += sq[pix1[3] - pix2[3]];
		291	s += sq[pix1[4] - pix2[4]];
		292	s += sq[pix1[5] - pix2[5]];
		293	s += sq[pix1[6] - pix2[6]];
		294	s += sq[pix1[7] - pix2[7]];
		295	pix1 += line_size;
		296	pix2 += line_size;
		297	}
		298	return s;
		299	}
		300
		301	static int sse16_c(void v, uint8_t pix1, uint8_t *pix2, int line_size, int h)
		302	{
		303	int s, i;
		304	uint32_t *sq = ff_squareTbl + 256;
		305
		306	s = 0;
		307	for (i = 0; i < h; i++) {
		308	s += sq[pix1[ 0] - pix2[ 0]];
		309	s += sq[pix1[ 1] - pix2[ 1]];
		310	s += sq[pix1[ 2] - pix2[ 2]];
		311	s += sq[pix1[ 3] - pix2[ 3]];
		312	s += sq[pix1[ 4] - pix2[ 4]];
		313	s += sq[pix1[ 5] - pix2[ 5]];
		314	s += sq[pix1[ 6] - pix2[ 6]];
		315	s += sq[pix1[ 7] - pix2[ 7]];
		316	s += sq[pix1[ 8] - pix2[ 8]];
		317	s += sq[pix1[ 9] - pix2[ 9]];
		318	s += sq[pix1[10] - pix2[10]];
		319	s += sq[pix1[11] - pix2[11]];
		320	s += sq[pix1[12] - pix2[12]];
		321	s += sq[pix1[13] - pix2[13]];
		322	s += sq[pix1[14] - pix2[14]];
		323	s += sq[pix1[15] - pix2[15]];
		324
		325	pix1 += line_size;
		326	pix2 += line_size;
		327	}
		328	return s;
		329	}
		330
		331	static void diff_pixels_c(int16_t av_restrict block, const uint8_t s1,
		332	const uint8_t *s2, int stride){
		333	int i;
		334
		335	/* read the pixels */
		336	for(i=0;i<8;i++) {
		337	block[0] = s1[0] - s2[0];
		338	block[1] = s1[1] - s2[1];
		339	block[2] = s1[2] - s2[2];
		340	block[3] = s1[3] - s2[3];
		341	block[4] = s1[4] - s2[4];
		342	block[5] = s1[5] - s2[5];
		343	block[6] = s1[6] - s2[6];
		344	block[7] = s1[7] - s2[7];
		345	s1 += stride;
		346	s2 += stride;
		347	block += 8;
		348	}
		349	}
		350
		351	static void put_pixels_clamped_c(const int16_t block, uint8_t av_restrict pixels,
		352	int line_size)
		353	{
		354	int i;
		355
		356	/* read the pixels */
		357	for(i=0;i<8;i++) {
		358	pixels[0] = av_clip_uint8(block[0]);
		359	pixels[1] = av_clip_uint8(block[1]);
		360	pixels[2] = av_clip_uint8(block[2]);
		361	pixels[3] = av_clip_uint8(block[3]);
		362	pixels[4] = av_clip_uint8(block[4]);
		363	pixels[5] = av_clip_uint8(block[5]);
		364	pixels[6] = av_clip_uint8(block[6]);
		365	pixels[7] = av_clip_uint8(block[7]);
		366
		367	pixels += line_size;
		368	block += 8;
		369	}
		370	}
		371
		372	static void put_pixels_clamped4_c(const int16_t block, uint8_t av_restrict pixels,
		373	int line_size)
		374	{
		375	int i;
		376
		377	/* read the pixels */
		378	for(i=0;i<4;i++) {
		379	pixels[0] = av_clip_uint8(block[0]);
		380	pixels[1] = av_clip_uint8(block[1]);
		381	pixels[2] = av_clip_uint8(block[2]);
		382	pixels[3] = av_clip_uint8(block[3]);
		383
		384	pixels += line_size;
		385	block += 8;
		386	}
		387	}
		388
		389	static void put_pixels_clamped2_c(const int16_t block, uint8_t av_restrict pixels,
		390	int line_size)
		391	{
		392	int i;
		393
		394	/* read the pixels */
		395	for(i=0;i<2;i++) {
		396	pixels[0] = av_clip_uint8(block[0]);
		397	pixels[1] = av_clip_uint8(block[1]);
		398
		399	pixels += line_size;
		400	block += 8;
		401	}
		402	}
		403
		404	static void put_signed_pixels_clamped_c(const int16_t *block,
		405	uint8_t *av_restrict pixels,
		406	int line_size)
		407	{
		408	int i, j;
		409
		410	for (i = 0; i < 8; i++) {
		411	for (j = 0; j < 8; j++) {
		412	if (*block < -128)
		413	*pixels = 0;
		414	else if (*block > 127)
		415	*pixels = 255;
		416	else
		417	pixels = (uint8_t)(block + 128);
		418	block++;
		419	pixels++;
		420	}
		421	pixels += (line_size - 8);
		422	}
		423	}
		424
		425	static void add_pixels8_c(uint8_t *av_restrict pixels,
		426	int16_t *block,
		427	int line_size)
		428	{
		429	int i;
		430
		431	for(i=0;i<8;i++) {
		432	pixels[0] += block[0];
		433	pixels[1] += block[1];
		434	pixels[2] += block[2];
		435	pixels[3] += block[3];
		436	pixels[4] += block[4];
		437	pixels[5] += block[5];
		438	pixels[6] += block[6];
		439	pixels[7] += block[7];
		440	pixels += line_size;
		441	block += 8;
		442	}
		443	}
		444
		445	static void add_pixels_clamped_c(const int16_t block, uint8_t av_restrict pixels,
		446	int line_size)
		447	{
		448	int i;
		449
		450	/* read the pixels */
		451	for(i=0;i<8;i++) {
		452	pixels[0] = av_clip_uint8(pixels[0] + block[0]);
		453	pixels[1] = av_clip_uint8(pixels[1] + block[1]);
		454	pixels[2] = av_clip_uint8(pixels[2] + block[2]);
		455	pixels[3] = av_clip_uint8(pixels[3] + block[3]);
		456	pixels[4] = av_clip_uint8(pixels[4] + block[4]);
		457	pixels[5] = av_clip_uint8(pixels[5] + block[5]);
		458	pixels[6] = av_clip_uint8(pixels[6] + block[6]);
		459	pixels[7] = av_clip_uint8(pixels[7] + block[7]);
		460	pixels += line_size;
		461	block += 8;
		462	}
		463	}
		464
		465	static void add_pixels_clamped4_c(const int16_t block, uint8_t av_restrict pixels,
		466	int line_size)
		467	{
		468	int i;
		469
		470	/* read the pixels */
		471	for(i=0;i<4;i++) {
		472	pixels[0] = av_clip_uint8(pixels[0] + block[0]);
		473	pixels[1] = av_clip_uint8(pixels[1] + block[1]);
		474	pixels[2] = av_clip_uint8(pixels[2] + block[2]);
		475	pixels[3] = av_clip_uint8(pixels[3] + block[3]);
		476	pixels += line_size;
		477	block += 8;
		478	}
		479	}
		480
		481	static void add_pixels_clamped2_c(const int16_t block, uint8_t av_restrict pixels,
		482	int line_size)
		483	{
		484	int i;
		485
		486	/* read the pixels */
		487	for(i=0;i<2;i++) {
		488	pixels[0] = av_clip_uint8(pixels[0] + block[0]);
		489	pixels[1] = av_clip_uint8(pixels[1] + block[1]);
		490	pixels += line_size;
		491	block += 8;
		492	}
		493	}
		494
		495	static int sum_abs_dctelem_c(int16_t *block)
		496	{
		497	int sum=0, i;
		498	for(i=0; i<64; i++)
		499	sum+= FFABS(block[i]);
		500	return sum;
		501	}
		502
		503	static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
		504	{
		505	int i;
		506
		507	for (i = 0; i < h; i++) {
		508	memset(block, value, 16);
		509	block += line_size;
		510	}
		511	}
		512
		513	static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
		514	{
		515	int i;
		516
		517	for (i = 0; i < h; i++) {
		518	memset(block, value, 8);
		519	block += line_size;
		520	}
		521	}
		522
		523	#define avg2(a,b) ((a+b+1)>>1)
		524	#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
		525
		526	static void gmc1_c(uint8_t dst, uint8_t src, int stride, int h, int x16, int y16, int rounder)
		527	{
		528	const int A=(16-x16)*(16-y16);
		529	const int B=( x16)*(16-y16);
		530	const int C=(16-x16)*( y16);
		531	const int D=( x16)*( y16);
		532	int i;
		533
		534	for(i=0; i
		535	{
		536	dst[0]= (Asrc[0] + Bsrc[1] + Csrc[stride+0] + Dsrc[stride+1] + rounder)>>8;
		537	dst[1]= (Asrc[1] + Bsrc[2] + Csrc[stride+1] + Dsrc[stride+2] + rounder)>>8;
		538	dst[2]= (Asrc[2] + Bsrc[3] + Csrc[stride+2] + Dsrc[stride+3] + rounder)>>8;
		539	dst[3]= (Asrc[3] + Bsrc[4] + Csrc[stride+3] + Dsrc[stride+4] + rounder)>>8;
		540	dst[4]= (Asrc[4] + Bsrc[5] + Csrc[stride+4] + Dsrc[stride+5] + rounder)>>8;
		541	dst[5]= (Asrc[5] + Bsrc[6] + Csrc[stride+5] + Dsrc[stride+6] + rounder)>>8;
		542	dst[6]= (Asrc[6] + Bsrc[7] + Csrc[stride+6] + Dsrc[stride+7] + rounder)>>8;
		543	dst[7]= (Asrc[7] + Bsrc[8] + Csrc[stride+7] + Dsrc[stride+8] + rounder)>>8;
		544	dst+= stride;
		545	src+= stride;
		546	}
		547	}
		548
		549	void ff_gmc_c(uint8_t dst, uint8_t src, int stride, int h, int ox, int oy,
		550	int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
		551	{
		552	int y, vx, vy;
		553	const int s= 1<
		554
		555	width--;
		556	height--;
		557
		558	for(y=0; y
		559	int x;
		560
		561	vx= ox;
		562	vy= oy;
		563	for(x=0; x<8; x++){ //XXX FIXME optimize
		564	int src_x, src_y, frac_x, frac_y, index;
		565
		566	src_x= vx>>16;
		567	src_y= vy>>16;
		568	frac_x= src_x&(s-1);
		569	frac_y= src_y&(s-1);
		570	src_x>>=shift;
		571	src_y>>=shift;
		572
		573	if((unsigned)src_x < width){
		574	if((unsigned)src_y < height){
		575	index= src_x + src_y*stride;
		576	dst[ystride + x]= ( ( src[index ](s-frac_x)
		577	+ src[index +1]* frac_x )*(s-frac_y)
		578	+ ( src[index+stride ]*(s-frac_x)
		579	+ src[index+stride+1]* frac_x )* frac_y
		580	+ r)>>(shift*2);
		581	}else{
		582	index= src_x + av_clip(src_y, 0, height)*stride;
		583	dst[ystride + x]= ( ( src[index ](s-frac_x)
		584	+ src[index +1]* frac_x )*s
		585	+ r)>>(shift*2);
		586	}
		587	}else{
		588	if((unsigned)src_y < height){
		589	index= av_clip(src_x, 0, width) + src_y*stride;
		590	dst[ystride + x]= ( ( src[index ](s-frac_y)
		591	+ src[index+stride ]* frac_y )*s
		592	+ r)>>(shift*2);
		593	}else{
		594	index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
		595	dst[y*stride + x]= src[index ];
		596	}
		597	}
		598
		599	vx+= dxx;
		600	vy+= dyx;
		601	}
		602	ox += dxy;
		603	oy += dyy;
		604	}
		605	}
		606
		607	static inline void put_tpel_pixels_mc00_c(uint8_t dst, const uint8_t src, int stride, int width, int height){
		608	switch(width){
		609	case 2: put_pixels2_8_c (dst, src, stride, height); break;
		610	case 4: put_pixels4_8_c (dst, src, stride, height); break;
		611	case 8: put_pixels8_8_c (dst, src, stride, height); break;
		612	case 16:put_pixels16_8_c(dst, src, stride, height); break;
		613	}
		614	}
		615
		616	static inline void put_tpel_pixels_mc10_c(uint8_t dst, const uint8_t src, int stride, int width, int height){
		617	int i,j;
		618	for (i=0; i < height; i++) {
		619	for (j=0; j < width; j++) {
		620	dst[j] = (683(2src[j] + src[j+1] + 1)) >> 11;
		621	}
		622	src += stride;
		623	dst += stride;
		624	}
		625	}
		626
		627	static inline void put_tpel_pixels_mc20_c(uint8_t dst, const uint8_t src, int stride, int width, int height){
		628	int i,j;
		629	for (i=0; i < height; i++) {
		630	for (j=0; j < width; j++) {
		631	dst[j] = (683(src[j] + 2src[j+1] + 1)) >> 11;
		632	}
		633	src += stride;
		634	dst += stride;
		635	}
		636	}
		637
		638	static inline void put_tpel_pixels_mc01_c(uint8_t dst, const uint8_t src, int stride, int width, int height){
		639	int i,j;
		640	for (i=0; i < height; i++) {
		641	for (j=0; j < width; j++) {
		642	dst[j] = (683(2src[j] + src[j+stride] + 1)) >> 11;
		643	}
		644	src += stride;
		645	dst += stride;
		646	}
		647	}
		648
		649	static inline void put_tpel_pixels_mc11_c(uint8_t dst, const uint8_t src, int stride, int width, int height){
		650	int i,j;
		651	for (i=0; i < height; i++) {
		652	for (j=0; j < width; j++) {
		653	dst[j] = (2731(4src[j] + 3src[j+1] + 3src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
		654	}
		655	src += stride;
		656	dst += stride;
		657	}
		658	}
		659
		660	static inline void put_tpel_pixels_mc12_c(uint8_t dst, const uint8_t src, int stride, int width, int height){
		661	int i,j;
		662	for (i=0; i < height; i++) {
		663	for (j=0; j < width; j++) {
		664	dst[j] = (2731(3src[j] + 2src[j+1] + 4src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
		665	}
		666	src += stride;
		667	dst += stride;
		668	}
		669	}
		670
		671	static inline void put_tpel_pixels_mc02_c(uint8_t dst, const uint8_t src, int stride, int width, int height){
		672	int i,j;
		673	for (i=0; i < height; i++) {
		674	for (j=0; j < width; j++) {
		675	dst[j] = (683(src[j] + 2src[j+stride] + 1)) >> 11;
		676	}
		677	src += stride;
		678	dst += stride;
		679	}
		680	}
		681
		682	static inline void put_tpel_pixels_mc21_c(uint8_t dst, const uint8_t src, int stride, int width, int height){
		683	int i,j;
		684	for (i=0; i < height; i++) {
		685	for (j=0; j < width; j++) {
		686	dst[j] = (2731(3src[j] + 4src[j+1] + 2src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
		687	}
		688	src += stride;
		689	dst += stride;
		690	}
		691	}
		692
		693	static inline void put_tpel_pixels_mc22_c(uint8_t dst, const uint8_t src, int stride, int width, int height){
		694	int i,j;
		695	for (i=0; i < height; i++) {
		696	for (j=0; j < width; j++) {
		697	dst[j] = (2731(2src[j] + 3src[j+1] + 3src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
		698	}
		699	src += stride;
		700	dst += stride;
		701	}
		702	}
		703
		704	static inline void avg_tpel_pixels_mc00_c(uint8_t dst, const uint8_t src, int stride, int width, int height){
		705	switch(width){
		706	case 2: avg_pixels2_8_c (dst, src, stride, height); break;
		707	case 4: avg_pixels4_8_c (dst, src, stride, height); break;
		708	case 8: avg_pixels8_8_c (dst, src, stride, height); break;
		709	case 16:avg_pixels16_8_c(dst, src, stride, height); break;
		710	}
		711	}
		712
		713	static inline void avg_tpel_pixels_mc10_c(uint8_t dst, const uint8_t src, int stride, int width, int height){
		714	int i,j;
		715	for (i=0; i < height; i++) {
		716	for (j=0; j < width; j++) {
		717	dst[j] = (dst[j] + ((683(2src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
		718	}
		719	src += stride;
		720	dst += stride;
		721	}
		722	}
		723
		724	static inline void avg_tpel_pixels_mc20_c(uint8_t dst, const uint8_t src, int stride, int width, int height){
		725	int i,j;
		726	for (i=0; i < height; i++) {
		727	for (j=0; j < width; j++) {
		728	dst[j] = (dst[j] + ((683(src[j] + 2src[j+1] + 1)) >> 11) + 1) >> 1;
		729	}
		730	src += stride;
		731	dst += stride;
		732	}
		733	}
		734
		735	static inline void avg_tpel_pixels_mc01_c(uint8_t dst, const uint8_t src, int stride, int width, int height){
		736	int i,j;
		737	for (i=0; i < height; i++) {
		738	for (j=0; j < width; j++) {
		739	dst[j] = (dst[j] + ((683(2src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
		740	}
		741	src += stride;
		742	dst += stride;
		743	}
		744	}
		745
		746	static inline void avg_tpel_pixels_mc11_c(uint8_t dst, const uint8_t src, int stride, int width, int height){
		747	int i,j;
		748	for (i=0; i < height; i++) {
		749	for (j=0; j < width; j++) {
		750	dst[j] = (dst[j] + ((2731(4src[j] + 3src[j+1] + 3src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
		751	}
		752	src += stride;
		753	dst += stride;
		754	}
		755	}
		756
		757	static inline void avg_tpel_pixels_mc12_c(uint8_t dst, const uint8_t src, int stride, int width, int height){
		758	int i,j;
		759	for (i=0; i < height; i++) {
		760	for (j=0; j < width; j++) {
		761	dst[j] = (dst[j] + ((2731(3src[j] + 2src[j+1] + 4src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
		762	}
		763	src += stride;
		764	dst += stride;
		765	}
		766	}
		767
		768	static inline void avg_tpel_pixels_mc02_c(uint8_t dst, const uint8_t src, int stride, int width, int height){
		769	int i,j;
		770	for (i=0; i < height; i++) {
		771	for (j=0; j < width; j++) {
		772	dst[j] = (dst[j] + ((683(src[j] + 2src[j+stride] + 1)) >> 11) + 1) >> 1;
		773	}
		774	src += stride;
		775	dst += stride;
		776	}
		777	}
		778
		779	static inline void avg_tpel_pixels_mc21_c(uint8_t dst, const uint8_t src, int stride, int width, int height){
		780	int i,j;
		781	for (i=0; i < height; i++) {
		782	for (j=0; j < width; j++) {
		783	dst[j] = (dst[j] + ((2731(3src[j] + 4src[j+1] + 2src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
		784	}
		785	src += stride;
		786	dst += stride;
		787	}
		788	}
		789
		790	static inline void avg_tpel_pixels_mc22_c(uint8_t dst, const uint8_t src, int stride, int width, int height){
		791	int i,j;
		792	for (i=0; i < height; i++) {
		793	for (j=0; j < width; j++) {
		794	dst[j] = (dst[j] + ((2731(2src[j] + 3src[j+1] + 3src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
		795	}
		796	src += stride;
		797	dst += stride;
		798	}
		799	}
		800
		801	#define QPEL_MC(r, OPNAME, RND, OP) \
		802	static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t dst, uint8_t src, int dstStride, int srcStride, int h){\
		803	const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
		804	int i;\
		805	for(i=0; i
		806	{\
		807	OP(dst[0], (src[0]+src[1])20 - (src[0]+src[2])6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
		808	OP(dst[1], (src[1]+src[2])20 - (src[0]+src[3])6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
		809	OP(dst[2], (src[2]+src[3])20 - (src[1]+src[4])6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
		810	OP(dst[3], (src[3]+src[4])20 - (src[2]+src[5])6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
		811	OP(dst[4], (src[4]+src[5])20 - (src[3]+src[6])6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
		812	OP(dst[5], (src[5]+src[6])20 - (src[4]+src[7])6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
		813	OP(dst[6], (src[6]+src[7])20 - (src[5]+src[8])6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
		814	OP(dst[7], (src[7]+src[8])20 - (src[6]+src[8])6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
		815	dst+=dstStride;\
		816	src+=srcStride;\
		817	}\
		818	}\
		819	\
		820	static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t dst, uint8_t src, int dstStride, int srcStride){\
		821	const int w=8;\
		822	const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
		823	int i;\
		824	for(i=0; i
		825	{\
		826	const int src0= src[0*srcStride];\
		827	const int src1= src[1*srcStride];\
		828	const int src2= src[2*srcStride];\
		829	const int src3= src[3*srcStride];\
		830	const int src4= src[4*srcStride];\
		831	const int src5= src[5*srcStride];\
		832	const int src6= src[6*srcStride];\
		833	const int src7= src[7*srcStride];\
		834	const int src8= src[8*srcStride];\
		835	OP(dst[0dstStride], (src0+src1)20 - (src0+src2)6 + (src1+src3)3 - (src2+src4));\
		836	OP(dst[1dstStride], (src1+src2)20 - (src0+src3)6 + (src0+src4)3 - (src1+src5));\
		837	OP(dst[2dstStride], (src2+src3)20 - (src1+src4)6 + (src0+src5)3 - (src0+src6));\
		838	OP(dst[3dstStride], (src3+src4)20 - (src2+src5)6 + (src1+src6)3 - (src0+src7));\
		839	OP(dst[4dstStride], (src4+src5)20 - (src3+src6)6 + (src2+src7)3 - (src1+src8));\
		840	OP(dst[5dstStride], (src5+src6)20 - (src4+src7)6 + (src3+src8)3 - (src2+src8));\
		841	OP(dst[6dstStride], (src6+src7)20 - (src5+src8)6 + (src4+src8)3 - (src3+src7));\
		842	OP(dst[7dstStride], (src7+src8)20 - (src6+src8)6 + (src5+src7)3 - (src4+src6));\
		843	dst++;\
		844	src++;\
		845	}\
		846	}\
		847	\
		848	static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t dst, uint8_t src, int dstStride, int srcStride, int h){\
		849	const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
		850	int i;\
		851	\
		852	for(i=0; i
		853	{\
		854	OP(dst[ 0], (src[ 0]+src[ 1])20 - (src[ 0]+src[ 2])6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
		855	OP(dst[ 1], (src[ 1]+src[ 2])20 - (src[ 0]+src[ 3])6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
		856	OP(dst[ 2], (src[ 2]+src[ 3])20 - (src[ 1]+src[ 4])6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
		857	OP(dst[ 3], (src[ 3]+src[ 4])20 - (src[ 2]+src[ 5])6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
		858	OP(dst[ 4], (src[ 4]+src[ 5])20 - (src[ 3]+src[ 6])6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
		859	OP(dst[ 5], (src[ 5]+src[ 6])20 - (src[ 4]+src[ 7])6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
		860	OP(dst[ 6], (src[ 6]+src[ 7])20 - (src[ 5]+src[ 8])6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
		861	OP(dst[ 7], (src[ 7]+src[ 8])20 - (src[ 6]+src[ 9])6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
		862	OP(dst[ 8], (src[ 8]+src[ 9])20 - (src[ 7]+src[10])6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
		863	OP(dst[ 9], (src[ 9]+src[10])20 - (src[ 8]+src[11])6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
		864	OP(dst[10], (src[10]+src[11])20 - (src[ 9]+src[12])6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
		865	OP(dst[11], (src[11]+src[12])20 - (src[10]+src[13])6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
		866	OP(dst[12], (src[12]+src[13])20 - (src[11]+src[14])6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
		867	OP(dst[13], (src[13]+src[14])20 - (src[12]+src[15])6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
		868	OP(dst[14], (src[14]+src[15])20 - (src[13]+src[16])6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
		869	OP(dst[15], (src[15]+src[16])20 - (src[14]+src[16])6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
		870	dst+=dstStride;\
		871	src+=srcStride;\
		872	}\
		873	}\
		874	\
		875	static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t dst, uint8_t src, int dstStride, int srcStride){\
		876	const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
		877	int i;\
		878	const int w=16;\
		879	for(i=0; i
		880	{\
		881	const int src0= src[0*srcStride];\
		882	const int src1= src[1*srcStride];\
		883	const int src2= src[2*srcStride];\
		884	const int src3= src[3*srcStride];\
		885	const int src4= src[4*srcStride];\
		886	const int src5= src[5*srcStride];\
		887	const int src6= src[6*srcStride];\
		888	const int src7= src[7*srcStride];\
		889	const int src8= src[8*srcStride];\
		890	const int src9= src[9*srcStride];\
		891	const int src10= src[10*srcStride];\
		892	const int src11= src[11*srcStride];\
		893	const int src12= src[12*srcStride];\
		894	const int src13= src[13*srcStride];\
		895	const int src14= src[14*srcStride];\
		896	const int src15= src[15*srcStride];\
		897	const int src16= src[16*srcStride];\
		898	OP(dst[ 0dstStride], (src0 +src1 )20 - (src0 +src2 )6 + (src1 +src3 )3 - (src2 +src4 ));\
		899	OP(dst[ 1dstStride], (src1 +src2 )20 - (src0 +src3 )6 + (src0 +src4 )3 - (src1 +src5 ));\
		900	OP(dst[ 2dstStride], (src2 +src3 )20 - (src1 +src4 )6 + (src0 +src5 )3 - (src0 +src6 ));\
		901	OP(dst[ 3dstStride], (src3 +src4 )20 - (src2 +src5 )6 + (src1 +src6 )3 - (src0 +src7 ));\
		902	OP(dst[ 4dstStride], (src4 +src5 )20 - (src3 +src6 )6 + (src2 +src7 )3 - (src1 +src8 ));\
		903	OP(dst[ 5dstStride], (src5 +src6 )20 - (src4 +src7 )6 + (src3 +src8 )3 - (src2 +src9 ));\
		904	OP(dst[ 6dstStride], (src6 +src7 )20 - (src5 +src8 )6 + (src4 +src9 )3 - (src3 +src10));\
		905	OP(dst[ 7dstStride], (src7 +src8 )20 - (src6 +src9 )6 + (src5 +src10)3 - (src4 +src11));\
		906	OP(dst[ 8dstStride], (src8 +src9 )20 - (src7 +src10)6 + (src6 +src11)3 - (src5 +src12));\
		907	OP(dst[ 9dstStride], (src9 +src10)20 - (src8 +src11)6 + (src7 +src12)3 - (src6 +src13));\
		908	OP(dst[10dstStride], (src10+src11)20 - (src9 +src12)6 + (src8 +src13)3 - (src7 +src14));\
		909	OP(dst[11dstStride], (src11+src12)20 - (src10+src13)6 + (src9 +src14)3 - (src8 +src15));\
		910	OP(dst[12dstStride], (src12+src13)20 - (src11+src14)6 + (src10+src15)3 - (src9 +src16));\
		911	OP(dst[13dstStride], (src13+src14)20 - (src12+src15)6 + (src11+src16)3 - (src10+src16));\
		912	OP(dst[14dstStride], (src14+src15)20 - (src13+src16)6 + (src12+src16)3 - (src11+src15));\
		913	OP(dst[15dstStride], (src15+src16)20 - (src14+src16)6 + (src13+src15)3 - (src12+src14));\
		914	dst++;\
		915	src++;\
		916	}\
		917	}\
		918	\
		919	static void OPNAME ## qpel8_mc10_c(uint8_t dst, uint8_t src, ptrdiff_t stride)\
		920	{\
		921	uint8_t half[64];\
		922	put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
		923	OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8);\
		924	}\
		925	\
		926	static void OPNAME ## qpel8_mc20_c(uint8_t dst, uint8_t src, ptrdiff_t stride)\
		927	{\
		928	OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
		929	}\
		930	\
		931	static void OPNAME ## qpel8_mc30_c(uint8_t dst, uint8_t src, ptrdiff_t stride)\
		932	{\
		933	uint8_t half[64];\
		934	put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
		935	OPNAME ## pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);\
		936	}\
		937	\
		938	static void OPNAME ## qpel8_mc01_c(uint8_t dst, uint8_t src, ptrdiff_t stride)\
		939	{\
		940	uint8_t full[16*9];\
		941	uint8_t half[64];\
		942	copy_block9(full, src, 16, stride, 9);\
		943	put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
		944	OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8);\
		945	}\
		946	\
		947	static void OPNAME ## qpel8_mc02_c(uint8_t dst, uint8_t src, ptrdiff_t stride)\
		948	{\
		949	uint8_t full[16*9];\
		950	copy_block9(full, src, 16, stride, 9);\
		951	OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
		952	}\
		953	\
		954	static void OPNAME ## qpel8_mc03_c(uint8_t dst, uint8_t src, ptrdiff_t stride)\
		955	{\
		956	uint8_t full[16*9];\
		957	uint8_t half[64];\
		958	copy_block9(full, src, 16, stride, 9);\
		959	put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
		960	OPNAME ## pixels8_l2_8(dst, full+16, half, stride, 16, 8, 8);\
		961	}\
		962	void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t dst, uint8_t src, ptrdiff_t stride)\
		963	{\
		964	uint8_t full[16*9];\
		965	uint8_t halfH[72];\
		966	uint8_t halfV[64];\
		967	uint8_t halfHV[64];\
		968	copy_block9(full, src, 16, stride, 9);\
		969	put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
		970	put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
		971	put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
		972	OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
		973	}\
		974	static void OPNAME ## qpel8_mc11_c(uint8_t dst, uint8_t src, ptrdiff_t stride)\
		975	{\
		976	uint8_t full[16*9];\
		977	uint8_t halfH[72];\
		978	uint8_t halfHV[64];\
		979	copy_block9(full, src, 16, stride, 9);\
		980	put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
		981	put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
		982	put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
		983	OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
		984	}\
		985	void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t dst, uint8_t src, ptrdiff_t stride)\
		986	{\
		987	uint8_t full[16*9];\
		988	uint8_t halfH[72];\
		989	uint8_t halfV[64];\
		990	uint8_t halfHV[64];\
		991	copy_block9(full, src, 16, stride, 9);\
		992	put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
		993	put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
		994	put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
		995	OPNAME ## pixels8_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
		996	}\
		997	static void OPNAME ## qpel8_mc31_c(uint8_t dst, uint8_t src, ptrdiff_t stride)\
		998	{\
		999	uint8_t full[16*9];\
		1000	uint8_t halfH[72];\
		1001	uint8_t halfHV[64];\
		1002	copy_block9(full, src, 16, stride, 9);\
		1003	put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
		1004	put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
		1005	put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
		1006	OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
		1007	}\
		1008	void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t dst, uint8_t src, ptrdiff_t stride)\
		1009	{\
		1010	uint8_t full[16*9];\
		1011	uint8_t halfH[72];\
		1012	uint8_t halfV[64];\
		1013	uint8_t halfHV[64];\
		1014	copy_block9(full, src, 16, stride, 9);\
		1015	put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
		1016	put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
		1017	put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
		1018	OPNAME ## pixels8_l4_8(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
		1019	}\
		1020	static void OPNAME ## qpel8_mc13_c(uint8_t dst, uint8_t src, ptrdiff_t stride)\
		1021	{\
		1022	uint8_t full[16*9];\
		1023	uint8_t halfH[72];\
		1024	uint8_t halfHV[64];\
		1025	copy_block9(full, src, 16, stride, 9);\
		1026	put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
		1027	put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
		1028	put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
		1029	OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
		1030	}\
		1031	void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t dst, uint8_t src, ptrdiff_t stride)\
		1032	{\
		1033	uint8_t full[16*9];\
		1034	uint8_t halfH[72];\
		1035	uint8_t halfV[64];\
		1036	uint8_t halfHV[64];\
		1037	copy_block9(full, src, 16, stride, 9);\
		1038	put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
		1039	put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
		1040	put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
		1041	OPNAME ## pixels8_l4_8(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
		1042	}\
		1043	static void OPNAME ## qpel8_mc33_c(uint8_t dst, uint8_t src, ptrdiff_t stride)\
		1044	{\
		1045	uint8_t full[16*9];\
		1046	uint8_t halfH[72];\
		1047	uint8_t halfHV[64];\
		1048	copy_block9(full, src, 16, stride, 9);\
		1049	put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
		1050	put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
		1051	put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
		1052	OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
		1053	}\
		1054	static void OPNAME ## qpel8_mc21_c(uint8_t dst, uint8_t src, ptrdiff_t stride)\
		1055	{\
		1056	uint8_t halfH[72];\
		1057	uint8_t halfHV[64];\
		1058	put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
		1059	put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
		1060	OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
		1061	}\
		1062	static void OPNAME ## qpel8_mc23_c(uint8_t dst, uint8_t src, ptrdiff_t stride)\
		1063	{\
		1064	uint8_t halfH[72];\
		1065	uint8_t halfHV[64];\
		1066	put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
		1067	put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
		1068	OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
		1069	}\
		1070	void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t dst, uint8_t src, ptrdiff_t stride)\
		1071	{\
		1072	uint8_t full[16*9];\
		1073	uint8_t halfH[72];\
		1074	uint8_t halfV[64];\
		1075	uint8_t halfHV[64];\
		1076	copy_block9(full, src, 16, stride, 9);\
		1077	put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
		1078	put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
		1079	put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
		1080	OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
		1081	}\
		1082	static void OPNAME ## qpel8_mc12_c(uint8_t dst, uint8_t src, ptrdiff_t stride)\
		1083	{\
		1084	uint8_t full[16*9];\
		1085	uint8_t halfH[72];\
		1086	copy_block9(full, src, 16, stride, 9);\
		1087	put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
		1088	put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
		1089	OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
		1090	}\
		1091	void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t dst, uint8_t src, ptrdiff_t stride)\
		1092	{\
		1093	uint8_t full[16*9];\
		1094	uint8_t halfH[72];\
		1095	uint8_t halfV[64];\
		1096	uint8_t halfHV[64];\
		1097	copy_block9(full, src, 16, stride, 9);\
		1098	put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
		1099	put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
		1100	put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
		1101	OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
		1102	}\
		1103	static void OPNAME ## qpel8_mc32_c(uint8_t dst, uint8_t src, ptrdiff_t stride)\
		1104	{\
		1105	uint8_t full[16*9];\
		1106	uint8_t halfH[72];\
		1107	copy_block9(full, src, 16, stride, 9);\
		1108	put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
		1109	put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
		1110	OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
		1111	}\
		1112	static void OPNAME ## qpel8_mc22_c(uint8_t dst, uint8_t src, ptrdiff_t stride)\
		1113	{\
		1114	uint8_t halfH[72];\
		1115	put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
		1116	OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
		1117	}\
		1118	\
		1119	static void OPNAME ## qpel16_mc10_c(uint8_t dst, uint8_t src, ptrdiff_t stride)\
		1120	{\
		1121	uint8_t half[256];\
		1122	put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
		1123	OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16);\
		1124	}\
		1125	\
		1126	static void OPNAME ## qpel16_mc20_c(uint8_t dst, uint8_t src, ptrdiff_t stride)\
		1127	{\
		1128	OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
		1129	}\
		1130	\
		1131	static void OPNAME ## qpel16_mc30_c(uint8_t dst, uint8_t src, ptrdiff_t stride)\
		1132	{\
		1133	uint8_t half[256];\
		1134	put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
		1135	OPNAME ## pixels16_l2_8(dst, src+1, half, stride, stride, 16, 16);\
		1136	}\
		1137	\
		1138	static void OPNAME ## qpel16_mc01_c(uint8_t dst, uint8_t src, ptrdiff_t stride)\
		1139	{\
		1140	uint8_t full[24*17];\
		1141	uint8_t half[256];\
		1142	copy_block17(full, src, 24, stride, 17);\
		1143	put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
		1144	OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16);\
		1145	}\
		1146	\
		1147	static void OPNAME ## qpel16_mc02_c(uint8_t dst, uint8_t src, ptrdiff_t stride)\
		1148	{\
		1149	uint8_t full[24*17];\
		1150	copy_block17(full, src, 24, stride, 17);\
		1151	OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
		1152	}\
		1153	\
		1154	static void OPNAME ## qpel16_mc03_c(uint8_t dst, uint8_t src, ptrdiff_t stride)\
		1155	{\
		1156	uint8_t full[24*17];\
		1157	uint8_t half[256];\
		1158	copy_block17(full, src, 24, stride, 17);\
		1159	put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
		1160	OPNAME ## pixels16_l2_8(dst, full+24, half, stride, 24, 16, 16);\
		1161	}\
		1162	void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t dst, uint8_t src, ptrdiff_t stride)\
		1163	{\
		1164	uint8_t full[24*17];\
		1165	uint8_t halfH[272];\
		1166	uint8_t halfV[256];\
		1167	uint8_t halfHV[256];\
		1168	copy_block17(full, src, 24, stride, 17);\
		1169	put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
		1170	put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
		1171	put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
		1172	OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
		1173	}\
		1174	static void OPNAME ## qpel16_mc11_c(uint8_t dst, uint8_t src, ptrdiff_t stride)\
		1175	{\
		1176	uint8_t full[24*17];\
		1177	uint8_t halfH[272];\
		1178	uint8_t halfHV[256];\
		1179	copy_block17(full, src, 24, stride, 17);\
		1180	put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
		1181	put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
		1182	put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
		1183	OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
		1184	}\
		1185	void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t dst, uint8_t src, ptrdiff_t stride)\
		1186	{\
		1187	uint8_t full[24*17];\
		1188	uint8_t halfH[272];\
		1189	uint8_t halfV[256];\
		1190	uint8_t halfHV[256];\
		1191	copy_block17(full, src, 24, stride, 17);\
		1192	put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
		1193	put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
		1194	put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
		1195	OPNAME ## pixels16_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
		1196	}\
		1197	static void OPNAME ## qpel16_mc31_c(uint8_t dst, uint8_t src, ptrdiff_t stride)\
		1198	{\
		1199	uint8_t full[24*17];\
		1200	uint8_t halfH[272];\
		1201	uint8_t halfHV[256];\
		1202	copy_block17(full, src, 24, stride, 17);\
		1203	put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
		1204	put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
		1205	put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
		1206	OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
		1207	}\
		1208	void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t dst, uint8_t src, ptrdiff_t stride)\
		1209	{\
		1210	uint8_t full[24*17];\
		1211	uint8_t halfH[272];\
		1212	uint8_t halfV[256];\
		1213	uint8_t halfHV[256];\
		1214	copy_block17(full, src, 24, stride, 17);\
		1215	put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
		1216	put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
		1217	put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
		1218	OPNAME ## pixels16_l4_8(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
		1219	}\
		1220	static void OPNAME ## qpel16_mc13_c(uint8_t dst, uint8_t src, ptrdiff_t stride)\
		1221	{\
		1222	uint8_t full[24*17];\
		1223	uint8_t halfH[272];\
		1224	uint8_t halfHV[256];\
		1225	copy_block17(full, src, 24, stride, 17);\
		1226	put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
		1227	put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
		1228	put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
		1229	OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
		1230	}\
		1231	void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t dst, uint8_t src, ptrdiff_t stride)\
		1232	{\
		1233	uint8_t full[24*17];\
		1234	uint8_t halfH[272];\
		1235	uint8_t halfV[256];\
		1236	uint8_t halfHV[256];\
		1237	copy_block17(full, src, 24, stride, 17);\
		1238	put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
		1239	put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
		1240	put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
		1241	OPNAME ## pixels16_l4_8(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
		1242	}\
		1243	static void OPNAME ## qpel16_mc33_c(uint8_t dst, uint8_t src, ptrdiff_t stride)\
		1244	{\
		1245	uint8_t full[24*17];\
		1246	uint8_t halfH[272];\
		1247	uint8_t halfHV[256];\
		1248	copy_block17(full, src, 24, stride, 17);\
		1249	put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
		1250	put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
		1251	put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
		1252	OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
		1253	}\
		1254	static void OPNAME ## qpel16_mc21_c(uint8_t dst, uint8_t src, ptrdiff_t stride)\
		1255	{\
		1256	uint8_t halfH[272];\
		1257	uint8_t halfHV[256];\
		1258	put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
		1259	put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
		1260	OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
		1261	}\
		1262	static void OPNAME ## qpel16_mc23_c(uint8_t dst, uint8_t src, ptrdiff_t stride)\
		1263	{\
		1264	uint8_t halfH[272];\
		1265	uint8_t halfHV[256];\
		1266	put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
		1267	put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
		1268	OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
		1269	}\
		1270	void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t dst, uint8_t src, ptrdiff_t stride)\
		1271	{\
		1272	uint8_t full[24*17];\
		1273	uint8_t halfH[272];\
		1274	uint8_t halfV[256];\
		1275	uint8_t halfHV[256];\
		1276	copy_block17(full, src, 24, stride, 17);\
		1277	put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
		1278	put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
		1279	put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
		1280	OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
		1281	}\
		1282	static void OPNAME ## qpel16_mc12_c(uint8_t dst, uint8_t src, ptrdiff_t stride)\
		1283	{\
		1284	uint8_t full[24*17];\
		1285	uint8_t halfH[272];\
		1286	copy_block17(full, src, 24, stride, 17);\
		1287	put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
		1288	put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
		1289	OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
		1290	}\
		1291	void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t dst, uint8_t src, ptrdiff_t stride)\
		1292	{\
		1293	uint8_t full[24*17];\
		1294	uint8_t halfH[272];\
		1295	uint8_t halfV[256];\
		1296	uint8_t halfHV[256];\
		1297	copy_block17(full, src, 24, stride, 17);\
		1298	put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
		1299	put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
		1300	put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
		1301	OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
		1302	}\
		1303	static void OPNAME ## qpel16_mc32_c(uint8_t dst, uint8_t src, ptrdiff_t stride)\
		1304	{\
		1305	uint8_t full[24*17];\
		1306	uint8_t halfH[272];\
		1307	copy_block17(full, src, 24, stride, 17);\
		1308	put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
		1309	put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
		1310	OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
		1311	}\
		1312	static void OPNAME ## qpel16_mc22_c(uint8_t dst, uint8_t src, ptrdiff_t stride)\
		1313	{\
		1314	uint8_t halfH[272];\
		1315	put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
		1316	OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
		1317	}
		1318
		1319	#define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
		1320	#define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
		1321	#define op_put(a, b) a = cm[((b) + 16)>>5]
		1322	#define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
		1323
		1324	QPEL_MC(0, put_ , _ , op_put)
		1325	QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
		1326	QPEL_MC(0, avg_ , _ , op_avg)
		1327	//QPEL_MC(1, avg_no_rnd , _ , op_avg)
		1328	#undef op_avg
		1329	#undef op_avg_no_rnd
		1330	#undef op_put
		1331	#undef op_put_no_rnd
		1332
		1333	void ff_put_pixels8x8_c(uint8_t dst, uint8_t src, ptrdiff_t stride)
		1334	{
		1335	put_pixels8_8_c(dst, src, stride, 8);
		1336	}
		1337	void ff_avg_pixels8x8_c(uint8_t dst, uint8_t src, ptrdiff_t stride)
		1338	{
		1339	avg_pixels8_8_c(dst, src, stride, 8);
		1340	}
		1341	void ff_put_pixels16x16_c(uint8_t dst, uint8_t src, ptrdiff_t stride)
		1342	{
		1343	put_pixels16_8_c(dst, src, stride, 16);
		1344	}
		1345	void ff_avg_pixels16x16_c(uint8_t dst, uint8_t src, ptrdiff_t stride)
		1346	{
		1347	avg_pixels16_8_c(dst, src, stride, 16);
		1348	}
		1349
		1350	#define put_qpel8_mc00_c ff_put_pixels8x8_c
		1351	#define avg_qpel8_mc00_c ff_avg_pixels8x8_c
		1352	#define put_qpel16_mc00_c ff_put_pixels16x16_c
		1353	#define avg_qpel16_mc00_c ff_avg_pixels16x16_c
		1354	#define put_no_rnd_qpel8_mc00_c ff_put_pixels8x8_c
		1355	#define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_c
		1356
		1357	static void wmv2_mspel8_h_lowpass(uint8_t dst, uint8_t src, int dstStride, int srcStride, int h){
		1358	const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
		1359	int i;
		1360
		1361	for(i=0; i
		1362	dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
		1363	dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
		1364	dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
		1365	dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
		1366	dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
		1367	dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
		1368	dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
		1369	dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
		1370	dst+=dstStride;
		1371	src+=srcStride;
		1372	}
		1373	}
		1374
		1375	#if CONFIG_RV40_DECODER
		1376	void ff_put_rv40_qpel16_mc33_c(uint8_t dst, uint8_t src, ptrdiff_t stride)
		1377	{
		1378	put_pixels16_xy2_8_c(dst, src, stride, 16);
		1379	}
		1380	void ff_avg_rv40_qpel16_mc33_c(uint8_t dst, uint8_t src, ptrdiff_t stride)
		1381	{
		1382	avg_pixels16_xy2_8_c(dst, src, stride, 16);
		1383	}
		1384	void ff_put_rv40_qpel8_mc33_c(uint8_t dst, uint8_t src, ptrdiff_t stride)
		1385	{
		1386	put_pixels8_xy2_8_c(dst, src, stride, 8);
		1387	}
		1388	void ff_avg_rv40_qpel8_mc33_c(uint8_t dst, uint8_t src, ptrdiff_t stride)
		1389	{
		1390	avg_pixels8_xy2_8_c(dst, src, stride, 8);
		1391	}
		1392	#endif /* CONFIG_RV40_DECODER */
		1393
		1394	#if CONFIG_DIRAC_DECODER
		1395	#define DIRAC_MC(OPNAME)\
		1396	void ff_ ## OPNAME ## _dirac_pixels8_c(uint8_t dst, const uint8_t src[5], int stride, int h)\
		1397	{\
		1398	OPNAME ## _pixels8_8_c(dst, src[0], stride, h);\
		1399	}\
		1400	void ff_ ## OPNAME ## _dirac_pixels16_c(uint8_t dst, const uint8_t src[5], int stride, int h)\
		1401	{\
		1402	OPNAME ## _pixels16_8_c(dst, src[0], stride, h);\
		1403	}\
		1404	void ff_ ## OPNAME ## _dirac_pixels32_c(uint8_t dst, const uint8_t src[5], int stride, int h)\
		1405	{\
		1406	OPNAME ## _pixels16_8_c(dst , src[0] , stride, h);\
		1407	OPNAME ## _pixels16_8_c(dst+16, src[0]+16, stride, h);\
		1408	}\
		1409	void ff_ ## OPNAME ## _dirac_pixels8_l2_c(uint8_t dst, const uint8_t src[5], int stride, int h)\
		1410	{\
		1411	OPNAME ## _pixels8_l2_8(dst, src[0], src[1], stride, stride, stride, h);\
		1412	}\
		1413	void ff_ ## OPNAME ## _dirac_pixels16_l2_c(uint8_t dst, const uint8_t src[5], int stride, int h)\
		1414	{\
		1415	OPNAME ## _pixels16_l2_8(dst, src[0], src[1], stride, stride, stride, h);\
		1416	}\
		1417	void ff_ ## OPNAME ## _dirac_pixels32_l2_c(uint8_t dst, const uint8_t src[5], int stride, int h)\
		1418	{\
		1419	OPNAME ## _pixels16_l2_8(dst , src[0] , src[1] , stride, stride, stride, h);\
		1420	OPNAME ## _pixels16_l2_8(dst+16, src[0]+16, src[1]+16, stride, stride, stride, h);\
		1421	}\
		1422	void ff_ ## OPNAME ## _dirac_pixels8_l4_c(uint8_t dst, const uint8_t src[5], int stride, int h)\
		1423	{\
		1424	OPNAME ## _pixels8_l4_8(dst, src[0], src[1], src[2], src[3], stride, stride, stride, stride, stride, h);\
		1425	}\
		1426	void ff_ ## OPNAME ## _dirac_pixels16_l4_c(uint8_t dst, const uint8_t src[5], int stride, int h)\
		1427	{\
		1428	OPNAME ## _pixels16_l4_8(dst, src[0], src[1], src[2], src[3], stride, stride, stride, stride, stride, h);\
		1429	}\
		1430	void ff_ ## OPNAME ## _dirac_pixels32_l4_c(uint8_t dst, const uint8_t src[5], int stride, int h)\
		1431	{\
		1432	OPNAME ## _pixels16_l4_8(dst , src[0] , src[1] , src[2] , src[3] , stride, stride, stride, stride, stride, h);\
		1433	OPNAME ## _pixels16_l4_8(dst+16, src[0]+16, src[1]+16, src[2]+16, src[3]+16, stride, stride, stride, stride, stride, h);\
		1434	}
		1435	DIRAC_MC(put)
		1436	DIRAC_MC(avg)
		1437	#endif
		1438
		1439	static void wmv2_mspel8_v_lowpass(uint8_t dst, uint8_t src, int dstStride, int srcStride, int w){
		1440	const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
		1441	int i;
		1442
		1443	for(i=0; i
		1444	const int src_1= src[ -srcStride];
		1445	const int src0 = src[0 ];
		1446	const int src1 = src[ srcStride];
		1447	const int src2 = src[2*srcStride];
		1448	const int src3 = src[3*srcStride];
		1449	const int src4 = src[4*srcStride];
		1450	const int src5 = src[5*srcStride];
		1451	const int src6 = src[6*srcStride];
		1452	const int src7 = src[7*srcStride];
		1453	const int src8 = src[8*srcStride];
		1454	const int src9 = src[9*srcStride];
		1455	dst[0dstStride]= cm[(9(src0 + src1) - (src_1 + src2) + 8)>>4];
		1456	dst[1dstStride]= cm[(9(src1 + src2) - (src0 + src3) + 8)>>4];
		1457	dst[2dstStride]= cm[(9(src2 + src3) - (src1 + src4) + 8)>>4];
		1458	dst[3dstStride]= cm[(9(src3 + src4) - (src2 + src5) + 8)>>4];
		1459	dst[4dstStride]= cm[(9(src4 + src5) - (src3 + src6) + 8)>>4];
		1460	dst[5dstStride]= cm[(9(src5 + src6) - (src4 + src7) + 8)>>4];
		1461	dst[6dstStride]= cm[(9(src6 + src7) - (src5 + src8) + 8)>>4];
		1462	dst[7dstStride]= cm[(9(src7 + src8) - (src6 + src9) + 8)>>4];
		1463	src++;
		1464	dst++;
		1465	}
		1466	}
		1467
		1468	static void put_mspel8_mc10_c(uint8_t dst, uint8_t src, ptrdiff_t stride)
		1469	{
		1470	uint8_t half[64];
		1471	wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
		1472	put_pixels8_l2_8(dst, src, half, stride, stride, 8, 8);
		1473	}
		1474
		1475	static void put_mspel8_mc20_c(uint8_t dst, uint8_t src, ptrdiff_t stride)
		1476	{
		1477	wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
		1478	}
		1479
		1480	static void put_mspel8_mc30_c(uint8_t dst, uint8_t src, ptrdiff_t stride)
		1481	{
		1482	uint8_t half[64];
		1483	wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
		1484	put_pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);
		1485	}
		1486
		1487	static void put_mspel8_mc02_c(uint8_t dst, uint8_t src, ptrdiff_t stride)
		1488	{
		1489	wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
		1490	}
		1491
		1492	static void put_mspel8_mc12_c(uint8_t dst, uint8_t src, ptrdiff_t stride)
		1493	{
		1494	uint8_t halfH[88];
		1495	uint8_t halfV[64];
		1496	uint8_t halfHV[64];
		1497	wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
		1498	wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
		1499	wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
		1500	put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
		1501	}
		1502	static void put_mspel8_mc32_c(uint8_t dst, uint8_t src, ptrdiff_t stride)
		1503	{
		1504	uint8_t halfH[88];
		1505	uint8_t halfV[64];
		1506	uint8_t halfHV[64];
		1507	wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
		1508	wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
		1509	wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
		1510	put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
		1511	}
		1512	static void put_mspel8_mc22_c(uint8_t dst, uint8_t src, ptrdiff_t stride)
		1513	{
		1514	uint8_t halfH[88];
		1515	wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
		1516	wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
		1517	}
		1518
		1519	static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
		1520	if(CONFIG_H263_DECODER \|\| CONFIG_H263_ENCODER) {
		1521	int x;
		1522	const int strength= ff_h263_loop_filter_strength[qscale];
		1523
		1524	for(x=0; x<8; x++){
		1525	int d1, d2, ad1;
		1526	int p0= src[x-2*stride];
		1527	int p1= src[x-1*stride];
		1528	int p2= src[x+0*stride];
		1529	int p3= src[x+1*stride];
		1530	int d = (p0 - p3 + 4*(p2 - p1)) / 8;
		1531
		1532	if (d<-2*strength) d1= 0;
		1533	else if(d<- strength) d1=-2*strength - d;
		1534	else if(d< strength) d1= d;
		1535	else if(d< 2strength) d1= 2strength - d;
		1536	else d1= 0;
		1537
		1538	p1 += d1;
		1539	p2 -= d1;
		1540	if(p1&256) p1= ~(p1>>31);
		1541	if(p2&256) p2= ~(p2>>31);
		1542
		1543	src[x-1*stride] = p1;
		1544	src[x+0*stride] = p2;
		1545
		1546	ad1= FFABS(d1)>>1;
		1547
		1548	d2= av_clip((p0-p3)/4, -ad1, ad1);
		1549
		1550	src[x-2*stride] = p0 - d2;
		1551	src[x+ stride] = p3 + d2;
		1552	}
		1553	}
		1554	}
		1555
		1556	static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
		1557	if(CONFIG_H263_DECODER \|\| CONFIG_H263_ENCODER) {
		1558	int y;
		1559	const int strength= ff_h263_loop_filter_strength[qscale];
		1560
		1561	for(y=0; y<8; y++){
		1562	int d1, d2, ad1;
		1563	int p0= src[y*stride-2];
		1564	int p1= src[y*stride-1];
		1565	int p2= src[y*stride+0];
		1566	int p3= src[y*stride+1];
		1567	int d = (p0 - p3 + 4*(p2 - p1)) / 8;
		1568
		1569	if (d<-2*strength) d1= 0;
		1570	else if(d<- strength) d1=-2*strength - d;
		1571	else if(d< strength) d1= d;
		1572	else if(d< 2strength) d1= 2strength - d;
		1573	else d1= 0;
		1574
		1575	p1 += d1;
		1576	p2 -= d1;
		1577	if(p1&256) p1= ~(p1>>31);
		1578	if(p2&256) p2= ~(p2>>31);
		1579
		1580	src[y*stride-1] = p1;
		1581	src[y*stride+0] = p2;
		1582
		1583	ad1= FFABS(d1)>>1;
		1584
		1585	d2= av_clip((p0-p3)/4, -ad1, ad1);
		1586
		1587	src[y*stride-2] = p0 - d2;
		1588	src[y*stride+1] = p3 + d2;
		1589	}
		1590	}
		1591	}
		1592
		1593	static inline int pix_abs16_c(void v, uint8_t pix1, uint8_t *pix2, int line_size, int h)
		1594	{
		1595	int s, i;
		1596
		1597	s = 0;
		1598	for(i=0;i
		1599	s += abs(pix1[0] - pix2[0]);
		1600	s += abs(pix1[1] - pix2[1]);
		1601	s += abs(pix1[2] - pix2[2]);
		1602	s += abs(pix1[3] - pix2[3]);
		1603	s += abs(pix1[4] - pix2[4]);
		1604	s += abs(pix1[5] - pix2[5]);
		1605	s += abs(pix1[6] - pix2[6]);
		1606	s += abs(pix1[7] - pix2[7]);
		1607	s += abs(pix1[8] - pix2[8]);
		1608	s += abs(pix1[9] - pix2[9]);
		1609	s += abs(pix1[10] - pix2[10]);
		1610	s += abs(pix1[11] - pix2[11]);
		1611	s += abs(pix1[12] - pix2[12]);
		1612	s += abs(pix1[13] - pix2[13]);
		1613	s += abs(pix1[14] - pix2[14]);
		1614	s += abs(pix1[15] - pix2[15]);
		1615	pix1 += line_size;
		1616	pix2 += line_size;
		1617	}
		1618	return s;
		1619	}
		1620
		1621	static int pix_abs16_x2_c(void v, uint8_t pix1, uint8_t *pix2, int line_size, int h)
		1622	{
		1623	int s, i;
		1624
		1625	s = 0;
		1626	for(i=0;i
		1627	s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
		1628	s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
		1629	s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
		1630	s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
		1631	s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
		1632	s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
		1633	s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
		1634	s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
		1635	s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
		1636	s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
		1637	s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
		1638	s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
		1639	s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
		1640	s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
		1641	s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
		1642	s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
		1643	pix1 += line_size;
		1644	pix2 += line_size;
		1645	}
		1646	return s;
		1647	}
		1648
		1649	static int pix_abs16_y2_c(void v, uint8_t pix1, uint8_t *pix2, int line_size, int h)
		1650	{
		1651	int s, i;
		1652	uint8_t *pix3 = pix2 + line_size;
		1653
		1654	s = 0;
		1655	for(i=0;i
		1656	s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
		1657	s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
		1658	s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
		1659	s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
		1660	s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
		1661	s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
		1662	s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
		1663	s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
		1664	s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
		1665	s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
		1666	s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
		1667	s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
		1668	s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
		1669	s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
		1670	s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
		1671	s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
		1672	pix1 += line_size;
		1673	pix2 += line_size;
		1674	pix3 += line_size;
		1675	}
		1676	return s;
		1677	}
		1678
		1679	static int pix_abs16_xy2_c(void v, uint8_t pix1, uint8_t *pix2, int line_size, int h)
		1680	{
		1681	int s, i;
		1682	uint8_t *pix3 = pix2 + line_size;
		1683
		1684	s = 0;
		1685	for(i=0;i
		1686	s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
		1687	s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
		1688	s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
		1689	s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
		1690	s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
		1691	s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
		1692	s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
		1693	s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
		1694	s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
		1695	s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
		1696	s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
		1697	s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
		1698	s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
		1699	s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
		1700	s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
		1701	s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
		1702	pix1 += line_size;
		1703	pix2 += line_size;
		1704	pix3 += line_size;
		1705	}
		1706	return s;
		1707	}
		1708
		1709	static inline int pix_abs8_c(void v, uint8_t pix1, uint8_t *pix2, int line_size, int h)
		1710	{
		1711	int s, i;
		1712
		1713	s = 0;
		1714	for(i=0;i
		1715	s += abs(pix1[0] - pix2[0]);
		1716	s += abs(pix1[1] - pix2[1]);
		1717	s += abs(pix1[2] - pix2[2]);
		1718	s += abs(pix1[3] - pix2[3]);
		1719	s += abs(pix1[4] - pix2[4]);
		1720	s += abs(pix1[5] - pix2[5]);
		1721	s += abs(pix1[6] - pix2[6]);
		1722	s += abs(pix1[7] - pix2[7]);
		1723	pix1 += line_size;
		1724	pix2 += line_size;
		1725	}
		1726	return s;
		1727	}
		1728
		1729	static int pix_abs8_x2_c(void v, uint8_t pix1, uint8_t *pix2, int line_size, int h)
		1730	{
		1731	int s, i;
		1732
		1733	s = 0;
		1734	for(i=0;i
		1735	s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
		1736	s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
		1737	s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
		1738	s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
		1739	s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
		1740	s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
		1741	s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
		1742	s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
		1743	pix1 += line_size;
		1744	pix2 += line_size;
		1745	}
		1746	return s;
		1747	}
		1748
		1749	static int pix_abs8_y2_c(void v, uint8_t pix1, uint8_t *pix2, int line_size, int h)
		1750	{
		1751	int s, i;
		1752	uint8_t *pix3 = pix2 + line_size;
		1753
		1754	s = 0;
		1755	for(i=0;i
		1756	s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
		1757	s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
		1758	s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
		1759	s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
		1760	s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
		1761	s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
		1762	s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
		1763	s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
		1764	pix1 += line_size;
		1765	pix2 += line_size;
		1766	pix3 += line_size;
		1767	}
		1768	return s;
		1769	}
		1770
		1771	static int pix_abs8_xy2_c(void v, uint8_t pix1, uint8_t *pix2, int line_size, int h)
		1772	{
		1773	int s, i;
		1774	uint8_t *pix3 = pix2 + line_size;
		1775
		1776	s = 0;
		1777	for(i=0;i
		1778	s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
		1779	s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
		1780	s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
		1781	s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
		1782	s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
		1783	s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
		1784	s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
		1785	s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
		1786	pix1 += line_size;
		1787	pix2 += line_size;
		1788	pix3 += line_size;
		1789	}
		1790	return s;
		1791	}
		1792
		1793	static int nsse16_c(void v, uint8_t s1, uint8_t *s2, int stride, int h){
		1794	MpegEncContext *c = v;
		1795	int score1=0;
		1796	int score2=0;
		1797	int x,y;
		1798
		1799	for(y=0; y
		1800	for(x=0; x<16; x++){
		1801	score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
		1802	}
		1803	if(y+1
		1804	for(x=0; x<15; x++){
		1805	score2+= FFABS( s1[x ] - s1[x +stride]
		1806	- s1[x+1] + s1[x+1+stride])
		1807	-FFABS( s2[x ] - s2[x +stride]
		1808	- s2[x+1] + s2[x+1+stride]);
		1809	}
		1810	}
		1811	s1+= stride;
		1812	s2+= stride;
		1813	}
		1814
		1815	if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
		1816	else return score1 + FFABS(score2)*8;
		1817	}
		1818
		1819	static int nsse8_c(void v, uint8_t s1, uint8_t *s2, int stride, int h){
		1820	MpegEncContext *c = v;
		1821	int score1=0;
		1822	int score2=0;
		1823	int x,y;
		1824
		1825	for(y=0; y
		1826	for(x=0; x<8; x++){
		1827	score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
		1828	}
		1829	if(y+1
		1830	for(x=0; x<7; x++){
		1831	score2+= FFABS( s1[x ] - s1[x +stride]
		1832	- s1[x+1] + s1[x+1+stride])
		1833	-FFABS( s2[x ] - s2[x +stride]
		1834	- s2[x+1] + s2[x+1+stride]);
		1835	}
		1836	}
		1837	s1+= stride;
		1838	s2+= stride;
		1839	}
		1840
		1841	if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
		1842	else return score1 + FFABS(score2)*8;
		1843	}
		1844
		1845	static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
		1846	int i;
		1847	unsigned int sum=0;
		1848
		1849	for(i=0; i<8*8; i++){
		1850	int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
		1851	int w= weight[i];
		1852	b>>= RECON_SHIFT;
		1853	av_assert2(-512
		1854
		1855	sum += (wb)(w*b)>>4;
		1856	}
		1857	return sum>>2;
		1858	}
		1859
		1860	static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
		1861	int i;
		1862
		1863	for(i=0; i<8*8; i++){
		1864	rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
		1865	}
		1866	}
		1867
		1868	static int zero_cmp(void s, uint8_t a, uint8_t *b, int stride, int h){
		1869	return 0;
		1870	}
		1871
		1872	void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
		1873	int i;
		1874
		1875	memset(cmp, 0, sizeof(void)6);
		1876
		1877	for(i=0; i<6; i++){
		1878	switch(type&0xFF){
		1879	case FF_CMP_SAD:
		1880	cmp[i]= c->sad[i];
		1881	break;
		1882	case FF_CMP_SATD:
		1883	cmp[i]= c->hadamard8_diff[i];
		1884	break;
		1885	case FF_CMP_SSE:
		1886	cmp[i]= c->sse[i];
		1887	break;
		1888	case FF_CMP_DCT:
		1889	cmp[i]= c->dct_sad[i];
		1890	break;
		1891	case FF_CMP_DCT264:
		1892	cmp[i]= c->dct264_sad[i];
		1893	break;
		1894	case FF_CMP_DCTMAX:
		1895	cmp[i]= c->dct_max[i];
		1896	break;
		1897	case FF_CMP_PSNR:
		1898	cmp[i]= c->quant_psnr[i];
		1899	break;
		1900	case FF_CMP_BIT:
		1901	cmp[i]= c->bit[i];
		1902	break;
		1903	case FF_CMP_RD:
		1904	cmp[i]= c->rd[i];
		1905	break;
		1906	case FF_CMP_VSAD:
		1907	cmp[i]= c->vsad[i];
		1908	break;
		1909	case FF_CMP_VSSE:
		1910	cmp[i]= c->vsse[i];
		1911	break;
		1912	case FF_CMP_ZERO:
		1913	cmp[i]= zero_cmp;
		1914	break;
		1915	case FF_CMP_NSSE:
		1916	cmp[i]= c->nsse[i];
		1917	break;
		1918	#if CONFIG_DWT
		1919	case FF_CMP_W53:
		1920	cmp[i]= c->w53[i];
		1921	break;
		1922	case FF_CMP_W97:
		1923	cmp[i]= c->w97[i];
		1924	break;
		1925	#endif
		1926	default:
		1927	av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
		1928	}
		1929	}
		1930	}
		1931
		1932	static void add_bytes_c(uint8_t dst, uint8_t src, int w){
		1933	long i;
		1934	for(i=0; i<=w-(int)sizeof(long); i+=sizeof(long)){
		1935	long a = (long)(src+i);
		1936	long b = (long)(dst+i);
		1937	(long)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
		1938	}
		1939	for(; i
		1940	dst[i+0] += src[i+0];
		1941	}
		1942
		1943	static void diff_bytes_c(uint8_t dst, const uint8_t src1, const uint8_t *src2, int w){
		1944	long i;
		1945	#if !HAVE_FAST_UNALIGNED
		1946	if((long)src2 & (sizeof(long)-1)){
		1947	for(i=0; i+7
		1948	dst[i+0] = src1[i+0]-src2[i+0];
		1949	dst[i+1] = src1[i+1]-src2[i+1];
		1950	dst[i+2] = src1[i+2]-src2[i+2];
		1951	dst[i+3] = src1[i+3]-src2[i+3];
		1952	dst[i+4] = src1[i+4]-src2[i+4];
		1953	dst[i+5] = src1[i+5]-src2[i+5];
		1954	dst[i+6] = src1[i+6]-src2[i+6];
		1955	dst[i+7] = src1[i+7]-src2[i+7];
		1956	}
		1957	}else
		1958	#endif
		1959	for(i=0; i<=w-(int)sizeof(long); i+=sizeof(long)){
		1960	long a = (long)(src1+i);
		1961	long b = (long)(src2+i);
		1962	(long)(dst+i) = ((a\|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80);
		1963	}
		1964	for(; i
		1965	dst[i+0] = src1[i+0]-src2[i+0];
		1966	}
		1967
		1968	static void add_hfyu_median_prediction_c(uint8_t dst, const uint8_t src1, const uint8_t diff, int w, int left, int *left_top){
		1969	int i;
		1970	uint8_t l, lt;
		1971
		1972	l= *left;
		1973	lt= *left_top;
		1974
		1975	for(i=0; i
		1976	l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
		1977	lt= src1[i];
		1978	dst[i]= l;
		1979	}
		1980
		1981	*left= l;
		1982	*left_top= lt;
		1983	}
		1984
		1985	static void sub_hfyu_median_prediction_c(uint8_t dst, const uint8_t src1, const uint8_t src2, int w, int left, int *left_top){
		1986	int i;
		1987	uint8_t l, lt;
		1988
		1989	l= *left;
		1990	lt= *left_top;
		1991
		1992	for(i=0; i
		1993	const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
		1994	lt= src1[i];
		1995	l= src2[i];
		1996	dst[i]= l - pred;
		1997	}
		1998
		1999	*left= l;
		2000	*left_top= lt;
		2001	}
		2002
		2003	static int add_hfyu_left_prediction_c(uint8_t dst, const uint8_t src, int w, int acc){
		2004	int i;
		2005
		2006	for(i=0; i
		2007	acc+= src[i];
		2008	dst[i]= acc;
		2009	i++;
		2010	acc+= src[i];
		2011	dst[i]= acc;
		2012	}
		2013
		2014	for(; i
		2015	acc+= src[i];
		2016	dst[i]= acc;
		2017	}
		2018
		2019	return acc;
		2020	}
		2021
		2022	#if HAVE_BIGENDIAN
		2023	#define B 3
		2024	#define G 2
		2025	#define R 1
		2026	#define A 0
		2027	#else
		2028	#define B 0
		2029	#define G 1
		2030	#define R 2
		2031	#define A 3
		2032	#endif
		2033	static void add_hfyu_left_prediction_bgr32_c(uint8_t dst, const uint8_t src, int w, int red, int green, int blue, int alpha){
		2034	int i;
		2035	int r,g,b,a;
		2036	r= *red;
		2037	g= *green;
		2038	b= *blue;
		2039	a= *alpha;
		2040
		2041	for(i=0; i
		2042	b+= src[4*i+B];
		2043	g+= src[4*i+G];
		2044	r+= src[4*i+R];
		2045	a+= src[4*i+A];
		2046
		2047	dst[4*i+B]= b;
		2048	dst[4*i+G]= g;
		2049	dst[4*i+R]= r;
		2050	dst[4*i+A]= a;
		2051	}
		2052
		2053	*red= r;
		2054	*green= g;
		2055	*blue= b;
		2056	*alpha= a;
		2057	}
		2058	#undef B
		2059	#undef G
		2060	#undef R
		2061	#undef A
		2062
		2063	#define BUTTERFLY2(o1,o2,i1,i2) \
		2064	o1= (i1)+(i2);\
		2065	o2= (i1)-(i2);
		2066
		2067	#define BUTTERFLY1(x,y) \
		2068	{\
		2069	int a,b;\
		2070	a= x;\
		2071	b= y;\
		2072	x= a+b;\
		2073	y= a-b;\
		2074	}
		2075
		2076	#define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
		2077
		2078	static int hadamard8_diff8x8_c(/MpegEncContext/ void s, uint8_t dst, uint8_t *src, int stride, int h){
		2079	int i;
		2080	int temp[64];
		2081	int sum=0;
		2082
		2083	av_assert2(h==8);
		2084
		2085	for(i=0; i<8; i++){
		2086	//FIXME try pointer walks
		2087	BUTTERFLY2(temp[8i+0], temp[8i+1], src[stridei+0]-dst[stridei+0],src[stridei+1]-dst[stridei+1]);
		2088	BUTTERFLY2(temp[8i+2], temp[8i+3], src[stridei+2]-dst[stridei+2],src[stridei+3]-dst[stridei+3]);
		2089	BUTTERFLY2(temp[8i+4], temp[8i+5], src[stridei+4]-dst[stridei+4],src[stridei+5]-dst[stridei+5]);
		2090	BUTTERFLY2(temp[8i+6], temp[8i+7], src[stridei+6]-dst[stridei+6],src[stridei+7]-dst[stridei+7]);
		2091
		2092	BUTTERFLY1(temp[8i+0], temp[8i+2]);
		2093	BUTTERFLY1(temp[8i+1], temp[8i+3]);
		2094	BUTTERFLY1(temp[8i+4], temp[8i+6]);
		2095	BUTTERFLY1(temp[8i+5], temp[8i+7]);
		2096
		2097	BUTTERFLY1(temp[8i+0], temp[8i+4]);
		2098	BUTTERFLY1(temp[8i+1], temp[8i+5]);
		2099	BUTTERFLY1(temp[8i+2], temp[8i+6]);
		2100	BUTTERFLY1(temp[8i+3], temp[8i+7]);
		2101	}
		2102
		2103	for(i=0; i<8; i++){
		2104	BUTTERFLY1(temp[80+i], temp[81+i]);
		2105	BUTTERFLY1(temp[82+i], temp[83+i]);
		2106	BUTTERFLY1(temp[84+i], temp[85+i]);
		2107	BUTTERFLY1(temp[86+i], temp[87+i]);
		2108
		2109	BUTTERFLY1(temp[80+i], temp[82+i]);
		2110	BUTTERFLY1(temp[81+i], temp[83+i]);
		2111	BUTTERFLY1(temp[84+i], temp[86+i]);
		2112	BUTTERFLY1(temp[85+i], temp[87+i]);
		2113
		2114	sum +=
		2115	BUTTERFLYA(temp[80+i], temp[84+i])
		2116	+BUTTERFLYA(temp[81+i], temp[85+i])
		2117	+BUTTERFLYA(temp[82+i], temp[86+i])
		2118	+BUTTERFLYA(temp[83+i], temp[87+i]);
		2119	}
		2120	return sum;
		2121	}
		2122
		2123	static int hadamard8_intra8x8_c(/MpegEncContext/ void s, uint8_t src, uint8_t *dummy, int stride, int h){
		2124	int i;
		2125	int temp[64];
		2126	int sum=0;
		2127
		2128	av_assert2(h==8);
		2129
		2130	for(i=0; i<8; i++){
		2131	//FIXME try pointer walks
		2132	BUTTERFLY2(temp[8i+0], temp[8i+1], src[stridei+0],src[stridei+1]);
		2133	BUTTERFLY2(temp[8i+2], temp[8i+3], src[stridei+2],src[stridei+3]);
		2134	BUTTERFLY2(temp[8i+4], temp[8i+5], src[stridei+4],src[stridei+5]);
		2135	BUTTERFLY2(temp[8i+6], temp[8i+7], src[stridei+6],src[stridei+7]);
		2136
		2137	BUTTERFLY1(temp[8i+0], temp[8i+2]);
		2138	BUTTERFLY1(temp[8i+1], temp[8i+3]);
		2139	BUTTERFLY1(temp[8i+4], temp[8i+6]);
		2140	BUTTERFLY1(temp[8i+5], temp[8i+7]);
		2141
		2142	BUTTERFLY1(temp[8i+0], temp[8i+4]);
		2143	BUTTERFLY1(temp[8i+1], temp[8i+5]);
		2144	BUTTERFLY1(temp[8i+2], temp[8i+6]);
		2145	BUTTERFLY1(temp[8i+3], temp[8i+7]);
		2146	}
		2147
		2148	for(i=0; i<8; i++){
		2149	BUTTERFLY1(temp[80+i], temp[81+i]);
		2150	BUTTERFLY1(temp[82+i], temp[83+i]);
		2151	BUTTERFLY1(temp[84+i], temp[85+i]);
		2152	BUTTERFLY1(temp[86+i], temp[87+i]);
		2153
		2154	BUTTERFLY1(temp[80+i], temp[82+i]);
		2155	BUTTERFLY1(temp[81+i], temp[83+i]);
		2156	BUTTERFLY1(temp[84+i], temp[86+i]);
		2157	BUTTERFLY1(temp[85+i], temp[87+i]);
		2158
		2159	sum +=
		2160	BUTTERFLYA(temp[80+i], temp[84+i])
		2161	+BUTTERFLYA(temp[81+i], temp[85+i])
		2162	+BUTTERFLYA(temp[82+i], temp[86+i])
		2163	+BUTTERFLYA(temp[83+i], temp[87+i]);
		2164	}
		2165
		2166	sum -= FFABS(temp[80] + temp[84]); // -mean
		2167
		2168	return sum;
		2169	}
		2170
		2171	static int dct_sad8x8_c(/MpegEncContext/ void c, uint8_t src1, uint8_t *src2, int stride, int h){
		2172	MpegEncContext * const s= (MpegEncContext *)c;
		2173	LOCAL_ALIGNED_16(int16_t, temp, [64]);
		2174
		2175	av_assert2(h==8);
		2176
		2177	s->dsp.diff_pixels(temp, src1, src2, stride);
		2178	s->dsp.fdct(temp);
		2179	return s->dsp.sum_abs_dctelem(temp);
		2180	}
		2181
		2182	#if CONFIG_GPL
		2183	#define DCT8_1D {\
		2184	const int s07 = SRC(0) + SRC(7);\
		2185	const int s16 = SRC(1) + SRC(6);\
		2186	const int s25 = SRC(2) + SRC(5);\
		2187	const int s34 = SRC(3) + SRC(4);\
		2188	const int a0 = s07 + s34;\
		2189	const int a1 = s16 + s25;\
		2190	const int a2 = s07 - s34;\
		2191	const int a3 = s16 - s25;\
		2192	const int d07 = SRC(0) - SRC(7);\
		2193	const int d16 = SRC(1) - SRC(6);\
		2194	const int d25 = SRC(2) - SRC(5);\
		2195	const int d34 = SRC(3) - SRC(4);\
		2196	const int a4 = d16 + d25 + (d07 + (d07>>1));\
		2197	const int a5 = d07 - d34 - (d25 + (d25>>1));\
		2198	const int a6 = d07 + d34 - (d16 + (d16>>1));\
		2199	const int a7 = d16 - d25 + (d34 + (d34>>1));\
		2200	DST(0, a0 + a1 ) ;\
		2201	DST(1, a4 + (a7>>2)) ;\
		2202	DST(2, a2 + (a3>>1)) ;\
		2203	DST(3, a5 + (a6>>2)) ;\
		2204	DST(4, a0 - a1 ) ;\
		2205	DST(5, a6 - (a5>>2)) ;\
		2206	DST(6, (a2>>1) - a3 ) ;\
		2207	DST(7, (a4>>2) - a7 ) ;\
		2208	}
		2209
		2210	static int dct264_sad8x8_c(/MpegEncContext/ void c, uint8_t src1, uint8_t *src2, int stride, int h){
		2211	MpegEncContext * const s= (MpegEncContext *)c;
		2212	int16_t dct[8][8];
		2213	int i;
		2214	int sum=0;
		2215
		2216	s->dsp.diff_pixels(dct[0], src1, src2, stride);
		2217
		2218	#define SRC(x) dct[i][x]
		2219	#define DST(x,v) dct[i][x]= v
		2220	for( i = 0; i < 8; i++ )
		2221	DCT8_1D
		2222	#undef SRC
		2223	#undef DST
		2224
		2225	#define SRC(x) dct[x][i]
		2226	#define DST(x,v) sum += FFABS(v)
		2227	for( i = 0; i < 8; i++ )
		2228	DCT8_1D
		2229	#undef SRC
		2230	#undef DST
		2231	return sum;
		2232	}
		2233	#endif
		2234
		2235	static int dct_max8x8_c(/MpegEncContext/ void c, uint8_t src1, uint8_t *src2, int stride, int h){
		2236	MpegEncContext * const s= (MpegEncContext *)c;
		2237	LOCAL_ALIGNED_16(int16_t, temp, [64]);
		2238	int sum=0, i;
		2239
		2240	av_assert2(h==8);
		2241
		2242	s->dsp.diff_pixels(temp, src1, src2, stride);
		2243	s->dsp.fdct(temp);
		2244
		2245	for(i=0; i<64; i++)
		2246	sum= FFMAX(sum, FFABS(temp[i]));
		2247
		2248	return sum;
		2249	}
		2250
		2251	static int quant_psnr8x8_c(/MpegEncContext/ void c, uint8_t src1, uint8_t *src2, int stride, int h){
		2252	MpegEncContext * const s= (MpegEncContext *)c;
		2253	LOCAL_ALIGNED_16(int16_t, temp, [64*2]);
		2254	int16_t * const bak = temp+64;
		2255	int sum=0, i;
		2256
		2257	av_assert2(h==8);
		2258	s->mb_intra=0;
		2259
		2260	s->dsp.diff_pixels(temp, src1, src2, stride);
		2261
		2262	memcpy(bak, temp, 64*sizeof(int16_t));
		2263
		2264	s->block_last_index[0/FIXME/]= s->fast_dct_quantize(s, temp, 0/FIXME/, s->qscale, &i);
		2265	s->dct_unquantize_inter(s, temp, 0, s->qscale);
		2266	ff_simple_idct_8(temp); //FIXME
		2267
		2268	for(i=0; i<64; i++)
		2269	sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
		2270
		2271	return sum;
		2272	}
		2273
		2274	static int rd8x8_c(/MpegEncContext/ void c, uint8_t src1, uint8_t *src2, int stride, int h){
		2275	MpegEncContext * const s= (MpegEncContext *)c;
		2276	const uint8_t *scantable= s->intra_scantable.permutated;
		2277	LOCAL_ALIGNED_16(int16_t, temp, [64]);
		2278	LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]);
		2279	LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]);
		2280	int i, last, run, bits, level, distortion, start_i;
		2281	const int esc_length= s->ac_esc_length;
		2282	uint8_t * length;
		2283	uint8_t * last_length;
		2284
		2285	av_assert2(h==8);
		2286
		2287	copy_block8(lsrc1, src1, 8, stride, 8);
		2288	copy_block8(lsrc2, src2, 8, stride, 8);
		2289
		2290	s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8);
		2291
		2292	s->block_last_index[0/FIXME/]= last= s->fast_dct_quantize(s, temp, 0/FIXME/, s->qscale, &i);
		2293
		2294	bits=0;
		2295
		2296	if (s->mb_intra) {
		2297	start_i = 1;
		2298	length = s->intra_ac_vlc_length;
		2299	last_length= s->intra_ac_vlc_last_length;
		2300	bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
		2301	} else {
		2302	start_i = 0;
		2303	length = s->inter_ac_vlc_length;
		2304	last_length= s->inter_ac_vlc_last_length;
		2305	}
		2306
		2307	if(last>=start_i){
		2308	run=0;
		2309	for(i=start_i; i
		2310	int j= scantable[i];
		2311	level= temp[j];
		2312
		2313	if(level){
		2314	level+=64;
		2315	if((level&(~127)) == 0){
		2316	bits+= length[UNI_AC_ENC_INDEX(run, level)];
		2317	}else
		2318	bits+= esc_length;
		2319	run=0;
		2320	}else
		2321	run++;
		2322	}
		2323	i= scantable[last];
		2324
		2325	level= temp[i] + 64;
		2326
		2327	av_assert2(level - 64);
		2328
		2329	if((level&(~127)) == 0){
		2330	bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
		2331	}else
		2332	bits+= esc_length;
		2333
		2334	}
		2335
		2336	if(last>=0){
		2337	if(s->mb_intra)
		2338	s->dct_unquantize_intra(s, temp, 0, s->qscale);
		2339	else
		2340	s->dct_unquantize_inter(s, temp, 0, s->qscale);
		2341	}
		2342
		2343	s->dsp.idct_add(lsrc2, 8, temp);
		2344
		2345	distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8);
		2346
		2347	return distortion + ((bitss->qscales->qscale*109 + 64)>>7);
		2348	}
		2349
		2350	static int bit8x8_c(/MpegEncContext/ void c, uint8_t src1, uint8_t *src2, int stride, int h){
		2351	MpegEncContext * const s= (MpegEncContext *)c;
		2352	const uint8_t *scantable= s->intra_scantable.permutated;
		2353	LOCAL_ALIGNED_16(int16_t, temp, [64]);
		2354	int i, last, run, bits, level, start_i;
		2355	const int esc_length= s->ac_esc_length;
		2356	uint8_t * length;
		2357	uint8_t * last_length;
		2358
		2359	av_assert2(h==8);
		2360
		2361	s->dsp.diff_pixels(temp, src1, src2, stride);
		2362
		2363	s->block_last_index[0/FIXME/]= last= s->fast_dct_quantize(s, temp, 0/FIXME/, s->qscale, &i);
		2364
		2365	bits=0;
		2366
		2367	if (s->mb_intra) {
		2368	start_i = 1;
		2369	length = s->intra_ac_vlc_length;
		2370	last_length= s->intra_ac_vlc_last_length;
		2371	bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
		2372	} else {
		2373	start_i = 0;
		2374	length = s->inter_ac_vlc_length;
		2375	last_length= s->inter_ac_vlc_last_length;
		2376	}
		2377
		2378	if(last>=start_i){
		2379	run=0;
		2380	for(i=start_i; i
		2381	int j= scantable[i];
		2382	level= temp[j];
		2383
		2384	if(level){
		2385	level+=64;
		2386	if((level&(~127)) == 0){
		2387	bits+= length[UNI_AC_ENC_INDEX(run, level)];
		2388	}else
		2389	bits+= esc_length;
		2390	run=0;
		2391	}else
		2392	run++;
		2393	}
		2394	i= scantable[last];
		2395
		2396	level= temp[i] + 64;
		2397
		2398	av_assert2(level - 64);
		2399
		2400	if((level&(~127)) == 0){
		2401	bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
		2402	}else
		2403	bits+= esc_length;
		2404	}
		2405
		2406	return bits;
		2407	}
		2408
		2409	#define VSAD_INTRA(size) \
		2410	static int vsad_intra##size##_c(/MpegEncContext/ void c, uint8_t s, uint8_t *dummy, int stride, int h){ \
		2411	int score=0; \
		2412	int x,y; \
		2413	\
		2414	for(y=1; y
		2415	for(x=0; x
		2416	score+= FFABS(s[x ] - s[x +stride]) + FFABS(s[x+1] - s[x+1+stride]) \
		2417	+FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]); \
		2418	} \
		2419	s+= stride; \
		2420	} \
		2421	\
		2422	return score; \
		2423	}
		2424	VSAD_INTRA(8)
		2425	VSAD_INTRA(16)
		2426
		2427	static int vsad16_c(/MpegEncContext/ void c, uint8_t s1, uint8_t *s2, int stride, int h){
		2428	int score=0;
		2429	int x,y;
		2430
		2431	for(y=1; y
		2432	for(x=0; x<16; x++){
		2433	score+= FFABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
		2434	}
		2435	s1+= stride;
		2436	s2+= stride;
		2437	}
		2438
		2439	return score;
		2440	}
		2441
		2442	#define SQ(a) ((a)*(a))
		2443	#define VSSE_INTRA(size) \
		2444	static int vsse_intra##size##_c(/MpegEncContext/ void c, uint8_t s, uint8_t *dummy, int stride, int h){ \
		2445	int score=0; \
		2446	int x,y; \
		2447	\
		2448	for(y=1; y
		2449	for(x=0; x
		2450	score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) \
		2451	+SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); \
		2452	} \
		2453	s+= stride; \
		2454	} \
		2455	\
		2456	return score; \
		2457	}
		2458	VSSE_INTRA(8)
		2459	VSSE_INTRA(16)
		2460
		2461	static int vsse16_c(/MpegEncContext/ void c, uint8_t s1, uint8_t *s2, int stride, int h){
		2462	int score=0;
		2463	int x,y;
		2464
		2465	for(y=1; y
		2466	for(x=0; x<16; x++){
		2467	score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
		2468	}
		2469	s1+= stride;
		2470	s2+= stride;
		2471	}
		2472
		2473	return score;
		2474	}
		2475
		2476	static int ssd_int8_vs_int16_c(const int8_t pix1, const int16_t pix2,
		2477	int size){
		2478	int score=0;
		2479	int i;
		2480	for(i=0; i
		2481	score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
		2482	return score;
		2483	}
		2484
		2485	#define WRAPPER8_16_SQ(name8, name16)\
		2486	static int name16(void /MpegEncContext/ s, uint8_t dst, uint8_t *src, int stride, int h){\
		2487	int score=0;\
		2488	score +=name8(s, dst , src , stride, 8);\
		2489	score +=name8(s, dst+8 , src+8 , stride, 8);\
		2490	if(h==16){\
		2491	dst += 8*stride;\
		2492	src += 8*stride;\
		2493	score +=name8(s, dst , src , stride, 8);\
		2494	score +=name8(s, dst+8 , src+8 , stride, 8);\
		2495	}\
		2496	return score;\
		2497	}
		2498
		2499	WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
		2500	WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
		2501	WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
		2502	#if CONFIG_GPL
		2503	WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
		2504	#endif
		2505	WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
		2506	WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
		2507	WRAPPER8_16_SQ(rd8x8_c, rd16_c)
		2508	WRAPPER8_16_SQ(bit8x8_c, bit16_c)
		2509
		2510	static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
		2511	uint32_t maxi, uint32_t maxisign)
		2512	{
		2513
		2514	if(a > mini) return mini;
		2515	else if((a^(1U<<31)) > maxisign) return maxi;
		2516	else return a;
		2517	}
		2518
		2519	static void vector_clipf_c_opposite_sign(float dst, const float src, float min, float max, int len){
		2520	int i;
		2521	uint32_t mini = (uint32_t)min;
		2522	uint32_t maxi = (uint32_t)max;
		2523	uint32_t maxisign = maxi ^ (1U<<31);
		2524	uint32_t dsti = (uint32_t)dst;
		2525	const uint32_t srci = (const uint32_t)src;
		2526	for(i=0; i
		2527	dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign);
		2528	dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign);
		2529	dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign);
		2530	dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign);
		2531	dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign);
		2532	dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign);
		2533	dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign);
		2534	dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign);
		2535	}
		2536	}
		2537	static void vector_clipf_c(float dst, const float src, float min, float max, int len){
		2538	int i;
		2539	if(min < 0 && max > 0) {
		2540	vector_clipf_c_opposite_sign(dst, src, &min, &max, len);
		2541	} else {
		2542	for(i=0; i < len; i+=8) {
		2543	dst[i ] = av_clipf(src[i ], min, max);
		2544	dst[i + 1] = av_clipf(src[i + 1], min, max);
		2545	dst[i + 2] = av_clipf(src[i + 2], min, max);
		2546	dst[i + 3] = av_clipf(src[i + 3], min, max);
		2547	dst[i + 4] = av_clipf(src[i + 4], min, max);
		2548	dst[i + 5] = av_clipf(src[i + 5], min, max);
		2549	dst[i + 6] = av_clipf(src[i + 6], min, max);
		2550	dst[i + 7] = av_clipf(src[i + 7], min, max);
		2551	}
		2552	}
		2553	}
		2554
		2555	static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order)
		2556	{
		2557	int res = 0;
		2558
		2559	while (order--)
		2560	res += v1++ *v2++;
		2561
		2562	return res;
		2563	}
		2564
		2565	static int32_t scalarproduct_and_madd_int16_c(int16_t v1, const int16_t v2, const int16_t *v3, int order, int mul)
		2566	{
		2567	int res = 0;
		2568	while (order--) {
		2569	res += v1 *v2++;
		2570	v1++ += mul *v3++;
		2571	}
		2572	return res;
		2573	}
		2574
		2575	static void apply_window_int16_c(int16_t output, const int16_t input,
		2576	const int16_t *window, unsigned int len)
		2577	{
		2578	int i;
		2579	int len2 = len >> 1;
		2580
		2581	for (i = 0; i < len2; i++) {
		2582	int16_t w = window[i];
		2583	output[i] = (MUL16(input[i], w) + (1 << 14)) >> 15;
		2584	output[len-i-1] = (MUL16(input[len-i-1], w) + (1 << 14)) >> 15;
		2585	}
		2586	}
		2587
		2588	static void vector_clip_int32_c(int32_t dst, const int32_t src, int32_t min,
		2589	int32_t max, unsigned int len)
		2590	{
		2591	do {
		2592	dst++ = av_clip(src++, min, max);
		2593	dst++ = av_clip(src++, min, max);
		2594	dst++ = av_clip(src++, min, max);
		2595	dst++ = av_clip(src++, min, max);
		2596	dst++ = av_clip(src++, min, max);
		2597	dst++ = av_clip(src++, min, max);
		2598	dst++ = av_clip(src++, min, max);
		2599	dst++ = av_clip(src++, min, max);
		2600	len -= 8;
		2601	} while (len > 0);
		2602	}
		2603
		2604	static void jref_idct_put(uint8_t dest, int line_size, int16_t block)
		2605	{
		2606	ff_j_rev_dct (block);
		2607	put_pixels_clamped_c(block, dest, line_size);
		2608	}
		2609	static void jref_idct_add(uint8_t dest, int line_size, int16_t block)
		2610	{
		2611	ff_j_rev_dct (block);
		2612	add_pixels_clamped_c(block, dest, line_size);
		2613	}
		2614
		2615	static void ff_jref_idct4_put(uint8_t dest, int line_size, int16_t block)
		2616	{
		2617	ff_j_rev_dct4 (block);
		2618	put_pixels_clamped4_c(block, dest, line_size);
		2619	}
		2620	static void ff_jref_idct4_add(uint8_t dest, int line_size, int16_t block)
		2621	{
		2622	ff_j_rev_dct4 (block);
		2623	add_pixels_clamped4_c(block, dest, line_size);
		2624	}
		2625
		2626	static void ff_jref_idct2_put(uint8_t dest, int line_size, int16_t block)
		2627	{
		2628	ff_j_rev_dct2 (block);
		2629	put_pixels_clamped2_c(block, dest, line_size);
		2630	}
		2631	static void ff_jref_idct2_add(uint8_t dest, int line_size, int16_t block)
		2632	{
		2633	ff_j_rev_dct2 (block);
		2634	add_pixels_clamped2_c(block, dest, line_size);
		2635	}
		2636
		2637	static void ff_jref_idct1_put(uint8_t dest, int line_size, int16_t block)
		2638	{
		2639	dest[0] = av_clip_uint8((block[0] + 4)>>3);
		2640	}
		2641	static void ff_jref_idct1_add(uint8_t dest, int line_size, int16_t block)
		2642	{
		2643	dest[0] = av_clip_uint8(dest[0] + ((block[0] + 4)>>3));
		2644	}
		2645
		2646	/* init static data */
		2647	av_cold void ff_dsputil_static_init(void)
		2648	{
		2649	int i;
		2650
		2651	for(i=0;i<512;i++) {
		2652	ff_squareTbl[i] = (i - 256) * (i - 256);
		2653	}
		2654
		2655	for(i=0; i<64; i++) ff_inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
		2656	}
		2657
		2658	int ff_check_alignment(void){
		2659	static int did_fail=0;
		2660	LOCAL_ALIGNED_16(int, aligned, [4]);
		2661
		2662	if((intptr_t)aligned & 15){
		2663	if(!did_fail){
		2664	#if HAVE_MMX \|\| HAVE_ALTIVEC
		2665	av_log(NULL, AV_LOG_ERROR,
		2666	"Compiler did not align stack variables. Libavcodec has been miscompiled\n"
		2667	"and may be very slow or crash. This is not a bug in libavcodec,\n"
		2668	"but in the compiler. You may try recompiling using gcc >= 4.2.\n"
		2669	"Do not report crashes to FFmpeg developers.\n");
		2670	#endif
		2671	did_fail=1;
		2672	}
		2673	return -1;
		2674	}
		2675	return 0;
		2676	}
		2677
		2678	av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
		2679	{
		2680	ff_check_alignment();
		2681
		2682	#if CONFIG_ENCODERS
		2683	if (avctx->bits_per_raw_sample == 10) {
		2684	c->fdct = ff_jpeg_fdct_islow_10;
		2685	c->fdct248 = ff_fdct248_islow_10;
		2686	} else {
		2687	if(avctx->dct_algo==FF_DCT_FASTINT) {
		2688	c->fdct = ff_fdct_ifast;
		2689	c->fdct248 = ff_fdct_ifast248;
		2690	}
		2691	else if(avctx->dct_algo==FF_DCT_FAAN) {
		2692	c->fdct = ff_faandct;
		2693	c->fdct248 = ff_faandct248;
		2694	}
		2695	else {
		2696	c->fdct = ff_jpeg_fdct_islow_8; //slow/accurate/default
		2697	c->fdct248 = ff_fdct248_islow_8;
		2698	}
		2699	}
		2700	#endif //CONFIG_ENCODERS
		2701
		2702	if(avctx->lowres==1){
		2703	c->idct_put= ff_jref_idct4_put;
		2704	c->idct_add= ff_jref_idct4_add;
		2705	c->idct = ff_j_rev_dct4;
		2706	c->idct_permutation_type= FF_NO_IDCT_PERM;
		2707	}else if(avctx->lowres==2){
		2708	c->idct_put= ff_jref_idct2_put;
		2709	c->idct_add= ff_jref_idct2_add;
		2710	c->idct = ff_j_rev_dct2;
		2711	c->idct_permutation_type= FF_NO_IDCT_PERM;
		2712	}else if(avctx->lowres==3){
		2713	c->idct_put= ff_jref_idct1_put;
		2714	c->idct_add= ff_jref_idct1_add;
		2715	c->idct = ff_j_rev_dct1;
		2716	c->idct_permutation_type= FF_NO_IDCT_PERM;
		2717	}else{
		2718	if (avctx->bits_per_raw_sample == 10) {
		2719	c->idct_put = ff_simple_idct_put_10;
		2720	c->idct_add = ff_simple_idct_add_10;
		2721	c->idct = ff_simple_idct_10;
		2722	c->idct_permutation_type = FF_NO_IDCT_PERM;
		2723	} else if (avctx->bits_per_raw_sample == 12) {
		2724	c->idct_put = ff_simple_idct_put_12;
		2725	c->idct_add = ff_simple_idct_add_12;
		2726	c->idct = ff_simple_idct_12;
		2727	c->idct_permutation_type = FF_NO_IDCT_PERM;
		2728	} else {
		2729	if(avctx->idct_algo==FF_IDCT_INT){
		2730	c->idct_put= jref_idct_put;
		2731	c->idct_add= jref_idct_add;
		2732	c->idct = ff_j_rev_dct;
		2733	c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
		2734	}else if(avctx->idct_algo==FF_IDCT_FAAN){
		2735	c->idct_put= ff_faanidct_put;
		2736	c->idct_add= ff_faanidct_add;
		2737	c->idct = ff_faanidct;
		2738	c->idct_permutation_type= FF_NO_IDCT_PERM;
		2739	}else{ //accurate/default
		2740	c->idct_put = ff_simple_idct_put_8;
		2741	c->idct_add = ff_simple_idct_add_8;
		2742	c->idct = ff_simple_idct_8;
		2743	c->idct_permutation_type= FF_NO_IDCT_PERM;
		2744	}
		2745	}
		2746	}
		2747
		2748	c->diff_pixels = diff_pixels_c;
		2749	c->put_pixels_clamped = put_pixels_clamped_c;
		2750	c->put_signed_pixels_clamped = put_signed_pixels_clamped_c;
		2751	c->add_pixels_clamped = add_pixels_clamped_c;
		2752	c->sum_abs_dctelem = sum_abs_dctelem_c;
		2753	c->gmc1 = gmc1_c;
		2754	c->gmc = ff_gmc_c;
		2755	c->pix_sum = pix_sum_c;
		2756	c->pix_norm1 = pix_norm1_c;
		2757
		2758	c->fill_block_tab[0] = fill_block16_c;
		2759	c->fill_block_tab[1] = fill_block8_c;
		2760
		2761	/* TODO [0] 16 [1] 8 */
		2762	c->pix_abs[0][0] = pix_abs16_c;
		2763	c->pix_abs[0][1] = pix_abs16_x2_c;
		2764	c->pix_abs[0][2] = pix_abs16_y2_c;
		2765	c->pix_abs[0][3] = pix_abs16_xy2_c;
		2766	c->pix_abs[1][0] = pix_abs8_c;
		2767	c->pix_abs[1][1] = pix_abs8_x2_c;
		2768	c->pix_abs[1][2] = pix_abs8_y2_c;
		2769	c->pix_abs[1][3] = pix_abs8_xy2_c;
		2770
		2771	c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
		2772	c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
		2773	c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
		2774	c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
		2775	c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
		2776	c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
		2777	c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
		2778	c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
		2779	c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
		2780
		2781	c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;
		2782	c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;
		2783	c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;
		2784	c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;
		2785	c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;
		2786	c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;
		2787	c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;
		2788	c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;
		2789	c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
		2790
		2791	#define dspfunc(PFX, IDX, NUM) \
		2792	c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
		2793	c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
		2794	c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
		2795	c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
		2796	c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
		2797	c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
		2798	c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
		2799	c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
		2800	c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
		2801	c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
		2802	c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
		2803	c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
		2804	c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
		2805	c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
		2806	c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
		2807	c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
		2808
		2809	dspfunc(put_qpel, 0, 16);
		2810	dspfunc(put_no_rnd_qpel, 0, 16);
		2811
		2812	dspfunc(avg_qpel, 0, 16);
		2813	/* dspfunc(avg_no_rnd_qpel, 0, 16); */
		2814
		2815	dspfunc(put_qpel, 1, 8);
		2816	dspfunc(put_no_rnd_qpel, 1, 8);
		2817
		2818	dspfunc(avg_qpel, 1, 8);
		2819	/* dspfunc(avg_no_rnd_qpel, 1, 8); */
		2820
		2821	#undef dspfunc
		2822
		2823	c->put_mspel_pixels_tab[0]= ff_put_pixels8x8_c;
		2824	c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
		2825	c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
		2826	c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
		2827	c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
		2828	c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
		2829	c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
		2830	c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
		2831
		2832	#define SET_CMP_FUNC(name) \
		2833	c->name[0]= name ## 16_c;\
		2834	c->name[1]= name ## 8x8_c;
		2835
		2836	SET_CMP_FUNC(hadamard8_diff)
		2837	c->hadamard8_diff[4]= hadamard8_intra16_c;
		2838	c->hadamard8_diff[5]= hadamard8_intra8x8_c;
		2839	SET_CMP_FUNC(dct_sad)
		2840	SET_CMP_FUNC(dct_max)
		2841	#if CONFIG_GPL
		2842	SET_CMP_FUNC(dct264_sad)
		2843	#endif
		2844	c->sad[0]= pix_abs16_c;
		2845	c->sad[1]= pix_abs8_c;
		2846	c->sse[0]= sse16_c;
		2847	c->sse[1]= sse8_c;
		2848	c->sse[2]= sse4_c;
		2849	SET_CMP_FUNC(quant_psnr)
		2850	SET_CMP_FUNC(rd)
		2851	SET_CMP_FUNC(bit)
		2852	c->vsad[0]= vsad16_c;
		2853	c->vsad[4]= vsad_intra16_c;
		2854	c->vsad[5]= vsad_intra8_c;
		2855	c->vsse[0]= vsse16_c;
		2856	c->vsse[4]= vsse_intra16_c;
		2857	c->vsse[5]= vsse_intra8_c;
		2858	c->nsse[0]= nsse16_c;
		2859	c->nsse[1]= nsse8_c;
		2860	#if CONFIG_SNOW_DECODER \|\| CONFIG_SNOW_ENCODER
		2861	ff_dsputil_init_dwt(c);
		2862	#endif
		2863
		2864	c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
		2865
		2866	c->add_bytes= add_bytes_c;
		2867	c->diff_bytes= diff_bytes_c;
		2868	c->add_hfyu_median_prediction= add_hfyu_median_prediction_c;
		2869	c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
		2870	c->add_hfyu_left_prediction = add_hfyu_left_prediction_c;
		2871	c->add_hfyu_left_prediction_bgr32 = add_hfyu_left_prediction_bgr32_c;
		2872	c->bswap_buf= bswap_buf;
		2873	c->bswap16_buf = bswap16_buf;
		2874
		2875	if (CONFIG_H263_DECODER \|\| CONFIG_H263_ENCODER) {
		2876	c->h263_h_loop_filter= h263_h_loop_filter_c;
		2877	c->h263_v_loop_filter= h263_v_loop_filter_c;
		2878	}
		2879
		2880	c->try_8x8basis= try_8x8basis_c;
		2881	c->add_8x8basis= add_8x8basis_c;
		2882
		2883	c->vector_clipf = vector_clipf_c;
		2884	c->scalarproduct_int16 = scalarproduct_int16_c;
		2885	c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
		2886	c->apply_window_int16 = apply_window_int16_c;
		2887	c->vector_clip_int32 = vector_clip_int32_c;
		2888
		2889	c->shrink[0]= av_image_copy_plane;
		2890	c->shrink[1]= ff_shrink22;
		2891	c->shrink[2]= ff_shrink44;
		2892	c->shrink[3]= ff_shrink88;
		2893
		2894	c->add_pixels8 = add_pixels8_c;
		2895
		2896	#undef FUNC
		2897	#undef FUNCC
		2898	#define FUNC(f, depth) f ## _ ## depth
		2899	#define FUNCC(f, depth) f ## _ ## depth ## _c
		2900
		2901	c->draw_edges = FUNCC(draw_edges, 8);
		2902	c->clear_block = FUNCC(clear_block, 8);
		2903	c->clear_blocks = FUNCC(clear_blocks, 8);
		2904
		2905	#define BIT_DEPTH_FUNCS(depth) \
		2906	c->get_pixels = FUNCC(get_pixels, depth);
		2907
		2908	switch (avctx->bits_per_raw_sample) {
		2909	case 9:
		2910	case 10:
		2911	case 12:
		2912	case 14:
		2913	BIT_DEPTH_FUNCS(16);
		2914	break;
		2915	default:
		2916	if(avctx->bits_per_raw_sample<=8 \|\| avctx->codec_type != AVMEDIA_TYPE_VIDEO) {
		2917	BIT_DEPTH_FUNCS(8);
		2918	}
		2919	break;
		2920	}
		2921
		2922
		2923	if (ARCH_ALPHA)
		2924	ff_dsputil_init_alpha(c, avctx);
		2925	if (ARCH_ARM)
		2926	ff_dsputil_init_arm(c, avctx);
		2927	if (ARCH_BFIN)
		2928	ff_dsputil_init_bfin(c, avctx);
		2929	if (ARCH_PPC)
		2930	ff_dsputil_init_ppc(c, avctx);
		2931	if (ARCH_SH4)
		2932	ff_dsputil_init_sh4(c, avctx);
		2933	if (HAVE_VIS)
		2934	ff_dsputil_init_vis(c, avctx);
		2935	if (ARCH_X86)
		2936	ff_dsputil_init_x86(c, avctx);
		2937
		2938	ff_init_scantable_permutation(c->idct_permutation,
		2939	c->idct_permutation_type);
		2940	}
		2941
		2942	av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
		2943	{
		2944	ff_dsputil_init(c, avctx);
		2945	}
		2946
		2947	av_cold void avpriv_dsputil_init(DSPContext c, AVCodecContext avctx)
		2948	{
		2949	ff_dsputil_init(c, avctx);
		2950	}

Subversion Repositories Kolibri OS

(root)/contrib/sdk/sources/ffmpeg/libavcodec/dsputil.c – Rev 4349