WebSVN – Kolibri OS – Blame – /contrib/sdk/sources/ffmpeg/libavcodec/snowenc.c

Rev	Author	Line No.	Line
4349	Serge	1	/*
		2	* Copyright (C) 2004 Michael Niedermayer
		3	*
		4	* This file is part of FFmpeg.
		5	*
		6	* FFmpeg is free software; you can redistribute it and/or
		7	* modify it under the terms of the GNU Lesser General Public
		8	* License as published by the Free Software Foundation; either
		9	* version 2.1 of the License, or (at your option) any later version.
		10	*
		11	* FFmpeg is distributed in the hope that it will be useful,
		12	* but WITHOUT ANY WARRANTY; without even the implied warranty of
		13	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
		14	* Lesser General Public License for more details.
		15	*
		16	* You should have received a copy of the GNU Lesser General Public
		17	* License along with FFmpeg; if not, write to the Free Software
		18	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
		19	*/
		20
		21	#include "libavutil/intmath.h"
		22	#include "libavutil/log.h"
		23	#include "libavutil/opt.h"
		24	#include "avcodec.h"
		25	#include "dsputil.h"
		26	#include "internal.h"
		27	#include "snow_dwt.h"
		28	#include "snow.h"
		29
		30	#include "rangecoder.h"
		31	#include "mathops.h"
		32
		33	#include "mpegvideo.h"
		34	#include "h263.h"
		35
		36	static av_cold int encode_init(AVCodecContext *avctx)
		37	{
		38	SnowContext *s = avctx->priv_data;
		39	int plane_index, ret;
		40
		41	if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
		42	av_log(avctx, AV_LOG_ERROR, "This codec is under development, files encoded with it may not be decodable with future versions!!!\n"
		43	"Use vstrict=-2 / -strict -2 to use it anyway.\n");
		44	return -1;
		45	}
		46
		47	if(avctx->prediction_method == DWT_97
		48	&& (avctx->flags & CODEC_FLAG_QSCALE)
		49	&& avctx->global_quality == 0){
		50	av_log(avctx, AV_LOG_ERROR, "The 9/7 wavelet is incompatible with lossless mode.\n");
		51	return -1;
		52	}
		53
		54	s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
		55
		56	s->mv_scale = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
		57	s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0;
		58
		59	for(plane_index=0; plane_index<3; plane_index++){
		60	s->plane[plane_index].diag_mc= 1;
		61	s->plane[plane_index].htaps= 6;
		62	s->plane[plane_index].hcoeff[0]= 40;
		63	s->plane[plane_index].hcoeff[1]= -10;
		64	s->plane[plane_index].hcoeff[2]= 2;
		65	s->plane[plane_index].fast_mc= 1;
		66	}
		67
		68	if ((ret = ff_snow_common_init(avctx)) < 0) {
		69	ff_snow_common_end(avctx->priv_data);
		70	return ret;
		71	}
		72	ff_snow_alloc_blocks(s);
		73
		74	s->version=0;
		75
		76	s->m.avctx = avctx;
		77	s->m.flags = avctx->flags;
		78	s->m.bit_rate= avctx->bit_rate;
		79
		80	s->m.me.temp =
		81	s->m.me.scratchpad= av_mallocz((avctx->width+64)2162sizeof(uint8_t));
		82	s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
		83	s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
		84	s->m.obmc_scratchpad= av_mallocz(MB_SIZEMB_SIZE12*sizeof(uint32_t));
		85	if (!s->m.me.scratchpad \|\| !s->m.me.map \|\| !s->m.me.score_map \|\| !s->m.obmc_scratchpad)
		86	return AVERROR(ENOMEM);
		87
		88	ff_h263_encode_init(&s->m); //mv_penalty
		89
		90	s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
		91
		92	if(avctx->flags&CODEC_FLAG_PASS1){
		93	if(!avctx->stats_out)
		94	avctx->stats_out = av_mallocz(256);
		95
		96	if (!avctx->stats_out)
		97	return AVERROR(ENOMEM);
		98	}
		99	if((avctx->flags&CODEC_FLAG_PASS2) \|\| !(avctx->flags&CODEC_FLAG_QSCALE)){
		100	if(ff_rate_control_init(&s->m) < 0)
		101	return -1;
		102	}
		103	s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE\|CODEC_FLAG_PASS2));
		104
		105	switch(avctx->pix_fmt){
		106	case AV_PIX_FMT_YUV444P:
		107	// case AV_PIX_FMT_YUV422P:
		108	case AV_PIX_FMT_YUV420P:
		109	// case AV_PIX_FMT_YUV411P:
		110	case AV_PIX_FMT_YUV410P:
		111	s->nb_planes = 3;
		112	s->colorspace_type= 0;
		113	break;
		114	case AV_PIX_FMT_GRAY8:
		115	s->nb_planes = 1;
		116	s->colorspace_type = 1;
		117	break;
		118	/* case AV_PIX_FMT_RGB32:
		119	s->colorspace= 1;
		120	break;*/
		121	default:
		122	av_log(avctx, AV_LOG_ERROR, "pixel format not supported\n");
		123	return -1;
		124	}
		125	avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
		126
		127	ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
		128	ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
		129
		130	s->input_picture = av_frame_alloc();
		131	if (!s->input_picture)
		132	return AVERROR(ENOMEM);
		133	if ((ret = ff_get_buffer(s->avctx, s->input_picture, AV_GET_BUFFER_FLAG_REF)) < 0)
		134	return ret;
		135
		136	if(s->avctx->me_method == ME_ITER){
		137	int i;
		138	int size= s->b_width * s->b_height << 2*s->block_max_depth;
		139	for(i=0; imax_ref_frames; i++){
		140	s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
		141	s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
		142	if (!s->ref_mvs[i] \|\| !s->ref_scores[i])
		143	return AVERROR(ENOMEM);
		144	}
		145	}
		146
		147	return 0;
		148	}
		149
		150	//near copy & paste from dsputil, FIXME
		151	static int pix_sum(uint8_t * pix, int line_size, int w, int h)
		152	{
		153	int s, i, j;
		154
		155	s = 0;
		156	for (i = 0; i < h; i++) {
		157	for (j = 0; j < w; j++) {
		158	s += pix[0];
		159	pix ++;
		160	}
		161	pix += line_size - w;
		162	}
		163	return s;
		164	}
		165
		166	//near copy & paste from dsputil, FIXME
		167	static int pix_norm1(uint8_t * pix, int line_size, int w)
		168	{
		169	int s, i, j;
		170	uint32_t *sq = ff_squareTbl + 256;
		171
		172	s = 0;
		173	for (i = 0; i < w; i++) {
		174	for (j = 0; j < w; j ++) {
		175	s += sq[pix[0]];
		176	pix ++;
		177	}
		178	pix += line_size - w;
		179	}
		180	return s;
		181	}
		182
		183	static inline int get_penalty_factor(int lambda, int lambda2, int type){
		184	switch(type&0xFF){
		185	default:
		186	case FF_CMP_SAD:
		187	return lambda>>FF_LAMBDA_SHIFT;
		188	case FF_CMP_DCT:
		189	return (3*lambda)>>(FF_LAMBDA_SHIFT+1);
		190	case FF_CMP_W53:
		191	return (4*lambda)>>(FF_LAMBDA_SHIFT);
		192	case FF_CMP_W97:
		193	return (2*lambda)>>(FF_LAMBDA_SHIFT);
		194	case FF_CMP_SATD:
		195	case FF_CMP_DCT264:
		196	return (2*lambda)>>FF_LAMBDA_SHIFT;
		197	case FF_CMP_RD:
		198	case FF_CMP_PSNR:
		199	case FF_CMP_SSE:
		200	case FF_CMP_NSSE:
		201	return lambda2>>FF_LAMBDA_SHIFT;
		202	case FF_CMP_BIT:
		203	return 1;
		204	}
		205	}
		206
		207	//FIXME copy&paste
		208	#define P_LEFT P[1]
		209	#define P_TOP P[2]
		210	#define P_TOPRIGHT P[3]
		211	#define P_MEDIAN P[4]
		212	#define P_MV1 P[9]
		213	#define FLAG_QPEL 1 //must be 1
		214
		215	static int encode_q_branch(SnowContext *s, int level, int x, int y){
		216	uint8_t p_buffer[1024];
		217	uint8_t i_buffer[1024];
		218	uint8_t p_state[sizeof(s->block_state)];
		219	uint8_t i_state[sizeof(s->block_state)];
		220	RangeCoder pc, ic;
		221	uint8_t *pbbak= s->c.bytestream;
		222	uint8_t *pbbak_start= s->c.bytestream_start;
		223	int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
		224	const int w= s->b_width << s->block_max_depth;
		225	const int h= s->b_height << s->block_max_depth;
		226	const int rem_depth= s->block_max_depth - level;
		227	const int index= (x + y*w) << rem_depth;
		228	const int block_w= 1<<(LOG2_MB_SIZE - level);
		229	int trx= (x+1)<
		230	int try= (y+1)<
		231	const BlockNode *left = x ? &s->block[index-1] : &null_block;
		232	const BlockNode *top = y ? &s->block[index-w] : &null_block;
		233	const BlockNode *right = trxblock[index+1] : &null_block;
		234	const BlockNode *bottom= tryblock[index+w] : &null_block;
		235	const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
		236	const BlockNode *tr = y && trxblock[index-w+(1<
		237	int pl = left->color[0];
		238	int pcb= left->color[1];
		239	int pcr= left->color[2];
		240	int pmx, pmy;
		241	int mx=0, my=0;
		242	int l,cr,cb;
		243	const int stride= s->current_picture->linesize[0];
		244	const int uvstride= s->current_picture->linesize[1];
		245	uint8_t current_data[3]= { s->input_picture->data[0] + (x + y stride)*block_w,
		246	s->input_picture->data[1] + ((xblock_w)>>s->chroma_h_shift) + ((yuvstride*block_w)>>s->chroma_v_shift),
		247	s->input_picture->data[2] + ((xblock_w)>>s->chroma_h_shift) + ((yuvstride*block_w)>>s->chroma_v_shift)};
		248	int P[10][2];
		249	int16_t last_mv[3][2];
		250	int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
		251	const int shift= 1+qpel;
		252	MotionEstContext *c= &s->m.me;
		253	int ref_context= av_log2(2left->ref) + av_log2(2top->ref);
		254	int mx_context= av_log2(2*FFABS(left->mx - top->mx));
		255	int my_context= av_log2(2*FFABS(left->my - top->my));
		256	int s_context= 2left->level + 2top->level + tl->level + tr->level;
		257	int ref, best_ref, ref_score, ref_mx, ref_my;
		258
		259	av_assert0(sizeof(s->block_state) >= 256);
		260	if(s->keyframe){
		261	set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
		262	return 0;
		263	}
		264
		265	// clip predictors / edge ?
		266
		267	P_LEFT[0]= left->mx;
		268	P_LEFT[1]= left->my;
		269	P_TOP [0]= top->mx;
		270	P_TOP [1]= top->my;
		271	P_TOPRIGHT[0]= tr->mx;
		272	P_TOPRIGHT[1]= tr->my;
		273
		274	last_mv[0][0]= s->block[index].mx;
		275	last_mv[0][1]= s->block[index].my;
		276	last_mv[1][0]= right->mx;
		277	last_mv[1][1]= right->my;
		278	last_mv[2][0]= bottom->mx;
		279	last_mv[2][1]= bottom->my;
		280
		281	s->m.mb_stride=2;
		282	s->m.mb_x=
		283	s->m.mb_y= 0;
		284	c->skip= 0;
		285
		286	av_assert1(c-> stride == stride);
		287	av_assert1(c->uvstride == uvstride);
		288
		289	c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
		290	c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
		291	c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
		292	c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
		293
		294	c->xmin = - x*block_w - 16+3;
		295	c->ymin = - y*block_w - 16+3;
		296	c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-3;
		297	c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-3;
		298
		299	if(P_LEFT[0] > (c->xmax<xmax<
		300	if(P_LEFT[1] > (c->ymax<ymax<
		301	if(P_TOP[0] > (c->xmax<xmax<
		302	if(P_TOP[1] > (c->ymax<ymax<
		303	if(P_TOPRIGHT[0] < (c->xmin<xmin<
		304	if(P_TOPRIGHT[0] > (c->xmax<xmax<
		305	if(P_TOPRIGHT[1] > (c->ymax<ymax<
		306
		307	P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
		308	P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
		309
		310	if (!y) {
		311	c->pred_x= P_LEFT[0];
		312	c->pred_y= P_LEFT[1];
		313	} else {
		314	c->pred_x = P_MEDIAN[0];
		315	c->pred_y = P_MEDIAN[1];
		316	}
		317
		318	score= INT_MAX;
		319	best_ref= 0;
		320	for(ref=0; refref_frames; ref++){
		321	init_ref(c, current_data, s->last_picture[ref]->data, NULL, block_wx, block_wy, 0);
		322
		323	ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /ref_index/ 0, last_mv,
		324	(1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
		325
		326	av_assert2(ref_mx >= c->xmin);
		327	av_assert2(ref_mx <= c->xmax);
		328	av_assert2(ref_my >= c->ymin);
		329	av_assert2(ref_my <= c->ymax);
		330
		331	ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
		332	ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
		333	ref_score+= 2av_log2(2ref)*c->penalty_factor;
		334	if(s->ref_mvs[ref]){
		335	s->ref_mvs[ref][index][0]= ref_mx;
		336	s->ref_mvs[ref][index][1]= ref_my;
		337	s->ref_scores[ref][index]= ref_score;
		338	}
		339	if(score > ref_score){
		340	score= ref_score;
		341	best_ref= ref;
		342	mx= ref_mx;
		343	my= ref_my;
		344	}
		345	}
		346	//FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
		347
		348	// subpel search
		349	base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
		350	pc= s->c;
		351	pc.bytestream_start=
		352	pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
		353	memcpy(p_state, s->block_state, sizeof(s->block_state));
		354
		355	if(level!=s->block_max_depth)
		356	put_rac(&pc, &p_state[4 + s_context], 1);
		357	put_rac(&pc, &p_state[1 + left->type + top->type], 0);
		358	if(s->ref_frames > 1)
		359	put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
		360	pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
		361	put_symbol(&pc, &p_state[128 + 32(mx_context + 16!!best_ref)], mx - pmx, 1);
		362	put_symbol(&pc, &p_state[128 + 32(my_context + 16!!best_ref)], my - pmy, 1);
		363	p_len= pc.bytestream - pc.bytestream_start;
		364	score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
		365
		366	block_s= block_w*block_w;
		367	sum = pix_sum(current_data[0], stride, block_w, block_w);
		368	l= (sum + block_s/2)/block_s;
		369	iscore = pix_norm1(current_data[0], stride, block_w) - 2lsum + llblock_s;
		370
		371	if (s->nb_planes > 2) {
		372	block_s= block_w*block_w>>(s->chroma_h_shift + s->chroma_v_shift);
		373	sum = pix_sum(current_data[1], uvstride, block_w>>s->chroma_h_shift, block_w>>s->chroma_v_shift);
		374	cb= (sum + block_s/2)/block_s;
		375	// iscore += pix_norm1(¤t_mb[1][0], uvstride, block_w>>1) - 2cbsum + cbcbblock_s;
		376	sum = pix_sum(current_data[2], uvstride, block_w>>s->chroma_h_shift, block_w>>s->chroma_v_shift);
		377	cr= (sum + block_s/2)/block_s;
		378	// iscore += pix_norm1(¤t_mb[2][0], uvstride, block_w>>1) - 2crsum + crcrblock_s;
		379	}else
		380	cb = cr = 0;
		381
		382	ic= s->c;
		383	ic.bytestream_start=
		384	ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
		385	memcpy(i_state, s->block_state, sizeof(s->block_state));
		386	if(level!=s->block_max_depth)
		387	put_rac(&ic, &i_state[4 + s_context], 1);
		388	put_rac(&ic, &i_state[1 + left->type + top->type], 1);
		389	put_symbol(&ic, &i_state[32], l-pl , 1);
		390	if (s->nb_planes > 2) {
		391	put_symbol(&ic, &i_state[64], cb-pcb, 1);
		392	put_symbol(&ic, &i_state[96], cr-pcr, 1);
		393	}
		394	i_len= ic.bytestream - ic.bytestream_start;
		395	iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
		396
		397	// assert(score==256256256*64-1);
		398	av_assert1(iscore < 255255256 + s->lambda2*10);
		399	av_assert1(iscore >= 0);
		400	av_assert1(l>=0 && l<=255);
		401	av_assert1(pl>=0 && pl<=255);
		402
		403	if(level==0){
		404	int varc= iscore >> 8;
		405	int vard= score >> 8;
		406	if (vard <= 64 \|\| vard < varc)
		407	c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
		408	else
		409	c->scene_change_score+= s->m.qscale;
		410	}
		411
		412	if(level!=s->block_max_depth){
		413	put_rac(&s->c, &s->block_state[4 + s_context], 0);
		414	score2 = encode_q_branch(s, level+1, 2x+0, 2y+0);
		415	score2+= encode_q_branch(s, level+1, 2x+1, 2y+0);
		416	score2+= encode_q_branch(s, level+1, 2x+0, 2y+1);
		417	score2+= encode_q_branch(s, level+1, 2x+1, 2y+1);
		418	score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
		419
		420	if(score2 < score && score2 < iscore)
		421	return score2;
		422	}
		423
		424	if(iscore < score){
		425	pred_mv(s, &pmx, &pmy, 0, left, top, tr);
		426	memcpy(pbbak, i_buffer, i_len);
		427	s->c= ic;
		428	s->c.bytestream_start= pbbak_start;
		429	s->c.bytestream= pbbak + i_len;
		430	set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
		431	memcpy(s->block_state, i_state, sizeof(s->block_state));
		432	return iscore;
		433	}else{
		434	memcpy(pbbak, p_buffer, p_len);
		435	s->c= pc;
		436	s->c.bytestream_start= pbbak_start;
		437	s->c.bytestream= pbbak + p_len;
		438	set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
		439	memcpy(s->block_state, p_state, sizeof(s->block_state));
		440	return score;
		441	}
		442	}
		443
		444	static void encode_q_branch2(SnowContext *s, int level, int x, int y){
		445	const int w= s->b_width << s->block_max_depth;
		446	const int rem_depth= s->block_max_depth - level;
		447	const int index= (x + y*w) << rem_depth;
		448	int trx= (x+1)<
		449	BlockNode *b= &s->block[index];
		450	const BlockNode *left = x ? &s->block[index-1] : &null_block;
		451	const BlockNode *top = y ? &s->block[index-w] : &null_block;
		452	const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
		453	const BlockNode *tr = y && trxblock[index-w+(1<
		454	int pl = left->color[0];
		455	int pcb= left->color[1];
		456	int pcr= left->color[2];
		457	int pmx, pmy;
		458	int ref_context= av_log2(2left->ref) + av_log2(2top->ref);
		459	int mx_context= av_log2(2FFABS(left->mx - top->mx)) + 16!!b->ref;
		460	int my_context= av_log2(2FFABS(left->my - top->my)) + 16!!b->ref;
		461	int s_context= 2left->level + 2top->level + tl->level + tr->level;
		462
		463	if(s->keyframe){
		464	set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
		465	return;
		466	}
		467
		468	if(level!=s->block_max_depth){
		469	if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
		470	put_rac(&s->c, &s->block_state[4 + s_context], 1);
		471	}else{
		472	put_rac(&s->c, &s->block_state[4 + s_context], 0);
		473	encode_q_branch2(s, level+1, 2x+0, 2y+0);
		474	encode_q_branch2(s, level+1, 2x+1, 2y+0);
		475	encode_q_branch2(s, level+1, 2x+0, 2y+1);
		476	encode_q_branch2(s, level+1, 2x+1, 2y+1);
		477	return;
		478	}
		479	}
		480	if(b->type & BLOCK_INTRA){
		481	pred_mv(s, &pmx, &pmy, 0, left, top, tr);
		482	put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
		483	put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
		484	if (s->nb_planes > 2) {
		485	put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
		486	put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
		487	}
		488	set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
		489	}else{
		490	pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
		491	put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
		492	if(s->ref_frames > 1)
		493	put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
		494	put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
		495	put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
		496	set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
		497	}
		498	}
		499
		500	static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
		501	int i, x2, y2;
		502	Plane *p= &s->plane[plane_index];
		503	const int block_size = MB_SIZE >> s->block_max_depth;
		504	const int block_w = plane_index ? block_size>>s->chroma_h_shift : block_size;
		505	const int block_h = plane_index ? block_size>>s->chroma_v_shift : block_size;
		506	const uint8_t *obmc = plane_index ? ff_obmc_tab[s->block_max_depth+s->chroma_h_shift] : ff_obmc_tab[s->block_max_depth];
		507	const int obmc_stride= plane_index ? (2block_size)>>s->chroma_h_shift : 2block_size;
		508	const int ref_stride= s->current_picture->linesize[plane_index];
		509	uint8_t *src= s-> input_picture->data[plane_index];
		510	IDWTELEM dst= (IDWTELEM)s->m.obmc_scratchpad + plane_indexblock_sizeblock_size*4; //FIXME change to unsigned
		511	const int b_stride = s->b_width << s->block_max_depth;
		512	const int w= p->width;
		513	const int h= p->height;
		514	int index= mb_x + mb_y*b_stride;
		515	BlockNode *b= &s->block[index];
		516	BlockNode backup= *b;
		517	int ab=0;
		518	int aa=0;
		519
		520	av_assert2(s->chroma_h_shift == s->chroma_v_shift); //obmc stuff above
		521
		522	b->type\|= BLOCK_INTRA;
		523	b->color[plane_index]= 0;
		524	memset(dst, 0, obmc_strideobmc_stridesizeof(IDWTELEM));
		525
		526	for(i=0; i<4; i++){
		527	int mb_x2= mb_x + (i &1) - 1;
		528	int mb_y2= mb_y + (i>>1) - 1;
		529	int x= block_w*mb_x2 + block_w/2;
		530	int y= block_h*mb_y2 + block_h/2;
		531
		532	add_yblock(s, 0, NULL, dst + (i&1)block_w + (i>>1)obmc_stride*block_h, NULL, obmc,
		533	x, y, block_w, block_h, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
		534
		535	for(y2= FFMAX(y, 0); y2
		536	for(x2= FFMAX(x, 0); x2
		537	int index= x2-(block_wmb_x - block_w/2) + (y2-(block_hmb_y - block_h/2))*obmc_stride;
		538	int obmc_v= obmc[index];
		539	int d;
		540	if(y<0) obmc_v += obmc[index + block_h*obmc_stride];
		541	if(x<0) obmc_v += obmc[index + block_w];
		542	if(y+block_h>h) obmc_v += obmc[index - block_h*obmc_stride];
		543	if(x+block_w>w) obmc_v += obmc[index - block_w];
		544	//FIXME precalculate this or simplify it somehow else
		545
		546	d = -dst[index] + (1<<(FRAC_BITS-1));
		547	dst[index] = d;
		548	ab += (src[x2 + y2ref_stride] - (d>>FRAC_BITS)) obmc_v;
		549	aa += obmc_v * obmc_v; //FIXME precalculate this
		550	}
		551	}
		552	}
		553	*b= backup;
		554
		555	return av_clip( ROUNDED_DIV(ab<
		556	}
		557
		558	static inline int get_block_bits(SnowContext *s, int x, int y, int w){
		559	const int b_stride = s->b_width << s->block_max_depth;
		560	const int b_height = s->b_height<< s->block_max_depth;
		561	int index= x + y*b_stride;
		562	const BlockNode *b = &s->block[index];
		563	const BlockNode *left = x ? &s->block[index-1] : &null_block;
		564	const BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
		565	const BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
		566	const BlockNode *tr = y && x+wblock[index-b_stride+w] : tl;
		567	int dmx, dmy;
		568	// int mx_context= av_log2(2*FFABS(left->mx - top->mx));
		569	// int my_context= av_log2(2*FFABS(left->my - top->my));
		570
		571	if(x<0 \|\| x>=b_stride \|\| y>=b_height)
		572	return 0;
		573	/*
		574	1 0 0
		575	01X 1-2 1
		576	001XX 3-6 2-3
		577	0001XXX 7-14 4-7
		578	00001XXXX 15-30 8-15
		579	*/
		580	//FIXME try accurate rate
		581	//FIXME intra and inter predictors if surrounding blocks are not the same type
		582	if(b->type & BLOCK_INTRA){
		583	return 3+2( av_log2(2FFABS(left->color[0] - b->color[0]))
		584	+ av_log2(2*FFABS(left->color[1] - b->color[1]))
		585	+ av_log2(2*FFABS(left->color[2] - b->color[2])));
		586	}else{
		587	pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
		588	dmx-= b->mx;
		589	dmy-= b->my;
		590	return 2(1 + av_log2(2FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
		591	+ av_log2(2*FFABS(dmy))
		592	+ av_log2(2*b->ref));
		593	}
		594	}
		595
		596	static int get_block_rd(SnowContext s, int mb_x, int mb_y, int plane_index, uint8_t (obmc_edged)[MB_SIZE * 2]){
		597	Plane *p= &s->plane[plane_index];
		598	const int block_size = MB_SIZE >> s->block_max_depth;
		599	const int block_w = plane_index ? block_size>>s->chroma_h_shift : block_size;
		600	const int block_h = plane_index ? block_size>>s->chroma_v_shift : block_size;
		601	const int obmc_stride= plane_index ? (2block_size)>>s->chroma_h_shift : 2block_size;
		602	const int ref_stride= s->current_picture->linesize[plane_index];
		603	uint8_t *dst= s->current_picture->data[plane_index];
		604	uint8_t *src= s-> input_picture->data[plane_index];
		605	IDWTELEM pred= (IDWTELEM)s->m.obmc_scratchpad + plane_indexblock_sizeblock_size*4;
		606	uint8_t *cur = s->scratchbuf;
		607	uint8_t *tmp = s->emu_edge_buffer;
		608	const int b_stride = s->b_width << s->block_max_depth;
		609	const int b_height = s->b_height<< s->block_max_depth;
		610	const int w= p->width;
		611	const int h= p->height;
		612	int distortion;
		613	int rate= 0;
		614	const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
		615	int sx= block_w*mb_x - block_w/2;
		616	int sy= block_h*mb_y - block_h/2;
		617	int x0= FFMAX(0,-sx);
		618	int y0= FFMAX(0,-sy);
		619	int x1= FFMIN(block_w*2, w-sx);
		620	int y1= FFMIN(block_h*2, h-sy);
		621	int i,x,y;
		622
		623	av_assert2(s->chroma_h_shift == s->chroma_v_shift); //obmc and square assumtions below chckinhg only block_w
		624
		625	ff_snow_pred_block(s, cur, tmp, ref_stride, sx, sy, block_w2, block_h2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
		626
		627	for(y=y0; y
		628	const uint8_t *obmc1= obmc_edged[y];
		629	const IDWTELEM pred1 = pred + yobmc_stride;
		630	uint8_t cur1 = cur + yref_stride;
		631	uint8_t dst1 = dst + sx + (sy+y)ref_stride;
		632	for(x=x0; x
		633	#if FRAC_BITS >= LOG2_OBMC_MAX
		634	int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
		635	#else
		636	int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
		637	#endif
		638	v = (v + pred1[x]) >> FRAC_BITS;
		639	if(v&(~255)) v= ~(v>>31);
		640	dst1[x] = v;
		641	}
		642	}
		643
		644	/* copy the regions where obmc[] = (uint8_t)256 */
		645	if(LOG2_OBMC_MAX == 8
		646	&& (mb_x == 0 \|\| mb_x == b_stride-1)
		647	&& (mb_y == 0 \|\| mb_y == b_height-1)){
		648	if(mb_x == 0)
		649	x1 = block_w;
		650	else
		651	x0 = block_w;
		652	if(mb_y == 0)
		653	y1 = block_h;
		654	else
		655	y0 = block_h;
		656	for(y=y0; y
		657	memcpy(dst + sx+x0 + (sy+y)ref_stride, cur + x0 + yref_stride, x1-x0);
		658	}
		659
		660	if(block_w==16){
		661	/* FIXME rearrange dsputil to fit 32x32 cmp functions */
		662	/* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
		663	/* FIXME cmps overlap but do not cover the wavelet's whole support.
		664	* So improving the score of one block is not strictly guaranteed
		665	* to improve the score of the whole frame, thus iterative motion
		666	* estimation does not always converge. */
		667	if(s->avctx->me_cmp == FF_CMP_W97)
		668	distortion = ff_w97_32_c(&s->m, src + sx + syref_stride, dst + sx + syref_stride, ref_stride, 32);
		669	else if(s->avctx->me_cmp == FF_CMP_W53)
		670	distortion = ff_w53_32_c(&s->m, src + sx + syref_stride, dst + sx + syref_stride, ref_stride, 32);
		671	else{
		672	distortion = 0;
		673	for(i=0; i<4; i++){
		674	int off = sx+16(i&1) + (sy+16(i>>1))*ref_stride;
		675	distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
		676	}
		677	}
		678	}else{
		679	av_assert2(block_w==8);
		680	distortion = s->dsp.me_cmp[0](&s->m, src + sx + syref_stride, dst + sx + syref_stride, ref_stride, block_w*2);
		681	}
		682
		683	if(plane_index==0){
		684	for(i=0; i<4; i++){
		685	/* ..RRr
		686	* .RXx.
		687	* rxx..
		688	*/
		689	rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
		690	}
		691	if(mb_x == b_stride-2)
		692	rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
		693	}
		694	return distortion + rate*penalty_factor;
		695	}
		696
		697	static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
		698	int i, y2;
		699	Plane *p= &s->plane[plane_index];
		700	const int block_size = MB_SIZE >> s->block_max_depth;
		701	const int block_w = plane_index ? block_size>>s->chroma_h_shift : block_size;
		702	const int block_h = plane_index ? block_size>>s->chroma_v_shift : block_size;
		703	const uint8_t *obmc = plane_index ? ff_obmc_tab[s->block_max_depth+s->chroma_h_shift] : ff_obmc_tab[s->block_max_depth];
		704	const int obmc_stride= plane_index ? (2block_size)>>s->chroma_h_shift : 2block_size;
		705	const int ref_stride= s->current_picture->linesize[plane_index];
		706	uint8_t *dst= s->current_picture->data[plane_index];
		707	uint8_t *src= s-> input_picture->data[plane_index];
		708	//FIXME zero_dst is const but add_yblock changes dst if add is 0 (this is never the case for dst=zero_dst
		709	// const has only been removed from zero_dst to suppress a warning
		710	static IDWTELEM zero_dst[4096]; //FIXME
		711	const int b_stride = s->b_width << s->block_max_depth;
		712	const int w= p->width;
		713	const int h= p->height;
		714	int distortion= 0;
		715	int rate= 0;
		716	const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
		717
		718	av_assert2(s->chroma_h_shift == s->chroma_v_shift); //obmc and square assumtions below
		719
		720	for(i=0; i<9; i++){
		721	int mb_x2= mb_x + (i%3) - 1;
		722	int mb_y2= mb_y + (i/3) - 1;
		723	int x= block_w*mb_x2 + block_w/2;
		724	int y= block_h*mb_y2 + block_h/2;
		725
		726	add_yblock(s, 0, NULL, zero_dst, dst, obmc,
		727	x, y, block_w, block_h, w, h, /dst_stride/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
		728
		729	//FIXME find a cleaner/simpler way to skip the outside stuff
		730	for(y2= y; y2<0; y2++)
		731	memcpy(dst + x + y2ref_stride, src + x + y2ref_stride, block_w);
		732	for(y2= h; y2
		733	memcpy(dst + x + y2ref_stride, src + x + y2ref_stride, block_w);
		734	if(x<0){
		735	for(y2= y; y2
		736	memcpy(dst + x + y2ref_stride, src + x + y2ref_stride, -x);
		737	}
		738	if(x+block_w > w){
		739	for(y2= y; y2
		740	memcpy(dst + w + y2ref_stride, src + w + y2ref_stride, x+block_w - w);
		741	}
		742
		743	av_assert1(block_w== 8 \|\| block_w==16);
		744	distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + yref_stride, dst + x + yref_stride, ref_stride, block_h);
		745	}
		746
		747	if(plane_index==0){
		748	BlockNode b= &s->block[mb_x+mb_yb_stride];
		749	int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
		750
		751	/* ..RRRr
		752	* .RXXx.
		753	* .RXXx.
		754	* rxxx.
		755	*/
		756	if(merged)
		757	rate = get_block_bits(s, mb_x, mb_y, 2);
		758	for(i=merged?4:0; i<9; i++){
		759	static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
		760	rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
		761	}
		762	}
		763	return distortion + rate*penalty_factor;
		764	}
		765
		766	static int encode_subband_c0run(SnowContext s, SubBand b, const IDWTELEM src, const IDWTELEM parent, int stride, int orientation){
		767	const int w= b->width;
		768	const int h= b->height;
		769	int x, y;
		770
		771	if(1){
		772	int run=0;
		773	int *runs = s->run_buffer;
		774	int run_index=0;
		775	int max_index;
		776
		777	for(y=0; y
		778	for(x=0; x
		779	int v, p=0;
		780	int /ll=0, /l=0, lt=0, t=0, rt=0;
		781	v= src[x + y*stride];
		782
		783	if(y){
		784	t= src[x + (y-1)*stride];
		785	if(x){
		786	lt= src[x - 1 + (y-1)*stride];
		787	}
		788	if(x + 1 < w){
		789	rt= src[x + 1 + (y-1)*stride];
		790	}
		791	}
		792	if(x){
		793	l= src[x - 1 + y*stride];
		794	/*if(x > 1){
		795	if(orientation==1) ll= src[y + (x-2)*stride];
		796	else ll= src[x - 2 + y*stride];
		797	}*/
		798	}
		799	if(parent){
		800	int px= x>>1;
		801	int py= y>>1;
		802	if(pxparent->width && pyparent->height)
		803	p= parent[px + py2stride];
		804	}
		805	if(!(/ll\|/l\|lt\|t\|rt\|p)){
		806	if(v){
		807	runs[run_index++]= run;
		808	run=0;
		809	}else{
		810	run++;
		811	}
		812	}
		813	}
		814	}
		815	max_index= run_index;
		816	runs[run_index++]= run;
		817	run_index=0;
		818	run= runs[run_index++];
		819
		820	put_symbol2(&s->c, b->state[30], max_index, 0);
		821	if(run_index <= max_index)
		822	put_symbol2(&s->c, b->state[1], run, 3);
		823
		824	for(y=0; y
		825	if(s->c.bytestream_end - s->c.bytestream < w*40){
		826	av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
		827	return -1;
		828	}
		829	for(x=0; x
		830	int v, p=0;
		831	int /ll=0, /l=0, lt=0, t=0, rt=0;
		832	v= src[x + y*stride];
		833
		834	if(y){
		835	t= src[x + (y-1)*stride];
		836	if(x){
		837	lt= src[x - 1 + (y-1)*stride];
		838	}
		839	if(x + 1 < w){
		840	rt= src[x + 1 + (y-1)*stride];
		841	}
		842	}
		843	if(x){
		844	l= src[x - 1 + y*stride];
		845	/*if(x > 1){
		846	if(orientation==1) ll= src[y + (x-2)*stride];
		847	else ll= src[x - 2 + y*stride];
		848	}*/
		849	}
		850	if(parent){
		851	int px= x>>1;
		852	int py= y>>1;
		853	if(pxparent->width && pyparent->height)
		854	p= parent[px + py2stride];
		855	}
		856	if(/ll\|/l\|lt\|t\|rt\|p){
		857	int context= av_log2(/FFABS(ll) + /3FFABS(l) + FFABS(lt) + 2FFABS(t) + FFABS(rt) + FFABS(p));
		858
		859	put_rac(&s->c, &b->state[0][context], !!v);
		860	}else{
		861	if(!run){
		862	run= runs[run_index++];
		863
		864	if(run_index <= max_index)
		865	put_symbol2(&s->c, b->state[1], run, 3);
		866	av_assert2(v);
		867	}else{
		868	run--;
		869	av_assert2(!v);
		870	}
		871	}
		872	if(v){
		873	int context= av_log2(/FFABS(ll) + /3FFABS(l) + FFABS(lt) + 2FFABS(t) + FFABS(rt) + FFABS(p));
		874	int l2= 2*FFABS(l) + (l<0);
		875	int t2= 2*FFABS(t) + (t<0);
		876
		877	put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
		878	put_rac(&s->c, &b->state[0][16 + 1 + 3 + ff_quant3bA[l2&0xFF] + 3*ff_quant3bA[t2&0xFF]], v<0);
		879	}
		880	}
		881	}
		882	}
		883	return 0;
		884	}
		885
		886	static int encode_subband(SnowContext s, SubBand b, const IDWTELEM src, const IDWTELEM parent, int stride, int orientation){
		887	// encode_subband_qtree(s, b, src, parent, stride, orientation);
		888	// encode_subband_z0run(s, b, src, parent, stride, orientation);
		889	return encode_subband_c0run(s, b, src, parent, stride, orientation);
		890	// encode_subband_dzr(s, b, src, parent, stride, orientation);
		891	}
		892
		893	static av_always_inline int check_block(SnowContext s, int mb_x, int mb_y, int p[3], int intra, uint8_t (obmc_edged)[MB_SIZE * 2], int *best_rd){
		894	const int b_stride= s->b_width << s->block_max_depth;
		895	BlockNode block= &s->block[mb_x + mb_y b_stride];
		896	BlockNode backup= *block;
		897	unsigned value;
		898	int rd, index;
		899
		900	av_assert2(mb_x>=0 && mb_y>=0);
		901	av_assert2(mb_x
		902
		903	if(intra){
		904	block->color[0] = p[0];
		905	block->color[1] = p[1];
		906	block->color[2] = p[2];
		907	block->type \|= BLOCK_INTRA;
		908	}else{
		909	index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
		910	value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
		911	if(s->me_cache[index] == value)
		912	return 0;
		913	s->me_cache[index]= value;
		914
		915	block->mx= p[0];
		916	block->my= p[1];
		917	block->type &= ~BLOCK_INTRA;
		918	}
		919
		920	rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
		921
		922	//FIXME chroma
		923	if(rd < *best_rd){
		924	*best_rd= rd;
		925	return 1;
		926	}else{
		927	*block= backup;
		928	return 0;
		929	}
		930	}
		931
		932	/* special case for int[2] args we discard afterwards,
		933	* fixes compilation problem with gcc 2.95 */
		934	static av_always_inline int check_block_inter(SnowContext s, int mb_x, int mb_y, int p0, int p1, uint8_t (obmc_edged)[MB_SIZE * 2], int *best_rd){
		935	int p[2] = {p0, p1};
		936	return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
		937	}
		938
		939	static av_always_inline int check_4block_inter(SnowContext s, int mb_x, int mb_y, int p0, int p1, int ref, int best_rd){
		940	const int b_stride= s->b_width << s->block_max_depth;
		941	BlockNode block= &s->block[mb_x + mb_y b_stride];
		942	BlockNode backup[4];
		943	unsigned value;
		944	int rd, index;
		945
		946	/* We don't initialize backup[] during variable declaration, because
		947	* that fails to compile on MSVC: "cannot convert from 'BlockNode' to
		948	* 'int16_t'". */
		949	backup[0] = block[0];
		950	backup[1] = block[1];
		951	backup[2] = block[b_stride];
		952	backup[3] = block[b_stride + 1];
		953
		954	av_assert2(mb_x>=0 && mb_y>=0);
		955	av_assert2(mb_x
		956	av_assert2(((mb_x\|mb_y)&1) == 0);
		957
		958	index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
		959	value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
		960	if(s->me_cache[index] == value)
		961	return 0;
		962	s->me_cache[index]= value;
		963
		964	block->mx= p0;
		965	block->my= p1;
		966	block->ref= ref;
		967	block->type &= ~BLOCK_INTRA;
		968	block[1]= block[b_stride]= block[b_stride+1]= *block;
		969
		970	rd= get_4block_rd(s, mb_x, mb_y, 0);
		971
		972	//FIXME chroma
		973	if(rd < *best_rd){
		974	*best_rd= rd;
		975	return 1;
		976	}else{
		977	block[0]= backup[0];
		978	block[1]= backup[1];
		979	block[b_stride]= backup[2];
		980	block[b_stride+1]= backup[3];
		981	return 0;
		982	}
		983	}
		984
		985	static void iterative_me(SnowContext *s){
		986	int pass, mb_x, mb_y;
		987	const int b_width = s->b_width << s->block_max_depth;
		988	const int b_height= s->b_height << s->block_max_depth;
		989	const int b_stride= b_width;
		990	int color[3];
		991
		992	{
		993	RangeCoder r = s->c;
		994	uint8_t state[sizeof(s->block_state)];
		995	memcpy(state, s->block_state, sizeof(s->block_state));
		996	for(mb_y= 0; mb_yb_height; mb_y++)
		997	for(mb_x= 0; mb_xb_width; mb_x++)
		998	encode_q_branch(s, 0, mb_x, mb_y);
		999	s->c = r;
		1000	memcpy(s->block_state, state, sizeof(s->block_state));
		1001	}
		1002
		1003	for(pass=0; pass<25; pass++){
		1004	int change= 0;
		1005
		1006	for(mb_y= 0; mb_y
		1007	for(mb_x= 0; mb_x
		1008	int dia_change, i, j, ref;
		1009	int best_rd= INT_MAX, ref_rd;
		1010	BlockNode backup, ref_b;
		1011	const int index= mb_x + mb_y * b_stride;
		1012	BlockNode *block= &s->block[index];
		1013	BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL;
		1014	BlockNode *lb = mb_x ? &s->block[index -1] : NULL;
		1015	BlockNode *rb = mb_x+1block[index +1] : NULL;
		1016	BlockNode *bb = mb_y+1block[index+b_stride ] : NULL;
		1017	BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL;
		1018	BlockNode *trb= mb_x+1block[index-b_stride+1] : NULL;
		1019	BlockNode *blb= mb_x && mb_y+1block[index+b_stride-1] : NULL;
		1020	BlockNode *brb= mb_x+1block[index+b_stride+1] : NULL;
		1021	const int b_w= (MB_SIZE >> s->block_max_depth);
		1022	uint8_t obmc_edged[MB_SIZE * 2][MB_SIZE * 2];
		1023
		1024	if(pass && (block->type & BLOCK_OPT))
		1025	continue;
		1026	block->type \|= BLOCK_OPT;
		1027
		1028	backup= *block;
		1029
		1030	if(!s->me_cache_generation)
		1031	memset(s->me_cache, 0, sizeof(s->me_cache));
		1032	s->me_cache_generation += 1<<22;
		1033
		1034	//FIXME precalculate
		1035	{
		1036	int x, y;
		1037	for (y = 0; y < b_w * 2; y++)
		1038	memcpy(obmc_edged[y], ff_obmc_tab[s->block_max_depth] + y * b_w * 2, b_w * 2);
		1039	if(mb_x==0)
		1040	for(y=0; y
		1041	memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
		1042	if(mb_x==b_stride-1)
		1043	for(y=0; y
		1044	memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
		1045	if(mb_y==0){
		1046	for(x=0; x
		1047	obmc_edged[0][x] += obmc_edged[b_w-1][x];
		1048	for(y=1; y
		1049	memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
		1050	}
		1051	if(mb_y==b_height-1){
		1052	for(x=0; x
		1053	obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
		1054	for(y=b_w; y
		1055	memcpy(obmc_edged[y], obmc_edged[b_w2-1], b_w2);
		1056	}
		1057	}
		1058
		1059	//skip stuff outside the picture
		1060	if(mb_x==0 \|\| mb_y==0 \|\| mb_x==b_width-1 \|\| mb_y==b_height-1){
		1061	uint8_t *src= s-> input_picture->data[0];
		1062	uint8_t *dst= s->current_picture->data[0];
		1063	const int stride= s->current_picture->linesize[0];
		1064	const int block_w= MB_SIZE >> s->block_max_depth;
		1065	const int block_h= MB_SIZE >> s->block_max_depth;
		1066	const int sx= block_w*mb_x - block_w/2;
		1067	const int sy= block_h*mb_y - block_h/2;
		1068	const int w= s->plane[0].width;
		1069	const int h= s->plane[0].height;
		1070	int y;
		1071
		1072	for(y=sy; y<0; y++)
		1073	memcpy(dst + sx + ystride, src + sx + ystride, block_w*2);
		1074	for(y=h; y
		1075	memcpy(dst + sx + ystride, src + sx + ystride, block_w*2);
		1076	if(sx<0){
		1077	for(y=sy; y
		1078	memcpy(dst + sx + ystride, src + sx + ystride, -sx);
		1079	}
		1080	if(sx+block_w*2 > w){
		1081	for(y=sy; y
		1082	memcpy(dst + w + ystride, src + w + ystride, sx+block_w*2 - w);
		1083	}
		1084	}
		1085
		1086	// intra(black) = neighbors' contribution to the current block
		1087	for(i=0; i < s->nb_planes; i++)
		1088	color[i]= get_dc(s, mb_x, mb_y, i);
		1089
		1090	// get previous score (cannot be cached due to OBMC)
		1091	if(pass > 0 && (block->type&BLOCK_INTRA)){
		1092	int color0[3]= {block->color[0], block->color[1], block->color[2]};
		1093	check_block(s, mb_x, mb_y, color0, 1, obmc_edged, &best_rd);
		1094	}else
		1095	check_block_inter(s, mb_x, mb_y, block->mx, block->my, obmc_edged, &best_rd);
		1096
		1097	ref_b= *block;
		1098	ref_rd= best_rd;
		1099	for(ref=0; ref < s->ref_frames; ref++){
		1100	int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
		1101	if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
		1102	continue;
		1103	block->ref= ref;
		1104	best_rd= INT_MAX;
		1105
		1106	check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], obmc_edged, &best_rd);
		1107	check_block_inter(s, mb_x, mb_y, 0, 0, obmc_edged, &best_rd);
		1108	if(tb)
		1109	check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], obmc_edged, &best_rd);
		1110	if(lb)
		1111	check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], obmc_edged, &best_rd);
		1112	if(rb)
		1113	check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], obmc_edged, &best_rd);
		1114	if(bb)
		1115	check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], obmc_edged, &best_rd);
		1116
		1117	/* fullpel ME */
		1118	//FIXME avoid subpel interpolation / round to nearest integer
		1119	do{
		1120	dia_change=0;
		1121	for(i=0; iavctx->dia_size, 1); i++){
		1122	for(j=0; j
		1123	dia_change \|= check_block_inter(s, mb_x, mb_y, block->mx+4(i-j), block->my+(4j), obmc_edged, &best_rd);
		1124	dia_change \|= check_block_inter(s, mb_x, mb_y, block->mx-4(i-j), block->my-(4j), obmc_edged, &best_rd);
		1125	dia_change \|= check_block_inter(s, mb_x, mb_y, block->mx+4(i-j), block->my-(4j), obmc_edged, &best_rd);
		1126	dia_change \|= check_block_inter(s, mb_x, mb_y, block->mx-4(i-j), block->my+(4j), obmc_edged, &best_rd);
		1127	}
		1128	}
		1129	}while(dia_change);
		1130	/* subpel ME */
		1131	do{
		1132	static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
		1133	dia_change=0;
		1134	for(i=0; i<8; i++)
		1135	dia_change \|= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], obmc_edged, &best_rd);
		1136	}while(dia_change);
		1137	//FIXME or try the standard 2 pass qpel or similar
		1138
		1139	mvr[0][0]= block->mx;
		1140	mvr[0][1]= block->my;
		1141	if(ref_rd > best_rd){
		1142	ref_rd= best_rd;
		1143	ref_b= *block;
		1144	}
		1145	}
		1146	best_rd= ref_rd;
		1147	*block= ref_b;
		1148	check_block(s, mb_x, mb_y, color, 1, obmc_edged, &best_rd);
		1149	//FIXME RD style color selection
		1150	if(!same_block(block, &backup)){
		1151	if(tb ) tb ->type &= ~BLOCK_OPT;
		1152	if(lb ) lb ->type &= ~BLOCK_OPT;
		1153	if(rb ) rb ->type &= ~BLOCK_OPT;
		1154	if(bb ) bb ->type &= ~BLOCK_OPT;
		1155	if(tlb) tlb->type &= ~BLOCK_OPT;
		1156	if(trb) trb->type &= ~BLOCK_OPT;
		1157	if(blb) blb->type &= ~BLOCK_OPT;
		1158	if(brb) brb->type &= ~BLOCK_OPT;
		1159	change ++;
		1160	}
		1161	}
		1162	}
		1163	av_log(s->avctx, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
		1164	if(!change)
		1165	break;
		1166	}
		1167
		1168	if(s->block_max_depth == 1){
		1169	int change= 0;
		1170	for(mb_y= 0; mb_y
		1171	for(mb_x= 0; mb_x
		1172	int i;
		1173	int best_rd, init_rd;
		1174	const int index= mb_x + mb_y * b_stride;
		1175	BlockNode *b[4];
		1176
		1177	b[0]= &s->block[index];
		1178	b[1]= b[0]+1;
		1179	b[2]= b[0]+b_stride;
		1180	b[3]= b[2]+1;
		1181	if(same_block(b[0], b[1]) &&
		1182	same_block(b[0], b[2]) &&
		1183	same_block(b[0], b[3]))
		1184	continue;
		1185
		1186	if(!s->me_cache_generation)
		1187	memset(s->me_cache, 0, sizeof(s->me_cache));
		1188	s->me_cache_generation += 1<<22;
		1189
		1190	init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
		1191
		1192	//FIXME more multiref search?
		1193	check_4block_inter(s, mb_x, mb_y,
		1194	(b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
		1195	(b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
		1196
		1197	for(i=0; i<4; i++)
		1198	if(!(b[i]->type&BLOCK_INTRA))
		1199	check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
		1200
		1201	if(init_rd != best_rd)
		1202	change++;
		1203	}
		1204	}
		1205	av_log(s->avctx, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
		1206	}
		1207	}
		1208
		1209	static void encode_blocks(SnowContext *s, int search){
		1210	int x, y;
		1211	int w= s->b_width;
		1212	int h= s->b_height;
		1213
		1214	if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
		1215	iterative_me(s);
		1216
		1217	for(y=0; y
		1218	if(s->c.bytestream_end - s->c.bytestream < wMB_SIZEMB_SIZE*3){ //FIXME nicer limit
		1219	av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
		1220	return;
		1221	}
		1222	for(x=0; x
		1223	if(s->avctx->me_method == ME_ITER \|\| !search)
		1224	encode_q_branch2(s, 0, x, y);
		1225	else
		1226	encode_q_branch (s, 0, x, y);
		1227	}
		1228	}
		1229	}
		1230
		1231	static void quantize(SnowContext s, SubBand b, IDWTELEM dst, DWTELEM src, int stride, int bias){
		1232	const int w= b->width;
		1233	const int h= b->height;
		1234	const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
		1235	const int qmul= ff_qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
		1236	int x,y, thres1, thres2;
		1237
		1238	if(s->qlog == LOSSLESS_QLOG){
		1239	for(y=0; y
		1240	for(x=0; x
		1241	dst[x + ystride]= src[x + ystride];
		1242	return;
		1243	}
		1244
		1245	bias= bias ? 0 : (3*qmul)>>3;
		1246	thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
		1247	thres2= 2*thres1;
		1248
		1249	if(!bias){
		1250	for(y=0; y
		1251	for(x=0; x
		1252	int i= src[x + y*stride];
		1253
		1254	if((unsigned)(i+thres1) > thres2){
		1255	if(i>=0){
		1256	i<<= QEXPSHIFT;
		1257	i/= qmul; //FIXME optimize
		1258	dst[x + y*stride]= i;
		1259	}else{
		1260	i= -i;
		1261	i<<= QEXPSHIFT;
		1262	i/= qmul; //FIXME optimize
		1263	dst[x + y*stride]= -i;
		1264	}
		1265	}else
		1266	dst[x + y*stride]= 0;
		1267	}
		1268	}
		1269	}else{
		1270	for(y=0; y
		1271	for(x=0; x
		1272	int i= src[x + y*stride];
		1273
		1274	if((unsigned)(i+thres1) > thres2){
		1275	if(i>=0){
		1276	i<<= QEXPSHIFT;
		1277	i= (i + bias) / qmul; //FIXME optimize
		1278	dst[x + y*stride]= i;
		1279	}else{
		1280	i= -i;
		1281	i<<= QEXPSHIFT;
		1282	i= (i + bias) / qmul; //FIXME optimize
		1283	dst[x + y*stride]= -i;
		1284	}
		1285	}else
		1286	dst[x + y*stride]= 0;
		1287	}
		1288	}
		1289	}
		1290	}
		1291
		1292	static void dequantize(SnowContext s, SubBand b, IDWTELEM *src, int stride){
		1293	const int w= b->width;
		1294	const int h= b->height;
		1295	const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
		1296	const int qmul= ff_qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
		1297	const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
		1298	int x,y;
		1299
		1300	if(s->qlog == LOSSLESS_QLOG) return;
		1301
		1302	for(y=0; y
		1303	for(x=0; x
		1304	int i= src[x + y*stride];
		1305	if(i<0){
		1306	src[x + ystride]= -((-iqmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
		1307	}else if(i>0){
		1308	src[x + ystride]= (( iqmul + qadd)>>(QEXPSHIFT));
		1309	}
		1310	}
		1311	}
		1312	}
		1313
		1314	static void decorrelate(SnowContext s, SubBand b, IDWTELEM *src, int stride, int inverse, int use_median){
		1315	const int w= b->width;
		1316	const int h= b->height;
		1317	int x,y;
		1318
		1319	for(y=h-1; y>=0; y--){
		1320	for(x=w-1; x>=0; x--){
		1321	int i= x + y*stride;
		1322
		1323	if(x){
		1324	if(use_median){
		1325	if(y && x+1
		1326	else src[i] -= src[i - 1];
		1327	}else{
		1328	if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
		1329	else src[i] -= src[i - 1];
		1330	}
		1331	}else{
		1332	if(y) src[i] -= src[i - stride];
		1333	}
		1334	}
		1335	}
		1336	}
		1337
		1338	static void correlate(SnowContext s, SubBand b, IDWTELEM *src, int stride, int inverse, int use_median){
		1339	const int w= b->width;
		1340	const int h= b->height;
		1341	int x,y;
		1342
		1343	for(y=0; y
		1344	for(x=0; x
		1345	int i= x + y*stride;
		1346
		1347	if(x){
		1348	if(use_median){
		1349	if(y && x+1
		1350	else src[i] += src[i - 1];
		1351	}else{
		1352	if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
		1353	else src[i] += src[i - 1];
		1354	}
		1355	}else{
		1356	if(y) src[i] += src[i - stride];
		1357	}
		1358	}
		1359	}
		1360	}
		1361
		1362	static void encode_qlogs(SnowContext *s){
		1363	int plane_index, level, orientation;
		1364
		1365	for(plane_index=0; plane_indexnb_planes, 2); plane_index++){
		1366	for(level=0; levelspatial_decomposition_count; level++){
		1367	for(orientation=level ? 1:0; orientation<4; orientation++){
		1368	if(orientation==2) continue;
		1369	put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
		1370	}
		1371	}
		1372	}
		1373	}
		1374
		1375	static void encode_header(SnowContext *s){
		1376	int plane_index, i;
		1377	uint8_t kstate[32];
		1378
		1379	memset(kstate, MID_STATE, sizeof(kstate));
		1380
		1381	put_rac(&s->c, kstate, s->keyframe);
		1382	if(s->keyframe \|\| s->always_reset){
		1383	ff_snow_reset_contexts(s);
		1384	s->last_spatial_decomposition_type=
		1385	s->last_qlog=
		1386	s->last_qbias=
		1387	s->last_mv_scale=
		1388	s->last_block_max_depth= 0;
		1389	for(plane_index=0; plane_index<2; plane_index++){
		1390	Plane *p= &s->plane[plane_index];
		1391	p->last_htaps=0;
		1392	p->last_diag_mc=0;
		1393	memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff));
		1394	}
		1395	}
		1396	if(s->keyframe){
		1397	put_symbol(&s->c, s->header_state, s->version, 0);
		1398	put_rac(&s->c, s->header_state, s->always_reset);
		1399	put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
		1400	put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
		1401	put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
		1402	put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
		1403	if (s->nb_planes > 2) {
		1404	put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
		1405	put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
		1406	}
		1407	put_rac(&s->c, s->header_state, s->spatial_scalability);
		1408	// put_rac(&s->c, s->header_state, s->rate_scalability);
		1409	put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
		1410
		1411	encode_qlogs(s);
		1412	}
		1413
		1414	if(!s->keyframe){
		1415	int update_mc=0;
		1416	for(plane_index=0; plane_indexnb_planes, 2); plane_index++){
		1417	Plane *p= &s->plane[plane_index];
		1418	update_mc \|= p->last_htaps != p->htaps;
		1419	update_mc \|= p->last_diag_mc != p->diag_mc;
		1420	update_mc \|= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
		1421	}
		1422	put_rac(&s->c, s->header_state, update_mc);
		1423	if(update_mc){
		1424	for(plane_index=0; plane_indexnb_planes, 2); plane_index++){
		1425	Plane *p= &s->plane[plane_index];
		1426	put_rac(&s->c, s->header_state, p->diag_mc);
		1427	put_symbol(&s->c, s->header_state, p->htaps/2-1, 0);
		1428	for(i= p->htaps/2; i; i--)
		1429	put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0);
		1430	}
		1431	}
		1432	if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
		1433	put_rac(&s->c, s->header_state, 1);
		1434	put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
		1435	encode_qlogs(s);
		1436	}else
		1437	put_rac(&s->c, s->header_state, 0);
		1438	}
		1439
		1440	put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
		1441	put_symbol(&s->c, s->header_state, s->qlog - s->last_qlog , 1);
		1442	put_symbol(&s->c, s->header_state, s->mv_scale - s->last_mv_scale, 1);
		1443	put_symbol(&s->c, s->header_state, s->qbias - s->last_qbias , 1);
		1444	put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
		1445
		1446	}
		1447
		1448	static void update_last_header_values(SnowContext *s){
		1449	int plane_index;
		1450
		1451	if(!s->keyframe){
		1452	for(plane_index=0; plane_index<2; plane_index++){
		1453	Plane *p= &s->plane[plane_index];
		1454	p->last_diag_mc= p->diag_mc;
		1455	p->last_htaps = p->htaps;
		1456	memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
		1457	}
		1458	}
		1459
		1460	s->last_spatial_decomposition_type = s->spatial_decomposition_type;
		1461	s->last_qlog = s->qlog;
		1462	s->last_qbias = s->qbias;
		1463	s->last_mv_scale = s->mv_scale;
		1464	s->last_block_max_depth = s->block_max_depth;
		1465	s->last_spatial_decomposition_count = s->spatial_decomposition_count;
		1466	}
		1467
		1468	static int qscale2qlog(int qscale){
		1469	return rint(QROOT*log2(qscale / (float)FF_QP2LAMBDA))
		1470	+ 61*QROOT/8; ///< 64 > 60
		1471	}
		1472
		1473	static int ratecontrol_1pass(SnowContext s, AVFrame pict)
		1474	{
		1475	/* Estimate the frame's complexity as a sum of weighted dwt coefficients.
		1476	* FIXME we know exact mv bits at this point,
		1477	* but ratecontrol isn't set up to include them. */
		1478	uint32_t coef_sum= 0;
		1479	int level, orientation, delta_qlog;
		1480
		1481	for(level=0; levelspatial_decomposition_count; level++){
		1482	for(orientation=level ? 1 : 0; orientation<4; orientation++){
		1483	SubBand *b= &s->plane[0].band[level][orientation];
		1484	IDWTELEM *buf= b->ibuf;
		1485	const int w= b->width;
		1486	const int h= b->height;
		1487	const int stride= b->stride;
		1488	const int qlog= av_clip(2QROOT + b->qlog, 0, QROOT16);
		1489	const int qmul= ff_qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
		1490	const int qdiv= (1<<16)/qmul;
		1491	int x, y;
		1492	//FIXME this is ugly
		1493	for(y=0; y
		1494	for(x=0; x
		1495	buf[x+ystride]= b->buf[x+ystride];
		1496	if(orientation==0)
		1497	decorrelate(s, b, buf, stride, 1, 0);
		1498	for(y=0; y
		1499	for(x=0; x
		1500	coef_sum+= abs(buf[x+ystride]) qdiv >> 16;
		1501	}
		1502	}
		1503
		1504	/* ugly, ratecontrol just takes a sqrt again */
		1505	coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
		1506	av_assert0(coef_sum < INT_MAX);
		1507
		1508	if(pict->pict_type == AV_PICTURE_TYPE_I){
		1509	s->m.current_picture.mb_var_sum= coef_sum;
		1510	s->m.current_picture.mc_mb_var_sum= 0;
		1511	}else{
		1512	s->m.current_picture.mc_mb_var_sum= coef_sum;
		1513	s->m.current_picture.mb_var_sum= 0;
		1514	}
		1515
		1516	pict->quality= ff_rate_estimate_qscale(&s->m, 1);
		1517	if (pict->quality < 0)
		1518	return INT_MIN;
		1519	s->lambda= pict->quality * 3/2;
		1520	delta_qlog= qscale2qlog(pict->quality) - s->qlog;
		1521	s->qlog+= delta_qlog;
		1522	return delta_qlog;
		1523	}
		1524
		1525	static void calculate_visual_weight(SnowContext s, Plane p){
		1526	int width = p->width;
		1527	int height= p->height;
		1528	int level, orientation, x, y;
		1529
		1530	for(level=0; levelspatial_decomposition_count; level++){
		1531	for(orientation=level ? 1 : 0; orientation<4; orientation++){
		1532	SubBand *b= &p->band[level][orientation];
		1533	IDWTELEM *ibuf= b->ibuf;
		1534	int64_t error=0;
		1535
		1536	memset(s->spatial_idwt_buffer, 0, sizeof(s->spatial_idwt_buffer)width*height);
		1537	ibuf[b->width/2 + b->height/2b->stride]= 25616;
		1538	ff_spatial_idwt(s->spatial_idwt_buffer, s->temp_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
		1539	for(y=0; y
		1540	for(x=0; x
		1541	int64_t d= s->spatial_idwt_buffer[x + ywidth]16;
		1542	error += d*d;
		1543	}
		1544	}
		1545
		1546	b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
		1547	}
		1548	}
		1549	}
		1550
		1551	static int encode_frame(AVCodecContext avctx, AVPacket pkt,
		1552	AVFrame pict, int got_packet)
		1553	{
		1554	SnowContext *s = avctx->priv_data;
		1555	RangeCoder * const c= &s->c;
		1556	AVFrame *pic = pict;
		1557	const int width= s->avctx->width;
		1558	const int height= s->avctx->height;
		1559	int level, orientation, plane_index, i, y, ret;
		1560	uint8_t rc_header_bak[sizeof(s->header_state)];
		1561	uint8_t rc_block_bak[sizeof(s->block_state)];
		1562
		1563	if ((ret = ff_alloc_packet2(avctx, pkt, s->b_widths->b_heightMB_SIZEMB_SIZE3 + FF_MIN_BUFFER_SIZE)) < 0)
		1564	return ret;
		1565
		1566	ff_init_range_encoder(c, pkt->data, pkt->size);
		1567	ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
		1568
		1569	for(i=0; i < s->nb_planes; i++){
		1570	int hshift= i ? s->chroma_h_shift : 0;
		1571	int vshift= i ? s->chroma_v_shift : 0;
		1572	for(y=0; y<(height>>vshift); y++)
		1573	memcpy(&s->input_picture->data[i][y * s->input_picture->linesize[i]],
		1574	&pict->data[i][y * pict->linesize[i]],
		1575	width>>hshift);
		1576	s->dsp.draw_edges(s->input_picture->data[i], s->input_picture->linesize[i],
		1577	width >> hshift, height >> vshift,
		1578	EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
		1579	EDGE_TOP \| EDGE_BOTTOM);
		1580
		1581	}
		1582	emms_c();
		1583	s->new_picture = pict;
		1584
		1585	s->m.picture_number= avctx->frame_number;
		1586	if(avctx->flags&CODEC_FLAG_PASS2){
		1587	s->m.pict_type = pic->pict_type = s->m.rc_context.entry[avctx->frame_number].new_pict_type;
		1588	s->keyframe = pic->pict_type == AV_PICTURE_TYPE_I;
		1589	if(!(avctx->flags&CODEC_FLAG_QSCALE)) {
		1590	pic->quality = ff_rate_estimate_qscale(&s->m, 0);
		1591	if (pic->quality < 0)
		1592	return -1;
		1593	}
		1594	}else{
		1595	s->keyframe= avctx->gop_size==0 \|\| avctx->frame_number % avctx->gop_size == 0;
		1596	s->m.pict_type = pic->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
		1597	}
		1598
		1599	if(s->pass1_rc && avctx->frame_number == 0)
		1600	pic->quality = 2*FF_QP2LAMBDA;
		1601	if (pic->quality) {
		1602	s->qlog = qscale2qlog(pic->quality);
		1603	s->lambda = pic->quality * 3/2;
		1604	}
		1605	if (s->qlog < 0 \|\| (!pic->quality && (avctx->flags & CODEC_FLAG_QSCALE))) {
		1606	s->qlog= LOSSLESS_QLOG;
		1607	s->lambda = 0;
		1608	}//else keep previous frame's qlog until after motion estimation
		1609
		1610	ff_snow_frame_start(s);
		1611	avctx->coded_frame= s->current_picture;
		1612
		1613	s->m.current_picture_ptr= &s->m.current_picture;
		1614	s->m.last_picture.f.pts = s->m.current_picture.f.pts;
		1615	s->m.current_picture.f.pts = pict->pts;
		1616	if(pic->pict_type == AV_PICTURE_TYPE_P){
		1617	int block_width = (width +15)>>4;
		1618	int block_height= (height+15)>>4;
		1619	int stride= s->current_picture->linesize[0];
		1620
		1621	av_assert0(s->current_picture->data[0]);
		1622	av_assert0(s->last_picture[0]->data[0]);
		1623
		1624	s->m.avctx= s->avctx;
		1625	s->m.current_picture.f.data[0] = s->current_picture->data[0];
		1626	s->m. last_picture.f.data[0] = s->last_picture[0]->data[0];
		1627	s->m. new_picture.f.data[0] = s-> input_picture->data[0];
		1628	s->m. last_picture_ptr= &s->m. last_picture;
		1629	s->m.linesize=
		1630	s->m. last_picture.f.linesize[0] =
		1631	s->m. new_picture.f.linesize[0] =
		1632	s->m.current_picture.f.linesize[0] = stride;
		1633	s->m.uvlinesize= s->current_picture->linesize[1];
		1634	s->m.width = width;
		1635	s->m.height= height;
		1636	s->m.mb_width = block_width;
		1637	s->m.mb_height= block_height;
		1638	s->m.mb_stride= s->m.mb_width+1;
		1639	s->m.b8_stride= 2*s->m.mb_width+1;
		1640	s->m.f_code=1;
		1641	s->m.pict_type = pic->pict_type;
		1642	s->m.me_method= s->avctx->me_method;
		1643	s->m.me.scene_change_score=0;
		1644	s->m.flags= s->avctx->flags;
		1645	s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
		1646	s->m.out_format= FMT_H263;
		1647	s->m.unrestricted_mv= 1;
		1648
		1649	s->m.lambda = s->lambda;
		1650	s->m.qscale= (s->m.lambda139 + FF_LAMBDA_SCALE64) >> (FF_LAMBDA_SHIFT + 7);
		1651	s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
		1652
		1653	s->m.dsp= s->dsp; //move
		1654	s->m.hdsp = s->hdsp;
		1655	ff_init_me(&s->m);
		1656	s->hdsp = s->m.hdsp;
		1657	s->dsp= s->m.dsp;
		1658	}
		1659
		1660	if(s->pass1_rc){
		1661	memcpy(rc_header_bak, s->header_state, sizeof(s->header_state));
		1662	memcpy(rc_block_bak, s->block_state, sizeof(s->block_state));
		1663	}
		1664
		1665	redo_frame:
		1666
		1667	if (pic->pict_type == AV_PICTURE_TYPE_I)
		1668	s->spatial_decomposition_count= 5;
		1669	else
		1670	s->spatial_decomposition_count= 5;
		1671
		1672	while( !(width >>(s->chroma_h_shift + s->spatial_decomposition_count))
		1673	\|\| !(height>>(s->chroma_v_shift + s->spatial_decomposition_count)))
		1674	s->spatial_decomposition_count--;
		1675
		1676	if (s->spatial_decomposition_count <= 0) {
		1677	av_log(avctx, AV_LOG_ERROR, "Resolution too low\n");
		1678	return AVERROR(EINVAL);
		1679	}
		1680
		1681	s->m.pict_type = pic->pict_type;
		1682	s->qbias = pic->pict_type == AV_PICTURE_TYPE_P ? 2 : 0;
		1683
		1684	ff_snow_common_init_after_header(avctx);
		1685
		1686	if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
		1687	for(plane_index=0; plane_index < s->nb_planes; plane_index++){
		1688	calculate_visual_weight(s, &s->plane[plane_index]);
		1689	}
		1690	}
		1691
		1692	encode_header(s);
		1693	s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
		1694	encode_blocks(s, 1);
		1695	s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
		1696
		1697	for(plane_index=0; plane_index < s->nb_planes; plane_index++){
		1698	Plane *p= &s->plane[plane_index];
		1699	int w= p->width;
		1700	int h= p->height;
		1701	int x, y;
		1702	// int bits= put_bits_count(&s->c.pb);
		1703
		1704	if (!s->memc_only) {
		1705	//FIXME optimize
		1706	if(pict->data[plane_index]) //FIXME gray hack
		1707	for(y=0; y
		1708	for(x=0; x
		1709	s->spatial_idwt_buffer[yw + x]= pict->data[plane_index][ypict->linesize[plane_index] + x]<
		1710	}
		1711	}
		1712	predict_plane(s, s->spatial_idwt_buffer, plane_index, 0);
		1713
		1714	if( plane_index==0
		1715	&& pic->pict_type == AV_PICTURE_TYPE_P
		1716	&& !(avctx->flags&CODEC_FLAG_PASS2)
		1717	&& s->m.me.scene_change_score > s->avctx->scenechange_threshold){
		1718	ff_init_range_encoder(c, pkt->data, pkt->size);
		1719	ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
		1720	pic->pict_type= AV_PICTURE_TYPE_I;
		1721	s->keyframe=1;
		1722	s->current_picture->key_frame=1;
		1723	goto redo_frame;
		1724	}
		1725
		1726	if(s->qlog == LOSSLESS_QLOG){
		1727	for(y=0; y
		1728	for(x=0; x
		1729	s->spatial_dwt_buffer[yw + x]= (s->spatial_idwt_buffer[yw + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
		1730	}
		1731	}
		1732	}else{
		1733	for(y=0; y
		1734	for(x=0; x
		1735	s->spatial_dwt_buffer[yw + x]=s->spatial_idwt_buffer[yw + x]<
		1736	}
		1737	}
		1738	}
		1739
		1740	ff_spatial_dwt(s->spatial_dwt_buffer, s->temp_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
		1741
		1742	if(s->pass1_rc && plane_index==0){
		1743	int delta_qlog = ratecontrol_1pass(s, pic);
		1744	if (delta_qlog <= INT_MIN)
		1745	return -1;
		1746	if(delta_qlog){
		1747	//reordering qlog in the bitstream would eliminate this reset
		1748	ff_init_range_encoder(c, pkt->data, pkt->size);
		1749	memcpy(s->header_state, rc_header_bak, sizeof(s->header_state));
		1750	memcpy(s->block_state, rc_block_bak, sizeof(s->block_state));
		1751	encode_header(s);
		1752	encode_blocks(s, 0);
		1753	}
		1754	}
		1755
		1756	for(level=0; levelspatial_decomposition_count; level++){
		1757	for(orientation=level ? 1 : 0; orientation<4; orientation++){
		1758	SubBand *b= &p->band[level][orientation];
		1759
		1760	quantize(s, b, b->ibuf, b->buf, b->stride, s->qbias);
		1761	if(orientation==0)
		1762	decorrelate(s, b, b->ibuf, b->stride, pic->pict_type == AV_PICTURE_TYPE_P, 0);
		1763	if (!s->no_bitstream)
		1764	encode_subband(s, b, b->ibuf, b->parent ? b->parent->ibuf : NULL, b->stride, orientation);
		1765	av_assert0(b->parent==NULL \|\| b->parent->stride == b->stride*2);
		1766	if(orientation==0)
		1767	correlate(s, b, b->ibuf, b->stride, 1, 0);
		1768	}
		1769	}
		1770
		1771	for(level=0; levelspatial_decomposition_count; level++){
		1772	for(orientation=level ? 1 : 0; orientation<4; orientation++){
		1773	SubBand *b= &p->band[level][orientation];
		1774
		1775	dequantize(s, b, b->ibuf, b->stride);
		1776	}
		1777	}
		1778
		1779	ff_spatial_idwt(s->spatial_idwt_buffer, s->temp_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
		1780	if(s->qlog == LOSSLESS_QLOG){
		1781	for(y=0; y
		1782	for(x=0; x
		1783	s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS;
		1784	}
		1785	}
		1786	}
		1787	predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
		1788	}else{
		1789	//ME/MC only
		1790	if(pic->pict_type == AV_PICTURE_TYPE_I){
		1791	for(y=0; y
		1792	for(x=0; x
		1793	s->current_picture->data[plane_index][y*s->current_picture->linesize[plane_index] + x]=
		1794	pict->data[plane_index][y*pict->linesize[plane_index] + x];
		1795	}
		1796	}
		1797	}else{
		1798	memset(s->spatial_idwt_buffer, 0, sizeof(IDWTELEM)wh);
		1799	predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
		1800	}
		1801	}
		1802	if(s->avctx->flags&CODEC_FLAG_PSNR){
		1803	int64_t error= 0;
		1804
		1805	if(pict->data[plane_index]) //FIXME gray hack
		1806	for(y=0; y
		1807	for(x=0; x
		1808	int d= s->current_picture->data[plane_index][ys->current_picture->linesize[plane_index] + x] - pict->data[plane_index][ypict->linesize[plane_index] + x];
		1809	error += d*d;
		1810	}
		1811	}
		1812	s->avctx->error[plane_index] += error;
		1813	s->current_picture->error[plane_index] = error;
		1814	}
		1815
		1816	}
		1817
		1818	update_last_header_values(s);
		1819
		1820	ff_snow_release_buffer(avctx);
		1821
		1822	s->current_picture->coded_picture_number = avctx->frame_number;
		1823	s->current_picture->pict_type = pict->pict_type;
		1824	s->current_picture->quality = pict->quality;
		1825	s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
		1826	s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits;
		1827	s->m.current_picture.f.display_picture_number =
		1828	s->m.current_picture.f.coded_picture_number = avctx->frame_number;
		1829	s->m.current_picture.f.quality = pic->quality;
		1830	s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
		1831	if(s->pass1_rc)
		1832	if (ff_rate_estimate_qscale(&s->m, 0) < 0)
		1833	return -1;
		1834	if(avctx->flags&CODEC_FLAG_PASS1)
		1835	ff_write_pass1_stats(&s->m);
		1836	s->m.last_pict_type = s->m.pict_type;
		1837	avctx->frame_bits = s->m.frame_bits;
		1838	avctx->mv_bits = s->m.mv_bits;
		1839	avctx->misc_bits = s->m.misc_bits;
		1840	avctx->p_tex_bits = s->m.p_tex_bits;
		1841
		1842	emms_c();
		1843
		1844	pkt->size = ff_rac_terminate(c);
		1845	if (avctx->coded_frame->key_frame)
		1846	pkt->flags \|= AV_PKT_FLAG_KEY;
		1847	*got_packet = 1;
		1848
		1849	return 0;
		1850	}
		1851
		1852	static av_cold int encode_end(AVCodecContext *avctx)
		1853	{
		1854	SnowContext *s = avctx->priv_data;
		1855
		1856	ff_snow_common_end(s);
		1857	ff_rate_control_uninit(&s->m);
		1858	av_frame_free(&s->input_picture);
		1859	av_free(avctx->stats_out);
		1860
		1861	return 0;
		1862	}
		1863
		1864	#define OFFSET(x) offsetof(SnowContext, x)
		1865	#define VE AV_OPT_FLAG_VIDEO_PARAM \| AV_OPT_FLAG_ENCODING_PARAM
		1866	static const AVOption options[] = {
		1867	{ "memc_only", "Only do ME/MC (I frames -> ref, P frame -> ME+MC).", OFFSET(memc_only), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
		1868	{ "no_bitstream", "Skip final bitstream writeout.", OFFSET(no_bitstream), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
		1869	{ NULL },
		1870	};
		1871
		1872	static const AVClass snowenc_class = {
		1873	.class_name = "snow encoder",
		1874	.item_name = av_default_item_name,
		1875	.option = options,
		1876	.version = LIBAVUTIL_VERSION_INT,
		1877	};
		1878
		1879	AVCodec ff_snow_encoder = {
		1880	.name = "snow",
		1881	.long_name = NULL_IF_CONFIG_SMALL("Snow"),
		1882	.type = AVMEDIA_TYPE_VIDEO,
		1883	.id = AV_CODEC_ID_SNOW,
		1884	.priv_data_size = sizeof(SnowContext),
		1885	.init = encode_init,
		1886	.encode2 = encode_frame,
		1887	.close = encode_end,
		1888	.pix_fmts = (const enum AVPixelFormat[]){
		1889	AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV444P,
		1890	AV_PIX_FMT_GRAY8,
		1891	AV_PIX_FMT_NONE
		1892	},
		1893	.priv_class = &snowenc_class,
		1894	};
		1895
		1896
		1897	#ifdef TEST
		1898	#undef malloc
		1899	#undef free
		1900	#undef printf
		1901
		1902	#include "libavutil/lfg.h"
		1903	#include "libavutil/mathematics.h"
		1904
		1905	int main(void){
		1906	#define width 256
		1907	#define height 256
		1908	int buffer[2][width*height];
		1909	SnowContext s;
		1910	int i;
		1911	AVLFG prng;
		1912	s.spatial_decomposition_count=6;
		1913	s.spatial_decomposition_type=1;
		1914
		1915	s.temp_dwt_buffer = av_mallocz(width * sizeof(DWTELEM));
		1916	s.temp_idwt_buffer = av_mallocz(width * sizeof(IDWTELEM));
		1917
		1918	av_lfg_init(&prng, 1);
		1919
		1920	printf("testing 5/3 DWT\n");
		1921	for(i=0; i
		1922	buffer[0][i] = buffer[1][i] = av_lfg_get(&prng) % 54321 - 12345;
		1923
		1924	ff_spatial_dwt(buffer[0], s.temp_dwt_buffer, width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
		1925	ff_spatial_idwt((IDWTELEM*)buffer[0], s.temp_idwt_buffer, width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
		1926
		1927	for(i=0; i
		1928	if(buffer[0][i]!= buffer[1][i]) printf("fsck: %6d %12d %7d\n",i, buffer[0][i], buffer[1][i]);
		1929
		1930	printf("testing 9/7 DWT\n");
		1931	s.spatial_decomposition_type=0;
		1932	for(i=0; i
		1933	buffer[0][i] = buffer[1][i] = av_lfg_get(&prng) % 54321 - 12345;
		1934
		1935	ff_spatial_dwt(buffer[0], s.temp_dwt_buffer, width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
		1936	ff_spatial_idwt((IDWTELEM*)buffer[0], s.temp_idwt_buffer, width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
		1937
		1938	for(i=0; i
		1939	if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %6d %12d %7d\n",i, buffer[0][i], buffer[1][i]);
		1940
		1941	{
		1942	int level, orientation, x, y;
		1943	int64_t errors[8][4];
		1944	int64_t g=0;
		1945
		1946	memset(errors, 0, sizeof(errors));
		1947	s.spatial_decomposition_count=3;
		1948	s.spatial_decomposition_type=0;
		1949	for(level=0; level
		1950	for(orientation=level ? 1 : 0; orientation<4; orientation++){
		1951	int w= width >> (s.spatial_decomposition_count-level);
		1952	int h= height >> (s.spatial_decomposition_count-level);
		1953	int stride= width << (s.spatial_decomposition_count-level);
		1954	DWTELEM *buf= buffer[0];
		1955	int64_t error=0;
		1956
		1957	if(orientation&1) buf+=w;
		1958	if(orientation>1) buf+=stride>>1;
		1959
		1960	memset(buffer[0], 0, sizeof(int)widthheight);
		1961	buf[w/2 + h/2stride]= 256256;
		1962	ff_spatial_idwt((IDWTELEM*)buffer[0], s.temp_idwt_buffer, width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
		1963	for(y=0; y
		1964	for(x=0; x
		1965	int64_t d= buffer[0][x + y*width];
		1966	error += d*d;
		1967	if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9 && level==2) printf("%8"PRId64" ", d);
		1968	}
		1969	if(FFABS(height/2-y)<9 && level==2) printf("\n");
		1970	}
		1971	error= (int)(sqrt(error)+0.5);
		1972	errors[level][orientation]= error;
		1973	if(g) g=av_gcd(g, error);
		1974	else g= error;
		1975	}
		1976	}
		1977	printf("static int const visual_weight[][4]={\n");
		1978	for(level=0; level
		1979	printf(" {");
		1980	for(orientation=0; orientation<4; orientation++){
		1981	printf("%8"PRId64",", errors[level][orientation]/g);
		1982	}
		1983	printf("},\n");
		1984	}
		1985	printf("};\n");
		1986	{
		1987	int level=2;
		1988	int w= width >> (s.spatial_decomposition_count-level);
		1989	//int h= height >> (s.spatial_decomposition_count-level);
		1990	int stride= width << (s.spatial_decomposition_count-level);
		1991	DWTELEM *buf= buffer[0];
		1992	int64_t error=0;
		1993
		1994	buf+=w;
		1995	buf+=stride>>1;
		1996
		1997	memset(buffer[0], 0, sizeof(int)widthheight);
		1998	for(y=0; y
		1999	for(x=0; x
		2000	int tab[4]={0,2,3,1};
		2001	buffer[0][x+widthy]= 256256tab[(x&1) + 2(y&1)];
		2002	}
		2003	}
		2004	ff_spatial_dwt(buffer[0], s.temp_dwt_buffer, width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
		2005	for(y=0; y
		2006	for(x=0; x
		2007	int64_t d= buffer[0][x + y*width];
		2008	error += d*d;
		2009	if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9) printf("%8"PRId64" ", d);
		2010	}
		2011	if(FFABS(height/2-y)<9) printf("\n");
		2012	}
		2013	}
		2014
		2015	}
		2016	return 0;
		2017	}
		2018	#endif /* TEST */

Subversion Repositories Kolibri OS

(root)/contrib/sdk/sources/ffmpeg/libavcodec/snowenc.c @ 4930 – Rev