WebSVN – Kolibri OS – Blame – /contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libavcodec/diracdec.c

Rev	Author	Line No.	Line
6147	serge	1	/*
		2	* Copyright (C) 2007 Marco Gerards
		3	* Copyright (C) 2009 David Conrad
		4	* Copyright (C) 2011 Jordi Ortiz
		5	*
		6	* This file is part of FFmpeg.
		7	*
		8	* FFmpeg is free software; you can redistribute it and/or
		9	* modify it under the terms of the GNU Lesser General Public
		10	* License as published by the Free Software Foundation; either
		11	* version 2.1 of the License, or (at your option) any later version.
		12	*
		13	* FFmpeg is distributed in the hope that it will be useful,
		14	* but WITHOUT ANY WARRANTY; without even the implied warranty of
		15	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
		16	* Lesser General Public License for more details.
		17	*
		18	* You should have received a copy of the GNU Lesser General Public
		19	* License along with FFmpeg; if not, write to the Free Software
		20	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
		21	*/
		22
		23	/**
		24	* @file
		25	* Dirac Decoder
		26	* @author Marco Gerards , David Conrad, Jordi Ortiz
		27	*/
		28
		29	#include "avcodec.h"
		30	#include "get_bits.h"
		31	#include "bytestream.h"
		32	#include "internal.h"
		33	#include "golomb.h"
		34	#include "dirac_arith.h"
		35	#include "mpeg12data.h"
		36	#include "libavcodec/mpegvideo.h"
		37	#include "mpegvideoencdsp.h"
		38	#include "dirac_dwt.h"
		39	#include "dirac.h"
		40	#include "diracdsp.h"
		41	#include "videodsp.h"
		42
		43	/**
		44	* The spec limits the number of wavelet decompositions to 4 for both
		45	* level 1 (VC-2) and 128 (long-gop default).
		46	* 5 decompositions is the maximum before >16-bit buffers are needed.
		47	* Schroedinger allows this for DD 9,7 and 13,7 wavelets only, limiting
		48	* the others to 4 decompositions (or 3 for the fidelity filter).
		49	*
		50	* We use this instead of MAX_DECOMPOSITIONS to save some memory.
		51	*/
		52	#define MAX_DWT_LEVELS 5
		53
		54	/**
		55	* The spec limits this to 3 for frame coding, but in practice can be as high as 6
		56	*/
		57	#define MAX_REFERENCE_FRAMES 8
		58	#define MAX_DELAY 5 /* limit for main profile for frame coding (TODO: field coding) */
		59	#define MAX_FRAMES (MAX_REFERENCE_FRAMES + MAX_DELAY + 1)
		60	#define MAX_QUANT 68 /* max quant for VC-2 */
		61	#define MAX_BLOCKSIZE 32 /* maximum xblen/yblen we support */
		62
		63	/**
		64	* DiracBlock->ref flags, if set then the block does MC from the given ref
		65	*/
		66	#define DIRAC_REF_MASK_REF1 1
		67	#define DIRAC_REF_MASK_REF2 2
		68	#define DIRAC_REF_MASK_GLOBAL 4
		69
		70	/**
		71	* Value of Picture.reference when Picture is not a reference picture, but
		72	* is held for delayed output.
		73	*/
		74	#define DELAYED_PIC_REF 4
		75
		76	#define CALC_PADDING(size, depth) \
		77	(((size + (1 << depth) - 1) >> depth) << depth)
		78
		79	#define DIVRNDUP(a, b) (((a) + (b) - 1) / (b))
		80
		81	typedef struct {
		82	AVFrame *avframe;
		83	int interpolated[3]; /* 1 if hpel[] is valid */
		84	uint8_t *hpel[3][4];
		85	uint8_t *hpel_base[3][4];
		86	int reference;
		87	} DiracFrame;
		88
		89	typedef struct {
		90	union {
		91	int16_t mv[2][2];
		92	int16_t dc[3];
		93	} u; /* anonymous unions aren't in C99 :( */
		94	uint8_t ref;
		95	} DiracBlock;
		96
		97	typedef struct SubBand {
		98	int level;
		99	int orientation;
		100	int stride;
		101	int width;
		102	int height;
		103	int quant;
		104	IDWTELEM *ibuf;
		105	struct SubBand *parent;
		106
		107	/* for low delay */
		108	unsigned length;
		109	const uint8_t *coeff_data;
		110	} SubBand;
		111
		112	typedef struct Plane {
		113	int width;
		114	int height;
		115	ptrdiff_t stride;
		116
		117	int idwt_width;
		118	int idwt_height;
		119	int idwt_stride;
		120	IDWTELEM *idwt_buf;
		121	IDWTELEM *idwt_buf_base;
		122	IDWTELEM *idwt_tmp;
		123
		124	/* block length */
		125	uint8_t xblen;
		126	uint8_t yblen;
		127	/* block separation (block n+1 starts after this many pixels in block n) */
		128	uint8_t xbsep;
		129	uint8_t ybsep;
		130	/* amount of overspill on each edge (half of the overlap between blocks) */
		131	uint8_t xoffset;
		132	uint8_t yoffset;
		133
		134	SubBand band[MAX_DWT_LEVELS][4];
		135	} Plane;
		136
		137	typedef struct DiracContext {
		138	AVCodecContext *avctx;
		139	MpegvideoEncDSPContext mpvencdsp;
		140	VideoDSPContext vdsp;
		141	DiracDSPContext diracdsp;
		142	GetBitContext gb;
		143	dirac_source_params source;
		144	int seen_sequence_header;
		145	int frame_number; /* number of the next frame to display */
		146	Plane plane[3];
		147	int chroma_x_shift;
		148	int chroma_y_shift;
		149
		150	int zero_res; /* zero residue flag */
		151	int is_arith; /* whether coeffs use arith or golomb coding */
		152	int low_delay; /* use the low delay syntax */
		153	int globalmc_flag; /* use global motion compensation */
		154	int num_refs; /* number of reference pictures */
		155
		156	/* wavelet decoding */
		157	unsigned wavelet_depth; /* depth of the IDWT */
		158	unsigned wavelet_idx;
		159
		160	/**
		161	* schroedinger older than 1.0.8 doesn't store
		162	* quant delta if only one codebook exists in a band
		163	*/
		164	unsigned old_delta_quant;
		165	unsigned codeblock_mode;
		166
		167	struct {
		168	unsigned width;
		169	unsigned height;
		170	} codeblock[MAX_DWT_LEVELS+1];
		171
		172	struct {
		173	unsigned num_x; /* number of horizontal slices */
		174	unsigned num_y; /* number of vertical slices */
		175	AVRational bytes; /* average bytes per slice */
		176	uint8_t quant[MAX_DWT_LEVELS][4]; /* [DIRAC_STD] E.1 */
		177	} lowdelay;
		178
		179	struct {
		180	int pan_tilt[2]; /* pan/tilt vector */
		181	int zrs[2][2]; /* zoom/rotate/shear matrix */
		182	int perspective[2]; /* perspective vector */
		183	unsigned zrs_exp;
		184	unsigned perspective_exp;
		185	} globalmc[2];
		186
		187	/* motion compensation */
		188	uint8_t mv_precision; /* [DIRAC_STD] REFS_WT_PRECISION */
		189	int16_t weight[2]; /* [DIRAC_STD] REF1_WT and REF2_WT */
		190	unsigned weight_log2denom; /* [DIRAC_STD] REFS_WT_PRECISION */
		191
		192	int blwidth; /* number of blocks (horizontally) */
		193	int blheight; /* number of blocks (vertically) */
		194	int sbwidth; /* number of superblocks (horizontally) */
		195	int sbheight; /* number of superblocks (vertically) */
		196
		197	uint8_t *sbsplit;
		198	DiracBlock *blmotion;
		199
		200	uint8_t *edge_emu_buffer[4];
		201	uint8_t *edge_emu_buffer_base;
		202
		203	uint16_t mctmp; / buffer holding the MC data multiplied by OBMC weights */
		204	uint8_t *mcscratch;
		205	int buffer_stride;
		206
		207	DECLARE_ALIGNED(16, uint8_t, obmc_weight)[3][MAX_BLOCKSIZE*MAX_BLOCKSIZE];
		208
		209	void (put_pixels_tab[4])(uint8_t dst, const uint8_t *src[5], int stride, int h);
		210	void (avg_pixels_tab[4])(uint8_t dst, const uint8_t *src[5], int stride, int h);
		211	void (add_obmc)(uint16_t dst, const uint8_t src, int stride, const uint8_t obmc_weight, int yblen);
		212	dirac_weight_func weight_func;
		213	dirac_biweight_func biweight_func;
		214
		215	DiracFrame *current_picture;
		216	DiracFrame *ref_pics[2];
		217
		218	DiracFrame *ref_frames[MAX_REFERENCE_FRAMES+1];
		219	DiracFrame *delay_frames[MAX_DELAY+1];
		220	DiracFrame all_frames[MAX_FRAMES];
		221	} DiracContext;
		222
		223	/**
		224	* Dirac Specification ->
		225	* Parse code values. 9.6.1 Table 9.1
		226	*/
		227	enum dirac_parse_code {
		228	pc_seq_header = 0x00,
		229	pc_eos = 0x10,
		230	pc_aux_data = 0x20,
		231	pc_padding = 0x30,
		232	};
		233
		234	enum dirac_subband {
		235	subband_ll = 0,
		236	subband_hl = 1,
		237	subband_lh = 2,
		238	subband_hh = 3,
		239	subband_nb,
		240	};
		241
		242	static const uint8_t default_qmat[][4][4] = {
		243	{ { 5, 3, 3, 0}, { 0, 4, 4, 1}, { 0, 5, 5, 2}, { 0, 6, 6, 3} },
		244	{ { 4, 2, 2, 0}, { 0, 4, 4, 2}, { 0, 5, 5, 3}, { 0, 7, 7, 5} },
		245	{ { 5, 3, 3, 0}, { 0, 4, 4, 1}, { 0, 5, 5, 2}, { 0, 6, 6, 3} },
		246	{ { 8, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0} },
		247	{ { 8, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0}, { 0, 4, 4, 0} },
		248	{ { 0, 4, 4, 8}, { 0, 8, 8, 12}, { 0, 13, 13, 17}, { 0, 17, 17, 21} },
		249	{ { 3, 1, 1, 0}, { 0, 4, 4, 2}, { 0, 6, 6, 5}, { 0, 9, 9, 7} },
		250	};
		251
		252	static const int qscale_tab[MAX_QUANT+1] = {
		253	4, 5, 6, 7, 8, 10, 11, 13,
		254	16, 19, 23, 27, 32, 38, 45, 54,
		255	64, 76, 91, 108, 128, 152, 181, 215,
		256	256, 304, 362, 431, 512, 609, 724, 861,
		257	1024, 1218, 1448, 1722, 2048, 2435, 2896, 3444,
		258	4096, 4871, 5793, 6889, 8192, 9742, 11585, 13777,
		259	16384, 19484, 23170, 27554, 32768, 38968, 46341, 55109,
		260	65536, 77936
		261	};
		262
		263	static const int qoffset_intra_tab[MAX_QUANT+1] = {
		264	1, 2, 3, 4, 4, 5, 6, 7,
		265	8, 10, 12, 14, 16, 19, 23, 27,
		266	32, 38, 46, 54, 64, 76, 91, 108,
		267	128, 152, 181, 216, 256, 305, 362, 431,
		268	512, 609, 724, 861, 1024, 1218, 1448, 1722,
		269	2048, 2436, 2897, 3445, 4096, 4871, 5793, 6889,
		270	8192, 9742, 11585, 13777, 16384, 19484, 23171, 27555,
		271	32768, 38968
		272	};
		273
		274	static const int qoffset_inter_tab[MAX_QUANT+1] = {
		275	1, 2, 2, 3, 3, 4, 4, 5,
		276	6, 7, 9, 10, 12, 14, 17, 20,
		277	24, 29, 34, 41, 48, 57, 68, 81,
		278	96, 114, 136, 162, 192, 228, 272, 323,
		279	384, 457, 543, 646, 768, 913, 1086, 1292,
		280	1536, 1827, 2172, 2583, 3072, 3653, 4344, 5166,
		281	6144, 7307, 8689, 10333, 12288, 14613, 17378, 20666,
		282	24576, 29226
		283	};
		284
		285	/* magic number division by 3 from schroedinger */
		286	static inline int divide3(int x)
		287	{
		288	return ((x+1)*21845 + 10922) >> 16;
		289	}
		290
		291	static DiracFrame remove_frame(DiracFrame framelist[], int picnum)
		292	{
		293	DiracFrame *remove_pic = NULL;
		294	int i, remove_idx = -1;
		295
		296	for (i = 0; framelist[i]; i++)
		297	if (framelist[i]->avframe->display_picture_number == picnum) {
		298	remove_pic = framelist[i];
		299	remove_idx = i;
		300	}
		301
		302	if (remove_pic)
		303	for (i = remove_idx; framelist[i]; i++)
		304	framelist[i] = framelist[i+1];
		305
		306	return remove_pic;
		307	}
		308
		309	static int add_frame(DiracFrame framelist[], int maxframes, DiracFrame frame)
		310	{
		311	int i;
		312	for (i = 0; i < maxframes; i++)
		313	if (!framelist[i]) {
		314	framelist[i] = frame;
		315	return 0;
		316	}
		317	return -1;
		318	}
		319
		320	static int alloc_sequence_buffers(DiracContext *s)
		321	{
		322	int sbwidth = DIVRNDUP(s->source.width, 4);
		323	int sbheight = DIVRNDUP(s->source.height, 4);
		324	int i, w, h, top_padding;
		325
		326	/* todo: think more about this / use or set Plane here */
		327	for (i = 0; i < 3; i++) {
		328	int max_xblen = MAX_BLOCKSIZE >> (i ? s->chroma_x_shift : 0);
		329	int max_yblen = MAX_BLOCKSIZE >> (i ? s->chroma_y_shift : 0);
		330	w = s->source.width >> (i ? s->chroma_x_shift : 0);
		331	h = s->source.height >> (i ? s->chroma_y_shift : 0);
		332
		333	/* we allocate the max we support here since num decompositions can
		334	* change from frame to frame. Stride is aligned to 16 for SIMD, and
		335	* 1<0) in arith decoding
		336	* MAX_BLOCKSIZE padding for MC: blocks can spill up to half of that
		337	* on each side */
		338	top_padding = FFMAX(1<
		339	w = FFALIGN(CALC_PADDING(w, MAX_DWT_LEVELS), 8); /* FIXME: Should this be 16 for SSE??? */
		340	h = top_padding + CALC_PADDING(h, MAX_DWT_LEVELS) + max_yblen/2;
		341
		342	s->plane[i].idwt_buf_base = av_mallocz_array((w+max_xblen), h * sizeof(IDWTELEM));
		343	s->plane[i].idwt_tmp = av_malloc_array((w+16), sizeof(IDWTELEM));
		344	s->plane[i].idwt_buf = s->plane[i].idwt_buf_base + top_padding*w;
		345	if (!s->plane[i].idwt_buf_base \|\| !s->plane[i].idwt_tmp)
		346	return AVERROR(ENOMEM);
		347	}
		348
		349	/* fixme: allocate using real stride here */
		350	s->sbsplit = av_malloc_array(sbwidth, sbheight);
		351	s->blmotion = av_malloc_array(sbwidth, sbheight * 16 * sizeof(*s->blmotion));
		352
		353	if (!s->sbsplit \|\| !s->blmotion)
		354	return AVERROR(ENOMEM);
		355	return 0;
		356	}
		357
		358	static int alloc_buffers(DiracContext *s, int stride)
		359	{
		360	int w = s->source.width;
		361	int h = s->source.height;
		362
		363	av_assert0(stride >= w);
		364	stride += 64;
		365
		366	if (s->buffer_stride >= stride)
		367	return 0;
		368	s->buffer_stride = 0;
		369
		370	av_freep(&s->edge_emu_buffer_base);
		371	memset(s->edge_emu_buffer, 0, sizeof(s->edge_emu_buffer));
		372	av_freep(&s->mctmp);
		373	av_freep(&s->mcscratch);
		374
		375	s->edge_emu_buffer_base = av_malloc_array(stride, MAX_BLOCKSIZE);
		376
		377	s->mctmp = av_malloc_array((stride+MAX_BLOCKSIZE), (h+MAX_BLOCKSIZE) * sizeof(*s->mctmp));
		378	s->mcscratch = av_malloc_array(stride, MAX_BLOCKSIZE);
		379
		380	if (!s->edge_emu_buffer_base \|\| !s->mctmp \|\| !s->mcscratch)
		381	return AVERROR(ENOMEM);
		382
		383	s->buffer_stride = stride;
		384	return 0;
		385	}
		386
		387	static void free_sequence_buffers(DiracContext *s)
		388	{
		389	int i, j, k;
		390
		391	for (i = 0; i < MAX_FRAMES; i++) {
		392	if (s->all_frames[i].avframe->data[0]) {
		393	av_frame_unref(s->all_frames[i].avframe);
		394	memset(s->all_frames[i].interpolated, 0, sizeof(s->all_frames[i].interpolated));
		395	}
		396
		397	for (j = 0; j < 3; j++)
		398	for (k = 1; k < 4; k++)
		399	av_freep(&s->all_frames[i].hpel_base[j][k]);
		400	}
		401
		402	memset(s->ref_frames, 0, sizeof(s->ref_frames));
		403	memset(s->delay_frames, 0, sizeof(s->delay_frames));
		404
		405	for (i = 0; i < 3; i++) {
		406	av_freep(&s->plane[i].idwt_buf_base);
		407	av_freep(&s->plane[i].idwt_tmp);
		408	}
		409
		410	s->buffer_stride = 0;
		411	av_freep(&s->sbsplit);
		412	av_freep(&s->blmotion);
		413	av_freep(&s->edge_emu_buffer_base);
		414
		415	av_freep(&s->mctmp);
		416	av_freep(&s->mcscratch);
		417	}
		418
		419	static av_cold int dirac_decode_init(AVCodecContext *avctx)
		420	{
		421	DiracContext *s = avctx->priv_data;
		422	int i;
		423
		424	s->avctx = avctx;
		425	s->frame_number = -1;
		426
		427	ff_diracdsp_init(&s->diracdsp);
		428	ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
		429	ff_videodsp_init(&s->vdsp, 8);
		430
		431	for (i = 0; i < MAX_FRAMES; i++) {
		432	s->all_frames[i].avframe = av_frame_alloc();
		433	if (!s->all_frames[i].avframe) {
		434	while (i > 0)
		435	av_frame_free(&s->all_frames[--i].avframe);
		436	return AVERROR(ENOMEM);
		437	}
		438	}
		439
		440	return 0;
		441	}
		442
		443	static void dirac_decode_flush(AVCodecContext *avctx)
		444	{
		445	DiracContext *s = avctx->priv_data;
		446	free_sequence_buffers(s);
		447	s->seen_sequence_header = 0;
		448	s->frame_number = -1;
		449	}
		450
		451	static av_cold int dirac_decode_end(AVCodecContext *avctx)
		452	{
		453	DiracContext *s = avctx->priv_data;
		454	int i;
		455
		456	dirac_decode_flush(avctx);
		457	for (i = 0; i < MAX_FRAMES; i++)
		458	av_frame_free(&s->all_frames[i].avframe);
		459
		460	return 0;
		461	}
		462
		463	#define SIGN_CTX(x) (CTX_SIGN_ZERO + ((x) > 0) - ((x) < 0))
		464
		465	static inline void coeff_unpack_arith(DiracArith *c, int qfactor, int qoffset,
		466	SubBand b, IDWTELEM buf, int x, int y)
		467	{
		468	int coeff, sign;
		469	int sign_pred = 0;
		470	int pred_ctx = CTX_ZPZN_F1;
		471
		472	/* Check if the parent subband has a 0 in the corresponding position */
		473	if (b->parent)
		474	pred_ctx += !!b->parent->ibuf[b->parent->stride * (y>>1) + (x>>1)] << 1;
		475
		476	if (b->orientation == subband_hl)
		477	sign_pred = buf[-b->stride];
		478
		479	/* Determine if the pixel has only zeros in its neighbourhood */
		480	if (x) {
		481	pred_ctx += !(buf[-1] \| buf[-b->stride] \| buf[-1-b->stride]);
		482	if (b->orientation == subband_lh)
		483	sign_pred = buf[-1];
		484	} else {
		485	pred_ctx += !buf[-b->stride];
		486	}
		487
		488	coeff = dirac_get_arith_uint(c, pred_ctx, CTX_COEFF_DATA);
		489	if (coeff) {
		490	coeff = (coeff * qfactor + qoffset + 2) >> 2;
		491	sign = dirac_get_arith_bit(c, SIGN_CTX(sign_pred));
		492	coeff = (coeff ^ -sign) + sign;
		493	}
		494	*buf = coeff;
		495	}
		496
		497	static inline int coeff_unpack_golomb(GetBitContext *gb, int qfactor, int qoffset)
		498	{
		499	int sign, coeff;
		500
		501	coeff = svq3_get_ue_golomb(gb);
		502	if (coeff) {
		503	coeff = (coeff * qfactor + qoffset + 2) >> 2;
		504	sign = get_bits1(gb);
		505	coeff = (coeff ^ -sign) + sign;
		506	}
		507	return coeff;
		508	}
		509
		510	/**
		511	* Decode the coeffs in the rectangle defined by left, right, top, bottom
		512	* [DIRAC_STD] 13.4.3.2 Codeblock unpacking loop. codeblock()
		513	*/
		514	static inline void codeblock(DiracContext s, SubBand b,
		515	GetBitContext gb, DiracArith c,
		516	int left, int right, int top, int bottom,
		517	int blockcnt_one, int is_arith)
		518	{
		519	int x, y, zero_block;
		520	int qoffset, qfactor;
		521	IDWTELEM *buf;
		522
		523	/* check for any coded coefficients in this codeblock */
		524	if (!blockcnt_one) {
		525	if (is_arith)
		526	zero_block = dirac_get_arith_bit(c, CTX_ZERO_BLOCK);
		527	else
		528	zero_block = get_bits1(gb);
		529
		530	if (zero_block)
		531	return;
		532	}
		533
		534	if (s->codeblock_mode && !(s->old_delta_quant && blockcnt_one)) {
		535	int quant = b->quant;
		536	if (is_arith)
		537	quant += dirac_get_arith_int(c, CTX_DELTA_Q_F, CTX_DELTA_Q_DATA);
		538	else
		539	quant += dirac_get_se_golomb(gb);
		540	if (quant < 0) {
		541	av_log(s->avctx, AV_LOG_ERROR, "Invalid quant\n");
		542	return;
		543	}
		544	b->quant = quant;
		545	}
		546
		547	b->quant = FFMIN(b->quant, MAX_QUANT);
		548
		549	qfactor = qscale_tab[b->quant];
		550	/* TODO: context pointer? */
		551	if (!s->num_refs)
		552	qoffset = qoffset_intra_tab[b->quant];
		553	else
		554	qoffset = qoffset_inter_tab[b->quant];
		555
		556	buf = b->ibuf + top * b->stride;
		557	for (y = top; y < bottom; y++) {
		558	for (x = left; x < right; x++) {
		559	/* [DIRAC_STD] 13.4.4 Subband coefficients. coeff_unpack() */
		560	if (is_arith)
		561	coeff_unpack_arith(c, qfactor, qoffset, b, buf+x, x, y);
		562	else
		563	buf[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
		564	}
		565	buf += b->stride;
		566	}
		567	}
		568
		569	/**
		570	* Dirac Specification ->
		571	* 13.3 intra_dc_prediction(band)
		572	*/
		573	static inline void intra_dc_prediction(SubBand *b)
		574	{
		575	IDWTELEM *buf = b->ibuf;
		576	int x, y;
		577
		578	for (x = 1; x < b->width; x++)
		579	buf[x] += buf[x-1];
		580	buf += b->stride;
		581
		582	for (y = 1; y < b->height; y++) {
		583	buf[0] += buf[-b->stride];
		584
		585	for (x = 1; x < b->width; x++) {
		586	int pred = buf[x - 1] + buf[x - b->stride] + buf[x - b->stride-1];
		587	buf[x] += divide3(pred);
		588	}
		589	buf += b->stride;
		590	}
		591	}
		592
		593	/**
		594	* Dirac Specification ->
		595	* 13.4.2 Non-skipped subbands. subband_coeffs()
		596	*/
		597	static av_always_inline void decode_subband_internal(DiracContext s, SubBand b, int is_arith)
		598	{
		599	int cb_x, cb_y, left, right, top, bottom;
		600	DiracArith c;
		601	GetBitContext gb;
		602	int cb_width = s->codeblock[b->level + (b->orientation != subband_ll)].width;
		603	int cb_height = s->codeblock[b->level + (b->orientation != subband_ll)].height;
		604	int blockcnt_one = (cb_width + cb_height) == 2;
		605
		606	if (!b->length)
		607	return;
		608
		609	init_get_bits8(&gb, b->coeff_data, b->length);
		610
		611	if (is_arith)
		612	ff_dirac_init_arith_decoder(&c, &gb, b->length);
		613
		614	top = 0;
		615	for (cb_y = 0; cb_y < cb_height; cb_y++) {
		616	bottom = (b->height * (cb_y+1LL)) / cb_height;
		617	left = 0;
		618	for (cb_x = 0; cb_x < cb_width; cb_x++) {
		619	right = (b->width * (cb_x+1LL)) / cb_width;
		620	codeblock(s, b, &gb, &c, left, right, top, bottom, blockcnt_one, is_arith);
		621	left = right;
		622	}
		623	top = bottom;
		624	}
		625
		626	if (b->orientation == subband_ll && s->num_refs == 0)
		627	intra_dc_prediction(b);
		628	}
		629
		630	static int decode_subband_arith(AVCodecContext avctx, void b)
		631	{
		632	DiracContext *s = avctx->priv_data;
		633	decode_subband_internal(s, b, 1);
		634	return 0;
		635	}
		636
		637	static int decode_subband_golomb(AVCodecContext avctx, void arg)
		638	{
		639	DiracContext *s = avctx->priv_data;
		640	SubBand **b = arg;
		641	decode_subband_internal(s, *b, 0);
		642	return 0;
		643	}
		644
		645	/**
		646	* Dirac Specification ->
		647	* [DIRAC_STD] 13.4.1 core_transform_data()
		648	*/
		649	static void decode_component(DiracContext *s, int comp)
		650	{
		651	AVCodecContext *avctx = s->avctx;
		652	SubBand bands[3MAX_DWT_LEVELS+1];
		653	enum dirac_subband orientation;
		654	int level, num_bands = 0;
		655
		656	/* Unpack all subbands at all levels. */
		657	for (level = 0; level < s->wavelet_depth; level++) {
		658	for (orientation = !!level; orientation < 4; orientation++) {
		659	SubBand *b = &s->plane[comp].band[level][orientation];
		660	bands[num_bands++] = b;
		661
		662	align_get_bits(&s->gb);
		663	/* [DIRAC_STD] 13.4.2 subband() */
		664	b->length = svq3_get_ue_golomb(&s->gb);
		665	if (b->length) {
		666	b->quant = svq3_get_ue_golomb(&s->gb);
		667	align_get_bits(&s->gb);
		668	b->coeff_data = s->gb.buffer + get_bits_count(&s->gb)/8;
		669	b->length = FFMIN(b->length, FFMAX(get_bits_left(&s->gb)/8, 0));
		670	skip_bits_long(&s->gb, b->length*8);
		671	}
		672	}
		673	/* arithmetic coding has inter-level dependencies, so we can only execute one level at a time */
		674	if (s->is_arith)
		675	avctx->execute(avctx, decode_subband_arith, &s->plane[comp].band[level][!!level],
		676	NULL, 4-!!level, sizeof(SubBand));
		677	}
		678	/* golomb coding has no inter-level dependencies, so we can execute all subbands in parallel */
		679	if (!s->is_arith)
		680	avctx->execute(avctx, decode_subband_golomb, bands, NULL, num_bands, sizeof(SubBand*));
		681	}
		682
		683	/* [DIRAC_STD] 13.5.5.2 Luma slice subband data. luma_slice_band(level,orient,sx,sy) --> if b2 == NULL */
		684	/* [DIRAC_STD] 13.5.5.3 Chroma slice subband data. chroma_slice_band(level,orient,sx,sy) --> if b2 != NULL */
		685	static void lowdelay_subband(DiracContext s, GetBitContext gb, int quant,
		686	int slice_x, int slice_y, int bits_end,
		687	SubBand b1, SubBand b2)
		688	{
		689	int left = b1->width * slice_x / s->lowdelay.num_x;
		690	int right = b1->width *(slice_x+1) / s->lowdelay.num_x;
		691	int top = b1->height * slice_y / s->lowdelay.num_y;
		692	int bottom = b1->height *(slice_y+1) / s->lowdelay.num_y;
		693
		694	int qfactor = qscale_tab[FFMIN(quant, MAX_QUANT)];
		695	int qoffset = qoffset_intra_tab[FFMIN(quant, MAX_QUANT)];
		696
		697	IDWTELEM buf1 = b1->ibuf + top b1->stride;
		698	IDWTELEM buf2 = b2 ? b2->ibuf + top b2->stride : NULL;
		699	int x, y;
		700	/* we have to constantly check for overread since the spec explicitly
		701	requires this, with the meaning that all remaining coeffs are set to 0 */
		702	if (get_bits_count(gb) >= bits_end)
		703	return;
		704
		705	for (y = top; y < bottom; y++) {
		706	for (x = left; x < right; x++) {
		707	buf1[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
		708	if (get_bits_count(gb) >= bits_end)
		709	return;
		710	if (buf2) {
		711	buf2[x] = coeff_unpack_golomb(gb, qfactor, qoffset);
		712	if (get_bits_count(gb) >= bits_end)
		713	return;
		714	}
		715	}
		716	buf1 += b1->stride;
		717	if (buf2)
		718	buf2 += b2->stride;
		719	}
		720	}
		721
		722	struct lowdelay_slice {
		723	GetBitContext gb;
		724	int slice_x;
		725	int slice_y;
		726	int bytes;
		727	};
		728
		729
		730	/**
		731	* Dirac Specification ->
		732	* 13.5.2 Slices. slice(sx,sy)
		733	*/
		734	static int decode_lowdelay_slice(AVCodecContext avctx, void arg)
		735	{
		736	DiracContext *s = avctx->priv_data;
		737	struct lowdelay_slice *slice = arg;
		738	GetBitContext *gb = &slice->gb;
		739	enum dirac_subband orientation;
		740	int level, quant, chroma_bits, chroma_end;
		741
		742	int quant_base = get_bits(gb, 7); /[DIRAC_STD] qindex /
		743	int length_bits = av_log2(8 * slice->bytes)+1;
		744	int luma_bits = get_bits_long(gb, length_bits);
		745	int luma_end = get_bits_count(gb) + FFMIN(luma_bits, get_bits_left(gb));
		746
		747	/* [DIRAC_STD] 13.5.5.2 luma_slice_band */
		748	for (level = 0; level < s->wavelet_depth; level++)
		749	for (orientation = !!level; orientation < 4; orientation++) {
		750	quant = FFMAX(quant_base - s->lowdelay.quant[level][orientation], 0);
		751	lowdelay_subband(s, gb, quant, slice->slice_x, slice->slice_y, luma_end,
		752	&s->plane[0].band[level][orientation], NULL);
		753	}
		754
		755	/* consume any unused bits from luma */
		756	skip_bits_long(gb, get_bits_count(gb) - luma_end);
		757
		758	chroma_bits = 8*slice->bytes - 7 - length_bits - luma_bits;
		759	chroma_end = get_bits_count(gb) + FFMIN(chroma_bits, get_bits_left(gb));
		760	/* [DIRAC_STD] 13.5.5.3 chroma_slice_band */
		761	for (level = 0; level < s->wavelet_depth; level++)
		762	for (orientation = !!level; orientation < 4; orientation++) {
		763	quant = FFMAX(quant_base - s->lowdelay.quant[level][orientation], 0);
		764	lowdelay_subband(s, gb, quant, slice->slice_x, slice->slice_y, chroma_end,
		765	&s->plane[1].band[level][orientation],
		766	&s->plane[2].band[level][orientation]);
		767	}
		768
		769	return 0;
		770	}
		771
		772	/**
		773	* Dirac Specification ->
		774	* 13.5.1 low_delay_transform_data()
		775	*/
		776	static int decode_lowdelay(DiracContext *s)
		777	{
		778	AVCodecContext *avctx = s->avctx;
		779	int slice_x, slice_y, bytes, bufsize;
		780	const uint8_t *buf;
		781	struct lowdelay_slice *slices;
		782	int slice_num = 0;
		783
		784	slices = av_mallocz_array(s->lowdelay.num_x, s->lowdelay.num_y * sizeof(struct lowdelay_slice));
		785	if (!slices)
		786	return AVERROR(ENOMEM);
		787
		788	align_get_bits(&s->gb);
		789	/[DIRAC_STD] 13.5.2 Slices. slice(sx,sy) /
		790	buf = s->gb.buffer + get_bits_count(&s->gb)/8;
		791	bufsize = get_bits_left(&s->gb);
		792
		793	for (slice_y = 0; bufsize > 0 && slice_y < s->lowdelay.num_y; slice_y++)
		794	for (slice_x = 0; bufsize > 0 && slice_x < s->lowdelay.num_x; slice_x++) {
		795	bytes = (slice_num+1) * s->lowdelay.bytes.num / s->lowdelay.bytes.den
		796	- slice_num * s->lowdelay.bytes.num / s->lowdelay.bytes.den;
		797
		798	slices[slice_num].bytes = bytes;
		799	slices[slice_num].slice_x = slice_x;
		800	slices[slice_num].slice_y = slice_y;
		801	init_get_bits(&slices[slice_num].gb, buf, bufsize);
		802	slice_num++;
		803
		804	buf += bytes;
		805	if (bufsize/8 >= bytes)
		806	bufsize -= bytes*8;
		807	else
		808	bufsize = 0;
		809	}
		810
		811	avctx->execute(avctx, decode_lowdelay_slice, slices, NULL, slice_num,
		812	sizeof(struct lowdelay_slice)); /* [DIRAC_STD] 13.5.2 Slices */
		813	intra_dc_prediction(&s->plane[0].band[0][0]); /* [DIRAC_STD] 13.3 intra_dc_prediction() */
		814	intra_dc_prediction(&s->plane[1].band[0][0]); /* [DIRAC_STD] 13.3 intra_dc_prediction() */
		815	intra_dc_prediction(&s->plane[2].band[0][0]); /* [DIRAC_STD] 13.3 intra_dc_prediction() */
		816	av_free(slices);
		817	return 0;
		818	}
		819
		820	static void init_planes(DiracContext *s)
		821	{
		822	int i, w, h, level, orientation;
		823
		824	for (i = 0; i < 3; i++) {
		825	Plane *p = &s->plane[i];
		826
		827	p->width = s->source.width >> (i ? s->chroma_x_shift : 0);
		828	p->height = s->source.height >> (i ? s->chroma_y_shift : 0);
		829	p->idwt_width = w = CALC_PADDING(p->width , s->wavelet_depth);
		830	p->idwt_height = h = CALC_PADDING(p->height, s->wavelet_depth);
		831	p->idwt_stride = FFALIGN(p->idwt_width, 8);
		832
		833	for (level = s->wavelet_depth-1; level >= 0; level--) {
		834	w = w>>1;
		835	h = h>>1;
		836	for (orientation = !!level; orientation < 4; orientation++) {
		837	SubBand *b = &p->band[level][orientation];
		838
		839	b->ibuf = p->idwt_buf;
		840	b->level = level;
		841	b->stride = p->idwt_stride << (s->wavelet_depth - level);
		842	b->width = w;
		843	b->height = h;
		844	b->orientation = orientation;
		845
		846	if (orientation & 1)
		847	b->ibuf += w;
		848	if (orientation > 1)
		849	b->ibuf += b->stride>>1;
		850
		851	if (level)
		852	b->parent = &p->band[level-1][orientation];
		853	}
		854	}
		855
		856	if (i > 0) {
		857	p->xblen = s->plane[0].xblen >> s->chroma_x_shift;
		858	p->yblen = s->plane[0].yblen >> s->chroma_y_shift;
		859	p->xbsep = s->plane[0].xbsep >> s->chroma_x_shift;
		860	p->ybsep = s->plane[0].ybsep >> s->chroma_y_shift;
		861	}
		862
		863	p->xoffset = (p->xblen - p->xbsep)/2;
		864	p->yoffset = (p->yblen - p->ybsep)/2;
		865	}
		866	}
		867
		868	/**
		869	* Unpack the motion compensation parameters
		870	* Dirac Specification ->
		871	* 11.2 Picture prediction data. picture_prediction()
		872	*/
		873	static int dirac_unpack_prediction_parameters(DiracContext *s)
		874	{
		875	static const uint8_t default_blen[] = { 4, 12, 16, 24 };
		876
		877	GetBitContext *gb = &s->gb;
		878	unsigned idx, ref;
		879
		880	align_get_bits(gb);
		881	/* [DIRAC_STD] 11.2.2 Block parameters. block_parameters() */
		882	/* Luma and Chroma are equal. 11.2.3 */
		883	idx = svq3_get_ue_golomb(gb); /* [DIRAC_STD] index */
		884
		885	if (idx > 4) {
		886	av_log(s->avctx, AV_LOG_ERROR, "Block prediction index too high\n");
		887	return AVERROR_INVALIDDATA;
		888	}
		889
		890	if (idx == 0) {
		891	s->plane[0].xblen = svq3_get_ue_golomb(gb);
		892	s->plane[0].yblen = svq3_get_ue_golomb(gb);
		893	s->plane[0].xbsep = svq3_get_ue_golomb(gb);
		894	s->plane[0].ybsep = svq3_get_ue_golomb(gb);
		895	} else {
		896	/[DIRAC_STD] preset_block_params(index). Table 11.1 /
		897	s->plane[0].xblen = default_blen[idx-1];
		898	s->plane[0].yblen = default_blen[idx-1];
		899	s->plane[0].xbsep = 4 * idx;
		900	s->plane[0].ybsep = 4 * idx;
		901	}
		902	/*[DIRAC_STD] 11.2.4 motion_data_dimensions()
		903	Calculated in function dirac_unpack_block_motion_data */
		904
		905	if (s->plane[0].xblen % (1 << s->chroma_x_shift) != 0 \|\|
		906	s->plane[0].yblen % (1 << s->chroma_y_shift) != 0 \|\|
		907	!s->plane[0].xblen \|\| !s->plane[0].yblen) {
		908	av_log(s->avctx, AV_LOG_ERROR,
		909	"invalid x/y block length (%d/%d) for x/y chroma shift (%d/%d)\n",
		910	s->plane[0].xblen, s->plane[0].yblen, s->chroma_x_shift, s->chroma_y_shift);
		911	return AVERROR_INVALIDDATA;
		912	}
		913	if (!s->plane[0].xbsep \|\| !s->plane[0].ybsep \|\| s->plane[0].xbsep < s->plane[0].xblen/2 \|\| s->plane[0].ybsep < s->plane[0].yblen/2) {
		914	av_log(s->avctx, AV_LOG_ERROR, "Block separation too small\n");
		915	return AVERROR_INVALIDDATA;
		916	}
		917	if (s->plane[0].xbsep > s->plane[0].xblen \|\| s->plane[0].ybsep > s->plane[0].yblen) {
		918	av_log(s->avctx, AV_LOG_ERROR, "Block separation greater than size\n");
		919	return AVERROR_INVALIDDATA;
		920	}
		921	if (FFMAX(s->plane[0].xblen, s->plane[0].yblen) > MAX_BLOCKSIZE) {
		922	av_log(s->avctx, AV_LOG_ERROR, "Unsupported large block size\n");
		923	return AVERROR_PATCHWELCOME;
		924	}
		925
		926	/*[DIRAC_STD] 11.2.5 Motion vector precision. motion_vector_precision()
		927	Read motion vector precision */
		928	s->mv_precision = svq3_get_ue_golomb(gb);
		929	if (s->mv_precision > 3) {
		930	av_log(s->avctx, AV_LOG_ERROR, "MV precision finer than eighth-pel\n");
		931	return AVERROR_INVALIDDATA;
		932	}
		933
		934	/*[DIRAC_STD] 11.2.6 Global motion. global_motion()
		935	Read the global motion compensation parameters */
		936	s->globalmc_flag = get_bits1(gb);
		937	if (s->globalmc_flag) {
		938	memset(s->globalmc, 0, sizeof(s->globalmc));
		939	/* [DIRAC_STD] pan_tilt(gparams) */
		940	for (ref = 0; ref < s->num_refs; ref++) {
		941	if (get_bits1(gb)) {
		942	s->globalmc[ref].pan_tilt[0] = dirac_get_se_golomb(gb);
		943	s->globalmc[ref].pan_tilt[1] = dirac_get_se_golomb(gb);
		944	}
		945	/* [DIRAC_STD] zoom_rotate_shear(gparams)
		946	zoom/rotation/shear parameters */
		947	if (get_bits1(gb)) {
		948	s->globalmc[ref].zrs_exp = svq3_get_ue_golomb(gb);
		949	s->globalmc[ref].zrs[0][0] = dirac_get_se_golomb(gb);
		950	s->globalmc[ref].zrs[0][1] = dirac_get_se_golomb(gb);
		951	s->globalmc[ref].zrs[1][0] = dirac_get_se_golomb(gb);
		952	s->globalmc[ref].zrs[1][1] = dirac_get_se_golomb(gb);
		953	} else {
		954	s->globalmc[ref].zrs[0][0] = 1;
		955	s->globalmc[ref].zrs[1][1] = 1;
		956	}
		957	/* [DIRAC_STD] perspective(gparams) */
		958	if (get_bits1(gb)) {
		959	s->globalmc[ref].perspective_exp = svq3_get_ue_golomb(gb);
		960	s->globalmc[ref].perspective[0] = dirac_get_se_golomb(gb);
		961	s->globalmc[ref].perspective[1] = dirac_get_se_golomb(gb);
		962	}
		963	}
		964	}
		965
		966	/*[DIRAC_STD] 11.2.7 Picture prediction mode. prediction_mode()
		967	Picture prediction mode, not currently used. */
		968	if (svq3_get_ue_golomb(gb)) {
		969	av_log(s->avctx, AV_LOG_ERROR, "Unknown picture prediction mode\n");
		970	return AVERROR_INVALIDDATA;
		971	}
		972
		973	/* [DIRAC_STD] 11.2.8 Reference picture weight. reference_picture_weights()
		974	just data read, weight calculation will be done later on. */
		975	s->weight_log2denom = 1;
		976	s->weight[0] = 1;
		977	s->weight[1] = 1;
		978
		979	if (get_bits1(gb)) {
		980	s->weight_log2denom = svq3_get_ue_golomb(gb);
		981	s->weight[0] = dirac_get_se_golomb(gb);
		982	if (s->num_refs == 2)
		983	s->weight[1] = dirac_get_se_golomb(gb);
		984	}
		985	return 0;
		986	}
		987
		988	/**
		989	* Dirac Specification ->
		990	* 11.3 Wavelet transform data. wavelet_transform()
		991	*/
		992	static int dirac_unpack_idwt_params(DiracContext *s)
		993	{
		994	GetBitContext *gb = &s->gb;
		995	int i, level;
		996	unsigned tmp;
		997
		998	#define CHECKEDREAD(dst, cond, errmsg) \
		999	tmp = svq3_get_ue_golomb(gb); \
		1000	if (cond) { \
		1001	av_log(s->avctx, AV_LOG_ERROR, errmsg); \
		1002	return AVERROR_INVALIDDATA; \
		1003	}\
		1004	dst = tmp;
		1005
		1006	align_get_bits(gb);
		1007
		1008	s->zero_res = s->num_refs ? get_bits1(gb) : 0;
		1009	if (s->zero_res)
		1010	return 0;
		1011
		1012	/[DIRAC_STD] 11.3.1 Transform parameters. transform_parameters() /
		1013	CHECKEDREAD(s->wavelet_idx, tmp > 6, "wavelet_idx is too big\n")
		1014
		1015	CHECKEDREAD(s->wavelet_depth, tmp > MAX_DWT_LEVELS \|\| tmp < 1, "invalid number of DWT decompositions\n")
		1016
		1017	if (!s->low_delay) {
		1018	/* Codeblock parameters (core syntax only) */
		1019	if (get_bits1(gb)) {
		1020	for (i = 0; i <= s->wavelet_depth; i++) {
		1021	CHECKEDREAD(s->codeblock[i].width , tmp < 1 \|\| tmp > (s->avctx->width >>s->wavelet_depth-i), "codeblock width invalid\n")
		1022	CHECKEDREAD(s->codeblock[i].height, tmp < 1 \|\| tmp > (s->avctx->height>>s->wavelet_depth-i), "codeblock height invalid\n")
		1023	}
		1024
		1025	CHECKEDREAD(s->codeblock_mode, tmp > 1, "unknown codeblock mode\n")
		1026	} else
		1027	for (i = 0; i <= s->wavelet_depth; i++)
		1028	s->codeblock[i].width = s->codeblock[i].height = 1;
		1029	} else {
		1030	/* Slice parameters + quantization matrix*/
		1031	/[DIRAC_STD] 11.3.4 Slice coding Parameters (low delay syntax only). slice_parameters() /
		1032	s->lowdelay.num_x = svq3_get_ue_golomb(gb);
		1033	s->lowdelay.num_y = svq3_get_ue_golomb(gb);
		1034	s->lowdelay.bytes.num = svq3_get_ue_golomb(gb);
		1035	s->lowdelay.bytes.den = svq3_get_ue_golomb(gb);
		1036
		1037	if (s->lowdelay.bytes.den <= 0) {
		1038	av_log(s->avctx,AV_LOG_ERROR,"Invalid lowdelay.bytes.den\n");
		1039	return AVERROR_INVALIDDATA;
		1040	}
		1041
		1042	/* [DIRAC_STD] 11.3.5 Quantisation matrices (low-delay syntax). quant_matrix() */
		1043	if (get_bits1(gb)) {
		1044	av_log(s->avctx,AV_LOG_DEBUG,"Low Delay: Has Custom Quantization Matrix!\n");
		1045	/* custom quantization matrix */
		1046	s->lowdelay.quant[0][0] = svq3_get_ue_golomb(gb);
		1047	for (level = 0; level < s->wavelet_depth; level++) {
		1048	s->lowdelay.quant[level][1] = svq3_get_ue_golomb(gb);
		1049	s->lowdelay.quant[level][2] = svq3_get_ue_golomb(gb);
		1050	s->lowdelay.quant[level][3] = svq3_get_ue_golomb(gb);
		1051	}
		1052	} else {
		1053	if (s->wavelet_depth > 4) {
		1054	av_log(s->avctx,AV_LOG_ERROR,"Mandatory custom low delay matrix missing for depth %d\n", s->wavelet_depth);
		1055	return AVERROR_INVALIDDATA;
		1056	}
		1057	/* default quantization matrix */
		1058	for (level = 0; level < s->wavelet_depth; level++)
		1059	for (i = 0; i < 4; i++) {
		1060	s->lowdelay.quant[level][i] = default_qmat[s->wavelet_idx][level][i];
		1061	/* haar with no shift differs for different depths */
		1062	if (s->wavelet_idx == 3)
		1063	s->lowdelay.quant[level][i] += 4*(s->wavelet_depth-1 - level);
		1064	}
		1065	}
		1066	}
		1067	return 0;
		1068	}
		1069
		1070	static inline int pred_sbsplit(uint8_t *sbsplit, int stride, int x, int y)
		1071	{
		1072	static const uint8_t avgsplit[7] = { 0, 0, 1, 1, 1, 2, 2 };
		1073
		1074	if (!(x\|y))
		1075	return 0;
		1076	else if (!y)
		1077	return sbsplit[-1];
		1078	else if (!x)
		1079	return sbsplit[-stride];
		1080
		1081	return avgsplit[sbsplit[-1] + sbsplit[-stride] + sbsplit[-stride-1]];
		1082	}
		1083
		1084	static inline int pred_block_mode(DiracBlock *block, int stride, int x, int y, int refmask)
		1085	{
		1086	int pred;
		1087
		1088	if (!(x\|y))
		1089	return 0;
		1090	else if (!y)
		1091	return block[-1].ref & refmask;
		1092	else if (!x)
		1093	return block[-stride].ref & refmask;
		1094
		1095	/* return the majority */
		1096	pred = (block[-1].ref & refmask) + (block[-stride].ref & refmask) + (block[-stride-1].ref & refmask);
		1097	return (pred >> 1) & refmask;
		1098	}
		1099
		1100	static inline void pred_block_dc(DiracBlock *block, int stride, int x, int y)
		1101	{
		1102	int i, n = 0;
		1103
		1104	memset(block->u.dc, 0, sizeof(block->u.dc));
		1105
		1106	if (x && !(block[-1].ref & 3)) {
		1107	for (i = 0; i < 3; i++)
		1108	block->u.dc[i] += block[-1].u.dc[i];
		1109	n++;
		1110	}
		1111
		1112	if (y && !(block[-stride].ref & 3)) {
		1113	for (i = 0; i < 3; i++)
		1114	block->u.dc[i] += block[-stride].u.dc[i];
		1115	n++;
		1116	}
		1117
		1118	if (x && y && !(block[-1-stride].ref & 3)) {
		1119	for (i = 0; i < 3; i++)
		1120	block->u.dc[i] += block[-1-stride].u.dc[i];
		1121	n++;
		1122	}
		1123
		1124	if (n == 2) {
		1125	for (i = 0; i < 3; i++)
		1126	block->u.dc[i] = (block->u.dc[i]+1)>>1;
		1127	} else if (n == 3) {
		1128	for (i = 0; i < 3; i++)
		1129	block->u.dc[i] = divide3(block->u.dc[i]);
		1130	}
		1131	}
		1132
		1133	static inline void pred_mv(DiracBlock *block, int stride, int x, int y, int ref)
		1134	{
		1135	int16_t *pred[3];
		1136	int refmask = ref+1;
		1137	int mask = refmask \| DIRAC_REF_MASK_GLOBAL; /* exclude gmc blocks */
		1138	int n = 0;
		1139
		1140	if (x && (block[-1].ref & mask) == refmask)
		1141	pred[n++] = block[-1].u.mv[ref];
		1142
		1143	if (y && (block[-stride].ref & mask) == refmask)
		1144	pred[n++] = block[-stride].u.mv[ref];
		1145
		1146	if (x && y && (block[-stride-1].ref & mask) == refmask)
		1147	pred[n++] = block[-stride-1].u.mv[ref];
		1148
		1149	switch (n) {
		1150	case 0:
		1151	block->u.mv[ref][0] = 0;
		1152	block->u.mv[ref][1] = 0;
		1153	break;
		1154	case 1:
		1155	block->u.mv[ref][0] = pred[0][0];
		1156	block->u.mv[ref][1] = pred[0][1];
		1157	break;
		1158	case 2:
		1159	block->u.mv[ref][0] = (pred[0][0] + pred[1][0] + 1) >> 1;
		1160	block->u.mv[ref][1] = (pred[0][1] + pred[1][1] + 1) >> 1;
		1161	break;
		1162	case 3:
		1163	block->u.mv[ref][0] = mid_pred(pred[0][0], pred[1][0], pred[2][0]);
		1164	block->u.mv[ref][1] = mid_pred(pred[0][1], pred[1][1], pred[2][1]);
		1165	break;
		1166	}
		1167	}
		1168
		1169	static void global_mv(DiracContext s, DiracBlock block, int x, int y, int ref)
		1170	{
		1171	int ez = s->globalmc[ref].zrs_exp;
		1172	int ep = s->globalmc[ref].perspective_exp;
		1173	int (*A)[2] = s->globalmc[ref].zrs;
		1174	int *b = s->globalmc[ref].pan_tilt;
		1175	int *c = s->globalmc[ref].perspective;
		1176
		1177	int m = (1<
		1178	int mx = m * ((A[0][0] * x + A[0][1]*y) + (1<
		1179	int my = m * ((A[1][0] * x + A[1][1]*y) + (1<
		1180
		1181	block->u.mv[ref][0] = (mx + (1<<(ez+ep))) >> (ez+ep);
		1182	block->u.mv[ref][1] = (my + (1<<(ez+ep))) >> (ez+ep);
		1183	}
		1184
		1185	static void decode_block_params(DiracContext s, DiracArith arith[8], DiracBlock block,
		1186	int stride, int x, int y)
		1187	{
		1188	int i;
		1189
		1190	block->ref = pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_REF1);
		1191	block->ref ^= dirac_get_arith_bit(arith, CTX_PMODE_REF1);
		1192
		1193	if (s->num_refs == 2) {
		1194	block->ref \|= pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_REF2);
		1195	block->ref ^= dirac_get_arith_bit(arith, CTX_PMODE_REF2) << 1;
		1196	}
		1197
		1198	if (!block->ref) {
		1199	pred_block_dc(block, stride, x, y);
		1200	for (i = 0; i < 3; i++)
		1201	block->u.dc[i] += dirac_get_arith_int(arith+1+i, CTX_DC_F1, CTX_DC_DATA);
		1202	return;
		1203	}
		1204
		1205	if (s->globalmc_flag) {
		1206	block->ref \|= pred_block_mode(block, stride, x, y, DIRAC_REF_MASK_GLOBAL);
		1207	block->ref ^= dirac_get_arith_bit(arith, CTX_GLOBAL_BLOCK) << 2;
		1208	}
		1209
		1210	for (i = 0; i < s->num_refs; i++)
		1211	if (block->ref & (i+1)) {
		1212	if (block->ref & DIRAC_REF_MASK_GLOBAL) {
		1213	global_mv(s, block, x, y, i);
		1214	} else {
		1215	pred_mv(block, stride, x, y, i);
		1216	block->u.mv[i][0] += dirac_get_arith_int(arith + 4 + 2 * i, CTX_MV_F1, CTX_MV_DATA);
		1217	block->u.mv[i][1] += dirac_get_arith_int(arith + 5 + 2 * i, CTX_MV_F1, CTX_MV_DATA);
		1218	}
		1219	}
		1220	}
		1221
		1222	/**
		1223	* Copies the current block to the other blocks covered by the current superblock split mode
		1224	*/
		1225	static void propagate_block_data(DiracBlock *block, int stride, int size)
		1226	{
		1227	int x, y;
		1228	DiracBlock *dst = block;
		1229
		1230	for (x = 1; x < size; x++)
		1231	dst[x] = *block;
		1232
		1233	for (y = 1; y < size; y++) {
		1234	dst += stride;
		1235	for (x = 0; x < size; x++)
		1236	dst[x] = *block;
		1237	}
		1238	}
		1239
		1240	/**
		1241	* Dirac Specification ->
		1242	* 12. Block motion data syntax
		1243	*/
		1244	static int dirac_unpack_block_motion_data(DiracContext *s)
		1245	{
		1246	GetBitContext *gb = &s->gb;
		1247	uint8_t *sbsplit = s->sbsplit;
		1248	int i, x, y, q, p;
		1249	DiracArith arith[8];
		1250
		1251	align_get_bits(gb);
		1252
		1253	/* [DIRAC_STD] 11.2.4 and 12.2.1 Number of blocks and superblocks */
		1254	s->sbwidth = DIVRNDUP(s->source.width, 4*s->plane[0].xbsep);
		1255	s->sbheight = DIVRNDUP(s->source.height, 4*s->plane[0].ybsep);
		1256	s->blwidth = 4 * s->sbwidth;
		1257	s->blheight = 4 * s->sbheight;
		1258
		1259	/* [DIRAC_STD] 12.3.1 Superblock splitting modes. superblock_split_modes()
		1260	decode superblock split modes */
		1261	ff_dirac_init_arith_decoder(arith, gb, svq3_get_ue_golomb(gb)); /* svq3_get_ue_golomb(gb) is the length */
		1262	for (y = 0; y < s->sbheight; y++) {
		1263	for (x = 0; x < s->sbwidth; x++) {
		1264	unsigned int split = dirac_get_arith_uint(arith, CTX_SB_F1, CTX_SB_DATA);
		1265	if (split > 2)
		1266	return AVERROR_INVALIDDATA;
		1267	sbsplit[x] = (split + pred_sbsplit(sbsplit+x, s->sbwidth, x, y)) % 3;
		1268	}
		1269	sbsplit += s->sbwidth;
		1270	}
		1271
		1272	/* setup arith decoding */
		1273	ff_dirac_init_arith_decoder(arith, gb, svq3_get_ue_golomb(gb));
		1274	for (i = 0; i < s->num_refs; i++) {
		1275	ff_dirac_init_arith_decoder(arith + 4 + 2 * i, gb, svq3_get_ue_golomb(gb));
		1276	ff_dirac_init_arith_decoder(arith + 5 + 2 * i, gb, svq3_get_ue_golomb(gb));
		1277	}
		1278	for (i = 0; i < 3; i++)
		1279	ff_dirac_init_arith_decoder(arith+1+i, gb, svq3_get_ue_golomb(gb));
		1280
		1281	for (y = 0; y < s->sbheight; y++)
		1282	for (x = 0; x < s->sbwidth; x++) {
		1283	int blkcnt = 1 << s->sbsplit[y * s->sbwidth + x];
		1284	int step = 4 >> s->sbsplit[y * s->sbwidth + x];
		1285
		1286	for (q = 0; q < blkcnt; q++)
		1287	for (p = 0; p < blkcnt; p++) {
		1288	int bx = 4 * x + p*step;
		1289	int by = 4 * y + q*step;
		1290	DiracBlock block = &s->blmotion[bys->blwidth + bx];
		1291	decode_block_params(s, arith, block, s->blwidth, bx, by);
		1292	propagate_block_data(block, s->blwidth, step);
		1293	}
		1294	}
		1295
		1296	return 0;
		1297	}
		1298
		1299	static int weight(int i, int blen, int offset)
		1300	{
		1301	#define ROLLOFF(i) offset == 1 ? ((i) ? 5 : 3) : \
		1302	(1 + (6(i) + offset - 1) / (2offset - 1))
		1303
		1304	if (i < 2*offset)
		1305	return ROLLOFF(i);
		1306	else if (i > blen-1 - 2*offset)
		1307	return ROLLOFF(blen-1 - i);
		1308	return 8;
		1309	}
		1310
		1311	static void init_obmc_weight_row(Plane p, uint8_t obmc_weight, int stride,
		1312	int left, int right, int wy)
		1313	{
		1314	int x;
		1315	for (x = 0; left && x < p->xblen >> 1; x++)
		1316	obmc_weight[x] = wy*8;
		1317	for (; x < p->xblen >> right; x++)
		1318	obmc_weight[x] = wy*weight(x, p->xblen, p->xoffset);
		1319	for (; x < p->xblen; x++)
		1320	obmc_weight[x] = wy*8;
		1321	for (; x < stride; x++)
		1322	obmc_weight[x] = 0;
		1323	}
		1324
		1325	static void init_obmc_weight(Plane p, uint8_t obmc_weight, int stride,
		1326	int left, int right, int top, int bottom)
		1327	{
		1328	int y;
		1329	for (y = 0; top && y < p->yblen >> 1; y++) {
		1330	init_obmc_weight_row(p, obmc_weight, stride, left, right, 8);
		1331	obmc_weight += stride;
		1332	}
		1333	for (; y < p->yblen >> bottom; y++) {
		1334	int wy = weight(y, p->yblen, p->yoffset);
		1335	init_obmc_weight_row(p, obmc_weight, stride, left, right, wy);
		1336	obmc_weight += stride;
		1337	}
		1338	for (; y < p->yblen; y++) {
		1339	init_obmc_weight_row(p, obmc_weight, stride, left, right, 8);
		1340	obmc_weight += stride;
		1341	}
		1342	}
		1343
		1344	static void init_obmc_weights(DiracContext s, Plane p, int by)
		1345	{
		1346	int top = !by;
		1347	int bottom = by == s->blheight-1;
		1348
		1349	/* don't bother re-initing for rows 2 to blheight-2, the weights don't change */
		1350	if (top \|\| bottom \|\| by == 1) {
		1351	init_obmc_weight(p, s->obmc_weight[0], MAX_BLOCKSIZE, 1, 0, top, bottom);
		1352	init_obmc_weight(p, s->obmc_weight[1], MAX_BLOCKSIZE, 0, 0, top, bottom);
		1353	init_obmc_weight(p, s->obmc_weight[2], MAX_BLOCKSIZE, 0, 1, top, bottom);
		1354	}
		1355	}
		1356
		1357	static const uint8_t epel_weights[4][4][4] = {
		1358	{{ 16, 0, 0, 0 },
		1359	{ 12, 4, 0, 0 },
		1360	{ 8, 8, 0, 0 },
		1361	{ 4, 12, 0, 0 }},
		1362	{{ 12, 0, 4, 0 },
		1363	{ 9, 3, 3, 1 },
		1364	{ 6, 6, 2, 2 },
		1365	{ 3, 9, 1, 3 }},
		1366	{{ 8, 0, 8, 0 },
		1367	{ 6, 2, 6, 2 },
		1368	{ 4, 4, 4, 4 },
		1369	{ 2, 6, 2, 6 }},
		1370	{{ 4, 0, 12, 0 },
		1371	{ 3, 1, 9, 3 },
		1372	{ 2, 2, 6, 6 },
		1373	{ 1, 3, 3, 9 }}
		1374	};
		1375
		1376	/**
		1377	* For block x,y, determine which of the hpel planes to do bilinear
		1378	* interpolation from and set src[] to the location in each hpel plane
		1379	* to MC from.
		1380	*
		1381	* @return the index of the put_dirac_pixels_tab function to use
		1382	* 0 for 1 plane (fpel,hpel), 1 for 2 planes (qpel), 2 for 4 planes (qpel), and 3 for epel
		1383	*/
		1384	static int mc_subpel(DiracContext s, DiracBlock block, const uint8_t *src[5],
		1385	int x, int y, int ref, int plane)
		1386	{
		1387	Plane *p = &s->plane[plane];
		1388	uint8_t **ref_hpel = s->ref_pics[ref]->hpel[plane];
		1389	int motion_x = block->u.mv[ref][0];
		1390	int motion_y = block->u.mv[ref][1];
		1391	int mx, my, i, epel, nplanes = 0;
		1392
		1393	if (plane) {
		1394	motion_x >>= s->chroma_x_shift;
		1395	motion_y >>= s->chroma_y_shift;
		1396	}
		1397
		1398	mx = motion_x & ~(-1U << s->mv_precision);
		1399	my = motion_y & ~(-1U << s->mv_precision);
		1400	motion_x >>= s->mv_precision;
		1401	motion_y >>= s->mv_precision;
		1402	/* normalize subpel coordinates to epel */
		1403	/* TODO: template this function? */
		1404	mx <<= 3 - s->mv_precision;
		1405	my <<= 3 - s->mv_precision;
		1406
		1407	x += motion_x;
		1408	y += motion_y;
		1409	epel = (mx\|my)&1;
		1410
		1411	/* hpel position */
		1412	if (!((mx\|my)&3)) {
		1413	nplanes = 1;
		1414	src[0] = ref_hpel[(my>>1)+(mx>>2)] + y*p->stride + x;
		1415	} else {
		1416	/* qpel or epel */
		1417	nplanes = 4;
		1418	for (i = 0; i < 4; i++)
		1419	src[i] = ref_hpel[i] + y*p->stride + x;
		1420
		1421	/* if we're interpolating in the right/bottom halves, adjust the planes as needed
		1422	we increment x/y because the edge changes for half of the pixels */
		1423	if (mx > 4) {
		1424	src[0] += 1;
		1425	src[2] += 1;
		1426	x++;
		1427	}
		1428	if (my > 4) {
		1429	src[0] += p->stride;
		1430	src[1] += p->stride;
		1431	y++;
		1432	}
		1433
		1434	/* hpel planes are:
		1435	[0]: F [1]: H
		1436	[2]: V [3]: C */
		1437	if (!epel) {
		1438	/* check if we really only need 2 planes since either mx or my is
		1439	a hpel position. (epel weights of 0 handle this there) */
		1440	if (!(mx&3)) {
		1441	/* mx == 0: average [0] and [2]
		1442	mx == 4: average [1] and [3] */
		1443	src[!mx] = src[2 + !!mx];
		1444	nplanes = 2;
		1445	} else if (!(my&3)) {
		1446	src[0] = src[(my>>1) ];
		1447	src[1] = src[(my>>1)+1];
		1448	nplanes = 2;
		1449	}
		1450	} else {
		1451	/* adjust the ordering if needed so the weights work */
		1452	if (mx > 4) {
		1453	FFSWAP(const uint8_t *, src[0], src[1]);
		1454	FFSWAP(const uint8_t *, src[2], src[3]);
		1455	}
		1456	if (my > 4) {
		1457	FFSWAP(const uint8_t *, src[0], src[2]);
		1458	FFSWAP(const uint8_t *, src[1], src[3]);
		1459	}
		1460	src[4] = epel_weights[my&3][mx&3];
		1461	}
		1462	}
		1463
		1464	/* fixme: v/h _edge_pos */
		1465	if (x + p->xblen > p->width +EDGE_WIDTH/2 \|\|
		1466	y + p->yblen > p->height+EDGE_WIDTH/2 \|\|
		1467	x < 0 \|\| y < 0) {
		1468	for (i = 0; i < nplanes; i++) {
		1469	s->vdsp.emulated_edge_mc(s->edge_emu_buffer[i], src[i],
		1470	p->stride, p->stride,
		1471	p->xblen, p->yblen, x, y,
		1472	p->width+EDGE_WIDTH/2, p->height+EDGE_WIDTH/2);
		1473	src[i] = s->edge_emu_buffer[i];
		1474	}
		1475	}
		1476	return (nplanes>>1) + epel;
		1477	}
		1478
		1479	static void add_dc(uint16_t *dst, int dc, int stride,
		1480	uint8_t *obmc_weight, int xblen, int yblen)
		1481	{
		1482	int x, y;
		1483	dc += 128;
		1484
		1485	for (y = 0; y < yblen; y++) {
		1486	for (x = 0; x < xblen; x += 2) {
		1487	dst[x ] += dc * obmc_weight[x ];
		1488	dst[x+1] += dc * obmc_weight[x+1];
		1489	}
		1490	dst += stride;
		1491	obmc_weight += MAX_BLOCKSIZE;
		1492	}
		1493	}
		1494
		1495	static void block_mc(DiracContext s, DiracBlock block,
		1496	uint16_t mctmp, uint8_t obmc_weight,
		1497	int plane, int dstx, int dsty)
		1498	{
		1499	Plane *p = &s->plane[plane];
		1500	const uint8_t *src[5];
		1501	int idx;
		1502
		1503	switch (block->ref&3) {
		1504	case 0: /* DC */
		1505	add_dc(mctmp, block->u.dc[plane], p->stride, obmc_weight, p->xblen, p->yblen);
		1506	return;
		1507	case 1:
		1508	case 2:
		1509	idx = mc_subpel(s, block, src, dstx, dsty, (block->ref&3)-1, plane);
		1510	s->put_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
		1511	if (s->weight_func)
		1512	s->weight_func(s->mcscratch, p->stride, s->weight_log2denom,
		1513	s->weight[0] + s->weight[1], p->yblen);
		1514	break;
		1515	case 3:
		1516	idx = mc_subpel(s, block, src, dstx, dsty, 0, plane);
		1517	s->put_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
		1518	idx = mc_subpel(s, block, src, dstx, dsty, 1, plane);
		1519	if (s->biweight_func) {
		1520	/* fixme: +32 is a quick hack */
		1521	s->put_pixels_tab[idx](s->mcscratch + 32, src, p->stride, p->yblen);
		1522	s->biweight_func(s->mcscratch, s->mcscratch+32, p->stride, s->weight_log2denom,
		1523	s->weight[0], s->weight[1], p->yblen);
		1524	} else
		1525	s->avg_pixels_tab[idx](s->mcscratch, src, p->stride, p->yblen);
		1526	break;
		1527	}
		1528	s->add_obmc(mctmp, s->mcscratch, p->stride, obmc_weight, p->yblen);
		1529	}
		1530
		1531	static void mc_row(DiracContext s, DiracBlock block, uint16_t *mctmp, int plane, int dsty)
		1532	{
		1533	Plane *p = &s->plane[plane];
		1534	int x, dstx = p->xbsep - p->xoffset;
		1535
		1536	block_mc(s, block, mctmp, s->obmc_weight[0], plane, -p->xoffset, dsty);
		1537	mctmp += p->xbsep;
		1538
		1539	for (x = 1; x < s->blwidth-1; x++) {
		1540	block_mc(s, block+x, mctmp, s->obmc_weight[1], plane, dstx, dsty);
		1541	dstx += p->xbsep;
		1542	mctmp += p->xbsep;
		1543	}
		1544	block_mc(s, block+x, mctmp, s->obmc_weight[2], plane, dstx, dsty);
		1545	}
		1546
		1547	static void select_dsp_funcs(DiracContext *s, int width, int height, int xblen, int yblen)
		1548	{
		1549	int idx = 0;
		1550	if (xblen > 8)
		1551	idx = 1;
		1552	if (xblen > 16)
		1553	idx = 2;
		1554
		1555	memcpy(s->put_pixels_tab, s->diracdsp.put_dirac_pixels_tab[idx], sizeof(s->put_pixels_tab));
		1556	memcpy(s->avg_pixels_tab, s->diracdsp.avg_dirac_pixels_tab[idx], sizeof(s->avg_pixels_tab));
		1557	s->add_obmc = s->diracdsp.add_dirac_obmc[idx];
		1558	if (s->weight_log2denom > 1 \|\| s->weight[0] != 1 \|\| s->weight[1] != 1) {
		1559	s->weight_func = s->diracdsp.weight_dirac_pixels_tab[idx];
		1560	s->biweight_func = s->diracdsp.biweight_dirac_pixels_tab[idx];
		1561	} else {
		1562	s->weight_func = NULL;
		1563	s->biweight_func = NULL;
		1564	}
		1565	}
		1566
		1567	static int interpolate_refplane(DiracContext s, DiracFrame ref, int plane, int width, int height)
		1568	{
		1569	/* chroma allocates an edge of 8 when subsampled
		1570	which for 4:2:2 means an h edge of 16 and v edge of 8
		1571	just use 8 for everything for the moment */
		1572	int i, edge = EDGE_WIDTH/2;
		1573
		1574	ref->hpel[plane][0] = ref->avframe->data[plane];
		1575	s->mpvencdsp.draw_edges(ref->hpel[plane][0], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP \| EDGE_BOTTOM); /* EDGE_TOP \| EDGE_BOTTOM values just copied to make it build, this needs to be ensured */
		1576
		1577	/* no need for hpel if we only have fpel vectors */
		1578	if (!s->mv_precision)
		1579	return 0;
		1580
		1581	for (i = 1; i < 4; i++) {
		1582	if (!ref->hpel_base[plane][i])
		1583	ref->hpel_base[plane][i] = av_malloc((height+2edge) ref->avframe->linesize[plane] + 32);
		1584	if (!ref->hpel_base[plane][i]) {
		1585	return AVERROR(ENOMEM);
		1586	}
		1587	/* we need to be 16-byte aligned even for chroma */
		1588	ref->hpel[plane][i] = ref->hpel_base[plane][i] + edge*ref->avframe->linesize[plane] + 16;
		1589	}
		1590
		1591	if (!ref->interpolated[plane]) {
		1592	s->diracdsp.dirac_hpel_filter(ref->hpel[plane][1], ref->hpel[plane][2],
		1593	ref->hpel[plane][3], ref->hpel[plane][0],
		1594	ref->avframe->linesize[plane], width, height);
		1595	s->mpvencdsp.draw_edges(ref->hpel[plane][1], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP \| EDGE_BOTTOM);
		1596	s->mpvencdsp.draw_edges(ref->hpel[plane][2], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP \| EDGE_BOTTOM);
		1597	s->mpvencdsp.draw_edges(ref->hpel[plane][3], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP \| EDGE_BOTTOM);
		1598	}
		1599	ref->interpolated[plane] = 1;
		1600
		1601	return 0;
		1602	}
		1603
		1604	/**
		1605	* Dirac Specification ->
		1606	* 13.0 Transform data syntax. transform_data()
		1607	*/
		1608	static int dirac_decode_frame_internal(DiracContext *s)
		1609	{
		1610	DWTContext d;
		1611	int y, i, comp, dsty;
		1612	int ret;
		1613
		1614	if (s->low_delay) {
		1615	/* [DIRAC_STD] 13.5.1 low_delay_transform_data() */
		1616	for (comp = 0; comp < 3; comp++) {
		1617	Plane *p = &s->plane[comp];
		1618	memset(p->idwt_buf, 0, p->idwt_stride * p->idwt_height * sizeof(IDWTELEM));
		1619	}
		1620	if (!s->zero_res) {
		1621	if ((ret = decode_lowdelay(s)) < 0)
		1622	return ret;
		1623	}
		1624	}
		1625
		1626	for (comp = 0; comp < 3; comp++) {
		1627	Plane *p = &s->plane[comp];
		1628	uint8_t *frame = s->current_picture->avframe->data[comp];
		1629
		1630	/* FIXME: small resolutions */
		1631	for (i = 0; i < 4; i++)
		1632	s->edge_emu_buffer[i] = s->edge_emu_buffer_base + i*FFALIGN(p->width, 16);
		1633
		1634	if (!s->zero_res && !s->low_delay)
		1635	{
		1636	memset(p->idwt_buf, 0, p->idwt_stride * p->idwt_height * sizeof(IDWTELEM));
		1637	decode_component(s, comp); /* [DIRAC_STD] 13.4.1 core_transform_data() */
		1638	}
		1639	ret = ff_spatial_idwt_init2(&d, p->idwt_buf, p->idwt_width, p->idwt_height, p->idwt_stride,
		1640	s->wavelet_idx+2, s->wavelet_depth, p->idwt_tmp);
		1641	if (ret < 0)
		1642	return ret;
		1643
		1644	if (!s->num_refs) { /* intra */
		1645	for (y = 0; y < p->height; y += 16) {
		1646	ff_spatial_idwt_slice2(&d, y+16); /* decode */
		1647	s->diracdsp.put_signed_rect_clamped(frame + y*p->stride, p->stride,
		1648	p->idwt_buf + y*p->idwt_stride, p->idwt_stride, p->width, 16);
		1649	}
		1650	} else { /* inter */
		1651	int rowheight = p->ybsep*p->stride;
		1652
		1653	select_dsp_funcs(s, p->width, p->height, p->xblen, p->yblen);
		1654
		1655	for (i = 0; i < s->num_refs; i++) {
		1656	int ret = interpolate_refplane(s, s->ref_pics[i], comp, p->width, p->height);
		1657	if (ret < 0)
		1658	return ret;
		1659	}
		1660
		1661	memset(s->mctmp, 0, 4p->yoffsetp->stride);
		1662
		1663	dsty = -p->yoffset;
		1664	for (y = 0; y < s->blheight; y++) {
		1665	int h = 0,
		1666	start = FFMAX(dsty, 0);
		1667	uint16_t mctmp = s->mctmp + yrowheight;
		1668	DiracBlock blocks = s->blmotion + ys->blwidth;
		1669
		1670	init_obmc_weights(s, p, y);
		1671
		1672	if (y == s->blheight-1 \|\| start+p->ybsep > p->height)
		1673	h = p->height - start;
		1674	else
		1675	h = p->ybsep - (start - dsty);
		1676	if (h < 0)
		1677	break;
		1678
		1679	memset(mctmp+2p->yoffsetp->stride, 0, 2*rowheight);
		1680	mc_row(s, blocks, mctmp, comp, dsty);
		1681
		1682	mctmp += (start - dsty)*p->stride + p->xoffset;
		1683	ff_spatial_idwt_slice2(&d, start + h); /* decode */
		1684	s->diracdsp.add_rect_clamped(frame + start*p->stride, mctmp, p->stride,
		1685	p->idwt_buf + start*p->idwt_stride, p->idwt_stride, p->width, h);
		1686
		1687	dsty += p->ybsep;
		1688	}
		1689	}
		1690	}
		1691
		1692
		1693	return 0;
		1694	}
		1695
		1696	static int get_buffer_with_edge(AVCodecContext avctx, AVFrame f, int flags)
		1697	{
		1698	int ret, i;
		1699	int chroma_x_shift, chroma_y_shift;
		1700	avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_x_shift, &chroma_y_shift);
		1701
		1702	f->width = avctx->width + 2 * EDGE_WIDTH;
		1703	f->height = avctx->height + 2 * EDGE_WIDTH + 2;
		1704	ret = ff_get_buffer(avctx, f, flags);
		1705	if (ret < 0)
		1706	return ret;
		1707
		1708	for (i = 0; f->data[i]; i++) {
		1709	int offset = (EDGE_WIDTH >> (i && i<3 ? chroma_y_shift : 0)) *
		1710	f->linesize[i] + 32;
		1711	f->data[i] += offset;
		1712	}
		1713	f->width = avctx->width;
		1714	f->height = avctx->height;
		1715
		1716	return 0;
		1717	}
		1718
		1719	/**
		1720	* Dirac Specification ->
		1721	* 11.1.1 Picture Header. picture_header()
		1722	*/
		1723	static int dirac_decode_picture_header(DiracContext *s)
		1724	{
		1725	unsigned retire, picnum;
		1726	int i, j, ret;
		1727	int64_t refdist, refnum;
		1728	GetBitContext *gb = &s->gb;
		1729
		1730	/* [DIRAC_STD] 11.1.1 Picture Header. picture_header() PICTURE_NUM */
		1731	picnum = s->current_picture->avframe->display_picture_number = get_bits_long(gb, 32);
		1732
		1733
		1734	av_log(s->avctx,AV_LOG_DEBUG,"PICTURE_NUM: %d\n",picnum);
		1735
		1736	/* if this is the first keyframe after a sequence header, start our
		1737	reordering from here */
		1738	if (s->frame_number < 0)
		1739	s->frame_number = picnum;
		1740
		1741	s->ref_pics[0] = s->ref_pics[1] = NULL;
		1742	for (i = 0; i < s->num_refs; i++) {
		1743	refnum = (picnum + dirac_get_se_golomb(gb)) & 0xFFFFFFFF;
		1744	refdist = INT64_MAX;
		1745
		1746	/* find the closest reference to the one we want */
		1747	/* Jordi: this is needed if the referenced picture hasn't yet arrived */
		1748	for (j = 0; j < MAX_REFERENCE_FRAMES && refdist; j++)
		1749	if (s->ref_frames[j]
		1750	&& FFABS(s->ref_frames[j]->avframe->display_picture_number - refnum) < refdist) {
		1751	s->ref_pics[i] = s->ref_frames[j];
		1752	refdist = FFABS(s->ref_frames[j]->avframe->display_picture_number - refnum);
		1753	}
		1754
		1755	if (!s->ref_pics[i] \|\| refdist)
		1756	av_log(s->avctx, AV_LOG_DEBUG, "Reference not found\n");
		1757
		1758	/* if there were no references at all, allocate one */
		1759	if (!s->ref_pics[i])
		1760	for (j = 0; j < MAX_FRAMES; j++)
		1761	if (!s->all_frames[j].avframe->data[0]) {
		1762	s->ref_pics[i] = &s->all_frames[j];
		1763	get_buffer_with_edge(s->avctx, s->ref_pics[i]->avframe, AV_GET_BUFFER_FLAG_REF);
		1764	break;
		1765	}
		1766
		1767	if (!s->ref_pics[i]) {
		1768	av_log(s->avctx, AV_LOG_ERROR, "Reference could not be allocated\n");
		1769	return AVERROR_INVALIDDATA;
		1770	}
		1771
		1772	}
		1773
		1774	/* retire the reference frames that are not used anymore */
		1775	if (s->current_picture->reference) {
		1776	retire = (picnum + dirac_get_se_golomb(gb)) & 0xFFFFFFFF;
		1777	if (retire != picnum) {
		1778	DiracFrame *retire_pic = remove_frame(s->ref_frames, retire);
		1779
		1780	if (retire_pic)
		1781	retire_pic->reference &= DELAYED_PIC_REF;
		1782	else
		1783	av_log(s->avctx, AV_LOG_DEBUG, "Frame to retire not found\n");
		1784	}
		1785
		1786	/* if reference array is full, remove the oldest as per the spec */
		1787	while (add_frame(s->ref_frames, MAX_REFERENCE_FRAMES, s->current_picture)) {
		1788	av_log(s->avctx, AV_LOG_ERROR, "Reference frame overflow\n");
		1789	remove_frame(s->ref_frames, s->ref_frames[0]->avframe->display_picture_number)->reference &= DELAYED_PIC_REF;
		1790	}
		1791	}
		1792
		1793	if (s->num_refs) {
		1794	ret = dirac_unpack_prediction_parameters(s); /* [DIRAC_STD] 11.2 Picture Prediction Data. picture_prediction() */
		1795	if (ret < 0)
		1796	return ret;
		1797	ret = dirac_unpack_block_motion_data(s); /* [DIRAC_STD] 12. Block motion data syntax */
		1798	if (ret < 0)
		1799	return ret;
		1800	}
		1801	ret = dirac_unpack_idwt_params(s); /* [DIRAC_STD] 11.3 Wavelet transform data */
		1802	if (ret < 0)
		1803	return ret;
		1804
		1805	init_planes(s);
		1806	return 0;
		1807	}
		1808
		1809	static int get_delayed_pic(DiracContext s, AVFrame picture, int *got_frame)
		1810	{
		1811	DiracFrame *out = s->delay_frames[0];
		1812	int i, out_idx = 0;
		1813	int ret;
		1814
		1815	/* find frame with lowest picture number */
		1816	for (i = 1; s->delay_frames[i]; i++)
		1817	if (s->delay_frames[i]->avframe->display_picture_number < out->avframe->display_picture_number) {
		1818	out = s->delay_frames[i];
		1819	out_idx = i;
		1820	}
		1821
		1822	for (i = out_idx; s->delay_frames[i]; i++)
		1823	s->delay_frames[i] = s->delay_frames[i+1];
		1824
		1825	if (out) {
		1826	out->reference ^= DELAYED_PIC_REF;
		1827	*got_frame = 1;
		1828	if((ret = av_frame_ref(picture, out->avframe)) < 0)
		1829	return ret;
		1830	}
		1831
		1832	return 0;
		1833	}
		1834
		1835	/**
		1836	* Dirac Specification ->
		1837	* 9.6 Parse Info Header Syntax. parse_info()
		1838	* 4 byte start code + byte parse code + 4 byte size + 4 byte previous size
		1839	*/
		1840	#define DATA_UNIT_HEADER_SIZE 13
		1841
		1842	/* [DIRAC_STD] dirac_decode_data_unit makes reference to the while defined in 9.3
		1843	inside the function parse_sequence() */
		1844	static int dirac_decode_data_unit(AVCodecContext avctx, const uint8_t buf, int size)
		1845	{
		1846	DiracContext *s = avctx->priv_data;
		1847	DiracFrame *pic = NULL;
		1848	int ret, i, parse_code;
		1849	unsigned tmp;
		1850
		1851	if (size < DATA_UNIT_HEADER_SIZE)
		1852	return AVERROR_INVALIDDATA;
		1853
		1854	parse_code = buf[4];
		1855
		1856	init_get_bits(&s->gb, &buf[13], 8*(size - DATA_UNIT_HEADER_SIZE));
		1857
		1858	if (parse_code == pc_seq_header) {
		1859	if (s->seen_sequence_header)
		1860	return 0;
		1861
		1862	/* [DIRAC_STD] 10. Sequence header */
		1863	ret = avpriv_dirac_parse_sequence_header(avctx, &s->gb, &s->source);
		1864	if (ret < 0)
		1865	return ret;
		1866
		1867	avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_x_shift, &s->chroma_y_shift);
		1868
		1869	ret = alloc_sequence_buffers(s);
		1870	if (ret < 0)
		1871	return ret;
		1872
		1873	s->seen_sequence_header = 1;
		1874	} else if (parse_code == pc_eos) { /* [DIRAC_STD] End of Sequence */
		1875	free_sequence_buffers(s);
		1876	s->seen_sequence_header = 0;
		1877	} else if (parse_code == pc_aux_data) {
		1878	if (buf[13] == 1) { /* encoder implementation/version */
		1879	int ver[3];
		1880	/* versions older than 1.0.8 don't store quant delta for
		1881	subbands with only one codeblock */
		1882	if (sscanf(buf+14, "Schroedinger %d.%d.%d", ver, ver+1, ver+2) == 3)
		1883	if (ver[0] == 1 && ver[1] == 0 && ver[2] <= 7)
		1884	s->old_delta_quant = 1;
		1885	}
		1886	} else if (parse_code & 0x8) { /* picture data unit */
		1887	if (!s->seen_sequence_header) {
		1888	av_log(avctx, AV_LOG_DEBUG, "Dropping frame without sequence header\n");
		1889	return AVERROR_INVALIDDATA;
		1890	}
		1891
		1892	/* find an unused frame */
		1893	for (i = 0; i < MAX_FRAMES; i++)
		1894	if (s->all_frames[i].avframe->data[0] == NULL)
		1895	pic = &s->all_frames[i];
		1896	if (!pic) {
		1897	av_log(avctx, AV_LOG_ERROR, "framelist full\n");
		1898	return AVERROR_INVALIDDATA;
		1899	}
		1900
		1901	av_frame_unref(pic->avframe);
		1902
		1903	/* [DIRAC_STD] Defined in 9.6.1 ... */
		1904	tmp = parse_code & 0x03; /* [DIRAC_STD] num_refs() */
		1905	if (tmp > 2) {
		1906	av_log(avctx, AV_LOG_ERROR, "num_refs of 3\n");
		1907	return AVERROR_INVALIDDATA;
		1908	}
		1909	s->num_refs = tmp;
		1910	s->is_arith = (parse_code & 0x48) == 0x08; /* [DIRAC_STD] using_ac() */
		1911	s->low_delay = (parse_code & 0x88) == 0x88; /* [DIRAC_STD] is_low_delay() */
		1912	pic->reference = (parse_code & 0x0C) == 0x0C; /* [DIRAC_STD] is_reference() */
		1913	pic->avframe->key_frame = s->num_refs == 0; /* [DIRAC_STD] is_intra() */
		1914	pic->avframe->pict_type = s->num_refs + 1; /* Definition of AVPictureType in avutil.h */
		1915
		1916	if ((ret = get_buffer_with_edge(avctx, pic->avframe, (parse_code & 0x0C) == 0x0C ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
		1917	return ret;
		1918	s->current_picture = pic;
		1919	s->plane[0].stride = pic->avframe->linesize[0];
		1920	s->plane[1].stride = pic->avframe->linesize[1];
		1921	s->plane[2].stride = pic->avframe->linesize[2];
		1922
		1923	if (alloc_buffers(s, FFMAX3(FFABS(s->plane[0].stride), FFABS(s->plane[1].stride), FFABS(s->plane[2].stride))) < 0)
		1924	return AVERROR(ENOMEM);
		1925
		1926	/* [DIRAC_STD] 11.1 Picture parse. picture_parse() */
		1927	ret = dirac_decode_picture_header(s);
		1928	if (ret < 0)
		1929	return ret;
		1930
		1931	/* [DIRAC_STD] 13.0 Transform data syntax. transform_data() */
		1932	ret = dirac_decode_frame_internal(s);
		1933	if (ret < 0)
		1934	return ret;
		1935	}
		1936	return 0;
		1937	}
		1938
		1939	static int dirac_decode_frame(AVCodecContext avctx, void data, int got_frame, AVPacket pkt)
		1940	{
		1941	DiracContext *s = avctx->priv_data;
		1942	AVFrame *picture = data;
		1943	uint8_t *buf = pkt->data;
		1944	int buf_size = pkt->size;
		1945	int i, buf_idx = 0;
		1946	int ret;
		1947	unsigned data_unit_size;
		1948
		1949	/* release unused frames */
		1950	for (i = 0; i < MAX_FRAMES; i++)
		1951	if (s->all_frames[i].avframe->data[0] && !s->all_frames[i].reference) {
		1952	av_frame_unref(s->all_frames[i].avframe);
		1953	memset(s->all_frames[i].interpolated, 0, sizeof(s->all_frames[i].interpolated));
		1954	}
		1955
		1956	s->current_picture = NULL;
		1957	*got_frame = 0;
		1958
		1959	/* end of stream, so flush delayed pics */
		1960	if (buf_size == 0)
		1961	return get_delayed_pic(s, (AVFrame *)data, got_frame);
		1962
		1963	for (;;) {
		1964	/*[DIRAC_STD] Here starts the code from parse_info() defined in 9.6
		1965	[DIRAC_STD] PARSE_INFO_PREFIX = "BBCD" as defined in ISO/IEC 646
		1966	BBCD start code search */
		1967	for (; buf_idx + DATA_UNIT_HEADER_SIZE < buf_size; buf_idx++) {
		1968	if (buf[buf_idx ] == 'B' && buf[buf_idx+1] == 'B' &&
		1969	buf[buf_idx+2] == 'C' && buf[buf_idx+3] == 'D')
		1970	break;
		1971	}
		1972	/* BBCD found or end of data */
		1973	if (buf_idx + DATA_UNIT_HEADER_SIZE >= buf_size)
		1974	break;
		1975
		1976	data_unit_size = AV_RB32(buf+buf_idx+5);
		1977	if (data_unit_size > buf_size - buf_idx \|\| !data_unit_size) {
		1978	if(data_unit_size > buf_size - buf_idx)
		1979	av_log(s->avctx, AV_LOG_ERROR,
		1980	"Data unit with size %d is larger than input buffer, discarding\n",
		1981	data_unit_size);
		1982	buf_idx += 4;
		1983	continue;
		1984	}
		1985	/* [DIRAC_STD] dirac_decode_data_unit makes reference to the while defined in 9.3 inside the function parse_sequence() */
		1986	ret = dirac_decode_data_unit(avctx, buf+buf_idx, data_unit_size);
		1987	if (ret < 0)
		1988	{
		1989	av_log(s->avctx, AV_LOG_ERROR,"Error in dirac_decode_data_unit\n");
		1990	return ret;
		1991	}
		1992	buf_idx += data_unit_size;
		1993	}
		1994
		1995	if (!s->current_picture)
		1996	return buf_size;
		1997
		1998	if (s->current_picture->avframe->display_picture_number > s->frame_number) {
		1999	DiracFrame *delayed_frame = remove_frame(s->delay_frames, s->frame_number);
		2000
		2001	s->current_picture->reference \|= DELAYED_PIC_REF;
		2002
		2003	if (add_frame(s->delay_frames, MAX_DELAY, s->current_picture)) {
		2004	int min_num = s->delay_frames[0]->avframe->display_picture_number;
		2005	/* Too many delayed frames, so we display the frame with the lowest pts */
		2006	av_log(avctx, AV_LOG_ERROR, "Delay frame overflow\n");
		2007
		2008	for (i = 1; s->delay_frames[i]; i++)
		2009	if (s->delay_frames[i]->avframe->display_picture_number < min_num)
		2010	min_num = s->delay_frames[i]->avframe->display_picture_number;
		2011
		2012	delayed_frame = remove_frame(s->delay_frames, min_num);
		2013	add_frame(s->delay_frames, MAX_DELAY, s->current_picture);
		2014	}
		2015
		2016	if (delayed_frame) {
		2017	delayed_frame->reference ^= DELAYED_PIC_REF;
		2018	if((ret=av_frame_ref(data, delayed_frame->avframe)) < 0)
		2019	return ret;
		2020	*got_frame = 1;
		2021	}
		2022	} else if (s->current_picture->avframe->display_picture_number == s->frame_number) {
		2023	/* The right frame at the right time :-) */
		2024	if((ret=av_frame_ref(data, s->current_picture->avframe)) < 0)
		2025	return ret;
		2026	*got_frame = 1;
		2027	}
		2028
		2029	if (*got_frame)
		2030	s->frame_number = picture->display_picture_number + 1;
		2031
		2032	return buf_idx;
		2033	}
		2034
		2035	AVCodec ff_dirac_decoder = {
		2036	.name = "dirac",
		2037	.long_name = NULL_IF_CONFIG_SMALL("BBC Dirac VC-2"),
		2038	.type = AVMEDIA_TYPE_VIDEO,
		2039	.id = AV_CODEC_ID_DIRAC,
		2040	.priv_data_size = sizeof(DiracContext),
		2041	.init = dirac_decode_init,
		2042	.close = dirac_decode_end,
		2043	.decode = dirac_decode_frame,
		2044	.capabilities = AV_CODEC_CAP_DELAY,
		2045	.flush = dirac_decode_flush,
		2046	};

Subversion Repositories Kolibri OS

(root)/contrib/sdk/sources/ffmpeg/ffmpeg-2.8/libavcodec/diracdec.c – Rev 6147