Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4349 | Serge | 1 | /* |
2 | * VC3/DNxHD encoder |
||
3 | * Copyright (c) 2007 Baptiste Coudurier |
||
4 | * Copyright (c) 2011 MirriAd Ltd |
||
5 | * |
||
6 | * VC-3 encoder funded by the British Broadcasting Corporation |
||
7 | * 10 bit support added by MirriAd Ltd, Joseph Artsimovich |
||
8 | * |
||
9 | * This file is part of FFmpeg. |
||
10 | * |
||
11 | * FFmpeg is free software; you can redistribute it and/or |
||
12 | * modify it under the terms of the GNU Lesser General Public |
||
13 | * License as published by the Free Software Foundation; either |
||
14 | * version 2.1 of the License, or (at your option) any later version. |
||
15 | * |
||
16 | * FFmpeg is distributed in the hope that it will be useful, |
||
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||
19 | * Lesser General Public License for more details. |
||
20 | * |
||
21 | * You should have received a copy of the GNU Lesser General Public |
||
22 | * License along with FFmpeg; if not, write to the Free Software |
||
23 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||
24 | */ |
||
25 | |||
26 | #define RC_VARIANCE 1 // use variance or ssd for fast rc |
||
27 | |||
28 | #include "libavutil/attributes.h" |
||
29 | #include "libavutil/internal.h" |
||
30 | #include "libavutil/opt.h" |
||
31 | #include "avcodec.h" |
||
32 | #include "dsputil.h" |
||
33 | #include "internal.h" |
||
34 | #include "mpegvideo.h" |
||
35 | #include "dnxhdenc.h" |
||
36 | |||
37 | #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM |
||
38 | #define DNX10BIT_QMAT_SHIFT 18 // The largest value that will not lead to overflow for 10bit samples. |
||
39 | |||
40 | static const AVOption options[]={ |
||
41 | {"nitris_compat", "encode with Avid Nitris compatibility", offsetof(DNXHDEncContext, nitris_compat), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, VE}, |
||
42 | {NULL} |
||
43 | }; |
||
44 | |||
45 | static const AVClass dnxhd_class = { |
||
46 | .class_name = "dnxhd", |
||
47 | .item_name = av_default_item_name, |
||
48 | .option = options, |
||
49 | .version = LIBAVUTIL_VERSION_INT, |
||
50 | }; |
||
51 | |||
52 | #define LAMBDA_FRAC_BITS 10 |
||
53 | |||
54 | static void dnxhd_8bit_get_pixels_8x4_sym(int16_t *av_restrict block, const uint8_t *pixels, int line_size) |
||
55 | { |
||
56 | int i; |
||
57 | for (i = 0; i < 4; i++) { |
||
58 | block[0] = pixels[0]; block[1] = pixels[1]; |
||
59 | block[2] = pixels[2]; block[3] = pixels[3]; |
||
60 | block[4] = pixels[4]; block[5] = pixels[5]; |
||
61 | block[6] = pixels[6]; block[7] = pixels[7]; |
||
62 | pixels += line_size; |
||
63 | block += 8; |
||
64 | } |
||
65 | memcpy(block, block - 8, sizeof(*block) * 8); |
||
66 | memcpy(block + 8, block - 16, sizeof(*block) * 8); |
||
67 | memcpy(block + 16, block - 24, sizeof(*block) * 8); |
||
68 | memcpy(block + 24, block - 32, sizeof(*block) * 8); |
||
69 | } |
||
70 | |||
71 | static av_always_inline void dnxhd_10bit_get_pixels_8x4_sym(int16_t *av_restrict block, const uint8_t *pixels, int line_size) |
||
72 | { |
||
73 | int i; |
||
74 | const uint16_t* pixels16 = (const uint16_t*)pixels; |
||
75 | line_size >>= 1; |
||
76 | |||
77 | for (i = 0; i < 4; i++) { |
||
78 | block[0] = pixels16[0]; block[1] = pixels16[1]; |
||
79 | block[2] = pixels16[2]; block[3] = pixels16[3]; |
||
80 | block[4] = pixels16[4]; block[5] = pixels16[5]; |
||
81 | block[6] = pixels16[6]; block[7] = pixels16[7]; |
||
82 | pixels16 += line_size; |
||
83 | block += 8; |
||
84 | } |
||
85 | memcpy(block, block - 8, sizeof(*block) * 8); |
||
86 | memcpy(block + 8, block - 16, sizeof(*block) * 8); |
||
87 | memcpy(block + 16, block - 24, sizeof(*block) * 8); |
||
88 | memcpy(block + 24, block - 32, sizeof(*block) * 8); |
||
89 | } |
||
90 | |||
91 | static int dnxhd_10bit_dct_quantize(MpegEncContext *ctx, int16_t *block, |
||
92 | int n, int qscale, int *overflow) |
||
93 | { |
||
94 | const uint8_t *scantable= ctx->intra_scantable.scantable; |
||
95 | const int *qmat = n<4 ? ctx->q_intra_matrix[qscale] : ctx->q_chroma_intra_matrix[qscale]; |
||
96 | int last_non_zero = 0; |
||
97 | int i; |
||
98 | |||
99 | ctx->dsp.fdct(block); |
||
100 | |||
101 | // Divide by 4 with rounding, to compensate scaling of DCT coefficients |
||
102 | block[0] = (block[0] + 2) >> 2; |
||
103 | |||
104 | for (i = 1; i < 64; ++i) { |
||
105 | int j = scantable[i]; |
||
106 | int sign = block[j] >> 31; |
||
107 | int level = (block[j] ^ sign) - sign; |
||
108 | level = level * qmat[j] >> DNX10BIT_QMAT_SHIFT; |
||
109 | block[j] = (level ^ sign) - sign; |
||
110 | if (level) |
||
111 | last_non_zero = i; |
||
112 | } |
||
113 | |||
114 | return last_non_zero; |
||
115 | } |
||
116 | |||
117 | static av_cold int dnxhd_init_vlc(DNXHDEncContext *ctx) |
||
118 | { |
||
119 | int i, j, level, run; |
||
120 | int max_level = 1<<(ctx->cid_table->bit_depth+2); |
||
121 | |||
122 | FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->vlc_codes, max_level*4*sizeof(*ctx->vlc_codes), fail); |
||
123 | FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->vlc_bits, max_level*4*sizeof(*ctx->vlc_bits) , fail); |
||
124 | FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->run_codes, 63*2, fail); |
||
125 | FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->run_bits, 63, fail); |
||
126 | |||
127 | ctx->vlc_codes += max_level*2; |
||
128 | ctx->vlc_bits += max_level*2; |
||
129 | for (level = -max_level; level < max_level; level++) { |
||
130 | for (run = 0; run < 2; run++) { |
||
131 | int index = (level<<1)|run; |
||
132 | int sign, offset = 0, alevel = level; |
||
133 | |||
134 | MASK_ABS(sign, alevel); |
||
135 | if (alevel > 64) { |
||
136 | offset = (alevel-1)>>6; |
||
137 | alevel -= offset<<6; |
||
138 | } |
||
139 | for (j = 0; j < 257; j++) { |
||
140 | if (ctx->cid_table->ac_level[j] >> 1 == alevel && |
||
141 | (!offset || (ctx->cid_table->ac_flags[j] & 1) && offset) && |
||
142 | (!run || (ctx->cid_table->ac_flags[j] & 2) && run)) { |
||
143 | av_assert1(!ctx->vlc_codes[index]); |
||
144 | if (alevel) { |
||
145 | ctx->vlc_codes[index] = (ctx->cid_table->ac_codes[j]<<1)|(sign&1); |
||
146 | ctx->vlc_bits [index] = ctx->cid_table->ac_bits[j]+1; |
||
147 | } else { |
||
148 | ctx->vlc_codes[index] = ctx->cid_table->ac_codes[j]; |
||
149 | ctx->vlc_bits [index] = ctx->cid_table->ac_bits [j]; |
||
150 | } |
||
151 | break; |
||
152 | } |
||
153 | } |
||
154 | av_assert0(!alevel || j < 257); |
||
155 | if (offset) { |
||
156 | ctx->vlc_codes[index] = (ctx->vlc_codes[index]< |
||
157 | ctx->vlc_bits [index]+= ctx->cid_table->index_bits; |
||
158 | } |
||
159 | } |
||
160 | } |
||
161 | for (i = 0; i < 62; i++) { |
||
162 | int run = ctx->cid_table->run[i]; |
||
163 | av_assert0(run < 63); |
||
164 | ctx->run_codes[run] = ctx->cid_table->run_codes[i]; |
||
165 | ctx->run_bits [run] = ctx->cid_table->run_bits[i]; |
||
166 | } |
||
167 | return 0; |
||
168 | fail: |
||
169 | return -1; |
||
170 | } |
||
171 | |||
172 | static av_cold int dnxhd_init_qmat(DNXHDEncContext *ctx, int lbias, int cbias) |
||
173 | { |
||
174 | // init first elem to 1 to avoid div by 0 in convert_matrix |
||
175 | uint16_t weight_matrix[64] = {1,}; // convert_matrix needs uint16_t* |
||
176 | int qscale, i; |
||
177 | const uint8_t *luma_weight_table = ctx->cid_table->luma_weight; |
||
178 | const uint8_t *chroma_weight_table = ctx->cid_table->chroma_weight; |
||
179 | |||
180 | FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->qmatrix_l, (ctx->m.avctx->qmax+1) * 64 * sizeof(int), fail); |
||
181 | FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->qmatrix_c, (ctx->m.avctx->qmax+1) * 64 * sizeof(int), fail); |
||
182 | FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->qmatrix_l16, (ctx->m.avctx->qmax+1) * 64 * 2 * sizeof(uint16_t), fail); |
||
183 | FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->qmatrix_c16, (ctx->m.avctx->qmax+1) * 64 * 2 * sizeof(uint16_t), fail); |
||
184 | |||
185 | if (ctx->cid_table->bit_depth == 8) { |
||
186 | for (i = 1; i < 64; i++) { |
||
187 | int j = ctx->m.dsp.idct_permutation[ff_zigzag_direct[i]]; |
||
188 | weight_matrix[j] = ctx->cid_table->luma_weight[i]; |
||
189 | } |
||
190 | ff_convert_matrix(&ctx->m.dsp, ctx->qmatrix_l, ctx->qmatrix_l16, weight_matrix, |
||
191 | ctx->m.intra_quant_bias, 1, ctx->m.avctx->qmax, 1); |
||
192 | for (i = 1; i < 64; i++) { |
||
193 | int j = ctx->m.dsp.idct_permutation[ff_zigzag_direct[i]]; |
||
194 | weight_matrix[j] = ctx->cid_table->chroma_weight[i]; |
||
195 | } |
||
196 | ff_convert_matrix(&ctx->m.dsp, ctx->qmatrix_c, ctx->qmatrix_c16, weight_matrix, |
||
197 | ctx->m.intra_quant_bias, 1, ctx->m.avctx->qmax, 1); |
||
198 | |||
199 | for (qscale = 1; qscale <= ctx->m.avctx->qmax; qscale++) { |
||
200 | for (i = 0; i < 64; i++) { |
||
201 | ctx->qmatrix_l [qscale] [i] <<= 2; ctx->qmatrix_c [qscale] [i] <<= 2; |
||
202 | ctx->qmatrix_l16[qscale][0][i] <<= 2; ctx->qmatrix_l16[qscale][1][i] <<= 2; |
||
203 | ctx->qmatrix_c16[qscale][0][i] <<= 2; ctx->qmatrix_c16[qscale][1][i] <<= 2; |
||
204 | } |
||
205 | } |
||
206 | } else { |
||
207 | // 10-bit |
||
208 | for (qscale = 1; qscale <= ctx->m.avctx->qmax; qscale++) { |
||
209 | for (i = 1; i < 64; i++) { |
||
210 | int j = ctx->m.dsp.idct_permutation[ff_zigzag_direct[i]]; |
||
211 | |||
212 | // The quantization formula from the VC-3 standard is: |
||
213 | // quantized = sign(block[i]) * floor(abs(block[i]/s) * p / (qscale * weight_table[i])) |
||
214 | // Where p is 32 for 8-bit samples and 8 for 10-bit ones. |
||
215 | // The s factor compensates scaling of DCT coefficients done by the DCT routines, |
||
216 | // and therefore is not present in standard. It's 8 for 8-bit samples and 4 for 10-bit ones. |
||
217 | // We want values of ctx->qtmatrix_l and ctx->qtmatrix_r to be: |
||
218 | // ((1 << DNX10BIT_QMAT_SHIFT) * (p / s)) / (qscale * weight_table[i]) |
||
219 | // For 10-bit samples, p / s == 2 |
||
220 | ctx->qmatrix_l[qscale][j] = (1 << (DNX10BIT_QMAT_SHIFT + 1)) / (qscale * luma_weight_table[i]); |
||
221 | ctx->qmatrix_c[qscale][j] = (1 << (DNX10BIT_QMAT_SHIFT + 1)) / (qscale * chroma_weight_table[i]); |
||
222 | } |
||
223 | } |
||
224 | } |
||
225 | |||
226 | ctx->m.q_chroma_intra_matrix16 = ctx->qmatrix_c16; |
||
227 | ctx->m.q_chroma_intra_matrix = ctx->qmatrix_c; |
||
228 | ctx->m.q_intra_matrix16 = ctx->qmatrix_l16; |
||
229 | ctx->m.q_intra_matrix = ctx->qmatrix_l; |
||
230 | |||
231 | return 0; |
||
232 | fail: |
||
233 | return -1; |
||
234 | } |
||
235 | |||
236 | static av_cold int dnxhd_init_rc(DNXHDEncContext *ctx) |
||
237 | { |
||
238 | FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->mb_rc, 8160*ctx->m.avctx->qmax*sizeof(RCEntry), fail); |
||
239 | if (ctx->m.avctx->mb_decision != FF_MB_DECISION_RD) |
||
240 | FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->mb_cmp, ctx->m.mb_num*sizeof(RCCMPEntry), fail); |
||
241 | |||
242 | ctx->frame_bits = (ctx->cid_table->coding_unit_size - 640 - 4 - ctx->min_padding) * 8; |
||
243 | ctx->qscale = 1; |
||
244 | ctx->lambda = 2< |
||
245 | return 0; |
||
246 | fail: |
||
247 | return -1; |
||
248 | } |
||
249 | |||
250 | static av_cold int dnxhd_encode_init(AVCodecContext *avctx) |
||
251 | { |
||
252 | DNXHDEncContext *ctx = avctx->priv_data; |
||
253 | int i, index, bit_depth; |
||
254 | |||
255 | switch (avctx->pix_fmt) { |
||
256 | case AV_PIX_FMT_YUV422P: |
||
257 | bit_depth = 8; |
||
258 | break; |
||
259 | case AV_PIX_FMT_YUV422P10: |
||
260 | bit_depth = 10; |
||
261 | break; |
||
262 | default: |
||
263 | av_log(avctx, AV_LOG_ERROR, "pixel format is incompatible with DNxHD\n"); |
||
264 | return -1; |
||
265 | } |
||
266 | |||
267 | ctx->cid = ff_dnxhd_find_cid(avctx, bit_depth); |
||
268 | if (!ctx->cid) { |
||
269 | av_log(avctx, AV_LOG_ERROR, "video parameters incompatible with DNxHD. Valid DNxHD profiles:\n"); |
||
270 | ff_dnxhd_print_profiles(avctx, AV_LOG_ERROR); |
||
271 | return -1; |
||
272 | } |
||
273 | av_log(avctx, AV_LOG_DEBUG, "cid %d\n", ctx->cid); |
||
274 | |||
275 | index = ff_dnxhd_get_cid_table(ctx->cid); |
||
276 | av_assert0(index >= 0); |
||
277 | ctx->cid_table = &ff_dnxhd_cid_table[index]; |
||
278 | |||
279 | ctx->m.avctx = avctx; |
||
280 | ctx->m.mb_intra = 1; |
||
281 | ctx->m.h263_aic = 1; |
||
282 | |||
283 | avctx->bits_per_raw_sample = ctx->cid_table->bit_depth; |
||
284 | |||
285 | ff_dct_common_init(&ctx->m); |
||
286 | ff_dct_encode_init(&ctx->m); |
||
287 | |||
288 | if (!ctx->m.dct_quantize) |
||
289 | ctx->m.dct_quantize = ff_dct_quantize_c; |
||
290 | |||
291 | if (ctx->cid_table->bit_depth == 10) { |
||
292 | ctx->m.dct_quantize = dnxhd_10bit_dct_quantize; |
||
293 | ctx->get_pixels_8x4_sym = dnxhd_10bit_get_pixels_8x4_sym; |
||
294 | ctx->block_width_l2 = 4; |
||
295 | } else { |
||
296 | ctx->get_pixels_8x4_sym = dnxhd_8bit_get_pixels_8x4_sym; |
||
297 | ctx->block_width_l2 = 3; |
||
298 | } |
||
299 | |||
300 | if (ARCH_X86) |
||
301 | ff_dnxhdenc_init_x86(ctx); |
||
302 | |||
303 | ctx->m.mb_height = (avctx->height + 15) / 16; |
||
304 | ctx->m.mb_width = (avctx->width + 15) / 16; |
||
305 | |||
306 | if (avctx->flags & CODEC_FLAG_INTERLACED_DCT) { |
||
307 | ctx->interlaced = 1; |
||
308 | ctx->m.mb_height /= 2; |
||
309 | } |
||
310 | |||
311 | ctx->m.mb_num = ctx->m.mb_height * ctx->m.mb_width; |
||
312 | |||
313 | if (avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS) |
||
314 | ctx->m.intra_quant_bias = avctx->intra_quant_bias; |
||
315 | if (dnxhd_init_qmat(ctx, ctx->m.intra_quant_bias, 0) < 0) // XXX tune lbias/cbias |
||
316 | return -1; |
||
317 | |||
318 | // Avid Nitris hardware decoder requires a minimum amount of padding in the coding unit payload |
||
319 | if (ctx->nitris_compat) |
||
320 | ctx->min_padding = 1600; |
||
321 | |||
322 | if (dnxhd_init_vlc(ctx) < 0) |
||
323 | return -1; |
||
324 | if (dnxhd_init_rc(ctx) < 0) |
||
325 | return -1; |
||
326 | |||
327 | FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->slice_size, ctx->m.mb_height*sizeof(uint32_t), fail); |
||
328 | FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->slice_offs, ctx->m.mb_height*sizeof(uint32_t), fail); |
||
329 | FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->mb_bits, ctx->m.mb_num *sizeof(uint16_t), fail); |
||
330 | FF_ALLOCZ_OR_GOTO(ctx->m.avctx, ctx->mb_qscale, ctx->m.mb_num *sizeof(uint8_t), fail); |
||
331 | |||
332 | ctx->frame.key_frame = 1; |
||
333 | ctx->frame.pict_type = AV_PICTURE_TYPE_I; |
||
334 | ctx->m.avctx->coded_frame = &ctx->frame; |
||
335 | |||
336 | if (avctx->thread_count > MAX_THREADS) { |
||
337 | av_log(avctx, AV_LOG_ERROR, "too many threads\n"); |
||
338 | return -1; |
||
339 | } |
||
340 | |||
341 | ctx->thread[0] = ctx; |
||
342 | for (i = 1; i < avctx->thread_count; i++) { |
||
343 | ctx->thread[i] = av_malloc(sizeof(DNXHDEncContext)); |
||
344 | memcpy(ctx->thread[i], ctx, sizeof(DNXHDEncContext)); |
||
345 | } |
||
346 | |||
347 | return 0; |
||
348 | fail: //for FF_ALLOCZ_OR_GOTO |
||
349 | return -1; |
||
350 | } |
||
351 | |||
352 | static int dnxhd_write_header(AVCodecContext *avctx, uint8_t *buf) |
||
353 | { |
||
354 | DNXHDEncContext *ctx = avctx->priv_data; |
||
355 | static const uint8_t header_prefix[5] = { 0x00,0x00,0x02,0x80,0x01 }; |
||
356 | |||
357 | memset(buf, 0, 640); |
||
358 | |||
359 | memcpy(buf, header_prefix, 5); |
||
360 | buf[5] = ctx->interlaced ? ctx->cur_field+2 : 0x01; |
||
361 | buf[6] = 0x80; // crc flag off |
||
362 | buf[7] = 0xa0; // reserved |
||
363 | AV_WB16(buf + 0x18, avctx->height>>ctx->interlaced); // ALPF |
||
364 | AV_WB16(buf + 0x1a, avctx->width); // SPL |
||
365 | AV_WB16(buf + 0x1d, avctx->height>>ctx->interlaced); // NAL |
||
366 | |||
367 | buf[0x21] = ctx->cid_table->bit_depth == 10 ? 0x58 : 0x38; |
||
368 | buf[0x22] = 0x88 + (ctx->interlaced<<2); |
||
369 | AV_WB32(buf + 0x28, ctx->cid); // CID |
||
370 | buf[0x2c] = ctx->interlaced ? 0 : 0x80; |
||
371 | |||
372 | buf[0x5f] = 0x01; // UDL |
||
373 | |||
374 | buf[0x167] = 0x02; // reserved |
||
375 | AV_WB16(buf + 0x16a, ctx->m.mb_height * 4 + 4); // MSIPS |
||
376 | buf[0x16d] = ctx->m.mb_height; // Ns |
||
377 | buf[0x16f] = 0x10; // reserved |
||
378 | |||
379 | ctx->msip = buf + 0x170; |
||
380 | return 0; |
||
381 | } |
||
382 | |||
383 | static av_always_inline void dnxhd_encode_dc(DNXHDEncContext *ctx, int diff) |
||
384 | { |
||
385 | int nbits; |
||
386 | if (diff < 0) { |
||
387 | nbits = av_log2_16bit(-2*diff); |
||
388 | diff--; |
||
389 | } else { |
||
390 | nbits = av_log2_16bit(2*diff); |
||
391 | } |
||
392 | put_bits(&ctx->m.pb, ctx->cid_table->dc_bits[nbits] + nbits, |
||
393 | (ctx->cid_table->dc_codes[nbits]< |
||
394 | } |
||
395 | |||
396 | static av_always_inline void dnxhd_encode_block(DNXHDEncContext *ctx, int16_t *block, int last_index, int n) |
||
397 | { |
||
398 | int last_non_zero = 0; |
||
399 | int slevel, i, j; |
||
400 | |||
401 | dnxhd_encode_dc(ctx, block[0] - ctx->m.last_dc[n]); |
||
402 | ctx->m.last_dc[n] = block[0]; |
||
403 | |||
404 | for (i = 1; i <= last_index; i++) { |
||
405 | j = ctx->m.intra_scantable.permutated[i]; |
||
406 | slevel = block[j]; |
||
407 | if (slevel) { |
||
408 | int run_level = i - last_non_zero - 1; |
||
409 | int rlevel = (slevel<<1)|!!run_level; |
||
410 | put_bits(&ctx->m.pb, ctx->vlc_bits[rlevel], ctx->vlc_codes[rlevel]); |
||
411 | if (run_level) |
||
412 | put_bits(&ctx->m.pb, ctx->run_bits[run_level], ctx->run_codes[run_level]); |
||
413 | last_non_zero = i; |
||
414 | } |
||
415 | } |
||
416 | put_bits(&ctx->m.pb, ctx->vlc_bits[0], ctx->vlc_codes[0]); // EOB |
||
417 | } |
||
418 | |||
419 | static av_always_inline void dnxhd_unquantize_c(DNXHDEncContext *ctx, int16_t *block, int n, int qscale, int last_index) |
||
420 | { |
||
421 | const uint8_t *weight_matrix; |
||
422 | int level; |
||
423 | int i; |
||
424 | |||
425 | weight_matrix = (n&2) ? ctx->cid_table->chroma_weight : ctx->cid_table->luma_weight; |
||
426 | |||
427 | for (i = 1; i <= last_index; i++) { |
||
428 | int j = ctx->m.intra_scantable.permutated[i]; |
||
429 | level = block[j]; |
||
430 | if (level) { |
||
431 | if (level < 0) { |
||
432 | level = (1-2*level) * qscale * weight_matrix[i]; |
||
433 | if (ctx->cid_table->bit_depth == 10) { |
||
434 | if (weight_matrix[i] != 8) |
||
435 | level += 8; |
||
436 | level >>= 4; |
||
437 | } else { |
||
438 | if (weight_matrix[i] != 32) |
||
439 | level += 32; |
||
440 | level >>= 6; |
||
441 | } |
||
442 | level = -level; |
||
443 | } else { |
||
444 | level = (2*level+1) * qscale * weight_matrix[i]; |
||
445 | if (ctx->cid_table->bit_depth == 10) { |
||
446 | if (weight_matrix[i] != 8) |
||
447 | level += 8; |
||
448 | level >>= 4; |
||
449 | } else { |
||
450 | if (weight_matrix[i] != 32) |
||
451 | level += 32; |
||
452 | level >>= 6; |
||
453 | } |
||
454 | } |
||
455 | block[j] = level; |
||
456 | } |
||
457 | } |
||
458 | } |
||
459 | |||
460 | static av_always_inline int dnxhd_ssd_block(int16_t *qblock, int16_t *block) |
||
461 | { |
||
462 | int score = 0; |
||
463 | int i; |
||
464 | for (i = 0; i < 64; i++) |
||
465 | score += (block[i] - qblock[i]) * (block[i] - qblock[i]); |
||
466 | return score; |
||
467 | } |
||
468 | |||
469 | static av_always_inline int dnxhd_calc_ac_bits(DNXHDEncContext *ctx, int16_t *block, int last_index) |
||
470 | { |
||
471 | int last_non_zero = 0; |
||
472 | int bits = 0; |
||
473 | int i, j, level; |
||
474 | for (i = 1; i <= last_index; i++) { |
||
475 | j = ctx->m.intra_scantable.permutated[i]; |
||
476 | level = block[j]; |
||
477 | if (level) { |
||
478 | int run_level = i - last_non_zero - 1; |
||
479 | bits += ctx->vlc_bits[(level<<1)|!!run_level]+ctx->run_bits[run_level]; |
||
480 | last_non_zero = i; |
||
481 | } |
||
482 | } |
||
483 | return bits; |
||
484 | } |
||
485 | |||
486 | static av_always_inline void dnxhd_get_blocks(DNXHDEncContext *ctx, int mb_x, int mb_y) |
||
487 | { |
||
488 | const int bs = ctx->block_width_l2; |
||
489 | const int bw = 1 << bs; |
||
490 | const uint8_t *ptr_y = ctx->thread[0]->src[0] + ((mb_y << 4) * ctx->m.linesize) + (mb_x << bs+1); |
||
491 | const uint8_t *ptr_u = ctx->thread[0]->src[1] + ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << bs); |
||
492 | const uint8_t *ptr_v = ctx->thread[0]->src[2] + ((mb_y << 4) * ctx->m.uvlinesize) + (mb_x << bs); |
||
493 | DSPContext *dsp = &ctx->m.dsp; |
||
494 | |||
495 | dsp->get_pixels(ctx->blocks[0], ptr_y, ctx->m.linesize); |
||
496 | dsp->get_pixels(ctx->blocks[1], ptr_y + bw, ctx->m.linesize); |
||
497 | dsp->get_pixels(ctx->blocks[2], ptr_u, ctx->m.uvlinesize); |
||
498 | dsp->get_pixels(ctx->blocks[3], ptr_v, ctx->m.uvlinesize); |
||
499 | |||
500 | if (mb_y+1 == ctx->m.mb_height && ctx->m.avctx->height == 1080) { |
||
501 | if (ctx->interlaced) { |
||
502 | ctx->get_pixels_8x4_sym(ctx->blocks[4], ptr_y + ctx->dct_y_offset, ctx->m.linesize); |
||
503 | ctx->get_pixels_8x4_sym(ctx->blocks[5], ptr_y + ctx->dct_y_offset + bw, ctx->m.linesize); |
||
504 | ctx->get_pixels_8x4_sym(ctx->blocks[6], ptr_u + ctx->dct_uv_offset, ctx->m.uvlinesize); |
||
505 | ctx->get_pixels_8x4_sym(ctx->blocks[7], ptr_v + ctx->dct_uv_offset, ctx->m.uvlinesize); |
||
506 | } else { |
||
507 | dsp->clear_block(ctx->blocks[4]); |
||
508 | dsp->clear_block(ctx->blocks[5]); |
||
509 | dsp->clear_block(ctx->blocks[6]); |
||
510 | dsp->clear_block(ctx->blocks[7]); |
||
511 | } |
||
512 | } else { |
||
513 | dsp->get_pixels(ctx->blocks[4], ptr_y + ctx->dct_y_offset, ctx->m.linesize); |
||
514 | dsp->get_pixels(ctx->blocks[5], ptr_y + ctx->dct_y_offset + bw, ctx->m.linesize); |
||
515 | dsp->get_pixels(ctx->blocks[6], ptr_u + ctx->dct_uv_offset, ctx->m.uvlinesize); |
||
516 | dsp->get_pixels(ctx->blocks[7], ptr_v + ctx->dct_uv_offset, ctx->m.uvlinesize); |
||
517 | } |
||
518 | } |
||
519 | |||
520 | static av_always_inline int dnxhd_switch_matrix(DNXHDEncContext *ctx, int i) |
||
521 | { |
||
522 | const static uint8_t component[8]={0,0,1,2,0,0,1,2}; |
||
523 | return component[i]; |
||
524 | } |
||
525 | |||
526 | static int dnxhd_calc_bits_thread(AVCodecContext *avctx, void *arg, int jobnr, int threadnr) |
||
527 | { |
||
528 | DNXHDEncContext *ctx = avctx->priv_data; |
||
529 | int mb_y = jobnr, mb_x; |
||
530 | int qscale = ctx->qscale; |
||
531 | LOCAL_ALIGNED_16(int16_t, block, [64]); |
||
532 | ctx = ctx->thread[threadnr]; |
||
533 | |||
534 | ctx->m.last_dc[0] = |
||
535 | ctx->m.last_dc[1] = |
||
536 | ctx->m.last_dc[2] = 1 << (ctx->cid_table->bit_depth + 2); |
||
537 | |||
538 | for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) { |
||
539 | unsigned mb = mb_y * ctx->m.mb_width + mb_x; |
||
540 | int ssd = 0; |
||
541 | int ac_bits = 0; |
||
542 | int dc_bits = 0; |
||
543 | int i; |
||
544 | |||
545 | dnxhd_get_blocks(ctx, mb_x, mb_y); |
||
546 | |||
547 | for (i = 0; i < 8; i++) { |
||
548 | int16_t *src_block = ctx->blocks[i]; |
||
549 | int overflow, nbits, diff, last_index; |
||
550 | int n = dnxhd_switch_matrix(ctx, i); |
||
551 | |||
552 | memcpy(block, src_block, 64*sizeof(*block)); |
||
553 | last_index = ctx->m.dct_quantize(&ctx->m, block, 4&(2*i), qscale, &overflow); |
||
554 | ac_bits += dnxhd_calc_ac_bits(ctx, block, last_index); |
||
555 | |||
556 | diff = block[0] - ctx->m.last_dc[n]; |
||
557 | if (diff < 0) nbits = av_log2_16bit(-2*diff); |
||
558 | else nbits = av_log2_16bit( 2*diff); |
||
559 | |||
560 | av_assert1(nbits < ctx->cid_table->bit_depth + 4); |
||
561 | dc_bits += ctx->cid_table->dc_bits[nbits] + nbits; |
||
562 | |||
563 | ctx->m.last_dc[n] = block[0]; |
||
564 | |||
565 | if (avctx->mb_decision == FF_MB_DECISION_RD || !RC_VARIANCE) { |
||
566 | dnxhd_unquantize_c(ctx, block, i, qscale, last_index); |
||
567 | ctx->m.dsp.idct(block); |
||
568 | ssd += dnxhd_ssd_block(block, src_block); |
||
569 | } |
||
570 | } |
||
571 | ctx->mb_rc[qscale][mb].ssd = ssd; |
||
572 | ctx->mb_rc[qscale][mb].bits = ac_bits+dc_bits+12+8*ctx->vlc_bits[0]; |
||
573 | } |
||
574 | return 0; |
||
575 | } |
||
576 | |||
577 | static int dnxhd_encode_thread(AVCodecContext *avctx, void *arg, int jobnr, int threadnr) |
||
578 | { |
||
579 | DNXHDEncContext *ctx = avctx->priv_data; |
||
580 | int mb_y = jobnr, mb_x; |
||
581 | ctx = ctx->thread[threadnr]; |
||
582 | init_put_bits(&ctx->m.pb, (uint8_t *)arg + 640 + ctx->slice_offs[jobnr], ctx->slice_size[jobnr]); |
||
583 | |||
584 | ctx->m.last_dc[0] = |
||
585 | ctx->m.last_dc[1] = |
||
586 | ctx->m.last_dc[2] = 1 << (ctx->cid_table->bit_depth + 2); |
||
587 | for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) { |
||
588 | unsigned mb = mb_y * ctx->m.mb_width + mb_x; |
||
589 | int qscale = ctx->mb_qscale[mb]; |
||
590 | int i; |
||
591 | |||
592 | put_bits(&ctx->m.pb, 12, qscale<<1); |
||
593 | |||
594 | dnxhd_get_blocks(ctx, mb_x, mb_y); |
||
595 | |||
596 | for (i = 0; i < 8; i++) { |
||
597 | int16_t *block = ctx->blocks[i]; |
||
598 | int overflow, n = dnxhd_switch_matrix(ctx, i); |
||
599 | int last_index = ctx->m.dct_quantize(&ctx->m, block, 4&(2*i), qscale, &overflow); |
||
600 | //START_TIMER; |
||
601 | dnxhd_encode_block(ctx, block, last_index, n); |
||
602 | //STOP_TIMER("encode_block"); |
||
603 | } |
||
604 | } |
||
605 | if (put_bits_count(&ctx->m.pb)&31) |
||
606 | put_bits(&ctx->m.pb, 32-(put_bits_count(&ctx->m.pb)&31), 0); |
||
607 | flush_put_bits(&ctx->m.pb); |
||
608 | return 0; |
||
609 | } |
||
610 | |||
611 | static void dnxhd_setup_threads_slices(DNXHDEncContext *ctx) |
||
612 | { |
||
613 | int mb_y, mb_x; |
||
614 | int offset = 0; |
||
615 | for (mb_y = 0; mb_y < ctx->m.mb_height; mb_y++) { |
||
616 | int thread_size; |
||
617 | ctx->slice_offs[mb_y] = offset; |
||
618 | ctx->slice_size[mb_y] = 0; |
||
619 | for (mb_x = 0; mb_x < ctx->m.mb_width; mb_x++) { |
||
620 | unsigned mb = mb_y * ctx->m.mb_width + mb_x; |
||
621 | ctx->slice_size[mb_y] += ctx->mb_bits[mb]; |
||
622 | } |
||
623 | ctx->slice_size[mb_y] = (ctx->slice_size[mb_y]+31)&~31; |
||
624 | ctx->slice_size[mb_y] >>= 3; |
||
625 | thread_size = ctx->slice_size[mb_y]; |
||
626 | offset += thread_size; |
||
627 | } |
||
628 | } |
||
629 | |||
630 | static int dnxhd_mb_var_thread(AVCodecContext *avctx, void *arg, int jobnr, int threadnr) |
||
631 | { |
||
632 | DNXHDEncContext *ctx = avctx->priv_data; |
||
633 | int mb_y = jobnr, mb_x, x, y; |
||
634 | int partial_last_row = (mb_y == ctx->m.mb_height - 1) && |
||
635 | ((avctx->height >> ctx->interlaced) & 0xF); |
||
636 | |||
637 | ctx = ctx->thread[threadnr]; |
||
638 | if (ctx->cid_table->bit_depth == 8) { |
||
639 | uint8_t *pix = ctx->thread[0]->src[0] + ((mb_y<<4) * ctx->m.linesize); |
||
640 | for (mb_x = 0; mb_x < ctx->m.mb_width; ++mb_x, pix += 16) { |
||
641 | unsigned mb = mb_y * ctx->m.mb_width + mb_x; |
||
642 | int sum; |
||
643 | int varc; |
||
644 | |||
645 | if (!partial_last_row && mb_x * 16 <= avctx->width - 16) { |
||
646 | sum = ctx->m.dsp.pix_sum(pix, ctx->m.linesize); |
||
647 | varc = ctx->m.dsp.pix_norm1(pix, ctx->m.linesize); |
||
648 | } else { |
||
649 | int bw = FFMIN(avctx->width - 16 * mb_x, 16); |
||
650 | int bh = FFMIN((avctx->height >> ctx->interlaced) - 16 * mb_y, 16); |
||
651 | sum = varc = 0; |
||
652 | for (y = 0; y < bh; y++) { |
||
653 | for (x = 0; x < bw; x++) { |
||
654 | uint8_t val = pix[x + y * ctx->m.linesize]; |
||
655 | sum += val; |
||
656 | varc += val * val; |
||
657 | } |
||
658 | } |
||
659 | } |
||
660 | varc = (varc - (((unsigned)sum * sum) >> 8) + 128) >> 8; |
||
661 | |||
662 | ctx->mb_cmp[mb].value = varc; |
||
663 | ctx->mb_cmp[mb].mb = mb; |
||
664 | } |
||
665 | } else { // 10-bit |
||
666 | int const linesize = ctx->m.linesize >> 1; |
||
667 | for (mb_x = 0; mb_x < ctx->m.mb_width; ++mb_x) { |
||
668 | uint16_t *pix = (uint16_t*)ctx->thread[0]->src[0] + ((mb_y << 4) * linesize) + (mb_x << 4); |
||
669 | unsigned mb = mb_y * ctx->m.mb_width + mb_x; |
||
670 | int sum = 0; |
||
671 | int sqsum = 0; |
||
672 | int mean, sqmean; |
||
673 | int i, j; |
||
674 | // Macroblocks are 16x16 pixels, unlike DCT blocks which are 8x8. |
||
675 | for (i = 0; i < 16; ++i) { |
||
676 | for (j = 0; j < 16; ++j) { |
||
677 | // Turn 16-bit pixels into 10-bit ones. |
||
678 | int const sample = (unsigned)pix[j] >> 6; |
||
679 | sum += sample; |
||
680 | sqsum += sample * sample; |
||
681 | // 2^10 * 2^10 * 16 * 16 = 2^28, which is less than INT_MAX |
||
682 | } |
||
683 | pix += linesize; |
||
684 | } |
||
685 | mean = sum >> 8; // 16*16 == 2^8 |
||
686 | sqmean = sqsum >> 8; |
||
687 | ctx->mb_cmp[mb].value = sqmean - mean * mean; |
||
688 | ctx->mb_cmp[mb].mb = mb; |
||
689 | } |
||
690 | } |
||
691 | return 0; |
||
692 | } |
||
693 | |||
694 | static int dnxhd_encode_rdo(AVCodecContext *avctx, DNXHDEncContext *ctx) |
||
695 | { |
||
696 | int lambda, up_step, down_step; |
||
697 | int last_lower = INT_MAX, last_higher = 0; |
||
698 | int x, y, q; |
||
699 | |||
700 | for (q = 1; q < avctx->qmax; q++) { |
||
701 | ctx->qscale = q; |
||
702 | avctx->execute2(avctx, dnxhd_calc_bits_thread, NULL, NULL, ctx->m.mb_height); |
||
703 | } |
||
704 | up_step = down_step = 2< |
||
705 | lambda = ctx->lambda; |
||
706 | |||
707 | for (;;) { |
||
708 | int bits = 0; |
||
709 | int end = 0; |
||
710 | if (lambda == last_higher) { |
||
711 | lambda++; |
||
712 | end = 1; // need to set final qscales/bits |
||
713 | } |
||
714 | for (y = 0; y < ctx->m.mb_height; y++) { |
||
715 | for (x = 0; x < ctx->m.mb_width; x++) { |
||
716 | unsigned min = UINT_MAX; |
||
717 | int qscale = 1; |
||
718 | int mb = y*ctx->m.mb_width+x; |
||
719 | for (q = 1; q < avctx->qmax; q++) { |
||
720 | unsigned score = ctx->mb_rc[q][mb].bits*lambda+ |
||
721 | ((unsigned)ctx->mb_rc[q][mb].ssd< |
||
722 | if (score < min) { |
||
723 | min = score; |
||
724 | qscale = q; |
||
725 | } |
||
726 | } |
||
727 | bits += ctx->mb_rc[qscale][mb].bits; |
||
728 | ctx->mb_qscale[mb] = qscale; |
||
729 | ctx->mb_bits[mb] = ctx->mb_rc[qscale][mb].bits; |
||
730 | } |
||
731 | bits = (bits+31)&~31; // padding |
||
732 | if (bits > ctx->frame_bits) |
||
733 | break; |
||
734 | } |
||
735 | //av_dlog(ctx->m.avctx, "lambda %d, up %u, down %u, bits %d, frame %d\n", |
||
736 | // lambda, last_higher, last_lower, bits, ctx->frame_bits); |
||
737 | if (end) { |
||
738 | if (bits > ctx->frame_bits) |
||
739 | return -1; |
||
740 | break; |
||
741 | } |
||
742 | if (bits < ctx->frame_bits) { |
||
743 | last_lower = FFMIN(lambda, last_lower); |
||
744 | if (last_higher != 0) |
||
745 | lambda = (lambda+last_higher)>>1; |
||
746 | else |
||
747 | lambda -= down_step; |
||
748 | down_step = FFMIN((int64_t)down_step*5, INT_MAX); |
||
749 | up_step = 1< |
||
750 | lambda = FFMAX(1, lambda); |
||
751 | if (lambda == last_lower) |
||
752 | break; |
||
753 | } else { |
||
754 | last_higher = FFMAX(lambda, last_higher); |
||
755 | if (last_lower != INT_MAX) |
||
756 | lambda = (lambda+last_lower)>>1; |
||
757 | else if ((int64_t)lambda + up_step > INT_MAX) |
||
758 | return -1; |
||
759 | else |
||
760 | lambda += up_step; |
||
761 | up_step = FFMIN((int64_t)up_step*5, INT_MAX); |
||
762 | down_step = 1< |
||
763 | } |
||
764 | } |
||
765 | //av_dlog(ctx->m.avctx, "out lambda %d\n", lambda); |
||
766 | ctx->lambda = lambda; |
||
767 | return 0; |
||
768 | } |
||
769 | |||
770 | static int dnxhd_find_qscale(DNXHDEncContext *ctx) |
||
771 | { |
||
772 | int bits = 0; |
||
773 | int up_step = 1; |
||
774 | int down_step = 1; |
||
775 | int last_higher = 0; |
||
776 | int last_lower = INT_MAX; |
||
777 | int qscale; |
||
778 | int x, y; |
||
779 | |||
780 | qscale = ctx->qscale; |
||
781 | for (;;) { |
||
782 | bits = 0; |
||
783 | ctx->qscale = qscale; |
||
784 | // XXX avoid recalculating bits |
||
785 | ctx->m.avctx->execute2(ctx->m.avctx, dnxhd_calc_bits_thread, NULL, NULL, ctx->m.mb_height); |
||
786 | for (y = 0; y < ctx->m.mb_height; y++) { |
||
787 | for (x = 0; x < ctx->m.mb_width; x++) |
||
788 | bits += ctx->mb_rc[qscale][y*ctx->m.mb_width+x].bits; |
||
789 | bits = (bits+31)&~31; // padding |
||
790 | if (bits > ctx->frame_bits) |
||
791 | break; |
||
792 | } |
||
793 | //av_dlog(ctx->m.avctx, "%d, qscale %d, bits %d, frame %d, higher %d, lower %d\n", |
||
794 | // ctx->m.avctx->frame_number, qscale, bits, ctx->frame_bits, last_higher, last_lower); |
||
795 | if (bits < ctx->frame_bits) { |
||
796 | if (qscale == 1) |
||
797 | return 1; |
||
798 | if (last_higher == qscale - 1) { |
||
799 | qscale = last_higher; |
||
800 | break; |
||
801 | } |
||
802 | last_lower = FFMIN(qscale, last_lower); |
||
803 | if (last_higher != 0) |
||
804 | qscale = (qscale+last_higher)>>1; |
||
805 | else |
||
806 | qscale -= down_step++; |
||
807 | if (qscale < 1) |
||
808 | qscale = 1; |
||
809 | up_step = 1; |
||
810 | } else { |
||
811 | if (last_lower == qscale + 1) |
||
812 | break; |
||
813 | last_higher = FFMAX(qscale, last_higher); |
||
814 | if (last_lower != INT_MAX) |
||
815 | qscale = (qscale+last_lower)>>1; |
||
816 | else |
||
817 | qscale += up_step++; |
||
818 | down_step = 1; |
||
819 | if (qscale >= ctx->m.avctx->qmax) |
||
820 | return -1; |
||
821 | } |
||
822 | } |
||
823 | //av_dlog(ctx->m.avctx, "out qscale %d\n", qscale); |
||
824 | ctx->qscale = qscale; |
||
825 | return 0; |
||
826 | } |
||
827 | |||
828 | #define BUCKET_BITS 8 |
||
829 | #define RADIX_PASSES 4 |
||
830 | #define NBUCKETS (1 << BUCKET_BITS) |
||
831 | |||
832 | static inline int get_bucket(int value, int shift) |
||
833 | { |
||
834 | value >>= shift; |
||
835 | value &= NBUCKETS - 1; |
||
836 | return NBUCKETS - 1 - value; |
||
837 | } |
||
838 | |||
839 | static void radix_count(const RCCMPEntry *data, int size, int buckets[RADIX_PASSES][NBUCKETS]) |
||
840 | { |
||
841 | int i, j; |
||
842 | memset(buckets, 0, sizeof(buckets[0][0]) * RADIX_PASSES * NBUCKETS); |
||
843 | for (i = 0; i < size; i++) { |
||
844 | int v = data[i].value; |
||
845 | for (j = 0; j < RADIX_PASSES; j++) { |
||
846 | buckets[j][get_bucket(v, 0)]++; |
||
847 | v >>= BUCKET_BITS; |
||
848 | } |
||
849 | av_assert1(!v); |
||
850 | } |
||
851 | for (j = 0; j < RADIX_PASSES; j++) { |
||
852 | int offset = size; |
||
853 | for (i = NBUCKETS - 1; i >= 0; i--) |
||
854 | buckets[j][i] = offset -= buckets[j][i]; |
||
855 | av_assert1(!buckets[j][0]); |
||
856 | } |
||
857 | } |
||
858 | |||
859 | static void radix_sort_pass(RCCMPEntry *dst, const RCCMPEntry *data, int size, int buckets[NBUCKETS], int pass) |
||
860 | { |
||
861 | int shift = pass * BUCKET_BITS; |
||
862 | int i; |
||
863 | for (i = 0; i < size; i++) { |
||
864 | int v = get_bucket(data[i].value, shift); |
||
865 | int pos = buckets[v]++; |
||
866 | dst[pos] = data[i]; |
||
867 | } |
||
868 | } |
||
869 | |||
870 | static void radix_sort(RCCMPEntry *data, int size) |
||
871 | { |
||
872 | int buckets[RADIX_PASSES][NBUCKETS]; |
||
873 | RCCMPEntry *tmp = av_malloc(sizeof(*tmp) * size); |
||
874 | radix_count(data, size, buckets); |
||
875 | radix_sort_pass(tmp, data, size, buckets[0], 0); |
||
876 | radix_sort_pass(data, tmp, size, buckets[1], 1); |
||
877 | if (buckets[2][NBUCKETS - 1] || buckets[3][NBUCKETS - 1]) { |
||
878 | radix_sort_pass(tmp, data, size, buckets[2], 2); |
||
879 | radix_sort_pass(data, tmp, size, buckets[3], 3); |
||
880 | } |
||
881 | av_free(tmp); |
||
882 | } |
||
883 | |||
884 | static int dnxhd_encode_fast(AVCodecContext *avctx, DNXHDEncContext *ctx) |
||
885 | { |
||
886 | int max_bits = 0; |
||
887 | int ret, x, y; |
||
888 | if ((ret = dnxhd_find_qscale(ctx)) < 0) |
||
889 | return -1; |
||
890 | for (y = 0; y < ctx->m.mb_height; y++) { |
||
891 | for (x = 0; x < ctx->m.mb_width; x++) { |
||
892 | int mb = y*ctx->m.mb_width+x; |
||
893 | int delta_bits; |
||
894 | ctx->mb_qscale[mb] = ctx->qscale; |
||
895 | ctx->mb_bits[mb] = ctx->mb_rc[ctx->qscale][mb].bits; |
||
896 | max_bits += ctx->mb_rc[ctx->qscale][mb].bits; |
||
897 | if (!RC_VARIANCE) { |
||
898 | delta_bits = ctx->mb_rc[ctx->qscale][mb].bits-ctx->mb_rc[ctx->qscale+1][mb].bits; |
||
899 | ctx->mb_cmp[mb].mb = mb; |
||
900 | ctx->mb_cmp[mb].value = delta_bits ? |
||
901 | ((ctx->mb_rc[ctx->qscale][mb].ssd-ctx->mb_rc[ctx->qscale+1][mb].ssd)*100)/delta_bits |
||
902 | : INT_MIN; //avoid increasing qscale |
||
903 | } |
||
904 | } |
||
905 | max_bits += 31; //worst padding |
||
906 | } |
||
907 | if (!ret) { |
||
908 | if (RC_VARIANCE) |
||
909 | avctx->execute2(avctx, dnxhd_mb_var_thread, NULL, NULL, ctx->m.mb_height); |
||
910 | radix_sort(ctx->mb_cmp, ctx->m.mb_num); |
||
911 | for (x = 0; x < ctx->m.mb_num && max_bits > ctx->frame_bits; x++) { |
||
912 | int mb = ctx->mb_cmp[x].mb; |
||
913 | max_bits -= ctx->mb_rc[ctx->qscale][mb].bits - ctx->mb_rc[ctx->qscale+1][mb].bits; |
||
914 | ctx->mb_qscale[mb] = ctx->qscale+1; |
||
915 | ctx->mb_bits[mb] = ctx->mb_rc[ctx->qscale+1][mb].bits; |
||
916 | } |
||
917 | } |
||
918 | return 0; |
||
919 | } |
||
920 | |||
921 | static void dnxhd_load_picture(DNXHDEncContext *ctx, const AVFrame *frame) |
||
922 | { |
||
923 | int i; |
||
924 | |||
925 | for (i = 0; i < 3; i++) { |
||
926 | ctx->frame.data[i] = frame->data[i]; |
||
927 | ctx->frame.linesize[i] = frame->linesize[i]; |
||
928 | } |
||
929 | |||
930 | for (i = 0; i < ctx->m.avctx->thread_count; i++) { |
||
931 | ctx->thread[i]->m.linesize = ctx->frame.linesize[0]< |
||
932 | ctx->thread[i]->m.uvlinesize = ctx->frame.linesize[1]< |
||
933 | ctx->thread[i]->dct_y_offset = ctx->m.linesize *8; |
||
934 | ctx->thread[i]->dct_uv_offset = ctx->m.uvlinesize*8; |
||
935 | } |
||
936 | |||
937 | ctx->frame.interlaced_frame = frame->interlaced_frame; |
||
938 | ctx->cur_field = frame->interlaced_frame && !frame->top_field_first; |
||
939 | } |
||
940 | |||
941 | static int dnxhd_encode_picture(AVCodecContext *avctx, AVPacket *pkt, |
||
942 | const AVFrame *frame, int *got_packet) |
||
943 | { |
||
944 | DNXHDEncContext *ctx = avctx->priv_data; |
||
945 | int first_field = 1; |
||
946 | int offset, i, ret; |
||
947 | uint8_t *buf; |
||
948 | |||
949 | if ((ret = ff_alloc_packet2(avctx, pkt, ctx->cid_table->frame_size)) < 0) |
||
950 | return ret; |
||
951 | buf = pkt->data; |
||
952 | |||
953 | dnxhd_load_picture(ctx, frame); |
||
954 | |||
955 | encode_coding_unit: |
||
956 | for (i = 0; i < 3; i++) { |
||
957 | ctx->src[i] = ctx->frame.data[i]; |
||
958 | if (ctx->interlaced && ctx->cur_field) |
||
959 | ctx->src[i] += ctx->frame.linesize[i]; |
||
960 | } |
||
961 | |||
962 | dnxhd_write_header(avctx, buf); |
||
963 | |||
964 | if (avctx->mb_decision == FF_MB_DECISION_RD) |
||
965 | ret = dnxhd_encode_rdo(avctx, ctx); |
||
966 | else |
||
967 | ret = dnxhd_encode_fast(avctx, ctx); |
||
968 | if (ret < 0) { |
||
969 | av_log(avctx, AV_LOG_ERROR, |
||
970 | "picture could not fit ratecontrol constraints, increase qmax\n"); |
||
971 | return -1; |
||
972 | } |
||
973 | |||
974 | dnxhd_setup_threads_slices(ctx); |
||
975 | |||
976 | offset = 0; |
||
977 | for (i = 0; i < ctx->m.mb_height; i++) { |
||
978 | AV_WB32(ctx->msip + i * 4, offset); |
||
979 | offset += ctx->slice_size[i]; |
||
980 | av_assert1(!(ctx->slice_size[i] & 3)); |
||
981 | } |
||
982 | |||
983 | avctx->execute2(avctx, dnxhd_encode_thread, buf, NULL, ctx->m.mb_height); |
||
984 | |||
985 | av_assert1(640 + offset + 4 <= ctx->cid_table->coding_unit_size); |
||
986 | memset(buf + 640 + offset, 0, ctx->cid_table->coding_unit_size - 4 - offset - 640); |
||
987 | |||
988 | AV_WB32(buf + ctx->cid_table->coding_unit_size - 4, 0x600DC0DE); // EOF |
||
989 | |||
990 | if (ctx->interlaced && first_field) { |
||
991 | first_field = 0; |
||
992 | ctx->cur_field ^= 1; |
||
993 | buf += ctx->cid_table->coding_unit_size; |
||
994 | goto encode_coding_unit; |
||
995 | } |
||
996 | |||
997 | ctx->frame.quality = ctx->qscale*FF_QP2LAMBDA; |
||
998 | |||
999 | pkt->flags |= AV_PKT_FLAG_KEY; |
||
1000 | *got_packet = 1; |
||
1001 | return 0; |
||
1002 | } |
||
1003 | |||
1004 | static av_cold int dnxhd_encode_end(AVCodecContext *avctx) |
||
1005 | { |
||
1006 | DNXHDEncContext *ctx = avctx->priv_data; |
||
1007 | int max_level = 1<<(ctx->cid_table->bit_depth+2); |
||
1008 | int i; |
||
1009 | |||
1010 | av_free(ctx->vlc_codes-max_level*2); |
||
1011 | av_free(ctx->vlc_bits -max_level*2); |
||
1012 | av_freep(&ctx->run_codes); |
||
1013 | av_freep(&ctx->run_bits); |
||
1014 | |||
1015 | av_freep(&ctx->mb_bits); |
||
1016 | av_freep(&ctx->mb_qscale); |
||
1017 | av_freep(&ctx->mb_rc); |
||
1018 | av_freep(&ctx->mb_cmp); |
||
1019 | av_freep(&ctx->slice_size); |
||
1020 | av_freep(&ctx->slice_offs); |
||
1021 | |||
1022 | av_freep(&ctx->qmatrix_c); |
||
1023 | av_freep(&ctx->qmatrix_l); |
||
1024 | av_freep(&ctx->qmatrix_c16); |
||
1025 | av_freep(&ctx->qmatrix_l16); |
||
1026 | |||
1027 | for (i = 1; i < avctx->thread_count; i++) |
||
1028 | av_freep(&ctx->thread[i]); |
||
1029 | |||
1030 | return 0; |
||
1031 | } |
||
1032 | |||
1033 | static const AVCodecDefault dnxhd_defaults[] = { |
||
1034 | { "qmax", "1024" }, /* Maximum quantization scale factor allowed for VC-3 */ |
||
1035 | { NULL }, |
||
1036 | }; |
||
1037 | |||
1038 | AVCodec ff_dnxhd_encoder = { |
||
1039 | .name = "dnxhd", |
||
1040 | .long_name = NULL_IF_CONFIG_SMALL("VC3/DNxHD"), |
||
1041 | .type = AVMEDIA_TYPE_VIDEO, |
||
1042 | .id = AV_CODEC_ID_DNXHD, |
||
1043 | .priv_data_size = sizeof(DNXHDEncContext), |
||
1044 | .init = dnxhd_encode_init, |
||
1045 | .encode2 = dnxhd_encode_picture, |
||
1046 | .close = dnxhd_encode_end, |
||
1047 | .capabilities = CODEC_CAP_SLICE_THREADS, |
||
1048 | .pix_fmts = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV422P, |
||
1049 | AV_PIX_FMT_YUV422P10, |
||
1050 | AV_PIX_FMT_NONE }, |
||
1051 | .priv_class = &dnxhd_class, |
||
1052 | .defaults = dnxhd_defaults, |
||
1053 | };>(ctx-><(ctx->=>>>>> |