Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
6147 serge 1
/*
2
 * Apple ProRes encoder
3
 *
4
 * Copyright (c) 2012 Konstantin Shishkov
5
 *
6
 * This encoder appears to be based on Anatoliy Wassermans considering
7
 * similarities in the bugs.
8
 *
9
 * This file is part of FFmpeg.
10
 *
11
 * FFmpeg is free software; you can redistribute it and/or
12
 * modify it under the terms of the GNU Lesser General Public
13
 * License as published by the Free Software Foundation; either
14
 * version 2.1 of the License, or (at your option) any later version.
15
 *
16
 * FFmpeg is distributed in the hope that it will be useful,
17
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19
 * Lesser General Public License for more details.
20
 *
21
 * You should have received a copy of the GNU Lesser General Public
22
 * License along with FFmpeg; if not, write to the Free Software
23
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24
 */
25
 
26
#include "libavutil/opt.h"
27
#include "libavutil/pixdesc.h"
28
#include "avcodec.h"
29
#include "fdctdsp.h"
30
#include "put_bits.h"
31
#include "bytestream.h"
32
#include "internal.h"
33
#include "proresdata.h"
34
 
35
#define CFACTOR_Y422 2
36
#define CFACTOR_Y444 3
37
 
38
#define MAX_MBS_PER_SLICE 8
39
 
40
#define MAX_PLANES 4
41
 
42
enum {
43
    PRORES_PROFILE_AUTO  = -1,
44
    PRORES_PROFILE_PROXY = 0,
45
    PRORES_PROFILE_LT,
46
    PRORES_PROFILE_STANDARD,
47
    PRORES_PROFILE_HQ,
48
    PRORES_PROFILE_4444,
49
};
50
 
51
enum {
52
    QUANT_MAT_PROXY = 0,
53
    QUANT_MAT_LT,
54
    QUANT_MAT_STANDARD,
55
    QUANT_MAT_HQ,
56
    QUANT_MAT_DEFAULT,
57
};
58
 
59
static const uint8_t prores_quant_matrices[][64] = {
60
    { // proxy
61
         4,  7,  9, 11, 13, 14, 15, 63,
62
         7,  7, 11, 12, 14, 15, 63, 63,
63
         9, 11, 13, 14, 15, 63, 63, 63,
64
        11, 11, 13, 14, 63, 63, 63, 63,
65
        11, 13, 14, 63, 63, 63, 63, 63,
66
        13, 14, 63, 63, 63, 63, 63, 63,
67
        13, 63, 63, 63, 63, 63, 63, 63,
68
        63, 63, 63, 63, 63, 63, 63, 63,
69
    },
70
    { // LT
71
         4,  5,  6,  7,  9, 11, 13, 15,
72
         5,  5,  7,  8, 11, 13, 15, 17,
73
         6,  7,  9, 11, 13, 15, 15, 17,
74
         7,  7,  9, 11, 13, 15, 17, 19,
75
         7,  9, 11, 13, 14, 16, 19, 23,
76
         9, 11, 13, 14, 16, 19, 23, 29,
77
         9, 11, 13, 15, 17, 21, 28, 35,
78
        11, 13, 16, 17, 21, 28, 35, 41,
79
    },
80
    { // standard
81
         4,  4,  5,  5,  6,  7,  7,  9,
82
         4,  4,  5,  6,  7,  7,  9,  9,
83
         5,  5,  6,  7,  7,  9,  9, 10,
84
         5,  5,  6,  7,  7,  9,  9, 10,
85
         5,  6,  7,  7,  8,  9, 10, 12,
86
         6,  7,  7,  8,  9, 10, 12, 15,
87
         6,  7,  7,  9, 10, 11, 14, 17,
88
         7,  7,  9, 10, 11, 14, 17, 21,
89
    },
90
    { // high quality
91
         4,  4,  4,  4,  4,  4,  4,  4,
92
         4,  4,  4,  4,  4,  4,  4,  4,
93
         4,  4,  4,  4,  4,  4,  4,  4,
94
         4,  4,  4,  4,  4,  4,  4,  5,
95
         4,  4,  4,  4,  4,  4,  5,  5,
96
         4,  4,  4,  4,  4,  5,  5,  6,
97
         4,  4,  4,  4,  5,  5,  6,  7,
98
         4,  4,  4,  4,  5,  6,  7,  7,
99
    },
100
    { // codec default
101
         4,  4,  4,  4,  4,  4,  4,  4,
102
         4,  4,  4,  4,  4,  4,  4,  4,
103
         4,  4,  4,  4,  4,  4,  4,  4,
104
         4,  4,  4,  4,  4,  4,  4,  4,
105
         4,  4,  4,  4,  4,  4,  4,  4,
106
         4,  4,  4,  4,  4,  4,  4,  4,
107
         4,  4,  4,  4,  4,  4,  4,  4,
108
         4,  4,  4,  4,  4,  4,  4,  4,
109
    },
110
};
111
 
112
#define NUM_MB_LIMITS 4
113
static const int prores_mb_limits[NUM_MB_LIMITS] = {
114
    1620, // up to 720x576
115
    2700, // up to 960x720
116
    6075, // up to 1440x1080
117
    9216, // up to 2048x1152
118
};
119
 
120
static const struct prores_profile {
121
    const char *full_name;
122
    uint32_t    tag;
123
    int         min_quant;
124
    int         max_quant;
125
    int         br_tab[NUM_MB_LIMITS];
126
    int         quant;
127
} prores_profile_info[5] = {
128
    {
129
        .full_name = "proxy",
130
        .tag       = MKTAG('a', 'p', 'c', 'o'),
131
        .min_quant = 4,
132
        .max_quant = 8,
133
        .br_tab    = { 300, 242, 220, 194 },
134
        .quant     = QUANT_MAT_PROXY,
135
    },
136
    {
137
        .full_name = "LT",
138
        .tag       = MKTAG('a', 'p', 'c', 's'),
139
        .min_quant = 1,
140
        .max_quant = 9,
141
        .br_tab    = { 720, 560, 490, 440 },
142
        .quant     = QUANT_MAT_LT,
143
    },
144
    {
145
        .full_name = "standard",
146
        .tag       = MKTAG('a', 'p', 'c', 'n'),
147
        .min_quant = 1,
148
        .max_quant = 6,
149
        .br_tab    = { 1050, 808, 710, 632 },
150
        .quant     = QUANT_MAT_STANDARD,
151
    },
152
    {
153
        .full_name = "high quality",
154
        .tag       = MKTAG('a', 'p', 'c', 'h'),
155
        .min_quant = 1,
156
        .max_quant = 6,
157
        .br_tab    = { 1566, 1216, 1070, 950 },
158
        .quant     = QUANT_MAT_HQ,
159
    },
160
    {
161
        .full_name = "4444",
162
        .tag       = MKTAG('a', 'p', '4', 'h'),
163
        .min_quant = 1,
164
        .max_quant = 6,
165
        .br_tab    = { 2350, 1828, 1600, 1425 },
166
        .quant     = QUANT_MAT_HQ,
167
    }
168
};
169
 
170
#define TRELLIS_WIDTH 16
171
#define SCORE_LIMIT   INT_MAX / 2
172
 
173
struct TrellisNode {
174
    int prev_node;
175
    int quant;
176
    int bits;
177
    int score;
178
};
179
 
180
#define MAX_STORED_Q 16
181
 
182
typedef struct ProresThreadData {
183
    DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
184
    DECLARE_ALIGNED(16, uint16_t, emu_buf)[16 * 16];
185
    int16_t custom_q[64];
186
    struct TrellisNode *nodes;
187
} ProresThreadData;
188
 
189
typedef struct ProresContext {
190
    AVClass *class;
191
    DECLARE_ALIGNED(16, int16_t, blocks)[MAX_PLANES][64 * 4 * MAX_MBS_PER_SLICE];
192
    DECLARE_ALIGNED(16, uint16_t, emu_buf)[16*16];
193
    int16_t quants[MAX_STORED_Q][64];
194
    int16_t custom_q[64];
195
    const uint8_t *quant_mat;
196
    const uint8_t *scantable;
197
 
198
    void (*fdct)(FDCTDSPContext *fdsp, const uint16_t *src,
199
                 int linesize, int16_t *block);
200
    FDCTDSPContext fdsp;
201
 
202
    const AVFrame *pic;
203
    int mb_width, mb_height;
204
    int mbs_per_slice;
205
    int num_chroma_blocks, chroma_factor;
206
    int slices_width;
207
    int slices_per_picture;
208
    int pictures_per_frame; // 1 for progressive, 2 for interlaced
209
    int cur_picture_idx;
210
    int num_planes;
211
    int bits_per_mb;
212
    int force_quant;
213
    int alpha_bits;
214
    int warn;
215
 
216
    char *vendor;
217
    int quant_sel;
218
 
219
    int frame_size_upper_bound;
220
 
221
    int profile;
222
    const struct prores_profile *profile_info;
223
 
224
    int *slice_q;
225
 
226
    ProresThreadData *tdata;
227
} ProresContext;
228
 
229
static void get_slice_data(ProresContext *ctx, const uint16_t *src,
230
                           int linesize, int x, int y, int w, int h,
231
                           int16_t *blocks, uint16_t *emu_buf,
232
                           int mbs_per_slice, int blocks_per_mb, int is_chroma)
233
{
234
    const uint16_t *esrc;
235
    const int mb_width = 4 * blocks_per_mb;
236
    int elinesize;
237
    int i, j, k;
238
 
239
    for (i = 0; i < mbs_per_slice; i++, src += mb_width) {
240
        if (x >= w) {
241
            memset(blocks, 0, 64 * (mbs_per_slice - i) * blocks_per_mb
242
                              * sizeof(*blocks));
243
            return;
244
        }
245
        if (x + mb_width <= w && y + 16 <= h) {
246
            esrc      = src;
247
            elinesize = linesize;
248
        } else {
249
            int bw, bh, pix;
250
 
251
            esrc      = emu_buf;
252
            elinesize = 16 * sizeof(*emu_buf);
253
 
254
            bw = FFMIN(w - x, mb_width);
255
            bh = FFMIN(h - y, 16);
256
 
257
            for (j = 0; j < bh; j++) {
258
                memcpy(emu_buf + j * 16,
259
                       (const uint8_t*)src + j * linesize,
260
                       bw * sizeof(*src));
261
                pix = emu_buf[j * 16 + bw - 1];
262
                for (k = bw; k < mb_width; k++)
263
                    emu_buf[j * 16 + k] = pix;
264
            }
265
            for (; j < 16; j++)
266
                memcpy(emu_buf + j * 16,
267
                       emu_buf + (bh - 1) * 16,
268
                       mb_width * sizeof(*emu_buf));
269
        }
270
        if (!is_chroma) {
271
            ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
272
            blocks += 64;
273
            if (blocks_per_mb > 2) {
274
                ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
275
                blocks += 64;
276
            }
277
            ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
278
            blocks += 64;
279
            if (blocks_per_mb > 2) {
280
                ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
281
                blocks += 64;
282
            }
283
        } else {
284
            ctx->fdct(&ctx->fdsp, esrc, elinesize, blocks);
285
            blocks += 64;
286
            ctx->fdct(&ctx->fdsp, esrc + elinesize * 4, elinesize, blocks);
287
            blocks += 64;
288
            if (blocks_per_mb > 2) {
289
                ctx->fdct(&ctx->fdsp, esrc + 8, elinesize, blocks);
290
                blocks += 64;
291
                ctx->fdct(&ctx->fdsp, esrc + elinesize * 4 + 8, elinesize, blocks);
292
                blocks += 64;
293
            }
294
        }
295
 
296
        x += mb_width;
297
    }
298
}
299
 
300
static void get_alpha_data(ProresContext *ctx, const uint16_t *src,
301
                           int linesize, int x, int y, int w, int h,
302
                           int16_t *blocks, int mbs_per_slice, int abits)
303
{
304
    const int slice_width = 16 * mbs_per_slice;
305
    int i, j, copy_w, copy_h;
306
 
307
    copy_w = FFMIN(w - x, slice_width);
308
    copy_h = FFMIN(h - y, 16);
309
    for (i = 0; i < copy_h; i++) {
310
        memcpy(blocks, src, copy_w * sizeof(*src));
311
        if (abits == 8)
312
            for (j = 0; j < copy_w; j++)
313
                blocks[j] >>= 2;
314
        else
315
            for (j = 0; j < copy_w; j++)
316
                blocks[j] = (blocks[j] << 6) | (blocks[j] >> 4);
317
        for (j = copy_w; j < slice_width; j++)
318
            blocks[j] = blocks[copy_w - 1];
319
        blocks += slice_width;
320
        src    += linesize >> 1;
321
    }
322
    for (; i < 16; i++) {
323
        memcpy(blocks, blocks - slice_width, slice_width * sizeof(*blocks));
324
        blocks += slice_width;
325
    }
326
}
327
 
328
/**
329
 * Write an unsigned rice/exp golomb codeword.
330
 */
331
static inline void encode_vlc_codeword(PutBitContext *pb, unsigned codebook, int val)
332
{
333
    unsigned int rice_order, exp_order, switch_bits, switch_val;
334
    int exponent;
335
 
336
    /* number of prefix bits to switch between Rice and expGolomb */
337
    switch_bits = (codebook & 3) + 1;
338
    rice_order  =  codebook >> 5;       /* rice code order */
339
    exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
340
 
341
    switch_val  = switch_bits << rice_order;
342
 
343
    if (val >= switch_val) {
344
        val -= switch_val - (1 << exp_order);
345
        exponent = av_log2(val);
346
 
347
        put_bits(pb, exponent - exp_order + switch_bits, 0);
348
        put_bits(pb, exponent + 1, val);
349
    } else {
350
        exponent = val >> rice_order;
351
 
352
        if (exponent)
353
            put_bits(pb, exponent, 0);
354
        put_bits(pb, 1, 1);
355
        if (rice_order)
356
            put_sbits(pb, rice_order, val);
357
    }
358
}
359
 
360
#define GET_SIGN(x)  ((x) >> 31)
361
#define MAKE_CODE(x) (((x) << 1) ^ GET_SIGN(x))
362
 
363
static void encode_dcs(PutBitContext *pb, int16_t *blocks,
364
                       int blocks_per_slice, int scale)
365
{
366
    int i;
367
    int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
368
 
369
    prev_dc = (blocks[0] - 0x4000) / scale;
370
    encode_vlc_codeword(pb, FIRST_DC_CB, MAKE_CODE(prev_dc));
371
    sign     = 0;
372
    codebook = 3;
373
    blocks  += 64;
374
 
375
    for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
376
        dc       = (blocks[0] - 0x4000) / scale;
377
        delta    = dc - prev_dc;
378
        new_sign = GET_SIGN(delta);
379
        delta    = (delta ^ sign) - sign;
380
        code     = MAKE_CODE(delta);
381
        encode_vlc_codeword(pb, ff_prores_dc_codebook[codebook], code);
382
        codebook = (code + (code & 1)) >> 1;
383
        codebook = FFMIN(codebook, 3);
384
        sign     = new_sign;
385
        prev_dc  = dc;
386
    }
387
}
388
 
389
static void encode_acs(PutBitContext *pb, int16_t *blocks,
390
                       int blocks_per_slice,
391
                       int plane_size_factor,
392
                       const uint8_t *scan, const int16_t *qmat)
393
{
394
    int idx, i;
395
    int run, level, run_cb, lev_cb;
396
    int max_coeffs, abs_level;
397
 
398
    max_coeffs = blocks_per_slice << 6;
399
    run_cb     = ff_prores_run_to_cb_index[4];
400
    lev_cb     = ff_prores_lev_to_cb_index[2];
401
    run        = 0;
402
 
403
    for (i = 1; i < 64; i++) {
404
        for (idx = scan[i]; idx < max_coeffs; idx += 64) {
405
            level = blocks[idx] / qmat[scan[i]];
406
            if (level) {
407
                abs_level = FFABS(level);
408
                encode_vlc_codeword(pb, ff_prores_ac_codebook[run_cb], run);
409
                encode_vlc_codeword(pb, ff_prores_ac_codebook[lev_cb],
410
                                    abs_level - 1);
411
                put_sbits(pb, 1, GET_SIGN(level));
412
 
413
                run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
414
                lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
415
                run    = 0;
416
            } else {
417
                run++;
418
            }
419
        }
420
    }
421
}
422
 
423
static int encode_slice_plane(ProresContext *ctx, PutBitContext *pb,
424
                              const uint16_t *src, int linesize,
425
                              int mbs_per_slice, int16_t *blocks,
426
                              int blocks_per_mb, int plane_size_factor,
427
                              const int16_t *qmat)
428
{
429
    int blocks_per_slice, saved_pos;
430
 
431
    saved_pos = put_bits_count(pb);
432
    blocks_per_slice = mbs_per_slice * blocks_per_mb;
433
 
434
    encode_dcs(pb, blocks, blocks_per_slice, qmat[0]);
435
    encode_acs(pb, blocks, blocks_per_slice, plane_size_factor,
436
               ctx->scantable, qmat);
437
    flush_put_bits(pb);
438
 
439
    return (put_bits_count(pb) - saved_pos) >> 3;
440
}
441
 
442
static void put_alpha_diff(PutBitContext *pb, int cur, int prev, int abits)
443
{
444
    const int dbits = (abits == 8) ? 4 : 7;
445
    const int dsize = 1 << dbits - 1;
446
    int diff = cur - prev;
447
 
448
    diff = av_mod_uintp2(diff, abits);
449
    if (diff >= (1 << abits) - dsize)
450
        diff -= 1 << abits;
451
    if (diff < -dsize || diff > dsize || !diff) {
452
        put_bits(pb, 1, 1);
453
        put_bits(pb, abits, diff);
454
    } else {
455
        put_bits(pb, 1, 0);
456
        put_bits(pb, dbits - 1, FFABS(diff) - 1);
457
        put_bits(pb, 1, diff < 0);
458
    }
459
}
460
 
461
static void put_alpha_run(PutBitContext *pb, int run)
462
{
463
    if (run) {
464
        put_bits(pb, 1, 0);
465
        if (run < 0x10)
466
            put_bits(pb, 4, run);
467
        else
468
            put_bits(pb, 15, run);
469
    } else {
470
        put_bits(pb, 1, 1);
471
    }
472
}
473
 
474
// todo alpha quantisation for high quants
475
static int encode_alpha_plane(ProresContext *ctx, PutBitContext *pb,
476
                              int mbs_per_slice, uint16_t *blocks,
477
                              int quant)
478
{
479
    const int abits = ctx->alpha_bits;
480
    const int mask  = (1 << abits) - 1;
481
    const int num_coeffs = mbs_per_slice * 256;
482
    int saved_pos = put_bits_count(pb);
483
    int prev = mask, cur;
484
    int idx = 0;
485
    int run = 0;
486
 
487
    cur = blocks[idx++];
488
    put_alpha_diff(pb, cur, prev, abits);
489
    prev = cur;
490
    do {
491
        cur = blocks[idx++];
492
        if (cur != prev) {
493
            put_alpha_run (pb, run);
494
            put_alpha_diff(pb, cur, prev, abits);
495
            prev = cur;
496
            run  = 0;
497
        } else {
498
            run++;
499
        }
500
    } while (idx < num_coeffs);
501
    if (run)
502
        put_alpha_run(pb, run);
503
    flush_put_bits(pb);
504
    return (put_bits_count(pb) - saved_pos) >> 3;
505
}
506
 
507
static int encode_slice(AVCodecContext *avctx, const AVFrame *pic,
508
                        PutBitContext *pb,
509
                        int sizes[4], int x, int y, int quant,
510
                        int mbs_per_slice)
511
{
512
    ProresContext *ctx = avctx->priv_data;
513
    int i, xp, yp;
514
    int total_size = 0;
515
    const uint16_t *src;
516
    int slice_width_factor = av_log2(mbs_per_slice);
517
    int num_cblocks, pwidth, linesize, line_add;
518
    int plane_factor, is_chroma;
519
    uint16_t *qmat;
520
 
521
    if (ctx->pictures_per_frame == 1)
522
        line_add = 0;
523
    else
524
        line_add = ctx->cur_picture_idx ^ !pic->top_field_first;
525
 
526
    if (ctx->force_quant) {
527
        qmat = ctx->quants[0];
528
    } else if (quant < MAX_STORED_Q) {
529
        qmat = ctx->quants[quant];
530
    } else {
531
        qmat = ctx->custom_q;
532
        for (i = 0; i < 64; i++)
533
            qmat[i] = ctx->quant_mat[i] * quant;
534
    }
535
 
536
    for (i = 0; i < ctx->num_planes; i++) {
537
        is_chroma    = (i == 1 || i == 2);
538
        plane_factor = slice_width_factor + 2;
539
        if (is_chroma)
540
            plane_factor += ctx->chroma_factor - 3;
541
        if (!is_chroma || ctx->chroma_factor == CFACTOR_Y444) {
542
            xp          = x << 4;
543
            yp          = y << 4;
544
            num_cblocks = 4;
545
            pwidth      = avctx->width;
546
        } else {
547
            xp          = x << 3;
548
            yp          = y << 4;
549
            num_cblocks = 2;
550
            pwidth      = avctx->width >> 1;
551
        }
552
 
553
        linesize = pic->linesize[i] * ctx->pictures_per_frame;
554
        src = (const uint16_t*)(pic->data[i] + yp * linesize +
555
                                line_add * pic->linesize[i]) + xp;
556
 
557
        if (i < 3) {
558
            get_slice_data(ctx, src, linesize, xp, yp,
559
                           pwidth, avctx->height / ctx->pictures_per_frame,
560
                           ctx->blocks[0], ctx->emu_buf,
561
                           mbs_per_slice, num_cblocks, is_chroma);
562
            sizes[i] = encode_slice_plane(ctx, pb, src, linesize,
563
                                          mbs_per_slice, ctx->blocks[0],
564
                                          num_cblocks, plane_factor,
565
                                          qmat);
566
        } else {
567
            get_alpha_data(ctx, src, linesize, xp, yp,
568
                           pwidth, avctx->height / ctx->pictures_per_frame,
569
                           ctx->blocks[0], mbs_per_slice, ctx->alpha_bits);
570
            sizes[i] = encode_alpha_plane(ctx, pb, mbs_per_slice,
571
                                          ctx->blocks[0], quant);
572
        }
573
        total_size += sizes[i];
574
        if (put_bits_left(pb) < 0) {
575
            av_log(avctx, AV_LOG_ERROR,
576
                   "Underestimated required buffer size.\n");
577
            return AVERROR_BUG;
578
        }
579
    }
580
    return total_size;
581
}
582
 
583
static inline int estimate_vlc(unsigned codebook, int val)
584
{
585
    unsigned int rice_order, exp_order, switch_bits, switch_val;
586
    int exponent;
587
 
588
    /* number of prefix bits to switch between Rice and expGolomb */
589
    switch_bits = (codebook & 3) + 1;
590
    rice_order  =  codebook >> 5;       /* rice code order */
591
    exp_order   = (codebook >> 2) & 7;  /* exp golomb code order */
592
 
593
    switch_val  = switch_bits << rice_order;
594
 
595
    if (val >= switch_val) {
596
        val -= switch_val - (1 << exp_order);
597
        exponent = av_log2(val);
598
 
599
        return exponent * 2 - exp_order + switch_bits + 1;
600
    } else {
601
        return (val >> rice_order) + rice_order + 1;
602
    }
603
}
604
 
605
static int estimate_dcs(int *error, int16_t *blocks, int blocks_per_slice,
606
                        int scale)
607
{
608
    int i;
609
    int codebook = 3, code, dc, prev_dc, delta, sign, new_sign;
610
    int bits;
611
 
612
    prev_dc  = (blocks[0] - 0x4000) / scale;
613
    bits     = estimate_vlc(FIRST_DC_CB, MAKE_CODE(prev_dc));
614
    sign     = 0;
615
    codebook = 3;
616
    blocks  += 64;
617
    *error  += FFABS(blocks[0] - 0x4000) % scale;
618
 
619
    for (i = 1; i < blocks_per_slice; i++, blocks += 64) {
620
        dc       = (blocks[0] - 0x4000) / scale;
621
        *error  += FFABS(blocks[0] - 0x4000) % scale;
622
        delta    = dc - prev_dc;
623
        new_sign = GET_SIGN(delta);
624
        delta    = (delta ^ sign) - sign;
625
        code     = MAKE_CODE(delta);
626
        bits    += estimate_vlc(ff_prores_dc_codebook[codebook], code);
627
        codebook = (code + (code & 1)) >> 1;
628
        codebook = FFMIN(codebook, 3);
629
        sign     = new_sign;
630
        prev_dc  = dc;
631
    }
632
 
633
    return bits;
634
}
635
 
636
static int estimate_acs(int *error, int16_t *blocks, int blocks_per_slice,
637
                        int plane_size_factor,
638
                        const uint8_t *scan, const int16_t *qmat)
639
{
640
    int idx, i;
641
    int run, level, run_cb, lev_cb;
642
    int max_coeffs, abs_level;
643
    int bits = 0;
644
 
645
    max_coeffs = blocks_per_slice << 6;
646
    run_cb     = ff_prores_run_to_cb_index[4];
647
    lev_cb     = ff_prores_lev_to_cb_index[2];
648
    run        = 0;
649
 
650
    for (i = 1; i < 64; i++) {
651
        for (idx = scan[i]; idx < max_coeffs; idx += 64) {
652
            level   = blocks[idx] / qmat[scan[i]];
653
            *error += FFABS(blocks[idx]) % qmat[scan[i]];
654
            if (level) {
655
                abs_level = FFABS(level);
656
                bits += estimate_vlc(ff_prores_ac_codebook[run_cb], run);
657
                bits += estimate_vlc(ff_prores_ac_codebook[lev_cb],
658
                                     abs_level - 1) + 1;
659
 
660
                run_cb = ff_prores_run_to_cb_index[FFMIN(run, 15)];
661
                lev_cb = ff_prores_lev_to_cb_index[FFMIN(abs_level, 9)];
662
                run    = 0;
663
            } else {
664
                run++;
665
            }
666
        }
667
    }
668
 
669
    return bits;
670
}
671
 
672
static int estimate_slice_plane(ProresContext *ctx, int *error, int plane,
673
                                const uint16_t *src, int linesize,
674
                                int mbs_per_slice,
675
                                int blocks_per_mb, int plane_size_factor,
676
                                const int16_t *qmat, ProresThreadData *td)
677
{
678
    int blocks_per_slice;
679
    int bits;
680
 
681
    blocks_per_slice = mbs_per_slice * blocks_per_mb;
682
 
683
    bits  = estimate_dcs(error, td->blocks[plane], blocks_per_slice, qmat[0]);
684
    bits += estimate_acs(error, td->blocks[plane], blocks_per_slice,
685
                         plane_size_factor, ctx->scantable, qmat);
686
 
687
    return FFALIGN(bits, 8);
688
}
689
 
690
static int est_alpha_diff(int cur, int prev, int abits)
691
{
692
    const int dbits = (abits == 8) ? 4 : 7;
693
    const int dsize = 1 << dbits - 1;
694
    int diff = cur - prev;
695
 
696
    diff = av_mod_uintp2(diff, abits);
697
    if (diff >= (1 << abits) - dsize)
698
        diff -= 1 << abits;
699
    if (diff < -dsize || diff > dsize || !diff)
700
        return abits + 1;
701
    else
702
        return dbits + 1;
703
}
704
 
705
static int estimate_alpha_plane(ProresContext *ctx, int *error,
706
                                const uint16_t *src, int linesize,
707
                                int mbs_per_slice, int quant,
708
                                int16_t *blocks)
709
{
710
    const int abits = ctx->alpha_bits;
711
    const int mask  = (1 << abits) - 1;
712
    const int num_coeffs = mbs_per_slice * 256;
713
    int prev = mask, cur;
714
    int idx = 0;
715
    int run = 0;
716
    int bits;
717
 
718
    *error = 0;
719
    cur = blocks[idx++];
720
    bits = est_alpha_diff(cur, prev, abits);
721
    prev = cur;
722
    do {
723
        cur = blocks[idx++];
724
        if (cur != prev) {
725
            if (!run)
726
                bits++;
727
            else if (run < 0x10)
728
                bits += 4;
729
            else
730
                bits += 15;
731
            bits += est_alpha_diff(cur, prev, abits);
732
            prev = cur;
733
            run  = 0;
734
        } else {
735
            run++;
736
        }
737
    } while (idx < num_coeffs);
738
 
739
    if (run) {
740
        if (run < 0x10)
741
            bits += 4;
742
        else
743
            bits += 15;
744
    }
745
 
746
    return bits;
747
}
748
 
749
static int find_slice_quant(AVCodecContext *avctx,
750
                            int trellis_node, int x, int y, int mbs_per_slice,
751
                            ProresThreadData *td)
752
{
753
    ProresContext *ctx = avctx->priv_data;
754
    int i, q, pq, xp, yp;
755
    const uint16_t *src;
756
    int slice_width_factor = av_log2(mbs_per_slice);
757
    int num_cblocks[MAX_PLANES], pwidth;
758
    int plane_factor[MAX_PLANES], is_chroma[MAX_PLANES];
759
    const int min_quant = ctx->profile_info->min_quant;
760
    const int max_quant = ctx->profile_info->max_quant;
761
    int error, bits, bits_limit;
762
    int mbs, prev, cur, new_score;
763
    int slice_bits[TRELLIS_WIDTH], slice_score[TRELLIS_WIDTH];
764
    int overquant;
765
    uint16_t *qmat;
766
    int linesize[4], line_add;
767
 
768
    if (ctx->pictures_per_frame == 1)
769
        line_add = 0;
770
    else
771
        line_add = ctx->cur_picture_idx ^ !ctx->pic->top_field_first;
772
    mbs = x + mbs_per_slice;
773
 
774
    for (i = 0; i < ctx->num_planes; i++) {
775
        is_chroma[i]    = (i == 1 || i == 2);
776
        plane_factor[i] = slice_width_factor + 2;
777
        if (is_chroma[i])
778
            plane_factor[i] += ctx->chroma_factor - 3;
779
        if (!is_chroma[i] || ctx->chroma_factor == CFACTOR_Y444) {
780
            xp             = x << 4;
781
            yp             = y << 4;
782
            num_cblocks[i] = 4;
783
            pwidth         = avctx->width;
784
        } else {
785
            xp             = x << 3;
786
            yp             = y << 4;
787
            num_cblocks[i] = 2;
788
            pwidth         = avctx->width >> 1;
789
        }
790
 
791
        linesize[i] = ctx->pic->linesize[i] * ctx->pictures_per_frame;
792
        src = (const uint16_t *)(ctx->pic->data[i] + yp * linesize[i] +
793
                                 line_add * ctx->pic->linesize[i]) + xp;
794
 
795
        if (i < 3) {
796
            get_slice_data(ctx, src, linesize[i], xp, yp,
797
                           pwidth, avctx->height / ctx->pictures_per_frame,
798
                           td->blocks[i], td->emu_buf,
799
                           mbs_per_slice, num_cblocks[i], is_chroma[i]);
800
        } else {
801
            get_alpha_data(ctx, src, linesize[i], xp, yp,
802
                           pwidth, avctx->height / ctx->pictures_per_frame,
803
                           td->blocks[i], mbs_per_slice, ctx->alpha_bits);
804
        }
805
    }
806
 
807
    for (q = min_quant; q < max_quant + 2; q++) {
808
        td->nodes[trellis_node + q].prev_node = -1;
809
        td->nodes[trellis_node + q].quant     = q;
810
    }
811
 
812
    // todo: maybe perform coarser quantising to fit into frame size when needed
813
    for (q = min_quant; q <= max_quant; q++) {
814
        bits  = 0;
815
        error = 0;
816
        for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
817
            bits += estimate_slice_plane(ctx, &error, i,
818
                                         src, linesize[i],
819
                                         mbs_per_slice,
820
                                         num_cblocks[i], plane_factor[i],
821
                                         ctx->quants[q], td);
822
        }
823
        if (ctx->alpha_bits)
824
            bits += estimate_alpha_plane(ctx, &error, src, linesize[3],
825
                                         mbs_per_slice, q, td->blocks[3]);
826
        if (bits > 65000 * 8)
827
            error = SCORE_LIMIT;
828
 
829
        slice_bits[q]  = bits;
830
        slice_score[q] = error;
831
    }
832
    if (slice_bits[max_quant] <= ctx->bits_per_mb * mbs_per_slice) {
833
        slice_bits[max_quant + 1]  = slice_bits[max_quant];
834
        slice_score[max_quant + 1] = slice_score[max_quant] + 1;
835
        overquant = max_quant;
836
    } else {
837
        for (q = max_quant + 1; q < 128; q++) {
838
            bits  = 0;
839
            error = 0;
840
            if (q < MAX_STORED_Q) {
841
                qmat = ctx->quants[q];
842
            } else {
843
                qmat = td->custom_q;
844
                for (i = 0; i < 64; i++)
845
                    qmat[i] = ctx->quant_mat[i] * q;
846
            }
847
            for (i = 0; i < ctx->num_planes - !!ctx->alpha_bits; i++) {
848
                bits += estimate_slice_plane(ctx, &error, i,
849
                                             src, linesize[i],
850
                                             mbs_per_slice,
851
                                             num_cblocks[i], plane_factor[i],
852
                                             qmat, td);
853
            }
854
            if (ctx->alpha_bits)
855
                bits += estimate_alpha_plane(ctx, &error, src, linesize[3],
856
                                             mbs_per_slice, q, td->blocks[3]);
857
            if (bits <= ctx->bits_per_mb * mbs_per_slice)
858
                break;
859
        }
860
 
861
        slice_bits[max_quant + 1]  = bits;
862
        slice_score[max_quant + 1] = error;
863
        overquant = q;
864
    }
865
    td->nodes[trellis_node + max_quant + 1].quant = overquant;
866
 
867
    bits_limit = mbs * ctx->bits_per_mb;
868
    for (pq = min_quant; pq < max_quant + 2; pq++) {
869
        prev = trellis_node - TRELLIS_WIDTH + pq;
870
 
871
        for (q = min_quant; q < max_quant + 2; q++) {
872
            cur = trellis_node + q;
873
 
874
            bits  = td->nodes[prev].bits + slice_bits[q];
875
            error = slice_score[q];
876
            if (bits > bits_limit)
877
                error = SCORE_LIMIT;
878
 
879
            if (td->nodes[prev].score < SCORE_LIMIT && error < SCORE_LIMIT)
880
                new_score = td->nodes[prev].score + error;
881
            else
882
                new_score = SCORE_LIMIT;
883
            if (td->nodes[cur].prev_node == -1 ||
884
                td->nodes[cur].score >= new_score) {
885
 
886
                td->nodes[cur].bits      = bits;
887
                td->nodes[cur].score     = new_score;
888
                td->nodes[cur].prev_node = prev;
889
            }
890
        }
891
    }
892
 
893
    error = td->nodes[trellis_node + min_quant].score;
894
    pq    = trellis_node + min_quant;
895
    for (q = min_quant + 1; q < max_quant + 2; q++) {
896
        if (td->nodes[trellis_node + q].score <= error) {
897
            error = td->nodes[trellis_node + q].score;
898
            pq    = trellis_node + q;
899
        }
900
    }
901
 
902
    return pq;
903
}
904
 
905
static int find_quant_thread(AVCodecContext *avctx, void *arg,
906
                             int jobnr, int threadnr)
907
{
908
    ProresContext *ctx = avctx->priv_data;
909
    ProresThreadData *td = ctx->tdata + threadnr;
910
    int mbs_per_slice = ctx->mbs_per_slice;
911
    int x, y = jobnr, mb, q = 0;
912
 
913
    for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
914
        while (ctx->mb_width - x < mbs_per_slice)
915
            mbs_per_slice >>= 1;
916
        q = find_slice_quant(avctx,
917
                             (mb + 1) * TRELLIS_WIDTH, x, y,
918
                             mbs_per_slice, td);
919
    }
920
 
921
    for (x = ctx->slices_width - 1; x >= 0; x--) {
922
        ctx->slice_q[x + y * ctx->slices_width] = td->nodes[q].quant;
923
        q = td->nodes[q].prev_node;
924
    }
925
 
926
    return 0;
927
}
928
 
929
static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
930
                        const AVFrame *pic, int *got_packet)
931
{
932
    ProresContext *ctx = avctx->priv_data;
933
    uint8_t *orig_buf, *buf, *slice_hdr, *slice_sizes, *tmp;
934
    uint8_t *picture_size_pos;
935
    PutBitContext pb;
936
    int x, y, i, mb, q = 0;
937
    int sizes[4] = { 0 };
938
    int slice_hdr_size = 2 + 2 * (ctx->num_planes - 1);
939
    int frame_size, picture_size, slice_size;
940
    int pkt_size, ret;
941
    int max_slice_size = (ctx->frame_size_upper_bound - 200) / (ctx->pictures_per_frame * ctx->slices_per_picture + 1);
942
    uint8_t frame_flags;
943
 
944
    ctx->pic = pic;
945
    pkt_size = ctx->frame_size_upper_bound;
946
 
947
    if ((ret = ff_alloc_packet2(avctx, pkt, pkt_size + AV_INPUT_BUFFER_MIN_SIZE, 0)) < 0)
948
        return ret;
949
 
950
    orig_buf = pkt->data;
951
 
952
    // frame atom
953
    orig_buf += 4;                              // frame size
954
    bytestream_put_be32  (&orig_buf, FRAME_ID); // frame container ID
955
    buf = orig_buf;
956
 
957
    // frame header
958
    tmp = buf;
959
    buf += 2;                                   // frame header size will be stored here
960
    bytestream_put_be16  (&buf, 0);             // version 1
961
    bytestream_put_buffer(&buf, ctx->vendor, 4);
962
    bytestream_put_be16  (&buf, avctx->width);
963
    bytestream_put_be16  (&buf, avctx->height);
964
 
965
    frame_flags = ctx->chroma_factor << 6;
966
    if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT)
967
        frame_flags |= pic->top_field_first ? 0x04 : 0x08;
968
    bytestream_put_byte  (&buf, frame_flags);
969
 
970
    bytestream_put_byte  (&buf, 0);             // reserved
971
    bytestream_put_byte  (&buf, avctx->color_primaries);
972
    bytestream_put_byte  (&buf, avctx->color_trc);
973
    bytestream_put_byte  (&buf, avctx->colorspace);
974
    bytestream_put_byte  (&buf, 0x40 | (ctx->alpha_bits >> 3));
975
    bytestream_put_byte  (&buf, 0);             // reserved
976
    if (ctx->quant_sel != QUANT_MAT_DEFAULT) {
977
        bytestream_put_byte  (&buf, 0x03);      // matrix flags - both matrices are present
978
        // luma quantisation matrix
979
        for (i = 0; i < 64; i++)
980
            bytestream_put_byte(&buf, ctx->quant_mat[i]);
981
        // chroma quantisation matrix
982
        for (i = 0; i < 64; i++)
983
            bytestream_put_byte(&buf, ctx->quant_mat[i]);
984
    } else {
985
        bytestream_put_byte  (&buf, 0x00);      // matrix flags - default matrices are used
986
    }
987
    bytestream_put_be16  (&tmp, buf - orig_buf); // write back frame header size
988
 
989
    for (ctx->cur_picture_idx = 0;
990
         ctx->cur_picture_idx < ctx->pictures_per_frame;
991
         ctx->cur_picture_idx++) {
992
        // picture header
993
        picture_size_pos = buf + 1;
994
        bytestream_put_byte  (&buf, 0x40);          // picture header size (in bits)
995
        buf += 4;                                   // picture data size will be stored here
996
        bytestream_put_be16  (&buf, ctx->slices_per_picture);
997
        bytestream_put_byte  (&buf, av_log2(ctx->mbs_per_slice) << 4); // slice width and height in MBs
998
 
999
        // seek table - will be filled during slice encoding
1000
        slice_sizes = buf;
1001
        buf += ctx->slices_per_picture * 2;
1002
 
1003
        // slices
1004
        if (!ctx->force_quant) {
1005
            ret = avctx->execute2(avctx, find_quant_thread, (void*)pic, NULL,
1006
                                  ctx->mb_height);
1007
            if (ret)
1008
                return ret;
1009
        }
1010
 
1011
        for (y = 0; y < ctx->mb_height; y++) {
1012
            int mbs_per_slice = ctx->mbs_per_slice;
1013
            for (x = mb = 0; x < ctx->mb_width; x += mbs_per_slice, mb++) {
1014
                q = ctx->force_quant ? ctx->force_quant
1015
                                     : ctx->slice_q[mb + y * ctx->slices_width];
1016
 
1017
                while (ctx->mb_width - x < mbs_per_slice)
1018
                    mbs_per_slice >>= 1;
1019
 
1020
                bytestream_put_byte(&buf, slice_hdr_size << 3);
1021
                slice_hdr = buf;
1022
                buf += slice_hdr_size - 1;
1023
                if (pkt_size <= buf - orig_buf + 2 * max_slice_size) {
1024
                    uint8_t *start = pkt->data;
1025
                    // Recompute new size according to max_slice_size
1026
                    // and deduce delta
1027
                    int delta = 200 + (ctx->pictures_per_frame *
1028
                                ctx->slices_per_picture + 1) *
1029
                                max_slice_size - pkt_size;
1030
 
1031
                    delta = FFMAX(delta, 2 * max_slice_size);
1032
                    ctx->frame_size_upper_bound += delta;
1033
 
1034
                    if (!ctx->warn) {
1035
                        avpriv_request_sample(avctx,
1036
                                              "Packet too small: is %i,"
1037
                                              " needs %i (slice: %i). "
1038
                                              "Correct allocation",
1039
                                              pkt_size, delta, max_slice_size);
1040
                        ctx->warn = 1;
1041
                    }
1042
 
1043
                    ret = av_grow_packet(pkt, delta);
1044
                    if (ret < 0)
1045
                        return ret;
1046
 
1047
                    pkt_size += delta;
1048
                    // restore pointers
1049
                    orig_buf         = pkt->data + (orig_buf         - start);
1050
                    buf              = pkt->data + (buf              - start);
1051
                    picture_size_pos = pkt->data + (picture_size_pos - start);
1052
                    slice_sizes      = pkt->data + (slice_sizes      - start);
1053
                    slice_hdr        = pkt->data + (slice_hdr        - start);
1054
                    tmp              = pkt->data + (tmp              - start);
1055
                }
1056
                init_put_bits(&pb, buf, (pkt_size - (buf - orig_buf)));
1057
                ret = encode_slice(avctx, pic, &pb, sizes, x, y, q,
1058
                                   mbs_per_slice);
1059
                if (ret < 0)
1060
                    return ret;
1061
 
1062
                bytestream_put_byte(&slice_hdr, q);
1063
                slice_size = slice_hdr_size + sizes[ctx->num_planes - 1];
1064
                for (i = 0; i < ctx->num_planes - 1; i++) {
1065
                    bytestream_put_be16(&slice_hdr, sizes[i]);
1066
                    slice_size += sizes[i];
1067
                }
1068
                bytestream_put_be16(&slice_sizes, slice_size);
1069
                buf += slice_size - slice_hdr_size;
1070
                if (max_slice_size < slice_size)
1071
                    max_slice_size = slice_size;
1072
            }
1073
        }
1074
 
1075
        picture_size = buf - (picture_size_pos - 1);
1076
        bytestream_put_be32(&picture_size_pos, picture_size);
1077
    }
1078
 
1079
    orig_buf -= 8;
1080
    frame_size = buf - orig_buf;
1081
    bytestream_put_be32(&orig_buf, frame_size);
1082
 
1083
    pkt->size   = frame_size;
1084
    pkt->flags |= AV_PKT_FLAG_KEY;
1085
    *got_packet = 1;
1086
 
1087
    return 0;
1088
}
1089
 
1090
static av_cold int encode_close(AVCodecContext *avctx)
1091
{
1092
    ProresContext *ctx = avctx->priv_data;
1093
    int i;
1094
 
1095
    if (ctx->tdata) {
1096
        for (i = 0; i < avctx->thread_count; i++)
1097
            av_freep(&ctx->tdata[i].nodes);
1098
    }
1099
    av_freep(&ctx->tdata);
1100
    av_freep(&ctx->slice_q);
1101
 
1102
    return 0;
1103
}
1104
 
1105
static void prores_fdct(FDCTDSPContext *fdsp, const uint16_t *src,
1106
                        int linesize, int16_t *block)
1107
{
1108
    int x, y;
1109
    const uint16_t *tsrc = src;
1110
 
1111
    for (y = 0; y < 8; y++) {
1112
        for (x = 0; x < 8; x++)
1113
            block[y * 8 + x] = tsrc[x];
1114
        tsrc += linesize >> 1;
1115
    }
1116
    fdsp->fdct(block);
1117
}
1118
 
1119
static av_cold int encode_init(AVCodecContext *avctx)
1120
{
1121
    ProresContext *ctx = avctx->priv_data;
1122
    int mps;
1123
    int i, j;
1124
    int min_quant, max_quant;
1125
    int interlaced = !!(avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT);
1126
 
1127
    avctx->bits_per_raw_sample = 10;
1128
#if FF_API_CODED_FRAME
1129
FF_DISABLE_DEPRECATION_WARNINGS
1130
    avctx->coded_frame->pict_type = AV_PICTURE_TYPE_I;
1131
    avctx->coded_frame->key_frame = 1;
1132
FF_ENABLE_DEPRECATION_WARNINGS
1133
#endif
1134
 
1135
    ctx->fdct      = prores_fdct;
1136
    ctx->scantable = interlaced ? ff_prores_interlaced_scan
1137
                                : ff_prores_progressive_scan;
1138
    ff_fdctdsp_init(&ctx->fdsp, avctx);
1139
 
1140
    mps = ctx->mbs_per_slice;
1141
    if (mps & (mps - 1)) {
1142
        av_log(avctx, AV_LOG_ERROR,
1143
               "there should be an integer power of two MBs per slice\n");
1144
        return AVERROR(EINVAL);
1145
    }
1146
    if (ctx->profile == PRORES_PROFILE_AUTO) {
1147
        const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(avctx->pix_fmt);
1148
        ctx->profile = (desc->flags & AV_PIX_FMT_FLAG_ALPHA ||
1149
                        !(desc->log2_chroma_w + desc->log2_chroma_h))
1150
                     ? PRORES_PROFILE_4444 : PRORES_PROFILE_HQ;
1151
        av_log(avctx, AV_LOG_INFO, "Autoselected %s. It can be overridden "
1152
               "through -profile option.\n", ctx->profile == PRORES_PROFILE_4444
1153
               ? "4:4:4:4 profile because of the used input colorspace"
1154
               : "HQ profile to keep best quality");
1155
    }
1156
    if (av_pix_fmt_desc_get(avctx->pix_fmt)->flags & AV_PIX_FMT_FLAG_ALPHA) {
1157
        if (ctx->profile != PRORES_PROFILE_4444) {
1158
            // force alpha and warn
1159
            av_log(avctx, AV_LOG_WARNING, "Profile selected will not "
1160
                   "encode alpha. Override with -profile if needed.\n");
1161
            ctx->alpha_bits = 0;
1162
        }
1163
        if (ctx->alpha_bits & 7) {
1164
            av_log(avctx, AV_LOG_ERROR, "alpha bits should be 0, 8 or 16\n");
1165
            return AVERROR(EINVAL);
1166
        }
1167
        avctx->bits_per_coded_sample = 32;
1168
    } else {
1169
        ctx->alpha_bits = 0;
1170
    }
1171
 
1172
    ctx->chroma_factor = avctx->pix_fmt == AV_PIX_FMT_YUV422P10
1173
                         ? CFACTOR_Y422
1174
                         : CFACTOR_Y444;
1175
    ctx->profile_info  = prores_profile_info + ctx->profile;
1176
    ctx->num_planes    = 3 + !!ctx->alpha_bits;
1177
 
1178
    ctx->mb_width      = FFALIGN(avctx->width,  16) >> 4;
1179
 
1180
    if (interlaced)
1181
        ctx->mb_height = FFALIGN(avctx->height, 32) >> 5;
1182
    else
1183
        ctx->mb_height = FFALIGN(avctx->height, 16) >> 4;
1184
 
1185
    ctx->slices_width  = ctx->mb_width / mps;
1186
    ctx->slices_width += av_popcount(ctx->mb_width - ctx->slices_width * mps);
1187
    ctx->slices_per_picture = ctx->mb_height * ctx->slices_width;
1188
    ctx->pictures_per_frame = 1 + interlaced;
1189
 
1190
    if (ctx->quant_sel == -1)
1191
        ctx->quant_mat = prores_quant_matrices[ctx->profile_info->quant];
1192
    else
1193
        ctx->quant_mat = prores_quant_matrices[ctx->quant_sel];
1194
 
1195
    if (strlen(ctx->vendor) != 4) {
1196
        av_log(avctx, AV_LOG_ERROR, "vendor ID should be 4 bytes\n");
1197
        return AVERROR_INVALIDDATA;
1198
    }
1199
 
1200
    ctx->force_quant = avctx->global_quality / FF_QP2LAMBDA;
1201
    if (!ctx->force_quant) {
1202
        if (!ctx->bits_per_mb) {
1203
            for (i = 0; i < NUM_MB_LIMITS - 1; i++)
1204
                if (prores_mb_limits[i] >= ctx->mb_width * ctx->mb_height *
1205
                                           ctx->pictures_per_frame)
1206
                    break;
1207
            ctx->bits_per_mb   = ctx->profile_info->br_tab[i];
1208
        } else if (ctx->bits_per_mb < 128) {
1209
            av_log(avctx, AV_LOG_ERROR, "too few bits per MB, please set at least 128\n");
1210
            return AVERROR_INVALIDDATA;
1211
        }
1212
 
1213
        min_quant = ctx->profile_info->min_quant;
1214
        max_quant = ctx->profile_info->max_quant;
1215
        for (i = min_quant; i < MAX_STORED_Q; i++) {
1216
            for (j = 0; j < 64; j++)
1217
                ctx->quants[i][j] = ctx->quant_mat[j] * i;
1218
        }
1219
 
1220
        ctx->slice_q = av_malloc(ctx->slices_per_picture * sizeof(*ctx->slice_q));
1221
        if (!ctx->slice_q) {
1222
            encode_close(avctx);
1223
            return AVERROR(ENOMEM);
1224
        }
1225
 
1226
        ctx->tdata = av_mallocz(avctx->thread_count * sizeof(*ctx->tdata));
1227
        if (!ctx->tdata) {
1228
            encode_close(avctx);
1229
            return AVERROR(ENOMEM);
1230
        }
1231
 
1232
        for (j = 0; j < avctx->thread_count; j++) {
1233
            ctx->tdata[j].nodes = av_malloc((ctx->slices_width + 1)
1234
                                            * TRELLIS_WIDTH
1235
                                            * sizeof(*ctx->tdata->nodes));
1236
            if (!ctx->tdata[j].nodes) {
1237
                encode_close(avctx);
1238
                return AVERROR(ENOMEM);
1239
            }
1240
            for (i = min_quant; i < max_quant + 2; i++) {
1241
                ctx->tdata[j].nodes[i].prev_node = -1;
1242
                ctx->tdata[j].nodes[i].bits      = 0;
1243
                ctx->tdata[j].nodes[i].score     = 0;
1244
            }
1245
        }
1246
    } else {
1247
        int ls = 0;
1248
 
1249
        if (ctx->force_quant > 64) {
1250
            av_log(avctx, AV_LOG_ERROR, "too large quantiser, maximum is 64\n");
1251
            return AVERROR_INVALIDDATA;
1252
        }
1253
 
1254
        for (j = 0; j < 64; j++) {
1255
            ctx->quants[0][j] = ctx->quant_mat[j] * ctx->force_quant;
1256
            ls += av_log2((1 << 11)  / ctx->quants[0][j]) * 2 + 1;
1257
        }
1258
 
1259
        ctx->bits_per_mb = ls * 8;
1260
        if (ctx->chroma_factor == CFACTOR_Y444)
1261
            ctx->bits_per_mb += ls * 4;
1262
    }
1263
 
1264
    ctx->frame_size_upper_bound = (ctx->pictures_per_frame *
1265
                                   ctx->slices_per_picture + 1) *
1266
                                  (2 + 2 * ctx->num_planes +
1267
                                   (mps * ctx->bits_per_mb) / 8)
1268
                                  + 200;
1269
 
1270
    if (ctx->alpha_bits) {
1271
         // The alpha plane is run-coded and might exceed the bit budget.
1272
         ctx->frame_size_upper_bound += (ctx->pictures_per_frame *
1273
                                         ctx->slices_per_picture + 1) *
1274
         /* num pixels per slice */     (ctx->mbs_per_slice * 256 *
1275
         /* bits per pixel */            (1 + ctx->alpha_bits + 1) + 7 >> 3);
1276
    }
1277
 
1278
    avctx->codec_tag   = ctx->profile_info->tag;
1279
 
1280
    av_log(avctx, AV_LOG_DEBUG,
1281
           "profile %d, %d slices, interlacing: %s, %d bits per MB\n",
1282
           ctx->profile, ctx->slices_per_picture * ctx->pictures_per_frame,
1283
           interlaced ? "yes" : "no", ctx->bits_per_mb);
1284
    av_log(avctx, AV_LOG_DEBUG, "frame size upper bound: %d\n",
1285
           ctx->frame_size_upper_bound);
1286
 
1287
    return 0;
1288
}
1289
 
1290
#define OFFSET(x) offsetof(ProresContext, x)
1291
#define VE     AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1292
 
1293
static const AVOption options[] = {
1294
    { "mbs_per_slice", "macroblocks per slice", OFFSET(mbs_per_slice),
1295
        AV_OPT_TYPE_INT, { .i64 = 8 }, 1, MAX_MBS_PER_SLICE, VE },
1296
    { "profile",       NULL, OFFSET(profile), AV_OPT_TYPE_INT,
1297
        { .i64 = PRORES_PROFILE_AUTO },
1298
        PRORES_PROFILE_AUTO, PRORES_PROFILE_4444, VE, "profile" },
1299
    { "auto",         NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_AUTO },
1300
        0, 0, VE, "profile" },
1301
    { "proxy",         NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_PROXY },
1302
        0, 0, VE, "profile" },
1303
    { "lt",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_LT },
1304
        0, 0, VE, "profile" },
1305
    { "standard",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_STANDARD },
1306
        0, 0, VE, "profile" },
1307
    { "hq",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_HQ },
1308
        0, 0, VE, "profile" },
1309
    { "4444",          NULL, 0, AV_OPT_TYPE_CONST, { .i64 = PRORES_PROFILE_4444 },
1310
        0, 0, VE, "profile" },
1311
    { "vendor", "vendor ID", OFFSET(vendor),
1312
        AV_OPT_TYPE_STRING, { .str = "Lavc" }, CHAR_MIN, CHAR_MAX, VE },
1313
    { "bits_per_mb", "desired bits per macroblock", OFFSET(bits_per_mb),
1314
        AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 8192, VE },
1315
    { "quant_mat", "quantiser matrix", OFFSET(quant_sel), AV_OPT_TYPE_INT,
1316
        { .i64 = -1 }, -1, QUANT_MAT_DEFAULT, VE, "quant_mat" },
1317
    { "auto",          NULL, 0, AV_OPT_TYPE_CONST, { .i64 = -1 },
1318
        0, 0, VE, "quant_mat" },
1319
    { "proxy",         NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_PROXY },
1320
        0, 0, VE, "quant_mat" },
1321
    { "lt",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_LT },
1322
        0, 0, VE, "quant_mat" },
1323
    { "standard",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_STANDARD },
1324
        0, 0, VE, "quant_mat" },
1325
    { "hq",            NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_HQ },
1326
        0, 0, VE, "quant_mat" },
1327
    { "default",       NULL, 0, AV_OPT_TYPE_CONST, { .i64 = QUANT_MAT_DEFAULT },
1328
        0, 0, VE, "quant_mat" },
1329
    { "alpha_bits", "bits for alpha plane", OFFSET(alpha_bits), AV_OPT_TYPE_INT,
1330
        { .i64 = 16 }, 0, 16, VE },
1331
    { NULL }
1332
};
1333
 
1334
static const AVClass proresenc_class = {
1335
    .class_name = "ProRes encoder",
1336
    .item_name  = av_default_item_name,
1337
    .option     = options,
1338
    .version    = LIBAVUTIL_VERSION_INT,
1339
};
1340
 
1341
AVCodec ff_prores_ks_encoder = {
1342
    .name           = "prores_ks",
1343
    .long_name      = NULL_IF_CONFIG_SMALL("Apple ProRes (iCodec Pro)"),
1344
    .type           = AVMEDIA_TYPE_VIDEO,
1345
    .id             = AV_CODEC_ID_PRORES,
1346
    .priv_data_size = sizeof(ProresContext),
1347
    .init           = encode_init,
1348
    .close          = encode_close,
1349
    .encode2        = encode_frame,
1350
    .capabilities   = AV_CODEC_CAP_SLICE_THREADS,
1351
    .pix_fmts       = (const enum AVPixelFormat[]) {
1352
                          AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10,
1353
                          AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_NONE
1354
                      },
1355
    .priv_class     = &proresenc_class,
1356
};