Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
6147 serge 1
/*
2
 * Cinepak encoder (c) 2011 Tomas Härdin
3
 * http://titan.codemill.se/~tomhar/cinepakenc.patch
4
 *
5
 * Fixes and improvements, vintage decoders compatibility
6
 *  (c) 2013, 2014 Rl, Aetey Global Technologies AB
7
 
8
Permission is hereby granted, free of charge, to any person obtaining a
9
copy of this software and associated documentation files (the "Software"),
10
to deal in the Software without restriction, including without limitation
11
the rights to use, copy, modify, merge, publish, distribute, sublicense,
12
and/or sell copies of the Software, and to permit persons to whom the
13
Software is furnished to do so, subject to the following conditions:
14
 
15
The above copyright notice and this permission notice shall be included
16
in all copies or substantial portions of the Software.
17
 
18
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
22
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
23
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24
OTHER DEALINGS IN THE SOFTWARE.
25
 
26
 * TODO:
27
 * - optimize: color space conversion, ...
28
 * - implement options to set the min/max number of strips?
29
 * MAYBE:
30
 * - "optimally" split the frame into several non-regular areas
31
 *   using a separate codebook pair for each area and approximating
32
 *   the area by several rectangular strips (generally not full width ones)
33
 *   (use quadtree splitting? a simple fixed-granularity grid?)
34
 *
35
 *
36
 * version 2014-01-23 Rl
37
 * - added option handling for flexibility
38
 *
39
 * version 2014-01-21 Rl
40
 * - believe it or not, now we get even smaller files, with better quality
41
 *   (which means I missed an optimization earlier :)
42
 *
43
 * version 2014-01-20 Rl
44
 * - made the encoder compatible with vintage decoders
45
 *   and added some yet unused code for possible future
46
 *   incremental codebook updates
47
 * - fixed a small memory leak
48
 *
49
 * version 2013-04-28 Rl
50
 * - bugfixed codebook optimization logic
51
 *
52
 * version 2013-02-14 Rl
53
 * "Valentine's Day" version:
54
 * - made strip division more robust
55
 * - minimized bruteforcing the number of strips,
56
 *   (costs some R/D but speeds up compession a lot), the heuristic
57
 *   assumption is that score as a function of the number of strips has
58
 *   one wide minimum which moves slowly, of course not fully true
59
 * - simplified codebook generation,
60
 *   the old code was meant for other optimizations than we actually do
61
 * - optimized the codebook generation / error estimation for MODE_MC
62
 *
63
 * version 2013-02-12 Rl
64
 * - separated codebook training sets, avoided the transfer of wasted bytes,
65
 *   which yields both better quality and smaller files
66
 * - now using the correct colorspace (TODO: move conversion to libswscale)
67
 *
68
 * version 2013-02-08 Rl
69
 * - fixes/optimization in multistrip encoding and codebook size choice,
70
 *   quality/bitrate is now better than that of the binary proprietary encoder
71
 */
72
 
73
#include "libavutil/intreadwrite.h"
74
#include "avcodec.h"
75
#include "libavutil/lfg.h"
76
#include "elbg.h"
77
#include "internal.h"
78
 
79
#include "libavutil/avassert.h"
80
#include "libavutil/opt.h"
81
 
82
#define CVID_HEADER_SIZE 10
83
#define STRIP_HEADER_SIZE 12
84
#define CHUNK_HEADER_SIZE 4
85
 
86
#define MB_SIZE 4           //4x4 MBs
87
#define MB_AREA (MB_SIZE*MB_SIZE)
88
 
89
#define VECTOR_MAX 6        //six or four entries per vector depending on format
90
#define CODEBOOK_MAX 256    //size of a codebook
91
 
92
#define MAX_STRIPS  32      //Note: having fewer choices regarding the number of strips speeds up encoding (obviously)
93
#define MIN_STRIPS  1       //Note: having more strips speeds up encoding the frame (this is less obvious)
94
// MAX_STRIPS limits the maximum quality you can reach
95
//            when you want hight quality on high resolutions,
96
// MIN_STRIPS limits the minimum efficiently encodable bit rate
97
//            on low resolutions
98
// the numbers are only used for brute force optimization for the first frame,
99
// for the following frames they are adaptively readjusted
100
// NOTE the decoder in ffmpeg has its own arbitrary limitation on the number
101
// of strips, currently 32
102
 
103
typedef enum {
104
    MODE_V1_ONLY = 0,
105
    MODE_V1_V4,
106
    MODE_MC,
107
 
108
    MODE_COUNT,
109
} CinepakMode;
110
 
111
typedef enum {
112
    ENC_V1,
113
    ENC_V4,
114
    ENC_SKIP,
115
 
116
    ENC_UNCERTAIN
117
} mb_encoding;
118
 
119
typedef struct {
120
    int v1_vector;                  //index into v1 codebook
121
    int v1_error;                   //error when using V1 encoding
122
    int v4_vector[4];               //indices into v4 codebooks
123
    int v4_error;                   //error when using V4 encoding
124
    int skip_error;                 //error when block is skipped (aka copied from last frame)
125
    mb_encoding best_encoding;      //last result from calculate_mode_score()
126
} mb_info;
127
 
128
typedef struct {
129
    int v1_codebook[CODEBOOK_MAX*VECTOR_MAX];
130
    int v4_codebook[CODEBOOK_MAX*VECTOR_MAX];
131
    int v1_size;
132
    int v4_size;
133
    CinepakMode mode;
134
} strip_info;
135
 
136
typedef struct {
137
    const AVClass *class;
138
    AVCodecContext *avctx;
139
    unsigned char *pict_bufs[4], *strip_buf, *frame_buf;
140
    AVFrame *last_frame;
141
    AVFrame *best_frame;
142
    AVFrame *scratch_frame;
143
    AVFrame *input_frame;
144
    enum AVPixelFormat pix_fmt;
145
    int w, h;
146
    int frame_buf_size;
147
    int curframe, keyint;
148
    AVLFG randctx;
149
    uint64_t lambda;
150
    int *codebook_input;
151
    int *codebook_closest;
152
    mb_info *mb;                                //MB RD state
153
    int min_strips;          //the current limit
154
    int max_strips;          //the current limit
155
#ifdef CINEPAKENC_DEBUG
156
    mb_info *best_mb;                           //TODO: remove. only used for printing stats
157
    int num_v1_mode, num_v4_mode, num_mc_mode;
158
    int num_v1_encs, num_v4_encs, num_skips;
159
#endif
160
// options
161
    int max_extra_cb_iterations;
162
    int skip_empty_cb;
163
    int min_min_strips;
164
    int max_max_strips;
165
    int strip_number_delta_range;
166
} CinepakEncContext;
167
 
168
#define OFFSET(x) offsetof(CinepakEncContext, x)
169
#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
170
static const AVOption options[] = {
171
    { "max_extra_cb_iterations", "Max extra codebook recalculation passes, more is better and slower", OFFSET(max_extra_cb_iterations), AV_OPT_TYPE_INT, { .i64 = 2 }, 0, INT_MAX, VE },
172
    { "skip_empty_cb", "Avoid wasting bytes, ignore vintage MacOS decoder", OFFSET(skip_empty_cb), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
173
    { "max_strips", "Limit strips/frame, vintage compatible is 1..3, otherwise the more the better", OFFSET(max_max_strips), AV_OPT_TYPE_INT, { .i64 = 3 }, MIN_STRIPS, MAX_STRIPS, VE },
174
    { "min_strips", "Enforce min strips/frame, more is worse and faster, must be <= max_strips", OFFSET(min_min_strips), AV_OPT_TYPE_INT, { .i64 = MIN_STRIPS }, MIN_STRIPS, MAX_STRIPS, VE },
175
    { "strip_number_adaptivity", "How fast the strip number adapts, more is slightly better, much slower", OFFSET(strip_number_delta_range), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, MAX_STRIPS-MIN_STRIPS, VE },
176
    { NULL },
177
};
178
 
179
static const AVClass cinepak_class = {
180
    .class_name = "cinepak",
181
    .item_name  = av_default_item_name,
182
    .option     = options,
183
    .version    = LIBAVUTIL_VERSION_INT,
184
};
185
 
186
static av_cold int cinepak_encode_init(AVCodecContext *avctx)
187
{
188
    CinepakEncContext *s = avctx->priv_data;
189
    int x, mb_count, strip_buf_size, frame_buf_size;
190
 
191
    if (avctx->width & 3 || avctx->height & 3) {
192
        av_log(avctx, AV_LOG_ERROR, "width and height must be multiples of four (got %ix%i)\n",
193
                avctx->width, avctx->height);
194
        return AVERROR(EINVAL);
195
    }
196
 
197
    if (s->min_min_strips > s->max_max_strips) {
198
        av_log(avctx, AV_LOG_ERROR, "minimal number of strips can not exceed maximal (got %i and %i)\n",
199
                s->min_min_strips, s->max_max_strips);
200
        return AVERROR(EINVAL);
201
    }
202
 
203
    if (!(s->last_frame = av_frame_alloc()))
204
        return AVERROR(ENOMEM);
205
    if (!(s->best_frame = av_frame_alloc()))
206
        goto enomem;
207
    if (!(s->scratch_frame = av_frame_alloc()))
208
        goto enomem;
209
    if (avctx->pix_fmt == AV_PIX_FMT_RGB24)
210
        if (!(s->input_frame = av_frame_alloc()))
211
            goto enomem;
212
 
213
    if (!(s->codebook_input = av_malloc(sizeof(int) * (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4) * (avctx->width * avctx->height) >> 2)))
214
        goto enomem;
215
 
216
    if (!(s->codebook_closest = av_malloc(sizeof(int) * (avctx->width * avctx->height) >> 2)))
217
        goto enomem;
218
 
219
    for(x = 0; x < (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 4 : 3); x++)
220
        if(!(s->pict_bufs[x] = av_malloc((avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4) * (avctx->width * avctx->height) >> 2)))
221
            goto enomem;
222
 
223
    mb_count = avctx->width * avctx->height / MB_AREA;
224
 
225
    //the largest possible chunk is 0x31 with all MBs encoded in V4 mode
226
    //and full codebooks being replaced in INTER mode,
227
    // which is 34 bits per MB
228
    //and 2*256 extra flag bits per strip
229
    strip_buf_size = STRIP_HEADER_SIZE + 3 * CHUNK_HEADER_SIZE + 2 * VECTOR_MAX * CODEBOOK_MAX + 4 * (mb_count + (mb_count + 15) / 16) + (2 * CODEBOOK_MAX)/8;
230
 
231
    frame_buf_size = CVID_HEADER_SIZE + s->max_max_strips * strip_buf_size;
232
 
233
    if (!(s->strip_buf = av_malloc(strip_buf_size)))
234
        goto enomem;
235
 
236
    if (!(s->frame_buf = av_malloc(frame_buf_size)))
237
        goto enomem;
238
 
239
    if (!(s->mb = av_malloc_array(mb_count, sizeof(mb_info))))
240
        goto enomem;
241
 
242
#ifdef CINEPAKENC_DEBUG
243
    if (!(s->best_mb = av_malloc_array(mb_count, sizeof(mb_info))))
244
        goto enomem;
245
#endif
246
 
247
    av_lfg_init(&s->randctx, 1);
248
    s->avctx = avctx;
249
    s->w = avctx->width;
250
    s->h = avctx->height;
251
    s->frame_buf_size = frame_buf_size;
252
    s->curframe = 0;
253
    s->keyint = avctx->keyint_min;
254
    s->pix_fmt = avctx->pix_fmt;
255
 
256
    //set up AVFrames
257
    s->last_frame->data[0]        = s->pict_bufs[0];
258
    s->last_frame->linesize[0]    = s->w;
259
    s->best_frame->data[0]        = s->pict_bufs[1];
260
    s->best_frame->linesize[0]    = s->w;
261
    s->scratch_frame->data[0]     = s->pict_bufs[2];
262
    s->scratch_frame->linesize[0] = s->w;
263
 
264
    if (s->pix_fmt == AV_PIX_FMT_RGB24) {
265
        s->last_frame->data[1]        = s->last_frame->data[0] + s->w * s->h;
266
        s->last_frame->data[2]        = s->last_frame->data[1] + ((s->w * s->h) >> 2);
267
        s->last_frame->linesize[1]    = s->last_frame->linesize[2] = s->w >> 1;
268
 
269
        s->best_frame->data[1]        = s->best_frame->data[0] + s->w * s->h;
270
        s->best_frame->data[2]        = s->best_frame->data[1] + ((s->w * s->h) >> 2);
271
        s->best_frame->linesize[1]    = s->best_frame->linesize[2] = s->w >> 1;
272
 
273
        s->scratch_frame->data[1]     = s->scratch_frame->data[0] + s->w * s->h;
274
        s->scratch_frame->data[2]     = s->scratch_frame->data[1] + ((s->w * s->h) >> 2);
275
        s->scratch_frame->linesize[1] = s->scratch_frame->linesize[2] = s->w >> 1;
276
 
277
        s->input_frame->data[0]       = s->pict_bufs[3];
278
        s->input_frame->linesize[0]   = s->w;
279
        s->input_frame->data[1]       = s->input_frame->data[0] + s->w * s->h;
280
        s->input_frame->data[2]       = s->input_frame->data[1] + ((s->w * s->h) >> 2);
281
        s->input_frame->linesize[1]   = s->input_frame->linesize[2] = s->w >> 1;
282
    }
283
 
284
    s->min_strips = s->min_min_strips;
285
    s->max_strips = s->max_max_strips;
286
 
287
#ifdef CINEPAKENC_DEBUG
288
    s->num_v1_mode = s->num_v4_mode = s->num_mc_mode = s->num_v1_encs = s->num_v4_encs = s->num_skips = 0;
289
#endif
290
 
291
    return 0;
292
 
293
enomem:
294
    av_frame_free(&s->last_frame);
295
    av_frame_free(&s->best_frame);
296
    av_frame_free(&s->scratch_frame);
297
    if (avctx->pix_fmt == AV_PIX_FMT_RGB24)
298
        av_frame_free(&s->input_frame);
299
    av_freep(&s->codebook_input);
300
    av_freep(&s->codebook_closest);
301
    av_freep(&s->strip_buf);
302
    av_freep(&s->frame_buf);
303
    av_freep(&s->mb);
304
#ifdef CINEPAKENC_DEBUG
305
    av_freep(&s->best_mb);
306
#endif
307
 
308
    for(x = 0; x < (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 4 : 3); x++)
309
        av_freep(&s->pict_bufs[x]);
310
 
311
    return AVERROR(ENOMEM);
312
}
313
 
314
static int64_t calculate_mode_score(CinepakEncContext *s, int h, strip_info *info, int report, int *training_set_v1_shrunk, int *training_set_v4_shrunk
315
#ifdef CINEPAK_REPORT_SERR
316
, int64_t *serr
317
#endif
318
)
319
{
320
    //score = FF_LAMBDA_SCALE * error + lambda * bits
321
    int x;
322
    int entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
323
    int mb_count = s->w * h / MB_AREA;
324
    mb_info *mb;
325
    int64_t score1, score2, score3;
326
    int64_t ret = s->lambda * ((info->v1_size ? CHUNK_HEADER_SIZE + info->v1_size * entry_size : 0) +
327
                   (info->v4_size ? CHUNK_HEADER_SIZE + info->v4_size * entry_size : 0) +
328
                   CHUNK_HEADER_SIZE) << 3;
329
 
330
    //av_log(s->avctx, AV_LOG_INFO, "sizes %3i %3i -> %9"PRId64" score mb_count %i", info->v1_size, info->v4_size, ret, mb_count);
331
 
332
#ifdef CINEPAK_REPORT_SERR
333
    *serr = 0;
334
#endif
335
 
336
    switch(info->mode) {
337
    case MODE_V1_ONLY:
338
        //one byte per MB
339
        ret += s->lambda * 8 * mb_count;
340
 
341
// while calculating we assume all blocks are ENC_V1
342
        for(x = 0; x < mb_count; x++) {
343
            mb = &s->mb[x];
344
            ret += FF_LAMBDA_SCALE * mb->v1_error;
345
#ifdef CINEPAK_REPORT_SERR
346
            *serr += mb->v1_error;
347
#endif
348
// this function is never called for report in MODE_V1_ONLY
349
//            if(!report)
350
            mb->best_encoding = ENC_V1;
351
        }
352
 
353
        break;
354
    case MODE_V1_V4:
355
        //9 or 33 bits per MB
356
        if(report) {
357
// no moves between the corresponding training sets are allowed
358
            *training_set_v1_shrunk = *training_set_v4_shrunk = 0;
359
            for(x = 0; x < mb_count; x++) {
360
                int mberr;
361
                mb = &s->mb[x];
362
                if(mb->best_encoding == ENC_V1)
363
                    score1 = s->lambda * 9  + FF_LAMBDA_SCALE * (mberr=mb->v1_error);
364
                else
365
                    score1 = s->lambda * 33 + FF_LAMBDA_SCALE * (mberr=mb->v4_error);
366
                ret += score1;
367
#ifdef CINEPAK_REPORT_SERR
368
                *serr += mberr;
369
#endif
370
            }
371
        } else { // find best mode per block
372
            for(x = 0; x < mb_count; x++) {
373
                mb = &s->mb[x];
374
                score1 = s->lambda * 9  + FF_LAMBDA_SCALE * mb->v1_error;
375
                score2 = s->lambda * 33 + FF_LAMBDA_SCALE * mb->v4_error;
376
 
377
                if(score1 <= score2) {
378
                    ret += score1;
379
#ifdef CINEPAK_REPORT_SERR
380
                    *serr += mb->v1_error;
381
#endif
382
                    mb->best_encoding = ENC_V1;
383
                } else {
384
                    ret += score2;
385
#ifdef CINEPAK_REPORT_SERR
386
                    *serr += mb->v4_error;
387
#endif
388
                    mb->best_encoding = ENC_V4;
389
                }
390
            }
391
        }
392
 
393
        break;
394
    case MODE_MC:
395
        //1, 10 or 34 bits per MB
396
        if(report) {
397
            int v1_shrunk = 0, v4_shrunk = 0;
398
            for(x = 0; x < mb_count; x++) {
399
                mb = &s->mb[x];
400
// it is OK to move blocks to ENC_SKIP here
401
// but not to any codebook encoding!
402
                score1 = s->lambda * 1  + FF_LAMBDA_SCALE * mb->skip_error;
403
                if(mb->best_encoding == ENC_SKIP) {
404
                    ret += score1;
405
#ifdef CINEPAK_REPORT_SERR
406
                    *serr += mb->skip_error;
407
#endif
408
                } else if(mb->best_encoding == ENC_V1) {
409
                    if((score2=s->lambda * 10 + FF_LAMBDA_SCALE * mb->v1_error) >= score1) {
410
                        mb->best_encoding = ENC_SKIP;
411
                        ++v1_shrunk;
412
                        ret += score1;
413
#ifdef CINEPAK_REPORT_SERR
414
                        *serr += mb->skip_error;
415
#endif
416
                    } else {
417
                        ret += score2;
418
#ifdef CINEPAK_REPORT_SERR
419
                        *serr += mb->v1_error;
420
#endif
421
                    }
422
                } else {
423
                    if((score3=s->lambda * 34 + FF_LAMBDA_SCALE * mb->v4_error) >= score1) {
424
                        mb->best_encoding = ENC_SKIP;
425
                        ++v4_shrunk;
426
                        ret += score1;
427
#ifdef CINEPAK_REPORT_SERR
428
                        *serr += mb->skip_error;
429
#endif
430
                    } else {
431
                        ret += score3;
432
#ifdef CINEPAK_REPORT_SERR
433
                        *serr += mb->v4_error;
434
#endif
435
                    }
436
                }
437
            }
438
            *training_set_v1_shrunk = v1_shrunk;
439
            *training_set_v4_shrunk = v4_shrunk;
440
        } else { // find best mode per block
441
            for(x = 0; x < mb_count; x++) {
442
                mb = &s->mb[x];
443
                score1 = s->lambda * 1  + FF_LAMBDA_SCALE * mb->skip_error;
444
                score2 = s->lambda * 10 + FF_LAMBDA_SCALE * mb->v1_error;
445
                score3 = s->lambda * 34 + FF_LAMBDA_SCALE * mb->v4_error;
446
 
447
                if(score1 <= score2 && score1 <= score3) {
448
                    ret += score1;
449
#ifdef CINEPAK_REPORT_SERR
450
                    *serr += mb->skip_error;
451
#endif
452
                    mb->best_encoding = ENC_SKIP;
453
                } else if(score2 <= score3) {
454
                    ret += score2;
455
#ifdef CINEPAK_REPORT_SERR
456
                    *serr += mb->v1_error;
457
#endif
458
                    mb->best_encoding = ENC_V1;
459
                } else {
460
                    ret += score3;
461
#ifdef CINEPAK_REPORT_SERR
462
                    *serr += mb->v4_error;
463
#endif
464
                    mb->best_encoding = ENC_V4;
465
                }
466
            }
467
        }
468
 
469
        break;
470
    }
471
 
472
    return ret;
473
}
474
 
475
static int write_chunk_header(unsigned char *buf, int chunk_type, int chunk_size)
476
{
477
    buf[0] = chunk_type;
478
    AV_WB24(&buf[1], chunk_size + CHUNK_HEADER_SIZE);
479
    return CHUNK_HEADER_SIZE;
480
}
481
 
482
static int encode_codebook(CinepakEncContext *s, int *codebook, int size, int chunk_type_yuv, int chunk_type_gray, unsigned char *buf)
483
{
484
    int x, y, ret, entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
485
    int incremental_codebook_replacement_mode = 0; // hardcoded here,
486
                // the compiler should notice that this is a constant -- rl
487
 
488
    ret = write_chunk_header(buf,
489
          s->pix_fmt == AV_PIX_FMT_RGB24 ?
490
           chunk_type_yuv+(incremental_codebook_replacement_mode?1:0) :
491
           chunk_type_gray+(incremental_codebook_replacement_mode?1:0),
492
          entry_size * size
493
           + (incremental_codebook_replacement_mode?(size+31)/32*4:0) );
494
 
495
// we do codebook encoding according to the "intra" mode
496
// but we keep the "dead" code for reference in case we will want
497
// to use incremental codebook updates (which actually would give us
498
// "kind of" motion compensation, especially in 1 strip/frame case) -- rl
499
// (of course, the code will be not useful as-is)
500
    if(incremental_codebook_replacement_mode) {
501
        int flags = 0;
502
        int flagsind;
503
        for(x = 0; x < size; x++) {
504
            if(flags == 0) {
505
                flagsind = ret;
506
                ret += 4;
507
                flags = 0x80000000;
508
            } else
509
                flags = ((flags>>1) | 0x80000000);
510
            for(y = 0; y < entry_size; y++)
511
                buf[ret++] = codebook[y + x*entry_size] ^ (y >= 4 ? 0x80 : 0);
512
            if((flags&0xffffffff) == 0xffffffff) {
513
                AV_WB32(&buf[flagsind], flags);
514
                flags = 0;
515
            }
516
        }
517
        if(flags)
518
            AV_WB32(&buf[flagsind], flags);
519
    } else
520
        for(x = 0; x < size; x++)
521
            for(y = 0; y < entry_size; y++)
522
                buf[ret++] = codebook[y + x*entry_size] ^ (y >= 4 ? 0x80 : 0);
523
 
524
    return ret;
525
}
526
 
527
//sets out to the sub picture starting at (x,y) in in
528
static void get_sub_picture(CinepakEncContext *s, int x, int y, AVPicture *in, AVPicture *out)
529
{
530
    out->data[0] = in->data[0] + x + y * in->linesize[0];
531
    out->linesize[0] = in->linesize[0];
532
 
533
    if(s->pix_fmt == AV_PIX_FMT_RGB24) {
534
        out->data[1] = in->data[1] + (x >> 1) + (y >> 1) * in->linesize[1];
535
        out->linesize[1] = in->linesize[1];
536
 
537
        out->data[2] = in->data[2] + (x >> 1) + (y >> 1) * in->linesize[2];
538
        out->linesize[2] = in->linesize[2];
539
    }
540
}
541
 
542
//decodes the V1 vector in mb into the 4x4 MB pointed to by sub_pict
543
static void decode_v1_vector(CinepakEncContext *s, AVPicture *sub_pict, int v1_vector, strip_info *info)
544
{
545
    int entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
546
 
547
    sub_pict->data[0][0] =
548
            sub_pict->data[0][1] =
549
            sub_pict->data[0][    sub_pict->linesize[0]] =
550
            sub_pict->data[0][1+  sub_pict->linesize[0]] = info->v1_codebook[v1_vector*entry_size];
551
 
552
    sub_pict->data[0][2] =
553
            sub_pict->data[0][3] =
554
            sub_pict->data[0][2+  sub_pict->linesize[0]] =
555
            sub_pict->data[0][3+  sub_pict->linesize[0]] = info->v1_codebook[v1_vector*entry_size+1];
556
 
557
    sub_pict->data[0][2*sub_pict->linesize[0]] =
558
            sub_pict->data[0][1+2*sub_pict->linesize[0]] =
559
            sub_pict->data[0][  3*sub_pict->linesize[0]] =
560
            sub_pict->data[0][1+3*sub_pict->linesize[0]] = info->v1_codebook[v1_vector*entry_size+2];
561
 
562
    sub_pict->data[0][2+2*sub_pict->linesize[0]] =
563
            sub_pict->data[0][3+2*sub_pict->linesize[0]] =
564
            sub_pict->data[0][2+3*sub_pict->linesize[0]] =
565
            sub_pict->data[0][3+3*sub_pict->linesize[0]] = info->v1_codebook[v1_vector*entry_size+3];
566
 
567
    if(s->pix_fmt == AV_PIX_FMT_RGB24) {
568
        sub_pict->data[1][0] =
569
            sub_pict->data[1][1] =
570
            sub_pict->data[1][    sub_pict->linesize[1]] =
571
            sub_pict->data[1][1+  sub_pict->linesize[1]] = info->v1_codebook[v1_vector*entry_size+4];
572
 
573
        sub_pict->data[2][0] =
574
            sub_pict->data[2][1] =
575
            sub_pict->data[2][    sub_pict->linesize[2]] =
576
            sub_pict->data[2][1+  sub_pict->linesize[2]] = info->v1_codebook[v1_vector*entry_size+5];
577
    }
578
}
579
 
580
//decodes the V4 vectors in mb into the 4x4 MB pointed to by sub_pict
581
static void decode_v4_vector(CinepakEncContext *s, AVPicture *sub_pict, int *v4_vector, strip_info *info)
582
{
583
    int i, x, y, entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
584
 
585
    for(i = y = 0; y < 4; y += 2) {
586
        for(x = 0; x < 4; x += 2, i++) {
587
            sub_pict->data[0][x   +     y*sub_pict->linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size];
588
            sub_pict->data[0][x+1 +     y*sub_pict->linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size+1];
589
            sub_pict->data[0][x   + (y+1)*sub_pict->linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size+2];
590
            sub_pict->data[0][x+1 + (y+1)*sub_pict->linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size+3];
591
 
592
            if(s->pix_fmt == AV_PIX_FMT_RGB24) {
593
                sub_pict->data[1][(x>>1) + (y>>1)*sub_pict->linesize[1]] = info->v4_codebook[v4_vector[i]*entry_size+4];
594
                sub_pict->data[2][(x>>1) + (y>>1)*sub_pict->linesize[2]] = info->v4_codebook[v4_vector[i]*entry_size+5];
595
            }
596
        }
597
    }
598
}
599
 
600
static void copy_mb(CinepakEncContext *s, AVPicture *a, AVPicture *b)
601
{
602
    int y, p;
603
 
604
    for(y = 0; y < MB_SIZE; y++) {
605
        memcpy(a->data[0]+y*a->linesize[0], b->data[0]+y*b->linesize[0],
606
               MB_SIZE);
607
    }
608
 
609
    if(s->pix_fmt == AV_PIX_FMT_RGB24) {
610
        for(p = 1; p <= 2; p++) {
611
            for(y = 0; y < MB_SIZE/2; y++) {
612
                memcpy(a->data[p] + y*a->linesize[p],
613
                       b->data[p] + y*b->linesize[p],
614
                       MB_SIZE/2);
615
            }
616
        }
617
    }
618
}
619
 
620
static int encode_mode(CinepakEncContext *s, int h, AVPicture *scratch_pict, AVPicture *last_pict, strip_info *info, unsigned char *buf)
621
{
622
    int x, y, z, flags, bits, temp_size, header_ofs, ret = 0, mb_count = s->w * h / MB_AREA;
623
    int needs_extra_bit, should_write_temp;
624
    unsigned char temp[64]; //32/2 = 16 V4 blocks at 4 B each -> 64 B
625
    mb_info *mb;
626
    AVPicture sub_scratch = {{0}}, sub_last = {{0}};
627
 
628
    //encode codebooks
629
////// MacOS vintage decoder compatibility dictates the presence of
630
////// the codebook chunk even when the codebook is empty - pretty dumb...
631
////// and also the certain order of the codebook chunks -- rl
632
    if(info->v4_size || !s->skip_empty_cb)
633
        ret += encode_codebook(s, info->v4_codebook, info->v4_size, 0x20, 0x24, buf + ret);
634
 
635
    if(info->v1_size || !s->skip_empty_cb)
636
        ret += encode_codebook(s, info->v1_codebook, info->v1_size, 0x22, 0x26, buf + ret);
637
 
638
    //update scratch picture
639
    for(z = y = 0; y < h; y += MB_SIZE) {
640
        for(x = 0; x < s->w; x += MB_SIZE, z++) {
641
            mb = &s->mb[z];
642
 
643
            get_sub_picture(s, x, y, scratch_pict, &sub_scratch);
644
 
645
            if(info->mode == MODE_MC && mb->best_encoding == ENC_SKIP) {
646
                get_sub_picture(s, x, y, last_pict, &sub_last);
647
                copy_mb(s, &sub_scratch, &sub_last);
648
            } else if(info->mode == MODE_V1_ONLY || mb->best_encoding == ENC_V1)
649
                decode_v1_vector(s, &sub_scratch, mb->v1_vector, info);
650
            else
651
                decode_v4_vector(s, &sub_scratch, mb->v4_vector, info);
652
        }
653
    }
654
 
655
    switch(info->mode) {
656
    case MODE_V1_ONLY:
657
        //av_log(s->avctx, AV_LOG_INFO, "mb_count = %i\n", mb_count);
658
        ret += write_chunk_header(buf + ret, 0x32, mb_count);
659
 
660
        for(x = 0; x < mb_count; x++)
661
            buf[ret++] = s->mb[x].v1_vector;
662
 
663
        break;
664
    case MODE_V1_V4:
665
        //remember header position
666
        header_ofs = ret;
667
        ret += CHUNK_HEADER_SIZE;
668
 
669
        for(x = 0; x < mb_count; x += 32) {
670
            flags = 0;
671
            for(y = x; y < FFMIN(x+32, mb_count); y++)
672
                if(s->mb[y].best_encoding == ENC_V4)
673
                    flags |= 1 << (31 - y + x);
674
 
675
            AV_WB32(&buf[ret], flags);
676
            ret += 4;
677
 
678
            for(y = x; y < FFMIN(x+32, mb_count); y++) {
679
                mb = &s->mb[y];
680
 
681
                if(mb->best_encoding == ENC_V1)
682
                    buf[ret++] = mb->v1_vector;
683
                else
684
                    for(z = 0; z < 4; z++)
685
                        buf[ret++] = mb->v4_vector[z];
686
            }
687
        }
688
 
689
        write_chunk_header(buf + header_ofs, 0x30, ret - header_ofs - CHUNK_HEADER_SIZE);
690
 
691
        break;
692
    case MODE_MC:
693
        //remember header position
694
        header_ofs = ret;
695
        ret += CHUNK_HEADER_SIZE;
696
        flags = bits = temp_size = 0;
697
 
698
        for(x = 0; x < mb_count; x++) {
699
            mb = &s->mb[x];
700
            flags |= (mb->best_encoding != ENC_SKIP) << (31 - bits++);
701
            needs_extra_bit = 0;
702
            should_write_temp = 0;
703
 
704
            if(mb->best_encoding != ENC_SKIP) {
705
                if(bits < 32)
706
                    flags |= (mb->best_encoding == ENC_V4) << (31 - bits++);
707
                else
708
                    needs_extra_bit = 1;
709
            }
710
 
711
            if(bits == 32) {
712
                AV_WB32(&buf[ret], flags);
713
                ret += 4;
714
                flags = bits = 0;
715
 
716
                if(mb->best_encoding == ENC_SKIP || needs_extra_bit) {
717
                    memcpy(&buf[ret], temp, temp_size);
718
                    ret += temp_size;
719
                    temp_size = 0;
720
                } else
721
                    should_write_temp = 1;
722
            }
723
 
724
            if(needs_extra_bit) {
725
                flags = (mb->best_encoding == ENC_V4) << 31;
726
                bits = 1;
727
            }
728
 
729
            if(mb->best_encoding == ENC_V1)
730
                temp[temp_size++] = mb->v1_vector;
731
            else if(mb->best_encoding == ENC_V4)
732
                for(z = 0; z < 4; z++)
733
                    temp[temp_size++] = mb->v4_vector[z];
734
 
735
            if(should_write_temp) {
736
                memcpy(&buf[ret], temp, temp_size);
737
                ret += temp_size;
738
                temp_size = 0;
739
            }
740
        }
741
 
742
        if(bits > 0) {
743
            AV_WB32(&buf[ret], flags);
744
            ret += 4;
745
            memcpy(&buf[ret], temp, temp_size);
746
            ret += temp_size;
747
        }
748
 
749
        write_chunk_header(buf + header_ofs, 0x31, ret - header_ofs - CHUNK_HEADER_SIZE);
750
 
751
        break;
752
    }
753
 
754
    return ret;
755
}
756
 
757
//computes distortion of 4x4 MB in b compared to a
758
static int compute_mb_distortion(CinepakEncContext *s, AVPicture *a, AVPicture *b)
759
{
760
    int x, y, p, d, ret = 0;
761
 
762
    for(y = 0; y < MB_SIZE; y++) {
763
        for(x = 0; x < MB_SIZE; x++) {
764
            d = a->data[0][x + y*a->linesize[0]] - b->data[0][x + y*b->linesize[0]];
765
            ret += d*d;
766
        }
767
    }
768
 
769
    if(s->pix_fmt == AV_PIX_FMT_RGB24) {
770
        for(p = 1; p <= 2; p++) {
771
            for(y = 0; y < MB_SIZE/2; y++) {
772
                for(x = 0; x < MB_SIZE/2; x++) {
773
                    d = a->data[p][x + y*a->linesize[p]] - b->data[p][x + y*b->linesize[p]];
774
                    ret += d*d;
775
                }
776
            }
777
        }
778
    }
779
 
780
    return ret;
781
}
782
 
783
// return the possibly adjusted size of the codebook
784
#define CERTAIN(x) ((x)!=ENC_UNCERTAIN)
785
static int quantize(CinepakEncContext *s, int h, AVPicture *pict,
786
                    int v1mode, strip_info *info,
787
                    mb_encoding encoding)
788
{
789
    int x, y, i, j, k, x2, y2, x3, y3, plane, shift, mbn;
790
    int entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
791
    int *codebook = v1mode ? info->v1_codebook : info->v4_codebook;
792
    int size = v1mode ? info->v1_size : info->v4_size;
793
    int64_t total_error = 0;
794
    uint8_t vq_pict_buf[(MB_AREA*3)/2];
795
    AVPicture sub_pict, vq_pict;
796
 
797
    for(mbn = i = y = 0; y < h; y += MB_SIZE) {
798
        for(x = 0; x < s->w; x += MB_SIZE, ++mbn) {
799
            int *base;
800
 
801
            if(CERTAIN(encoding)) {
802
// use for the training only the blocks known to be to be encoded [sic:-]
803
               if(s->mb[mbn].best_encoding != encoding) continue;
804
            }
805
 
806
            base = s->codebook_input + i*entry_size;
807
            if(v1mode) {
808
                //subsample
809
                for(j = y2 = 0; y2 < entry_size; y2 += 2) {
810
                    for(x2 = 0; x2 < 4; x2 += 2, j++) {
811
                        plane = y2 < 4 ? 0 : 1 + (x2 >> 1);
812
                        shift = y2 < 4 ? 0 : 1;
813
                        x3 = shift ? 0 : x2;
814
                        y3 = shift ? 0 : y2;
815
                        base[j] = (pict->data[plane][((x+x3) >> shift) +      ((y+y3) >> shift)      * pict->linesize[plane]] +
816
                                   pict->data[plane][((x+x3) >> shift) + 1 +  ((y+y3) >> shift)      * pict->linesize[plane]] +
817
                                   pict->data[plane][((x+x3) >> shift) +     (((y+y3) >> shift) + 1) * pict->linesize[plane]] +
818
                                   pict->data[plane][((x+x3) >> shift) + 1 + (((y+y3) >> shift) + 1) * pict->linesize[plane]]) >> 2;
819
                    }
820
                }
821
            } else {
822
                //copy
823
                for(j = y2 = 0; y2 < MB_SIZE; y2 += 2) {
824
                    for(x2 = 0; x2 < MB_SIZE; x2 += 2) {
825
                        for(k = 0; k < entry_size; k++, j++) {
826
                            plane = k >= 4 ? k - 3 : 0;
827
 
828
                            if(k >= 4) {
829
                                x3 = (x+x2) >> 1;
830
                                y3 = (y+y2) >> 1;
831
                            } else {
832
                                x3 = x + x2 + (k & 1);
833
                                y3 = y + y2 + (k >> 1);
834
                            }
835
 
836
                            base[j] = pict->data[plane][x3 + y3*pict->linesize[plane]];
837
                        }
838
                    }
839
                }
840
            }
841
            i += v1mode ? 1 : 4;
842
        }
843
    }
844
//    if(i < mbn*(v1mode ? 1 : 4)) {
845
//        av_log(s->avctx, AV_LOG_INFO, "reducing training set for %s from %i to %i (encoding %i)\n", v1mode?"v1":"v4", mbn*(v1mode ? 1 : 4), i, encoding);
846
//    }
847
 
848
    if(i == 0) // empty training set, nothing to do
849
        return 0;
850
    if(i < size) {
851
        //av_log(s->avctx, (CERTAIN(encoding) ? AV_LOG_ERROR : AV_LOG_INFO), "WOULD WASTE: %s cbsize %i bigger than training set size %i (encoding %i)\n", v1mode?"v1":"v4", size, i, encoding);
852
        size = i;
853
    }
854
 
855
    avpriv_init_elbg(s->codebook_input, entry_size, i, codebook, size, 1, s->codebook_closest, &s->randctx);
856
    avpriv_do_elbg(s->codebook_input, entry_size, i, codebook, size, 1, s->codebook_closest, &s->randctx);
857
 
858
    //setup vq_pict, which contains a single MB
859
    vq_pict.data[0] = vq_pict_buf;
860
    vq_pict.linesize[0] = MB_SIZE;
861
    vq_pict.data[1] = &vq_pict_buf[MB_AREA];
862
    vq_pict.data[2] = vq_pict.data[1] + (MB_AREA >> 2);
863
    vq_pict.linesize[1] = vq_pict.linesize[2] = MB_SIZE >> 1;
864
 
865
    //copy indices
866
    for(i = j = y = 0; y < h; y += MB_SIZE) {
867
        for(x = 0; x < s->w; x += MB_SIZE, j++) {
868
            mb_info *mb = &s->mb[j];
869
// skip uninteresting blocks if we know their preferred encoding
870
            if(CERTAIN(encoding) && mb->best_encoding != encoding)
871
                continue;
872
 
873
            //point sub_pict to current MB
874
            get_sub_picture(s, x, y, pict, &sub_pict);
875
 
876
            if(v1mode) {
877
                mb->v1_vector = s->codebook_closest[i];
878
 
879
                //fill in vq_pict with V1 data
880
                decode_v1_vector(s, &vq_pict, mb->v1_vector, info);
881
 
882
                mb->v1_error = compute_mb_distortion(s, &sub_pict, &vq_pict);
883
                total_error += mb->v1_error;
884
            } else {
885
                for(k = 0; k < 4; k++)
886
                    mb->v4_vector[k] = s->codebook_closest[i+k];
887
 
888
                //fill in vq_pict with V4 data
889
                decode_v4_vector(s, &vq_pict, mb->v4_vector, info);
890
 
891
                mb->v4_error = compute_mb_distortion(s, &sub_pict, &vq_pict);
892
                total_error += mb->v4_error;
893
            }
894
            i += v1mode ? 1 : 4;
895
        }
896
    }
897
// check that we did it right in the beginning of the function
898
    av_assert0(i >= size); // training set is no smaller than the codebook
899
 
900
    //av_log(s->avctx, AV_LOG_INFO, "isv1 %i size= %i i= %i error %"PRId64"\n", v1mode, size, i, total_error);
901
 
902
    return size;
903
}
904
 
905
static void calculate_skip_errors(CinepakEncContext *s, int h, AVPicture *last_pict, AVPicture *pict, strip_info *info)
906
{
907
    int x, y, i;
908
    AVPicture sub_last, sub_pict;
909
 
910
    for(i = y = 0; y < h; y += MB_SIZE) {
911
        for(x = 0; x < s->w; x += MB_SIZE, i++) {
912
            get_sub_picture(s, x, y, last_pict, &sub_last);
913
            get_sub_picture(s, x, y, pict,      &sub_pict);
914
 
915
            s->mb[i].skip_error = compute_mb_distortion(s, &sub_last, &sub_pict);
916
        }
917
    }
918
}
919
 
920
static void write_strip_header(CinepakEncContext *s, int y, int h, int keyframe, unsigned char *buf, int strip_size)
921
{
922
// actually we are exclusively using intra strip coding (how much can we win
923
// otherwise? how to choose which part of a codebook to update?),
924
// keyframes are different only because we disallow ENC_SKIP on them -- rl
925
// (besides, the logic here used to be inverted: )
926
//    buf[0] = keyframe ? 0x11: 0x10;
927
    buf[0] = keyframe ? 0x10: 0x11;
928
    AV_WB24(&buf[1], strip_size + STRIP_HEADER_SIZE);
929
//    AV_WB16(&buf[4], y); /* using absolute y values works -- rl */
930
    AV_WB16(&buf[4], 0); /* using relative values works as well -- rl */
931
    AV_WB16(&buf[6], 0);
932
//    AV_WB16(&buf[8], y+h); /* using absolute y values works -- rl */
933
    AV_WB16(&buf[8], h); /* using relative values works as well -- rl */
934
    AV_WB16(&buf[10], s->w);
935
    //av_log(s->avctx, AV_LOG_INFO, "write_strip_header() %x keyframe=%d\n", buf[0], keyframe);
936
}
937
 
938
static int rd_strip(CinepakEncContext *s, int y, int h, int keyframe, AVPicture *last_pict, AVPicture *pict, AVPicture *scratch_pict, unsigned char *buf, int64_t *best_score
939
#ifdef CINEPAK_REPORT_SERR
940
, int64_t *best_serr
941
#endif
942
)
943
{
944
    int64_t score = 0;
945
#ifdef CINEPAK_REPORT_SERR
946
    int64_t serr;
947
#endif
948
    int best_size = 0;
949
    strip_info info;
950
// for codebook optimization:
951
    int v1enough, v1_size, v4enough, v4_size;
952
    int new_v1_size, new_v4_size;
953
    int v1shrunk, v4shrunk;
954
 
955
    if(!keyframe)
956
        calculate_skip_errors(s, h, last_pict, pict, &info);
957
 
958
    //try some powers of 4 for the size of the codebooks
959
    //constraint the v4 codebook to be no bigger than v1 one,
960
    //(and no less than v1_size/4)
961
    //thus making v1 preferable and possibly losing small details? should be ok
962
#define SMALLEST_CODEBOOK 1
963
    for(v1enough = 0, v1_size = SMALLEST_CODEBOOK; v1_size <= CODEBOOK_MAX && !v1enough; v1_size <<= 2) {
964
        for(v4enough = 0, v4_size = 0; v4_size <= v1_size && !v4enough; v4_size = v4_size ? v4_size << 2 : v1_size >= SMALLEST_CODEBOOK << 2 ? v1_size >> 2 : SMALLEST_CODEBOOK) {
965
            //try all modes
966
            for(CinepakMode mode = 0; mode < MODE_COUNT; mode++) {
967
                //don't allow MODE_MC in intra frames
968
                if(keyframe && mode == MODE_MC)
969
                    continue;
970
 
971
                if(mode == MODE_V1_ONLY) {
972
                    info.v1_size = v1_size;
973
// the size may shrink even before optimizations if the input is short:
974
                    info.v1_size = quantize(s, h, pict, 1, &info, ENC_UNCERTAIN);
975
                    if(info.v1_size < v1_size)
976
// too few eligible blocks, no sense in trying bigger sizes
977
                        v1enough = 1;
978
 
979
                    info.v4_size = 0;
980
                } else { // mode != MODE_V1_ONLY
981
                    // if v4 codebook is empty then only allow V1-only mode
982
                    if(!v4_size)
983
                        continue;
984
 
985
                    if(mode == MODE_V1_V4) {
986
                        info.v4_size = v4_size;
987
                        info.v4_size = quantize(s, h, pict, 0, &info, ENC_UNCERTAIN);
988
                        if(info.v4_size < v4_size)
989
// too few eligible blocks, no sense in trying bigger sizes
990
                            v4enough = 1;
991
                    }
992
                }
993
 
994
                info.mode = mode;
995
// choose the best encoding per block, based on current experience
996
                score = calculate_mode_score(s, h, &info, 0,
997
                                             &v1shrunk, &v4shrunk
998
#ifdef CINEPAK_REPORT_SERR
999
, &serr
1000
#endif
1001
);
1002
 
1003
                if(mode != MODE_V1_ONLY){
1004
                    int extra_iterations_limit = s->max_extra_cb_iterations;
1005
// recompute the codebooks, omitting the extra blocks
1006
// we assume we _may_ come here with more blocks to encode than before
1007
                    info.v1_size = v1_size;
1008
                    new_v1_size = quantize(s, h, pict, 1, &info, ENC_V1);
1009
                    if(new_v1_size < info.v1_size){
1010
                        //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v1 codebook to %i entries\n", mode, v1_size, v4_size, new_v1_size);
1011
                        info.v1_size = new_v1_size;
1012
                    }
1013
// we assume we _may_ come here with more blocks to encode than before
1014
                    info.v4_size = v4_size;
1015
                    new_v4_size = quantize(s, h, pict, 0, &info, ENC_V4);
1016
                    if(new_v4_size < info.v4_size) {
1017
                        //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v4 codebook to %i entries at first iteration\n", mode, v1_size, v4_size, new_v4_size);
1018
                        info.v4_size = new_v4_size;
1019
                    }
1020
// calculate the resulting score
1021
// (do not move blocks to codebook encodings now, as some blocks may have
1022
// got bigger errors despite a smaller training set - but we do not
1023
// ever grow the training sets back)
1024
                    for(;;) {
1025
                        score = calculate_mode_score(s, h, &info, 1,
1026
                                                     &v1shrunk, &v4shrunk
1027
#ifdef CINEPAK_REPORT_SERR
1028
, &serr
1029
#endif
1030
);
1031
// do we have a reason to reiterate? if so, have we reached the limit?
1032
                        if((!v1shrunk && !v4shrunk) || !extra_iterations_limit--) break;
1033
// recompute the codebooks, omitting the extra blocks
1034
                        if(v1shrunk) {
1035
                            info.v1_size = v1_size;
1036
                            new_v1_size = quantize(s, h, pict, 1, &info, ENC_V1);
1037
                            if(new_v1_size < info.v1_size){
1038
                                //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v1 codebook to %i entries\n", mode, v1_size, v4_size, new_v1_size);
1039
                                info.v1_size = new_v1_size;
1040
                            }
1041
                        }
1042
                        if(v4shrunk) {
1043
                            info.v4_size = v4_size;
1044
                            new_v4_size = quantize(s, h, pict, 0, &info, ENC_V4);
1045
                            if(new_v4_size < info.v4_size) {
1046
                                //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v4 codebook to %i entries\n", mode, v1_size, v4_size, new_v4_size);
1047
                                info.v4_size = new_v4_size;
1048
                            }
1049
                        }
1050
                    }
1051
                }
1052
 
1053
                //av_log(s->avctx, AV_LOG_INFO, "%3i %3i score = %"PRId64"\n", v1_size, v4_size, score);
1054
 
1055
                if(best_size == 0 || score < *best_score) {
1056
 
1057
                    *best_score = score;
1058
#ifdef CINEPAK_REPORT_SERR
1059
                    *best_serr = serr;
1060
#endif
1061
                    best_size = encode_mode(s, h, scratch_pict, last_pict, &info, s->strip_buf + STRIP_HEADER_SIZE);
1062
 
1063
                    //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: %18"PRId64" %i B", mode, info.v1_size, info.v4_size, score, best_size);
1064
                    //av_log(s->avctx, AV_LOG_INFO, "\n");
1065
#ifdef CINEPAK_REPORT_SERR
1066
                    av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: %18"PRId64" %i B\n", mode, v1_size, v4_size, serr, best_size);
1067
#endif
1068
 
1069
#ifdef CINEPAKENC_DEBUG
1070
                    //save MB encoding choices
1071
                    memcpy(s->best_mb, s->mb, mb_count*sizeof(mb_info));
1072
#endif
1073
 
1074
                    //memcpy(strip_temp + STRIP_HEADER_SIZE, strip_temp, best_size);
1075
                    write_strip_header(s, y, h, keyframe, s->strip_buf, best_size);
1076
 
1077
                }
1078
            }
1079
        }
1080
    }
1081
 
1082
#ifdef CINEPAKENC_DEBUG
1083
    //gather stats. this will only work properly of MAX_STRIPS == 1
1084
    if(best_info.mode == MODE_V1_ONLY) {
1085
        s->num_v1_mode++;
1086
        s->num_v1_encs += s->w*h/MB_AREA;
1087
    } else {
1088
        if(best_info.mode == MODE_V1_V4)
1089
            s->num_v4_mode++;
1090
        else
1091
            s->num_mc_mode++;
1092
 
1093
        int x;
1094
        for(x = 0; x < s->w*h/MB_AREA; x++)
1095
            if(s->best_mb[x].best_encoding == ENC_V1)
1096
                s->num_v1_encs++;
1097
            else if(s->best_mb[x].best_encoding == ENC_V4)
1098
                s->num_v4_encs++;
1099
            else
1100
                s->num_skips++;
1101
    }
1102
#endif
1103
 
1104
    best_size += STRIP_HEADER_SIZE;
1105
    memcpy(buf, s->strip_buf, best_size);
1106
 
1107
    return best_size;
1108
}
1109
 
1110
static int write_cvid_header(CinepakEncContext *s, unsigned char *buf, int num_strips, int data_size, int isakeyframe)
1111
{
1112
    buf[0] = isakeyframe ? 0 : 1;
1113
    AV_WB24(&buf[1], data_size + CVID_HEADER_SIZE);
1114
    AV_WB16(&buf[4], s->w);
1115
    AV_WB16(&buf[6], s->h);
1116
    AV_WB16(&buf[8], num_strips);
1117
 
1118
    return CVID_HEADER_SIZE;
1119
}
1120
 
1121
static int rd_frame(CinepakEncContext *s, const AVFrame *frame, int isakeyframe, unsigned char *buf, int buf_size)
1122
{
1123
    int num_strips, strip, i, y, nexty, size, temp_size;
1124
    AVPicture last_pict, pict, scratch_pict;
1125
    int64_t best_score = 0, score, score_temp;
1126
#ifdef CINEPAK_REPORT_SERR
1127
    int64_t best_serr = 0, serr, serr_temp;
1128
#endif
1129
 
1130
    int best_nstrips = -1, best_size = -1; // mark as uninitialzed
1131
 
1132
    if(s->pix_fmt == AV_PIX_FMT_RGB24) {
1133
        int x;
1134
// build a copy of the given frame in the correct colorspace
1135
        for(y = 0; y < s->h; y += 2) {
1136
            for(x = 0; x < s->w; x += 2) {
1137
                uint8_t *ir[2]; int32_t r, g, b, rr, gg, bb;
1138
                ir[0] = ((AVPicture*)frame)->data[0] + x*3 + y*((AVPicture*)frame)->linesize[0];
1139
                ir[1] = ir[0] + ((AVPicture*)frame)->linesize[0];
1140
                get_sub_picture(s, x, y, (AVPicture*)s->input_frame, &scratch_pict);
1141
                r = g = b = 0;
1142
                for(i=0; i<4; ++i) {
1143
                    int i1, i2;
1144
                    i1 = (i&1); i2 = (i>=2);
1145
                    rr = ir[i2][i1*3+0];
1146
                    gg = ir[i2][i1*3+1];
1147
                    bb = ir[i2][i1*3+2];
1148
                    r += rr; g += gg; b += bb;
1149
// using fixed point arithmetic for portable repeatability, scaling by 2^23
1150
// "Y"
1151
//                    rr = 0.2857*rr + 0.5714*gg + 0.1429*bb;
1152
                    rr = (2396625*rr + 4793251*gg + 1198732*bb) >> 23;
1153
                    if(      rr <   0) rr =   0;
1154
                    else if (rr > 255) rr = 255;
1155
                    scratch_pict.data[0][i1 + i2*scratch_pict.linesize[0]] = rr;
1156
                }
1157
// let us scale down as late as possible
1158
//                r /= 4; g /= 4; b /= 4;
1159
// "U"
1160
//                rr = -0.1429*r - 0.2857*g + 0.4286*b;
1161
                rr = (-299683*r - 599156*g + 898839*b) >> 23;
1162
                if(      rr < -128) rr = -128;
1163
                else if (rr >  127) rr =  127;
1164
                scratch_pict.data[1][0] = rr + 128; // quantize needs unsigned
1165
// "V"
1166
//                rr = 0.3571*r - 0.2857*g - 0.0714*b;
1167
                rr = (748893*r - 599156*g - 149737*b) >> 23;
1168
                if(      rr < -128) rr = -128;
1169
                else if (rr >  127) rr =  127;
1170
                scratch_pict.data[2][0] = rr + 128; // quantize needs unsigned
1171
            }
1172
        }
1173
    }
1174
 
1175
    //would be nice but quite certainly incompatible with vintage players:
1176
    // support encoding zero strips (meaning skip the whole frame)
1177
    for(num_strips = s->min_strips; num_strips <= s->max_strips && num_strips <= s->h / MB_SIZE; num_strips++) {
1178
        score = 0;
1179
        size = 0;
1180
#ifdef CINEPAK_REPORT_SERR
1181
        serr = 0;
1182
#endif
1183
 
1184
        for(y = 0, strip = 1; y < s->h; strip++, y = nexty) {
1185
            int strip_height;
1186
 
1187
            nexty = strip * s->h / num_strips; // <= s->h
1188
            //make nexty the next multiple of 4 if not already there
1189
            if(nexty & 3)
1190
                nexty += 4 - (nexty & 3);
1191
 
1192
            strip_height = nexty - y;
1193
            if(strip_height <= 0) { // can this ever happen?
1194
                av_log(s->avctx, AV_LOG_INFO, "skipping zero height strip %i of %i\n", strip, num_strips);
1195
                continue;
1196
            }
1197
 
1198
            if(s->pix_fmt == AV_PIX_FMT_RGB24)
1199
                get_sub_picture(s, 0, y, (AVPicture*)s->input_frame,    &pict);
1200
            else
1201
                get_sub_picture(s, 0, y, (AVPicture*)frame,              &pict);
1202
            get_sub_picture(s, 0, y, (AVPicture*)s->last_frame,    &last_pict);
1203
            get_sub_picture(s, 0, y, (AVPicture*)s->scratch_frame, &scratch_pict);
1204
 
1205
            if((temp_size = rd_strip(s, y, strip_height, isakeyframe, &last_pict, &pict, &scratch_pict, s->frame_buf + size + CVID_HEADER_SIZE, &score_temp
1206
#ifdef CINEPAK_REPORT_SERR
1207
, &serr_temp
1208
#endif
1209
)) < 0)
1210
                return temp_size;
1211
 
1212
            score += score_temp;
1213
#ifdef CINEPAK_REPORT_SERR
1214
            serr += serr_temp;
1215
#endif
1216
            size += temp_size;
1217
            //av_log(s->avctx, AV_LOG_INFO, "strip %d, isakeyframe=%d", strip, isakeyframe);
1218
            //av_log(s->avctx, AV_LOG_INFO, "\n");
1219
        }
1220
 
1221
        if(best_score == 0 || score < best_score) {
1222
            best_score = score;
1223
#ifdef CINEPAK_REPORT_SERR
1224
            best_serr = serr;
1225
#endif
1226
            best_size = size + write_cvid_header(s, s->frame_buf, num_strips, size, isakeyframe);
1227
            //av_log(s->avctx, AV_LOG_INFO, "best number of strips so far: %2i, %12"PRId64", %i B\n", num_strips, score, best_size);
1228
#ifdef CINEPAK_REPORT_SERR
1229
            av_log(s->avctx, AV_LOG_INFO, "best number of strips so far: %2i, %12"PRId64", %i B\n", num_strips, serr, best_size);
1230
#endif
1231
 
1232
            FFSWAP(AVFrame *, s->best_frame, s->scratch_frame);
1233
            memcpy(buf, s->frame_buf, best_size);
1234
            best_nstrips = num_strips;
1235
        }
1236
// avoid trying too many strip numbers without a real reason
1237
// (this makes the processing of the very first frame faster)
1238
        if(num_strips - best_nstrips > 4)
1239
            break;
1240
    }
1241
 
1242
    av_assert0(best_nstrips >= 0 && best_size >= 0);
1243
 
1244
// let the number of strips slowly adapt to the changes in the contents,
1245
// compared to full bruteforcing every time this will occasionally lead
1246
// to some r/d performance loss but makes encoding up to several times faster
1247
    if(!s->strip_number_delta_range) {
1248
        if(best_nstrips == s->max_strips) { // let us try to step up
1249
            s->max_strips = best_nstrips + 1;
1250
            if(s->max_strips >= s->max_max_strips)
1251
                s->max_strips = s->max_max_strips;
1252
        } else { // try to step down
1253
            s->max_strips = best_nstrips;
1254
        }
1255
        s->min_strips = s->max_strips - 1;
1256
        if(s->min_strips < s->min_min_strips)
1257
            s->min_strips = s->min_min_strips;
1258
    } else {
1259
        s->max_strips = best_nstrips + s->strip_number_delta_range;
1260
        if(s->max_strips >= s->max_max_strips)
1261
            s->max_strips = s->max_max_strips;
1262
        s->min_strips = best_nstrips - s->strip_number_delta_range;
1263
        if(s->min_strips < s->min_min_strips)
1264
            s->min_strips = s->min_min_strips;
1265
    }
1266
 
1267
    return best_size;
1268
}
1269
 
1270
static int cinepak_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
1271
                                const AVFrame *frame, int *got_packet)
1272
{
1273
    CinepakEncContext *s = avctx->priv_data;
1274
    int ret;
1275
 
1276
    s->lambda = frame->quality ? frame->quality - 1 : 2 * FF_LAMBDA_SCALE;
1277
 
1278
    if ((ret = ff_alloc_packet2(avctx, pkt, s->frame_buf_size, 0)) < 0)
1279
        return ret;
1280
    ret = rd_frame(s, frame, (s->curframe == 0), pkt->data, s->frame_buf_size);
1281
    pkt->size = ret;
1282
    if (s->curframe == 0)
1283
        pkt->flags |= AV_PKT_FLAG_KEY;
1284
    *got_packet = 1;
1285
 
1286
    FFSWAP(AVFrame *, s->last_frame, s->best_frame);
1287
 
1288
    if (++s->curframe >= s->keyint)
1289
        s->curframe = 0;
1290
 
1291
    return 0;
1292
}
1293
 
1294
static av_cold int cinepak_encode_end(AVCodecContext *avctx)
1295
{
1296
    CinepakEncContext *s = avctx->priv_data;
1297
    int x;
1298
 
1299
    av_frame_free(&s->last_frame);
1300
    av_frame_free(&s->best_frame);
1301
    av_frame_free(&s->scratch_frame);
1302
    if (avctx->pix_fmt == AV_PIX_FMT_RGB24)
1303
        av_frame_free(&s->input_frame);
1304
    av_freep(&s->codebook_input);
1305
    av_freep(&s->codebook_closest);
1306
    av_freep(&s->strip_buf);
1307
    av_freep(&s->frame_buf);
1308
    av_freep(&s->mb);
1309
#ifdef CINEPAKENC_DEBUG
1310
    av_freep(&s->best_mb);
1311
#endif
1312
 
1313
    for(x = 0; x < (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 4 : 3); x++)
1314
        av_freep(&s->pict_bufs[x]);
1315
 
1316
#ifdef CINEPAKENC_DEBUG
1317
    av_log(avctx, AV_LOG_INFO, "strip coding stats: %i V1 mode, %i V4 mode, %i MC mode (%i V1 encs, %i V4 encs, %i skips)\n",
1318
        s->num_v1_mode, s->num_v4_mode, s->num_mc_mode, s->num_v1_encs, s->num_v4_encs, s->num_skips);
1319
#endif
1320
 
1321
    return 0;
1322
}
1323
 
1324
AVCodec ff_cinepak_encoder = {
1325
    .name           = "cinepak",
1326
    .type           = AVMEDIA_TYPE_VIDEO,
1327
    .id             = AV_CODEC_ID_CINEPAK,
1328
    .priv_data_size = sizeof(CinepakEncContext),
1329
    .init           = cinepak_encode_init,
1330
    .encode2        = cinepak_encode_frame,
1331
    .close          = cinepak_encode_end,
1332
    .pix_fmts       = (const enum AVPixelFormat[]){AV_PIX_FMT_RGB24, AV_PIX_FMT_GRAY8, AV_PIX_FMT_NONE},
1333
    .long_name      = NULL_IF_CONFIG_SMALL("Cinepak / CVID"),
1334
    .priv_class     = &cinepak_class,
1335
};