Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Cinepak encoder (c) 2011 Tomas Härdin
  3.  * http://titan.codemill.se/~tomhar/cinepakenc.patch
  4.  *
  5.  * Fixes and improvements, vintage decoders compatibility
  6.  *  (c) 2013, 2014 Rl, Aetey Global Technologies AB
  7.  
  8. Permission is hereby granted, free of charge, to any person obtaining a
  9. copy of this software and associated documentation files (the "Software"),
  10. to deal in the Software without restriction, including without limitation
  11. the rights to use, copy, modify, merge, publish, distribute, sublicense,
  12. and/or sell copies of the Software, and to permit persons to whom the
  13. Software is furnished to do so, subject to the following conditions:
  14.  
  15. The above copyright notice and this permission notice shall be included
  16. in all copies or substantial portions of the Software.
  17.  
  18. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19. IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20. FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  21. THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  22. OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  23. ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  24. OTHER DEALINGS IN THE SOFTWARE.
  25.  
  26.  * TODO:
  27.  * - optimize: color space conversion, ...
  28.  * - implement options to set the min/max number of strips?
  29.  * MAYBE:
  30.  * - "optimally" split the frame into several non-regular areas
  31.  *   using a separate codebook pair for each area and approximating
  32.  *   the area by several rectangular strips (generally not full width ones)
  33.  *   (use quadtree splitting? a simple fixed-granularity grid?)
  34.  *
  35.  *
  36.  * version 2014-01-23 Rl
  37.  * - added option handling for flexibility
  38.  *
  39.  * version 2014-01-21 Rl
  40.  * - believe it or not, now we get even smaller files, with better quality
  41.  *   (which means I missed an optimization earlier :)
  42.  *
  43.  * version 2014-01-20 Rl
  44.  * - made the encoder compatible with vintage decoders
  45.  *   and added some yet unused code for possible future
  46.  *   incremental codebook updates
  47.  * - fixed a small memory leak
  48.  *
  49.  * version 2013-04-28 Rl
  50.  * - bugfixed codebook optimization logic
  51.  *
  52.  * version 2013-02-14 Rl
  53.  * "Valentine's Day" version:
  54.  * - made strip division more robust
  55.  * - minimized bruteforcing the number of strips,
  56.  *   (costs some R/D but speeds up compession a lot), the heuristic
  57.  *   assumption is that score as a function of the number of strips has
  58.  *   one wide minimum which moves slowly, of course not fully true
  59.  * - simplified codebook generation,
  60.  *   the old code was meant for other optimizations than we actually do
  61.  * - optimized the codebook generation / error estimation for MODE_MC
  62.  *
  63.  * version 2013-02-12 Rl
  64.  * - separated codebook training sets, avoided the transfer of wasted bytes,
  65.  *   which yields both better quality and smaller files
  66.  * - now using the correct colorspace (TODO: move conversion to libswscale)
  67.  *
  68.  * version 2013-02-08 Rl
  69.  * - fixes/optimization in multistrip encoding and codebook size choice,
  70.  *   quality/bitrate is now better than that of the binary proprietary encoder
  71.  */
  72.  
  73. #include "libavutil/intreadwrite.h"
  74. #include "avcodec.h"
  75. #include "libavutil/lfg.h"
  76. #include "elbg.h"
  77. #include "internal.h"
  78.  
  79. #include "libavutil/avassert.h"
  80. #include "libavutil/opt.h"
  81.  
  82. #define CVID_HEADER_SIZE 10
  83. #define STRIP_HEADER_SIZE 12
  84. #define CHUNK_HEADER_SIZE 4
  85.  
  86. #define MB_SIZE 4           //4x4 MBs
  87. #define MB_AREA (MB_SIZE*MB_SIZE)
  88.  
  89. #define VECTOR_MAX 6        //six or four entries per vector depending on format
  90. #define CODEBOOK_MAX 256    //size of a codebook
  91.  
  92. #define MAX_STRIPS  32      //Note: having fewer choices regarding the number of strips speeds up encoding (obviously)
  93. #define MIN_STRIPS  1       //Note: having more strips speeds up encoding the frame (this is less obvious)
  94. // MAX_STRIPS limits the maximum quality you can reach
  95. //            when you want hight quality on high resolutions,
  96. // MIN_STRIPS limits the minimum efficiently encodable bit rate
  97. //            on low resolutions
  98. // the numbers are only used for brute force optimization for the first frame,
  99. // for the following frames they are adaptively readjusted
  100. // NOTE the decoder in ffmpeg has its own arbitrary limitation on the number
  101. // of strips, currently 32
  102.  
  103. typedef enum {
  104.     MODE_V1_ONLY = 0,
  105.     MODE_V1_V4,
  106.     MODE_MC,
  107.  
  108.     MODE_COUNT,
  109. } CinepakMode;
  110.  
  111. typedef enum {
  112.     ENC_V1,
  113.     ENC_V4,
  114.     ENC_SKIP,
  115.  
  116.     ENC_UNCERTAIN
  117. } mb_encoding;
  118.  
  119. typedef struct {
  120.     int v1_vector;                  //index into v1 codebook
  121.     int v1_error;                   //error when using V1 encoding
  122.     int v4_vector[4];               //indices into v4 codebooks
  123.     int v4_error;                   //error when using V4 encoding
  124.     int skip_error;                 //error when block is skipped (aka copied from last frame)
  125.     mb_encoding best_encoding;      //last result from calculate_mode_score()
  126. } mb_info;
  127.  
  128. typedef struct {
  129.     int v1_codebook[CODEBOOK_MAX*VECTOR_MAX];
  130.     int v4_codebook[CODEBOOK_MAX*VECTOR_MAX];
  131.     int v1_size;
  132.     int v4_size;
  133.     CinepakMode mode;
  134. } strip_info;
  135.  
  136. typedef struct {
  137.     const AVClass *class;
  138.     AVCodecContext *avctx;
  139.     unsigned char *pict_bufs[4], *strip_buf, *frame_buf;
  140.     AVFrame *last_frame;
  141.     AVFrame *best_frame;
  142.     AVFrame *scratch_frame;
  143.     AVFrame *input_frame;
  144.     enum AVPixelFormat pix_fmt;
  145.     int w, h;
  146.     int frame_buf_size;
  147.     int curframe, keyint;
  148.     AVLFG randctx;
  149.     uint64_t lambda;
  150.     int *codebook_input;
  151.     int *codebook_closest;
  152.     mb_info *mb;                                //MB RD state
  153.     int min_strips;          //the current limit
  154.     int max_strips;          //the current limit
  155. #ifdef CINEPAKENC_DEBUG
  156.     mb_info *best_mb;                           //TODO: remove. only used for printing stats
  157.     int num_v1_mode, num_v4_mode, num_mc_mode;
  158.     int num_v1_encs, num_v4_encs, num_skips;
  159. #endif
  160. // options
  161.     int max_extra_cb_iterations;
  162.     int skip_empty_cb;
  163.     int min_min_strips;
  164.     int max_max_strips;
  165.     int strip_number_delta_range;
  166. } CinepakEncContext;
  167.  
  168. #define OFFSET(x) offsetof(CinepakEncContext, x)
  169. #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
  170. static const AVOption options[] = {
  171.     { "max_extra_cb_iterations", "Max extra codebook recalculation passes, more is better and slower", OFFSET(max_extra_cb_iterations), AV_OPT_TYPE_INT, { .i64 = 2 }, 0, INT_MAX, VE },
  172.     { "skip_empty_cb", "Avoid wasting bytes, ignore vintage MacOS decoder", OFFSET(skip_empty_cb), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
  173.     { "max_strips", "Limit strips/frame, vintage compatible is 1..3, otherwise the more the better", OFFSET(max_max_strips), AV_OPT_TYPE_INT, { .i64 = 3 }, MIN_STRIPS, MAX_STRIPS, VE },
  174.     { "min_strips", "Enforce min strips/frame, more is worse and faster, must be <= max_strips", OFFSET(min_min_strips), AV_OPT_TYPE_INT, { .i64 = MIN_STRIPS }, MIN_STRIPS, MAX_STRIPS, VE },
  175.     { "strip_number_adaptivity", "How fast the strip number adapts, more is slightly better, much slower", OFFSET(strip_number_delta_range), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, MAX_STRIPS-MIN_STRIPS, VE },
  176.     { NULL },
  177. };
  178.  
  179. static const AVClass cinepak_class = {
  180.     .class_name = "cinepak",
  181.     .item_name  = av_default_item_name,
  182.     .option     = options,
  183.     .version    = LIBAVUTIL_VERSION_INT,
  184. };
  185.  
  186. static av_cold int cinepak_encode_init(AVCodecContext *avctx)
  187. {
  188.     CinepakEncContext *s = avctx->priv_data;
  189.     int x, mb_count, strip_buf_size, frame_buf_size;
  190.  
  191.     if (avctx->width & 3 || avctx->height & 3) {
  192.         av_log(avctx, AV_LOG_ERROR, "width and height must be multiples of four (got %ix%i)\n",
  193.                 avctx->width, avctx->height);
  194.         return AVERROR(EINVAL);
  195.     }
  196.  
  197.     if (s->min_min_strips > s->max_max_strips) {
  198.         av_log(avctx, AV_LOG_ERROR, "minimal number of strips can not exceed maximal (got %i and %i)\n",
  199.                 s->min_min_strips, s->max_max_strips);
  200.         return AVERROR(EINVAL);
  201.     }
  202.  
  203.     if (!(s->last_frame = av_frame_alloc()))
  204.         return AVERROR(ENOMEM);
  205.     if (!(s->best_frame = av_frame_alloc()))
  206.         goto enomem;
  207.     if (!(s->scratch_frame = av_frame_alloc()))
  208.         goto enomem;
  209.     if (avctx->pix_fmt == AV_PIX_FMT_RGB24)
  210.         if (!(s->input_frame = av_frame_alloc()))
  211.             goto enomem;
  212.  
  213.     if (!(s->codebook_input = av_malloc(sizeof(int) * (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4) * (avctx->width * avctx->height) >> 2)))
  214.         goto enomem;
  215.  
  216.     if (!(s->codebook_closest = av_malloc(sizeof(int) * (avctx->width * avctx->height) >> 2)))
  217.         goto enomem;
  218.  
  219.     for(x = 0; x < (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 4 : 3); x++)
  220.         if(!(s->pict_bufs[x] = av_malloc((avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4) * (avctx->width * avctx->height) >> 2)))
  221.             goto enomem;
  222.  
  223.     mb_count = avctx->width * avctx->height / MB_AREA;
  224.  
  225.     //the largest possible chunk is 0x31 with all MBs encoded in V4 mode
  226.     //and full codebooks being replaced in INTER mode,
  227.     // which is 34 bits per MB
  228.     //and 2*256 extra flag bits per strip
  229.     strip_buf_size = STRIP_HEADER_SIZE + 3 * CHUNK_HEADER_SIZE + 2 * VECTOR_MAX * CODEBOOK_MAX + 4 * (mb_count + (mb_count + 15) / 16) + (2 * CODEBOOK_MAX)/8;
  230.  
  231.     frame_buf_size = CVID_HEADER_SIZE + s->max_max_strips * strip_buf_size;
  232.  
  233.     if (!(s->strip_buf = av_malloc(strip_buf_size)))
  234.         goto enomem;
  235.  
  236.     if (!(s->frame_buf = av_malloc(frame_buf_size)))
  237.         goto enomem;
  238.  
  239.     if (!(s->mb = av_malloc_array(mb_count, sizeof(mb_info))))
  240.         goto enomem;
  241.  
  242. #ifdef CINEPAKENC_DEBUG
  243.     if (!(s->best_mb = av_malloc_array(mb_count, sizeof(mb_info))))
  244.         goto enomem;
  245. #endif
  246.  
  247.     av_lfg_init(&s->randctx, 1);
  248.     s->avctx = avctx;
  249.     s->w = avctx->width;
  250.     s->h = avctx->height;
  251.     s->frame_buf_size = frame_buf_size;
  252.     s->curframe = 0;
  253.     s->keyint = avctx->keyint_min;
  254.     s->pix_fmt = avctx->pix_fmt;
  255.  
  256.     //set up AVFrames
  257.     s->last_frame->data[0]        = s->pict_bufs[0];
  258.     s->last_frame->linesize[0]    = s->w;
  259.     s->best_frame->data[0]        = s->pict_bufs[1];
  260.     s->best_frame->linesize[0]    = s->w;
  261.     s->scratch_frame->data[0]     = s->pict_bufs[2];
  262.     s->scratch_frame->linesize[0] = s->w;
  263.  
  264.     if (s->pix_fmt == AV_PIX_FMT_RGB24) {
  265.         s->last_frame->data[1]        = s->last_frame->data[0] + s->w * s->h;
  266.         s->last_frame->data[2]        = s->last_frame->data[1] + ((s->w * s->h) >> 2);
  267.         s->last_frame->linesize[1]    = s->last_frame->linesize[2] = s->w >> 1;
  268.  
  269.         s->best_frame->data[1]        = s->best_frame->data[0] + s->w * s->h;
  270.         s->best_frame->data[2]        = s->best_frame->data[1] + ((s->w * s->h) >> 2);
  271.         s->best_frame->linesize[1]    = s->best_frame->linesize[2] = s->w >> 1;
  272.  
  273.         s->scratch_frame->data[1]     = s->scratch_frame->data[0] + s->w * s->h;
  274.         s->scratch_frame->data[2]     = s->scratch_frame->data[1] + ((s->w * s->h) >> 2);
  275.         s->scratch_frame->linesize[1] = s->scratch_frame->linesize[2] = s->w >> 1;
  276.  
  277.         s->input_frame->data[0]       = s->pict_bufs[3];
  278.         s->input_frame->linesize[0]   = s->w;
  279.         s->input_frame->data[1]       = s->input_frame->data[0] + s->w * s->h;
  280.         s->input_frame->data[2]       = s->input_frame->data[1] + ((s->w * s->h) >> 2);
  281.         s->input_frame->linesize[1]   = s->input_frame->linesize[2] = s->w >> 1;
  282.     }
  283.  
  284.     s->min_strips = s->min_min_strips;
  285.     s->max_strips = s->max_max_strips;
  286.  
  287. #ifdef CINEPAKENC_DEBUG
  288.     s->num_v1_mode = s->num_v4_mode = s->num_mc_mode = s->num_v1_encs = s->num_v4_encs = s->num_skips = 0;
  289. #endif
  290.  
  291.     return 0;
  292.  
  293. enomem:
  294.     av_frame_free(&s->last_frame);
  295.     av_frame_free(&s->best_frame);
  296.     av_frame_free(&s->scratch_frame);
  297.     if (avctx->pix_fmt == AV_PIX_FMT_RGB24)
  298.         av_frame_free(&s->input_frame);
  299.     av_freep(&s->codebook_input);
  300.     av_freep(&s->codebook_closest);
  301.     av_freep(&s->strip_buf);
  302.     av_freep(&s->frame_buf);
  303.     av_freep(&s->mb);
  304. #ifdef CINEPAKENC_DEBUG
  305.     av_freep(&s->best_mb);
  306. #endif
  307.  
  308.     for(x = 0; x < (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 4 : 3); x++)
  309.         av_freep(&s->pict_bufs[x]);
  310.  
  311.     return AVERROR(ENOMEM);
  312. }
  313.  
  314. static int64_t calculate_mode_score(CinepakEncContext *s, int h, strip_info *info, int report, int *training_set_v1_shrunk, int *training_set_v4_shrunk
  315. #ifdef CINEPAK_REPORT_SERR
  316. , int64_t *serr
  317. #endif
  318. )
  319. {
  320.     //score = FF_LAMBDA_SCALE * error + lambda * bits
  321.     int x;
  322.     int entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
  323.     int mb_count = s->w * h / MB_AREA;
  324.     mb_info *mb;
  325.     int64_t score1, score2, score3;
  326.     int64_t ret = s->lambda * ((info->v1_size ? CHUNK_HEADER_SIZE + info->v1_size * entry_size : 0) +
  327.                    (info->v4_size ? CHUNK_HEADER_SIZE + info->v4_size * entry_size : 0) +
  328.                    CHUNK_HEADER_SIZE) << 3;
  329.  
  330.     //av_log(s->avctx, AV_LOG_INFO, "sizes %3i %3i -> %9"PRId64" score mb_count %i", info->v1_size, info->v4_size, ret, mb_count);
  331.  
  332. #ifdef CINEPAK_REPORT_SERR
  333.     *serr = 0;
  334. #endif
  335.  
  336.     switch(info->mode) {
  337.     case MODE_V1_ONLY:
  338.         //one byte per MB
  339.         ret += s->lambda * 8 * mb_count;
  340.  
  341. // while calculating we assume all blocks are ENC_V1
  342.         for(x = 0; x < mb_count; x++) {
  343.             mb = &s->mb[x];
  344.             ret += FF_LAMBDA_SCALE * mb->v1_error;
  345. #ifdef CINEPAK_REPORT_SERR
  346.             *serr += mb->v1_error;
  347. #endif
  348. // this function is never called for report in MODE_V1_ONLY
  349. //            if(!report)
  350.             mb->best_encoding = ENC_V1;
  351.         }
  352.  
  353.         break;
  354.     case MODE_V1_V4:
  355.         //9 or 33 bits per MB
  356.         if(report) {
  357. // no moves between the corresponding training sets are allowed
  358.             *training_set_v1_shrunk = *training_set_v4_shrunk = 0;
  359.             for(x = 0; x < mb_count; x++) {
  360.                 int mberr;
  361.                 mb = &s->mb[x];
  362.                 if(mb->best_encoding == ENC_V1)
  363.                     score1 = s->lambda * 9  + FF_LAMBDA_SCALE * (mberr=mb->v1_error);
  364.                 else
  365.                     score1 = s->lambda * 33 + FF_LAMBDA_SCALE * (mberr=mb->v4_error);
  366.                 ret += score1;
  367. #ifdef CINEPAK_REPORT_SERR
  368.                 *serr += mberr;
  369. #endif
  370.             }
  371.         } else { // find best mode per block
  372.             for(x = 0; x < mb_count; x++) {
  373.                 mb = &s->mb[x];
  374.                 score1 = s->lambda * 9  + FF_LAMBDA_SCALE * mb->v1_error;
  375.                 score2 = s->lambda * 33 + FF_LAMBDA_SCALE * mb->v4_error;
  376.  
  377.                 if(score1 <= score2) {
  378.                     ret += score1;
  379. #ifdef CINEPAK_REPORT_SERR
  380.                     *serr += mb->v1_error;
  381. #endif
  382.                     mb->best_encoding = ENC_V1;
  383.                 } else {
  384.                     ret += score2;
  385. #ifdef CINEPAK_REPORT_SERR
  386.                     *serr += mb->v4_error;
  387. #endif
  388.                     mb->best_encoding = ENC_V4;
  389.                 }
  390.             }
  391.         }
  392.  
  393.         break;
  394.     case MODE_MC:
  395.         //1, 10 or 34 bits per MB
  396.         if(report) {
  397.             int v1_shrunk = 0, v4_shrunk = 0;
  398.             for(x = 0; x < mb_count; x++) {
  399.                 mb = &s->mb[x];
  400. // it is OK to move blocks to ENC_SKIP here
  401. // but not to any codebook encoding!
  402.                 score1 = s->lambda * 1  + FF_LAMBDA_SCALE * mb->skip_error;
  403.                 if(mb->best_encoding == ENC_SKIP) {
  404.                     ret += score1;
  405. #ifdef CINEPAK_REPORT_SERR
  406.                     *serr += mb->skip_error;
  407. #endif
  408.                 } else if(mb->best_encoding == ENC_V1) {
  409.                     if((score2=s->lambda * 10 + FF_LAMBDA_SCALE * mb->v1_error) >= score1) {
  410.                         mb->best_encoding = ENC_SKIP;
  411.                         ++v1_shrunk;
  412.                         ret += score1;
  413. #ifdef CINEPAK_REPORT_SERR
  414.                         *serr += mb->skip_error;
  415. #endif
  416.                     } else {
  417.                         ret += score2;
  418. #ifdef CINEPAK_REPORT_SERR
  419.                         *serr += mb->v1_error;
  420. #endif
  421.                     }
  422.                 } else {
  423.                     if((score3=s->lambda * 34 + FF_LAMBDA_SCALE * mb->v4_error) >= score1) {
  424.                         mb->best_encoding = ENC_SKIP;
  425.                         ++v4_shrunk;
  426.                         ret += score1;
  427. #ifdef CINEPAK_REPORT_SERR
  428.                         *serr += mb->skip_error;
  429. #endif
  430.                     } else {
  431.                         ret += score3;
  432. #ifdef CINEPAK_REPORT_SERR
  433.                         *serr += mb->v4_error;
  434. #endif
  435.                     }
  436.                 }
  437.             }
  438.             *training_set_v1_shrunk = v1_shrunk;
  439.             *training_set_v4_shrunk = v4_shrunk;
  440.         } else { // find best mode per block
  441.             for(x = 0; x < mb_count; x++) {
  442.                 mb = &s->mb[x];
  443.                 score1 = s->lambda * 1  + FF_LAMBDA_SCALE * mb->skip_error;
  444.                 score2 = s->lambda * 10 + FF_LAMBDA_SCALE * mb->v1_error;
  445.                 score3 = s->lambda * 34 + FF_LAMBDA_SCALE * mb->v4_error;
  446.  
  447.                 if(score1 <= score2 && score1 <= score3) {
  448.                     ret += score1;
  449. #ifdef CINEPAK_REPORT_SERR
  450.                     *serr += mb->skip_error;
  451. #endif
  452.                     mb->best_encoding = ENC_SKIP;
  453.                 } else if(score2 <= score3) {
  454.                     ret += score2;
  455. #ifdef CINEPAK_REPORT_SERR
  456.                     *serr += mb->v1_error;
  457. #endif
  458.                     mb->best_encoding = ENC_V1;
  459.                 } else {
  460.                     ret += score3;
  461. #ifdef CINEPAK_REPORT_SERR
  462.                     *serr += mb->v4_error;
  463. #endif
  464.                     mb->best_encoding = ENC_V4;
  465.                 }
  466.             }
  467.         }
  468.  
  469.         break;
  470.     }
  471.  
  472.     return ret;
  473. }
  474.  
  475. static int write_chunk_header(unsigned char *buf, int chunk_type, int chunk_size)
  476. {
  477.     buf[0] = chunk_type;
  478.     AV_WB24(&buf[1], chunk_size + CHUNK_HEADER_SIZE);
  479.     return CHUNK_HEADER_SIZE;
  480. }
  481.  
  482. static int encode_codebook(CinepakEncContext *s, int *codebook, int size, int chunk_type_yuv, int chunk_type_gray, unsigned char *buf)
  483. {
  484.     int x, y, ret, entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
  485.     int incremental_codebook_replacement_mode = 0; // hardcoded here,
  486.                 // the compiler should notice that this is a constant -- rl
  487.  
  488.     ret = write_chunk_header(buf,
  489.           s->pix_fmt == AV_PIX_FMT_RGB24 ?
  490.            chunk_type_yuv+(incremental_codebook_replacement_mode?1:0) :
  491.            chunk_type_gray+(incremental_codebook_replacement_mode?1:0),
  492.           entry_size * size
  493.            + (incremental_codebook_replacement_mode?(size+31)/32*4:0) );
  494.  
  495. // we do codebook encoding according to the "intra" mode
  496. // but we keep the "dead" code for reference in case we will want
  497. // to use incremental codebook updates (which actually would give us
  498. // "kind of" motion compensation, especially in 1 strip/frame case) -- rl
  499. // (of course, the code will be not useful as-is)
  500.     if(incremental_codebook_replacement_mode) {
  501.         int flags = 0;
  502.         int flagsind;
  503.         for(x = 0; x < size; x++) {
  504.             if(flags == 0) {
  505.                 flagsind = ret;
  506.                 ret += 4;
  507.                 flags = 0x80000000;
  508.             } else
  509.                 flags = ((flags>>1) | 0x80000000);
  510.             for(y = 0; y < entry_size; y++)
  511.                 buf[ret++] = codebook[y + x*entry_size] ^ (y >= 4 ? 0x80 : 0);
  512.             if((flags&0xffffffff) == 0xffffffff) {
  513.                 AV_WB32(&buf[flagsind], flags);
  514.                 flags = 0;
  515.             }
  516.         }
  517.         if(flags)
  518.             AV_WB32(&buf[flagsind], flags);
  519.     } else
  520.         for(x = 0; x < size; x++)
  521.             for(y = 0; y < entry_size; y++)
  522.                 buf[ret++] = codebook[y + x*entry_size] ^ (y >= 4 ? 0x80 : 0);
  523.  
  524.     return ret;
  525. }
  526.  
  527. //sets out to the sub picture starting at (x,y) in in
  528. static void get_sub_picture(CinepakEncContext *s, int x, int y, AVPicture *in, AVPicture *out)
  529. {
  530.     out->data[0] = in->data[0] + x + y * in->linesize[0];
  531.     out->linesize[0] = in->linesize[0];
  532.  
  533.     if(s->pix_fmt == AV_PIX_FMT_RGB24) {
  534.         out->data[1] = in->data[1] + (x >> 1) + (y >> 1) * in->linesize[1];
  535.         out->linesize[1] = in->linesize[1];
  536.  
  537.         out->data[2] = in->data[2] + (x >> 1) + (y >> 1) * in->linesize[2];
  538.         out->linesize[2] = in->linesize[2];
  539.     }
  540. }
  541.  
  542. //decodes the V1 vector in mb into the 4x4 MB pointed to by sub_pict
  543. static void decode_v1_vector(CinepakEncContext *s, AVPicture *sub_pict, int v1_vector, strip_info *info)
  544. {
  545.     int entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
  546.  
  547.     sub_pict->data[0][0] =
  548.             sub_pict->data[0][1] =
  549.             sub_pict->data[0][    sub_pict->linesize[0]] =
  550.             sub_pict->data[0][1+  sub_pict->linesize[0]] = info->v1_codebook[v1_vector*entry_size];
  551.  
  552.     sub_pict->data[0][2] =
  553.             sub_pict->data[0][3] =
  554.             sub_pict->data[0][2+  sub_pict->linesize[0]] =
  555.             sub_pict->data[0][3+  sub_pict->linesize[0]] = info->v1_codebook[v1_vector*entry_size+1];
  556.  
  557.     sub_pict->data[0][2*sub_pict->linesize[0]] =
  558.             sub_pict->data[0][1+2*sub_pict->linesize[0]] =
  559.             sub_pict->data[0][  3*sub_pict->linesize[0]] =
  560.             sub_pict->data[0][1+3*sub_pict->linesize[0]] = info->v1_codebook[v1_vector*entry_size+2];
  561.  
  562.     sub_pict->data[0][2+2*sub_pict->linesize[0]] =
  563.             sub_pict->data[0][3+2*sub_pict->linesize[0]] =
  564.             sub_pict->data[0][2+3*sub_pict->linesize[0]] =
  565.             sub_pict->data[0][3+3*sub_pict->linesize[0]] = info->v1_codebook[v1_vector*entry_size+3];
  566.  
  567.     if(s->pix_fmt == AV_PIX_FMT_RGB24) {
  568.         sub_pict->data[1][0] =
  569.             sub_pict->data[1][1] =
  570.             sub_pict->data[1][    sub_pict->linesize[1]] =
  571.             sub_pict->data[1][1+  sub_pict->linesize[1]] = info->v1_codebook[v1_vector*entry_size+4];
  572.  
  573.         sub_pict->data[2][0] =
  574.             sub_pict->data[2][1] =
  575.             sub_pict->data[2][    sub_pict->linesize[2]] =
  576.             sub_pict->data[2][1+  sub_pict->linesize[2]] = info->v1_codebook[v1_vector*entry_size+5];
  577.     }
  578. }
  579.  
  580. //decodes the V4 vectors in mb into the 4x4 MB pointed to by sub_pict
  581. static void decode_v4_vector(CinepakEncContext *s, AVPicture *sub_pict, int *v4_vector, strip_info *info)
  582. {
  583.     int i, x, y, entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
  584.  
  585.     for(i = y = 0; y < 4; y += 2) {
  586.         for(x = 0; x < 4; x += 2, i++) {
  587.             sub_pict->data[0][x   +     y*sub_pict->linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size];
  588.             sub_pict->data[0][x+1 +     y*sub_pict->linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size+1];
  589.             sub_pict->data[0][x   + (y+1)*sub_pict->linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size+2];
  590.             sub_pict->data[0][x+1 + (y+1)*sub_pict->linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size+3];
  591.  
  592.             if(s->pix_fmt == AV_PIX_FMT_RGB24) {
  593.                 sub_pict->data[1][(x>>1) + (y>>1)*sub_pict->linesize[1]] = info->v4_codebook[v4_vector[i]*entry_size+4];
  594.                 sub_pict->data[2][(x>>1) + (y>>1)*sub_pict->linesize[2]] = info->v4_codebook[v4_vector[i]*entry_size+5];
  595.             }
  596.         }
  597.     }
  598. }
  599.  
  600. static void copy_mb(CinepakEncContext *s, AVPicture *a, AVPicture *b)
  601. {
  602.     int y, p;
  603.  
  604.     for(y = 0; y < MB_SIZE; y++) {
  605.         memcpy(a->data[0]+y*a->linesize[0], b->data[0]+y*b->linesize[0],
  606.                MB_SIZE);
  607.     }
  608.  
  609.     if(s->pix_fmt == AV_PIX_FMT_RGB24) {
  610.         for(p = 1; p <= 2; p++) {
  611.             for(y = 0; y < MB_SIZE/2; y++) {
  612.                 memcpy(a->data[p] + y*a->linesize[p],
  613.                        b->data[p] + y*b->linesize[p],
  614.                        MB_SIZE/2);
  615.             }
  616.         }
  617.     }
  618. }
  619.  
  620. static int encode_mode(CinepakEncContext *s, int h, AVPicture *scratch_pict, AVPicture *last_pict, strip_info *info, unsigned char *buf)
  621. {
  622.     int x, y, z, flags, bits, temp_size, header_ofs, ret = 0, mb_count = s->w * h / MB_AREA;
  623.     int needs_extra_bit, should_write_temp;
  624.     unsigned char temp[64]; //32/2 = 16 V4 blocks at 4 B each -> 64 B
  625.     mb_info *mb;
  626.     AVPicture sub_scratch = {{0}}, sub_last = {{0}};
  627.  
  628.     //encode codebooks
  629. ////// MacOS vintage decoder compatibility dictates the presence of
  630. ////// the codebook chunk even when the codebook is empty - pretty dumb...
  631. ////// and also the certain order of the codebook chunks -- rl
  632.     if(info->v4_size || !s->skip_empty_cb)
  633.         ret += encode_codebook(s, info->v4_codebook, info->v4_size, 0x20, 0x24, buf + ret);
  634.  
  635.     if(info->v1_size || !s->skip_empty_cb)
  636.         ret += encode_codebook(s, info->v1_codebook, info->v1_size, 0x22, 0x26, buf + ret);
  637.  
  638.     //update scratch picture
  639.     for(z = y = 0; y < h; y += MB_SIZE) {
  640.         for(x = 0; x < s->w; x += MB_SIZE, z++) {
  641.             mb = &s->mb[z];
  642.  
  643.             get_sub_picture(s, x, y, scratch_pict, &sub_scratch);
  644.  
  645.             if(info->mode == MODE_MC && mb->best_encoding == ENC_SKIP) {
  646.                 get_sub_picture(s, x, y, last_pict, &sub_last);
  647.                 copy_mb(s, &sub_scratch, &sub_last);
  648.             } else if(info->mode == MODE_V1_ONLY || mb->best_encoding == ENC_V1)
  649.                 decode_v1_vector(s, &sub_scratch, mb->v1_vector, info);
  650.             else
  651.                 decode_v4_vector(s, &sub_scratch, mb->v4_vector, info);
  652.         }
  653.     }
  654.  
  655.     switch(info->mode) {
  656.     case MODE_V1_ONLY:
  657.         //av_log(s->avctx, AV_LOG_INFO, "mb_count = %i\n", mb_count);
  658.         ret += write_chunk_header(buf + ret, 0x32, mb_count);
  659.  
  660.         for(x = 0; x < mb_count; x++)
  661.             buf[ret++] = s->mb[x].v1_vector;
  662.  
  663.         break;
  664.     case MODE_V1_V4:
  665.         //remember header position
  666.         header_ofs = ret;
  667.         ret += CHUNK_HEADER_SIZE;
  668.  
  669.         for(x = 0; x < mb_count; x += 32) {
  670.             flags = 0;
  671.             for(y = x; y < FFMIN(x+32, mb_count); y++)
  672.                 if(s->mb[y].best_encoding == ENC_V4)
  673.                     flags |= 1 << (31 - y + x);
  674.  
  675.             AV_WB32(&buf[ret], flags);
  676.             ret += 4;
  677.  
  678.             for(y = x; y < FFMIN(x+32, mb_count); y++) {
  679.                 mb = &s->mb[y];
  680.  
  681.                 if(mb->best_encoding == ENC_V1)
  682.                     buf[ret++] = mb->v1_vector;
  683.                 else
  684.                     for(z = 0; z < 4; z++)
  685.                         buf[ret++] = mb->v4_vector[z];
  686.             }
  687.         }
  688.  
  689.         write_chunk_header(buf + header_ofs, 0x30, ret - header_ofs - CHUNK_HEADER_SIZE);
  690.  
  691.         break;
  692.     case MODE_MC:
  693.         //remember header position
  694.         header_ofs = ret;
  695.         ret += CHUNK_HEADER_SIZE;
  696.         flags = bits = temp_size = 0;
  697.  
  698.         for(x = 0; x < mb_count; x++) {
  699.             mb = &s->mb[x];
  700.             flags |= (mb->best_encoding != ENC_SKIP) << (31 - bits++);
  701.             needs_extra_bit = 0;
  702.             should_write_temp = 0;
  703.  
  704.             if(mb->best_encoding != ENC_SKIP) {
  705.                 if(bits < 32)
  706.                     flags |= (mb->best_encoding == ENC_V4) << (31 - bits++);
  707.                 else
  708.                     needs_extra_bit = 1;
  709.             }
  710.  
  711.             if(bits == 32) {
  712.                 AV_WB32(&buf[ret], flags);
  713.                 ret += 4;
  714.                 flags = bits = 0;
  715.  
  716.                 if(mb->best_encoding == ENC_SKIP || needs_extra_bit) {
  717.                     memcpy(&buf[ret], temp, temp_size);
  718.                     ret += temp_size;
  719.                     temp_size = 0;
  720.                 } else
  721.                     should_write_temp = 1;
  722.             }
  723.  
  724.             if(needs_extra_bit) {
  725.                 flags = (mb->best_encoding == ENC_V4) << 31;
  726.                 bits = 1;
  727.             }
  728.  
  729.             if(mb->best_encoding == ENC_V1)
  730.                 temp[temp_size++] = mb->v1_vector;
  731.             else if(mb->best_encoding == ENC_V4)
  732.                 for(z = 0; z < 4; z++)
  733.                     temp[temp_size++] = mb->v4_vector[z];
  734.  
  735.             if(should_write_temp) {
  736.                 memcpy(&buf[ret], temp, temp_size);
  737.                 ret += temp_size;
  738.                 temp_size = 0;
  739.             }
  740.         }
  741.  
  742.         if(bits > 0) {
  743.             AV_WB32(&buf[ret], flags);
  744.             ret += 4;
  745.             memcpy(&buf[ret], temp, temp_size);
  746.             ret += temp_size;
  747.         }
  748.  
  749.         write_chunk_header(buf + header_ofs, 0x31, ret - header_ofs - CHUNK_HEADER_SIZE);
  750.  
  751.         break;
  752.     }
  753.  
  754.     return ret;
  755. }
  756.  
  757. //computes distortion of 4x4 MB in b compared to a
  758. static int compute_mb_distortion(CinepakEncContext *s, AVPicture *a, AVPicture *b)
  759. {
  760.     int x, y, p, d, ret = 0;
  761.  
  762.     for(y = 0; y < MB_SIZE; y++) {
  763.         for(x = 0; x < MB_SIZE; x++) {
  764.             d = a->data[0][x + y*a->linesize[0]] - b->data[0][x + y*b->linesize[0]];
  765.             ret += d*d;
  766.         }
  767.     }
  768.  
  769.     if(s->pix_fmt == AV_PIX_FMT_RGB24) {
  770.         for(p = 1; p <= 2; p++) {
  771.             for(y = 0; y < MB_SIZE/2; y++) {
  772.                 for(x = 0; x < MB_SIZE/2; x++) {
  773.                     d = a->data[p][x + y*a->linesize[p]] - b->data[p][x + y*b->linesize[p]];
  774.                     ret += d*d;
  775.                 }
  776.             }
  777.         }
  778.     }
  779.  
  780.     return ret;
  781. }
  782.  
  783. // return the possibly adjusted size of the codebook
  784. #define CERTAIN(x) ((x)!=ENC_UNCERTAIN)
  785. static int quantize(CinepakEncContext *s, int h, AVPicture *pict,
  786.                     int v1mode, strip_info *info,
  787.                     mb_encoding encoding)
  788. {
  789.     int x, y, i, j, k, x2, y2, x3, y3, plane, shift, mbn;
  790.     int entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
  791.     int *codebook = v1mode ? info->v1_codebook : info->v4_codebook;
  792.     int size = v1mode ? info->v1_size : info->v4_size;
  793.     int64_t total_error = 0;
  794.     uint8_t vq_pict_buf[(MB_AREA*3)/2];
  795.     AVPicture sub_pict, vq_pict;
  796.  
  797.     for(mbn = i = y = 0; y < h; y += MB_SIZE) {
  798.         for(x = 0; x < s->w; x += MB_SIZE, ++mbn) {
  799.             int *base;
  800.  
  801.             if(CERTAIN(encoding)) {
  802. // use for the training only the blocks known to be to be encoded [sic:-]
  803.                if(s->mb[mbn].best_encoding != encoding) continue;
  804.             }
  805.  
  806.             base = s->codebook_input + i*entry_size;
  807.             if(v1mode) {
  808.                 //subsample
  809.                 for(j = y2 = 0; y2 < entry_size; y2 += 2) {
  810.                     for(x2 = 0; x2 < 4; x2 += 2, j++) {
  811.                         plane = y2 < 4 ? 0 : 1 + (x2 >> 1);
  812.                         shift = y2 < 4 ? 0 : 1;
  813.                         x3 = shift ? 0 : x2;
  814.                         y3 = shift ? 0 : y2;
  815.                         base[j] = (pict->data[plane][((x+x3) >> shift) +      ((y+y3) >> shift)      * pict->linesize[plane]] +
  816.                                    pict->data[plane][((x+x3) >> shift) + 1 +  ((y+y3) >> shift)      * pict->linesize[plane]] +
  817.                                    pict->data[plane][((x+x3) >> shift) +     (((y+y3) >> shift) + 1) * pict->linesize[plane]] +
  818.                                    pict->data[plane][((x+x3) >> shift) + 1 + (((y+y3) >> shift) + 1) * pict->linesize[plane]]) >> 2;
  819.                     }
  820.                 }
  821.             } else {
  822.                 //copy
  823.                 for(j = y2 = 0; y2 < MB_SIZE; y2 += 2) {
  824.                     for(x2 = 0; x2 < MB_SIZE; x2 += 2) {
  825.                         for(k = 0; k < entry_size; k++, j++) {
  826.                             plane = k >= 4 ? k - 3 : 0;
  827.  
  828.                             if(k >= 4) {
  829.                                 x3 = (x+x2) >> 1;
  830.                                 y3 = (y+y2) >> 1;
  831.                             } else {
  832.                                 x3 = x + x2 + (k & 1);
  833.                                 y3 = y + y2 + (k >> 1);
  834.                             }
  835.  
  836.                             base[j] = pict->data[plane][x3 + y3*pict->linesize[plane]];
  837.                         }
  838.                     }
  839.                 }
  840.             }
  841.             i += v1mode ? 1 : 4;
  842.         }
  843.     }
  844. //    if(i < mbn*(v1mode ? 1 : 4)) {
  845. //        av_log(s->avctx, AV_LOG_INFO, "reducing training set for %s from %i to %i (encoding %i)\n", v1mode?"v1":"v4", mbn*(v1mode ? 1 : 4), i, encoding);
  846. //    }
  847.  
  848.     if(i == 0) // empty training set, nothing to do
  849.         return 0;
  850.     if(i < size) {
  851.         //av_log(s->avctx, (CERTAIN(encoding) ? AV_LOG_ERROR : AV_LOG_INFO), "WOULD WASTE: %s cbsize %i bigger than training set size %i (encoding %i)\n", v1mode?"v1":"v4", size, i, encoding);
  852.         size = i;
  853.     }
  854.  
  855.     avpriv_init_elbg(s->codebook_input, entry_size, i, codebook, size, 1, s->codebook_closest, &s->randctx);
  856.     avpriv_do_elbg(s->codebook_input, entry_size, i, codebook, size, 1, s->codebook_closest, &s->randctx);
  857.  
  858.     //setup vq_pict, which contains a single MB
  859.     vq_pict.data[0] = vq_pict_buf;
  860.     vq_pict.linesize[0] = MB_SIZE;
  861.     vq_pict.data[1] = &vq_pict_buf[MB_AREA];
  862.     vq_pict.data[2] = vq_pict.data[1] + (MB_AREA >> 2);
  863.     vq_pict.linesize[1] = vq_pict.linesize[2] = MB_SIZE >> 1;
  864.  
  865.     //copy indices
  866.     for(i = j = y = 0; y < h; y += MB_SIZE) {
  867.         for(x = 0; x < s->w; x += MB_SIZE, j++) {
  868.             mb_info *mb = &s->mb[j];
  869. // skip uninteresting blocks if we know their preferred encoding
  870.             if(CERTAIN(encoding) && mb->best_encoding != encoding)
  871.                 continue;
  872.  
  873.             //point sub_pict to current MB
  874.             get_sub_picture(s, x, y, pict, &sub_pict);
  875.  
  876.             if(v1mode) {
  877.                 mb->v1_vector = s->codebook_closest[i];
  878.  
  879.                 //fill in vq_pict with V1 data
  880.                 decode_v1_vector(s, &vq_pict, mb->v1_vector, info);
  881.  
  882.                 mb->v1_error = compute_mb_distortion(s, &sub_pict, &vq_pict);
  883.                 total_error += mb->v1_error;
  884.             } else {
  885.                 for(k = 0; k < 4; k++)
  886.                     mb->v4_vector[k] = s->codebook_closest[i+k];
  887.  
  888.                 //fill in vq_pict with V4 data
  889.                 decode_v4_vector(s, &vq_pict, mb->v4_vector, info);
  890.  
  891.                 mb->v4_error = compute_mb_distortion(s, &sub_pict, &vq_pict);
  892.                 total_error += mb->v4_error;
  893.             }
  894.             i += v1mode ? 1 : 4;
  895.         }
  896.     }
  897. // check that we did it right in the beginning of the function
  898.     av_assert0(i >= size); // training set is no smaller than the codebook
  899.  
  900.     //av_log(s->avctx, AV_LOG_INFO, "isv1 %i size= %i i= %i error %"PRId64"\n", v1mode, size, i, total_error);
  901.  
  902.     return size;
  903. }
  904.  
  905. static void calculate_skip_errors(CinepakEncContext *s, int h, AVPicture *last_pict, AVPicture *pict, strip_info *info)
  906. {
  907.     int x, y, i;
  908.     AVPicture sub_last, sub_pict;
  909.  
  910.     for(i = y = 0; y < h; y += MB_SIZE) {
  911.         for(x = 0; x < s->w; x += MB_SIZE, i++) {
  912.             get_sub_picture(s, x, y, last_pict, &sub_last);
  913.             get_sub_picture(s, x, y, pict,      &sub_pict);
  914.  
  915.             s->mb[i].skip_error = compute_mb_distortion(s, &sub_last, &sub_pict);
  916.         }
  917.     }
  918. }
  919.  
  920. static void write_strip_header(CinepakEncContext *s, int y, int h, int keyframe, unsigned char *buf, int strip_size)
  921. {
  922. // actually we are exclusively using intra strip coding (how much can we win
  923. // otherwise? how to choose which part of a codebook to update?),
  924. // keyframes are different only because we disallow ENC_SKIP on them -- rl
  925. // (besides, the logic here used to be inverted: )
  926. //    buf[0] = keyframe ? 0x11: 0x10;
  927.     buf[0] = keyframe ? 0x10: 0x11;
  928.     AV_WB24(&buf[1], strip_size + STRIP_HEADER_SIZE);
  929. //    AV_WB16(&buf[4], y); /* using absolute y values works -- rl */
  930.     AV_WB16(&buf[4], 0); /* using relative values works as well -- rl */
  931.     AV_WB16(&buf[6], 0);
  932. //    AV_WB16(&buf[8], y+h); /* using absolute y values works -- rl */
  933.     AV_WB16(&buf[8], h); /* using relative values works as well -- rl */
  934.     AV_WB16(&buf[10], s->w);
  935.     //av_log(s->avctx, AV_LOG_INFO, "write_strip_header() %x keyframe=%d\n", buf[0], keyframe);
  936. }
  937.  
  938. static int rd_strip(CinepakEncContext *s, int y, int h, int keyframe, AVPicture *last_pict, AVPicture *pict, AVPicture *scratch_pict, unsigned char *buf, int64_t *best_score
  939. #ifdef CINEPAK_REPORT_SERR
  940. , int64_t *best_serr
  941. #endif
  942. )
  943. {
  944.     int64_t score = 0;
  945. #ifdef CINEPAK_REPORT_SERR
  946.     int64_t serr;
  947. #endif
  948.     int best_size = 0;
  949.     strip_info info;
  950. // for codebook optimization:
  951.     int v1enough, v1_size, v4enough, v4_size;
  952.     int new_v1_size, new_v4_size;
  953.     int v1shrunk, v4shrunk;
  954.  
  955.     if(!keyframe)
  956.         calculate_skip_errors(s, h, last_pict, pict, &info);
  957.  
  958.     //try some powers of 4 for the size of the codebooks
  959.     //constraint the v4 codebook to be no bigger than v1 one,
  960.     //(and no less than v1_size/4)
  961.     //thus making v1 preferable and possibly losing small details? should be ok
  962. #define SMALLEST_CODEBOOK 1
  963.     for(v1enough = 0, v1_size = SMALLEST_CODEBOOK; v1_size <= CODEBOOK_MAX && !v1enough; v1_size <<= 2) {
  964.         for(v4enough = 0, v4_size = 0; v4_size <= v1_size && !v4enough; v4_size = v4_size ? v4_size << 2 : v1_size >= SMALLEST_CODEBOOK << 2 ? v1_size >> 2 : SMALLEST_CODEBOOK) {
  965.             //try all modes
  966.             for(CinepakMode mode = 0; mode < MODE_COUNT; mode++) {
  967.                 //don't allow MODE_MC in intra frames
  968.                 if(keyframe && mode == MODE_MC)
  969.                     continue;
  970.  
  971.                 if(mode == MODE_V1_ONLY) {
  972.                     info.v1_size = v1_size;
  973. // the size may shrink even before optimizations if the input is short:
  974.                     info.v1_size = quantize(s, h, pict, 1, &info, ENC_UNCERTAIN);
  975.                     if(info.v1_size < v1_size)
  976. // too few eligible blocks, no sense in trying bigger sizes
  977.                         v1enough = 1;
  978.  
  979.                     info.v4_size = 0;
  980.                 } else { // mode != MODE_V1_ONLY
  981.                     // if v4 codebook is empty then only allow V1-only mode
  982.                     if(!v4_size)
  983.                         continue;
  984.  
  985.                     if(mode == MODE_V1_V4) {
  986.                         info.v4_size = v4_size;
  987.                         info.v4_size = quantize(s, h, pict, 0, &info, ENC_UNCERTAIN);
  988.                         if(info.v4_size < v4_size)
  989. // too few eligible blocks, no sense in trying bigger sizes
  990.                             v4enough = 1;
  991.                     }
  992.                 }
  993.  
  994.                 info.mode = mode;
  995. // choose the best encoding per block, based on current experience
  996.                 score = calculate_mode_score(s, h, &info, 0,
  997.                                              &v1shrunk, &v4shrunk
  998. #ifdef CINEPAK_REPORT_SERR
  999. , &serr
  1000. #endif
  1001. );
  1002.  
  1003.                 if(mode != MODE_V1_ONLY){
  1004.                     int extra_iterations_limit = s->max_extra_cb_iterations;
  1005. // recompute the codebooks, omitting the extra blocks
  1006. // we assume we _may_ come here with more blocks to encode than before
  1007.                     info.v1_size = v1_size;
  1008.                     new_v1_size = quantize(s, h, pict, 1, &info, ENC_V1);
  1009.                     if(new_v1_size < info.v1_size){
  1010.                         //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v1 codebook to %i entries\n", mode, v1_size, v4_size, new_v1_size);
  1011.                         info.v1_size = new_v1_size;
  1012.                     }
  1013. // we assume we _may_ come here with more blocks to encode than before
  1014.                     info.v4_size = v4_size;
  1015.                     new_v4_size = quantize(s, h, pict, 0, &info, ENC_V4);
  1016.                     if(new_v4_size < info.v4_size) {
  1017.                         //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v4 codebook to %i entries at first iteration\n", mode, v1_size, v4_size, new_v4_size);
  1018.                         info.v4_size = new_v4_size;
  1019.                     }
  1020. // calculate the resulting score
  1021. // (do not move blocks to codebook encodings now, as some blocks may have
  1022. // got bigger errors despite a smaller training set - but we do not
  1023. // ever grow the training sets back)
  1024.                     for(;;) {
  1025.                         score = calculate_mode_score(s, h, &info, 1,
  1026.                                                      &v1shrunk, &v4shrunk
  1027. #ifdef CINEPAK_REPORT_SERR
  1028. , &serr
  1029. #endif
  1030. );
  1031. // do we have a reason to reiterate? if so, have we reached the limit?
  1032.                         if((!v1shrunk && !v4shrunk) || !extra_iterations_limit--) break;
  1033. // recompute the codebooks, omitting the extra blocks
  1034.                         if(v1shrunk) {
  1035.                             info.v1_size = v1_size;
  1036.                             new_v1_size = quantize(s, h, pict, 1, &info, ENC_V1);
  1037.                             if(new_v1_size < info.v1_size){
  1038.                                 //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v1 codebook to %i entries\n", mode, v1_size, v4_size, new_v1_size);
  1039.                                 info.v1_size = new_v1_size;
  1040.                             }
  1041.                         }
  1042.                         if(v4shrunk) {
  1043.                             info.v4_size = v4_size;
  1044.                             new_v4_size = quantize(s, h, pict, 0, &info, ENC_V4);
  1045.                             if(new_v4_size < info.v4_size) {
  1046.                                 //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v4 codebook to %i entries\n", mode, v1_size, v4_size, new_v4_size);
  1047.                                 info.v4_size = new_v4_size;
  1048.                             }
  1049.                         }
  1050.                     }
  1051.                 }
  1052.  
  1053.                 //av_log(s->avctx, AV_LOG_INFO, "%3i %3i score = %"PRId64"\n", v1_size, v4_size, score);
  1054.  
  1055.                 if(best_size == 0 || score < *best_score) {
  1056.  
  1057.                     *best_score = score;
  1058. #ifdef CINEPAK_REPORT_SERR
  1059.                     *best_serr = serr;
  1060. #endif
  1061.                     best_size = encode_mode(s, h, scratch_pict, last_pict, &info, s->strip_buf + STRIP_HEADER_SIZE);
  1062.  
  1063.                     //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: %18"PRId64" %i B", mode, info.v1_size, info.v4_size, score, best_size);
  1064.                     //av_log(s->avctx, AV_LOG_INFO, "\n");
  1065. #ifdef CINEPAK_REPORT_SERR
  1066.                     av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: %18"PRId64" %i B\n", mode, v1_size, v4_size, serr, best_size);
  1067. #endif
  1068.  
  1069. #ifdef CINEPAKENC_DEBUG
  1070.                     //save MB encoding choices
  1071.                     memcpy(s->best_mb, s->mb, mb_count*sizeof(mb_info));
  1072. #endif
  1073.  
  1074.                     //memcpy(strip_temp + STRIP_HEADER_SIZE, strip_temp, best_size);
  1075.                     write_strip_header(s, y, h, keyframe, s->strip_buf, best_size);
  1076.  
  1077.                 }
  1078.             }
  1079.         }
  1080.     }
  1081.  
  1082. #ifdef CINEPAKENC_DEBUG
  1083.     //gather stats. this will only work properly of MAX_STRIPS == 1
  1084.     if(best_info.mode == MODE_V1_ONLY) {
  1085.         s->num_v1_mode++;
  1086.         s->num_v1_encs += s->w*h/MB_AREA;
  1087.     } else {
  1088.         if(best_info.mode == MODE_V1_V4)
  1089.             s->num_v4_mode++;
  1090.         else
  1091.             s->num_mc_mode++;
  1092.  
  1093.         int x;
  1094.         for(x = 0; x < s->w*h/MB_AREA; x++)
  1095.             if(s->best_mb[x].best_encoding == ENC_V1)
  1096.                 s->num_v1_encs++;
  1097.             else if(s->best_mb[x].best_encoding == ENC_V4)
  1098.                 s->num_v4_encs++;
  1099.             else
  1100.                 s->num_skips++;
  1101.     }
  1102. #endif
  1103.  
  1104.     best_size += STRIP_HEADER_SIZE;
  1105.     memcpy(buf, s->strip_buf, best_size);
  1106.  
  1107.     return best_size;
  1108. }
  1109.  
  1110. static int write_cvid_header(CinepakEncContext *s, unsigned char *buf, int num_strips, int data_size, int isakeyframe)
  1111. {
  1112.     buf[0] = isakeyframe ? 0 : 1;
  1113.     AV_WB24(&buf[1], data_size + CVID_HEADER_SIZE);
  1114.     AV_WB16(&buf[4], s->w);
  1115.     AV_WB16(&buf[6], s->h);
  1116.     AV_WB16(&buf[8], num_strips);
  1117.  
  1118.     return CVID_HEADER_SIZE;
  1119. }
  1120.  
  1121. static int rd_frame(CinepakEncContext *s, const AVFrame *frame, int isakeyframe, unsigned char *buf, int buf_size)
  1122. {
  1123.     int num_strips, strip, i, y, nexty, size, temp_size;
  1124.     AVPicture last_pict, pict, scratch_pict;
  1125.     int64_t best_score = 0, score, score_temp;
  1126. #ifdef CINEPAK_REPORT_SERR
  1127.     int64_t best_serr = 0, serr, serr_temp;
  1128. #endif
  1129.  
  1130.     int best_nstrips = -1, best_size = -1; // mark as uninitialzed
  1131.  
  1132.     if(s->pix_fmt == AV_PIX_FMT_RGB24) {
  1133.         int x;
  1134. // build a copy of the given frame in the correct colorspace
  1135.         for(y = 0; y < s->h; y += 2) {
  1136.             for(x = 0; x < s->w; x += 2) {
  1137.                 uint8_t *ir[2]; int32_t r, g, b, rr, gg, bb;
  1138.                 ir[0] = ((AVPicture*)frame)->data[0] + x*3 + y*((AVPicture*)frame)->linesize[0];
  1139.                 ir[1] = ir[0] + ((AVPicture*)frame)->linesize[0];
  1140.                 get_sub_picture(s, x, y, (AVPicture*)s->input_frame, &scratch_pict);
  1141.                 r = g = b = 0;
  1142.                 for(i=0; i<4; ++i) {
  1143.                     int i1, i2;
  1144.                     i1 = (i&1); i2 = (i>=2);
  1145.                     rr = ir[i2][i1*3+0];
  1146.                     gg = ir[i2][i1*3+1];
  1147.                     bb = ir[i2][i1*3+2];
  1148.                     r += rr; g += gg; b += bb;
  1149. // using fixed point arithmetic for portable repeatability, scaling by 2^23
  1150. // "Y"
  1151. //                    rr = 0.2857*rr + 0.5714*gg + 0.1429*bb;
  1152.                     rr = (2396625*rr + 4793251*gg + 1198732*bb) >> 23;
  1153.                     if(      rr <   0) rr =   0;
  1154.                     else if (rr > 255) rr = 255;
  1155.                     scratch_pict.data[0][i1 + i2*scratch_pict.linesize[0]] = rr;
  1156.                 }
  1157. // let us scale down as late as possible
  1158. //                r /= 4; g /= 4; b /= 4;
  1159. // "U"
  1160. //                rr = -0.1429*r - 0.2857*g + 0.4286*b;
  1161.                 rr = (-299683*r - 599156*g + 898839*b) >> 23;
  1162.                 if(      rr < -128) rr = -128;
  1163.                 else if (rr >  127) rr =  127;
  1164.                 scratch_pict.data[1][0] = rr + 128; // quantize needs unsigned
  1165. // "V"
  1166. //                rr = 0.3571*r - 0.2857*g - 0.0714*b;
  1167.                 rr = (748893*r - 599156*g - 149737*b) >> 23;
  1168.                 if(      rr < -128) rr = -128;
  1169.                 else if (rr >  127) rr =  127;
  1170.                 scratch_pict.data[2][0] = rr + 128; // quantize needs unsigned
  1171.             }
  1172.         }
  1173.     }
  1174.  
  1175.     //would be nice but quite certainly incompatible with vintage players:
  1176.     // support encoding zero strips (meaning skip the whole frame)
  1177.     for(num_strips = s->min_strips; num_strips <= s->max_strips && num_strips <= s->h / MB_SIZE; num_strips++) {
  1178.         score = 0;
  1179.         size = 0;
  1180. #ifdef CINEPAK_REPORT_SERR
  1181.         serr = 0;
  1182. #endif
  1183.  
  1184.         for(y = 0, strip = 1; y < s->h; strip++, y = nexty) {
  1185.             int strip_height;
  1186.  
  1187.             nexty = strip * s->h / num_strips; // <= s->h
  1188.             //make nexty the next multiple of 4 if not already there
  1189.             if(nexty & 3)
  1190.                 nexty += 4 - (nexty & 3);
  1191.  
  1192.             strip_height = nexty - y;
  1193.             if(strip_height <= 0) { // can this ever happen?
  1194.                 av_log(s->avctx, AV_LOG_INFO, "skipping zero height strip %i of %i\n", strip, num_strips);
  1195.                 continue;
  1196.             }
  1197.  
  1198.             if(s->pix_fmt == AV_PIX_FMT_RGB24)
  1199.                 get_sub_picture(s, 0, y, (AVPicture*)s->input_frame,    &pict);
  1200.             else
  1201.                 get_sub_picture(s, 0, y, (AVPicture*)frame,              &pict);
  1202.             get_sub_picture(s, 0, y, (AVPicture*)s->last_frame,    &last_pict);
  1203.             get_sub_picture(s, 0, y, (AVPicture*)s->scratch_frame, &scratch_pict);
  1204.  
  1205.             if((temp_size = rd_strip(s, y, strip_height, isakeyframe, &last_pict, &pict, &scratch_pict, s->frame_buf + size + CVID_HEADER_SIZE, &score_temp
  1206. #ifdef CINEPAK_REPORT_SERR
  1207. , &serr_temp
  1208. #endif
  1209. )) < 0)
  1210.                 return temp_size;
  1211.  
  1212.             score += score_temp;
  1213. #ifdef CINEPAK_REPORT_SERR
  1214.             serr += serr_temp;
  1215. #endif
  1216.             size += temp_size;
  1217.             //av_log(s->avctx, AV_LOG_INFO, "strip %d, isakeyframe=%d", strip, isakeyframe);
  1218.             //av_log(s->avctx, AV_LOG_INFO, "\n");
  1219.         }
  1220.  
  1221.         if(best_score == 0 || score < best_score) {
  1222.             best_score = score;
  1223. #ifdef CINEPAK_REPORT_SERR
  1224.             best_serr = serr;
  1225. #endif
  1226.             best_size = size + write_cvid_header(s, s->frame_buf, num_strips, size, isakeyframe);
  1227.             //av_log(s->avctx, AV_LOG_INFO, "best number of strips so far: %2i, %12"PRId64", %i B\n", num_strips, score, best_size);
  1228. #ifdef CINEPAK_REPORT_SERR
  1229.             av_log(s->avctx, AV_LOG_INFO, "best number of strips so far: %2i, %12"PRId64", %i B\n", num_strips, serr, best_size);
  1230. #endif
  1231.  
  1232.             FFSWAP(AVFrame *, s->best_frame, s->scratch_frame);
  1233.             memcpy(buf, s->frame_buf, best_size);
  1234.             best_nstrips = num_strips;
  1235.         }
  1236. // avoid trying too many strip numbers without a real reason
  1237. // (this makes the processing of the very first frame faster)
  1238.         if(num_strips - best_nstrips > 4)
  1239.             break;
  1240.     }
  1241.  
  1242.     av_assert0(best_nstrips >= 0 && best_size >= 0);
  1243.  
  1244. // let the number of strips slowly adapt to the changes in the contents,
  1245. // compared to full bruteforcing every time this will occasionally lead
  1246. // to some r/d performance loss but makes encoding up to several times faster
  1247.     if(!s->strip_number_delta_range) {
  1248.         if(best_nstrips == s->max_strips) { // let us try to step up
  1249.             s->max_strips = best_nstrips + 1;
  1250.             if(s->max_strips >= s->max_max_strips)
  1251.                 s->max_strips = s->max_max_strips;
  1252.         } else { // try to step down
  1253.             s->max_strips = best_nstrips;
  1254.         }
  1255.         s->min_strips = s->max_strips - 1;
  1256.         if(s->min_strips < s->min_min_strips)
  1257.             s->min_strips = s->min_min_strips;
  1258.     } else {
  1259.         s->max_strips = best_nstrips + s->strip_number_delta_range;
  1260.         if(s->max_strips >= s->max_max_strips)
  1261.             s->max_strips = s->max_max_strips;
  1262.         s->min_strips = best_nstrips - s->strip_number_delta_range;
  1263.         if(s->min_strips < s->min_min_strips)
  1264.             s->min_strips = s->min_min_strips;
  1265.     }
  1266.  
  1267.     return best_size;
  1268. }
  1269.  
  1270. static int cinepak_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
  1271.                                 const AVFrame *frame, int *got_packet)
  1272. {
  1273.     CinepakEncContext *s = avctx->priv_data;
  1274.     int ret;
  1275.  
  1276.     s->lambda = frame->quality ? frame->quality - 1 : 2 * FF_LAMBDA_SCALE;
  1277.  
  1278.     if ((ret = ff_alloc_packet2(avctx, pkt, s->frame_buf_size, 0)) < 0)
  1279.         return ret;
  1280.     ret = rd_frame(s, frame, (s->curframe == 0), pkt->data, s->frame_buf_size);
  1281.     pkt->size = ret;
  1282.     if (s->curframe == 0)
  1283.         pkt->flags |= AV_PKT_FLAG_KEY;
  1284.     *got_packet = 1;
  1285.  
  1286.     FFSWAP(AVFrame *, s->last_frame, s->best_frame);
  1287.  
  1288.     if (++s->curframe >= s->keyint)
  1289.         s->curframe = 0;
  1290.  
  1291.     return 0;
  1292. }
  1293.  
  1294. static av_cold int cinepak_encode_end(AVCodecContext *avctx)
  1295. {
  1296.     CinepakEncContext *s = avctx->priv_data;
  1297.     int x;
  1298.  
  1299.     av_frame_free(&s->last_frame);
  1300.     av_frame_free(&s->best_frame);
  1301.     av_frame_free(&s->scratch_frame);
  1302.     if (avctx->pix_fmt == AV_PIX_FMT_RGB24)
  1303.         av_frame_free(&s->input_frame);
  1304.     av_freep(&s->codebook_input);
  1305.     av_freep(&s->codebook_closest);
  1306.     av_freep(&s->strip_buf);
  1307.     av_freep(&s->frame_buf);
  1308.     av_freep(&s->mb);
  1309. #ifdef CINEPAKENC_DEBUG
  1310.     av_freep(&s->best_mb);
  1311. #endif
  1312.  
  1313.     for(x = 0; x < (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 4 : 3); x++)
  1314.         av_freep(&s->pict_bufs[x]);
  1315.  
  1316. #ifdef CINEPAKENC_DEBUG
  1317.     av_log(avctx, AV_LOG_INFO, "strip coding stats: %i V1 mode, %i V4 mode, %i MC mode (%i V1 encs, %i V4 encs, %i skips)\n",
  1318.         s->num_v1_mode, s->num_v4_mode, s->num_mc_mode, s->num_v1_encs, s->num_v4_encs, s->num_skips);
  1319. #endif
  1320.  
  1321.     return 0;
  1322. }
  1323.  
  1324. AVCodec ff_cinepak_encoder = {
  1325.     .name           = "cinepak",
  1326.     .type           = AVMEDIA_TYPE_VIDEO,
  1327.     .id             = AV_CODEC_ID_CINEPAK,
  1328.     .priv_data_size = sizeof(CinepakEncContext),
  1329.     .init           = cinepak_encode_init,
  1330.     .encode2        = cinepak_encode_frame,
  1331.     .close          = cinepak_encode_end,
  1332.     .pix_fmts       = (const enum AVPixelFormat[]){AV_PIX_FMT_RGB24, AV_PIX_FMT_GRAY8, AV_PIX_FMT_NONE},
  1333.     .long_name      = NULL_IF_CONFIG_SMALL("Cinepak / CVID"),
  1334.     .priv_class     = &cinepak_class,
  1335. };
  1336.