Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Monkey's Audio lossless audio decoder
  3.  * Copyright (c) 2007 Benjamin Zores <ben@geexbox.org>
  4.  *  based upon libdemac from Dave Chapman.
  5.  *
  6.  * This file is part of FFmpeg.
  7.  *
  8.  * FFmpeg is free software; you can redistribute it and/or
  9.  * modify it under the terms of the GNU Lesser General Public
  10.  * License as published by the Free Software Foundation; either
  11.  * version 2.1 of the License, or (at your option) any later version.
  12.  *
  13.  * FFmpeg is distributed in the hope that it will be useful,
  14.  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15.  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16.  * Lesser General Public License for more details.
  17.  *
  18.  * You should have received a copy of the GNU Lesser General Public
  19.  * License along with FFmpeg; if not, write to the Free Software
  20.  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21.  */
  22.  
  23. #include "libavutil/avassert.h"
  24. #include "libavutil/channel_layout.h"
  25. #include "libavutil/opt.h"
  26. #include "avcodec.h"
  27. #include "dsputil.h"
  28. #include "bytestream.h"
  29. #include "internal.h"
  30. #include "get_bits.h"
  31. #include "unary.h"
  32.  
  33. /**
  34.  * @file
  35.  * Monkey's Audio lossless audio decoder
  36.  */
  37.  
  38. #define MAX_CHANNELS        2
  39. #define MAX_BYTESPERSAMPLE  3
  40.  
  41. #define APE_FRAMECODE_MONO_SILENCE    1
  42. #define APE_FRAMECODE_STEREO_SILENCE  3
  43. #define APE_FRAMECODE_PSEUDO_STEREO   4
  44.  
  45. #define HISTORY_SIZE 512
  46. #define PREDICTOR_ORDER 8
  47. /** Total size of all predictor histories */
  48. #define PREDICTOR_SIZE 50
  49.  
  50. #define YDELAYA (18 + PREDICTOR_ORDER*4)
  51. #define YDELAYB (18 + PREDICTOR_ORDER*3)
  52. #define XDELAYA (18 + PREDICTOR_ORDER*2)
  53. #define XDELAYB (18 + PREDICTOR_ORDER)
  54.  
  55. #define YADAPTCOEFFSA 18
  56. #define XADAPTCOEFFSA 14
  57. #define YADAPTCOEFFSB 10
  58. #define XADAPTCOEFFSB 5
  59.  
  60. /**
  61.  * Possible compression levels
  62.  * @{
  63.  */
  64. enum APECompressionLevel {
  65.     COMPRESSION_LEVEL_FAST       = 1000,
  66.     COMPRESSION_LEVEL_NORMAL     = 2000,
  67.     COMPRESSION_LEVEL_HIGH       = 3000,
  68.     COMPRESSION_LEVEL_EXTRA_HIGH = 4000,
  69.     COMPRESSION_LEVEL_INSANE     = 5000
  70. };
  71. /** @} */
  72.  
  73. #define APE_FILTER_LEVELS 3
  74.  
  75. /** Filter orders depending on compression level */
  76. static const uint16_t ape_filter_orders[5][APE_FILTER_LEVELS] = {
  77.     {  0,   0,    0 },
  78.     { 16,   0,    0 },
  79.     { 64,   0,    0 },
  80.     { 32, 256,    0 },
  81.     { 16, 256, 1280 }
  82. };
  83.  
  84. /** Filter fraction bits depending on compression level */
  85. static const uint8_t ape_filter_fracbits[5][APE_FILTER_LEVELS] = {
  86.     {  0,  0,  0 },
  87.     { 11,  0,  0 },
  88.     { 11,  0,  0 },
  89.     { 10, 13,  0 },
  90.     { 11, 13, 15 }
  91. };
  92.  
  93.  
  94. /** Filters applied to the decoded data */
  95. typedef struct APEFilter {
  96.     int16_t *coeffs;        ///< actual coefficients used in filtering
  97.     int16_t *adaptcoeffs;   ///< adaptive filter coefficients used for correcting of actual filter coefficients
  98.     int16_t *historybuffer; ///< filter memory
  99.     int16_t *delay;         ///< filtered values
  100.  
  101.     int avg;
  102. } APEFilter;
  103.  
  104. typedef struct APERice {
  105.     uint32_t k;
  106.     uint32_t ksum;
  107. } APERice;
  108.  
  109. typedef struct APERangecoder {
  110.     uint32_t low;           ///< low end of interval
  111.     uint32_t range;         ///< length of interval
  112.     uint32_t help;          ///< bytes_to_follow resp. intermediate value
  113.     unsigned int buffer;    ///< buffer for input/output
  114. } APERangecoder;
  115.  
  116. /** Filter histories */
  117. typedef struct APEPredictor {
  118.     int32_t *buf;
  119.  
  120.     int32_t lastA[2];
  121.  
  122.     int32_t filterA[2];
  123.     int32_t filterB[2];
  124.  
  125.     int32_t coeffsA[2][4];  ///< adaption coefficients
  126.     int32_t coeffsB[2][5];  ///< adaption coefficients
  127.     int32_t historybuffer[HISTORY_SIZE + PREDICTOR_SIZE];
  128.  
  129.     unsigned int sample_pos;
  130. } APEPredictor;
  131.  
  132. /** Decoder context */
  133. typedef struct APEContext {
  134.     AVClass *class;                          ///< class for AVOptions
  135.     AVCodecContext *avctx;
  136.     DSPContext dsp;
  137.     int channels;
  138.     int samples;                             ///< samples left to decode in current frame
  139.     int bps;
  140.  
  141.     int fileversion;                         ///< codec version, very important in decoding process
  142.     int compression_level;                   ///< compression levels
  143.     int fset;                                ///< which filter set to use (calculated from compression level)
  144.     int flags;                               ///< global decoder flags
  145.  
  146.     uint32_t CRC;                            ///< frame CRC
  147.     int frameflags;                          ///< frame flags
  148.     APEPredictor predictor;                  ///< predictor used for final reconstruction
  149.  
  150.     int32_t *decoded_buffer;
  151.     int decoded_size;
  152.     int32_t *decoded[MAX_CHANNELS];          ///< decoded data for each channel
  153.     int blocks_per_loop;                     ///< maximum number of samples to decode for each call
  154.  
  155.     int16_t* filterbuf[APE_FILTER_LEVELS];   ///< filter memory
  156.  
  157.     APERangecoder rc;                        ///< rangecoder used to decode actual values
  158.     APERice riceX;                           ///< rice code parameters for the second channel
  159.     APERice riceY;                           ///< rice code parameters for the first channel
  160.     APEFilter filters[APE_FILTER_LEVELS][2]; ///< filters used for reconstruction
  161.     GetBitContext gb;
  162.  
  163.     uint8_t *data;                           ///< current frame data
  164.     uint8_t *data_end;                       ///< frame data end
  165.     int data_size;                           ///< frame data allocated size
  166.     const uint8_t *ptr;                      ///< current position in frame data
  167.  
  168.     int error;
  169.  
  170.     void (*entropy_decode_mono)(struct APEContext *ctx, int blockstodecode);
  171.     void (*entropy_decode_stereo)(struct APEContext *ctx, int blockstodecode);
  172.     void (*predictor_decode_mono)(struct APEContext *ctx, int count);
  173.     void (*predictor_decode_stereo)(struct APEContext *ctx, int count);
  174. } APEContext;
  175.  
  176. static void ape_apply_filters(APEContext *ctx, int32_t *decoded0,
  177.                               int32_t *decoded1, int count);
  178.  
  179. static void entropy_decode_mono_0000(APEContext *ctx, int blockstodecode);
  180. static void entropy_decode_stereo_0000(APEContext *ctx, int blockstodecode);
  181. static void entropy_decode_mono_3860(APEContext *ctx, int blockstodecode);
  182. static void entropy_decode_stereo_3860(APEContext *ctx, int blockstodecode);
  183. static void entropy_decode_mono_3900(APEContext *ctx, int blockstodecode);
  184. static void entropy_decode_stereo_3900(APEContext *ctx, int blockstodecode);
  185. static void entropy_decode_stereo_3930(APEContext *ctx, int blockstodecode);
  186. static void entropy_decode_mono_3990(APEContext *ctx, int blockstodecode);
  187. static void entropy_decode_stereo_3990(APEContext *ctx, int blockstodecode);
  188.  
  189. static void predictor_decode_mono_3800(APEContext *ctx, int count);
  190. static void predictor_decode_stereo_3800(APEContext *ctx, int count);
  191. static void predictor_decode_mono_3930(APEContext *ctx, int count);
  192. static void predictor_decode_stereo_3930(APEContext *ctx, int count);
  193. static void predictor_decode_mono_3950(APEContext *ctx, int count);
  194. static void predictor_decode_stereo_3950(APEContext *ctx, int count);
  195.  
  196. // TODO: dsputilize
  197.  
  198. static av_cold int ape_decode_close(AVCodecContext *avctx)
  199. {
  200.     APEContext *s = avctx->priv_data;
  201.     int i;
  202.  
  203.     for (i = 0; i < APE_FILTER_LEVELS; i++)
  204.         av_freep(&s->filterbuf[i]);
  205.  
  206.     av_freep(&s->decoded_buffer);
  207.     av_freep(&s->data);
  208.     s->decoded_size = s->data_size = 0;
  209.  
  210.     return 0;
  211. }
  212.  
  213. static av_cold int ape_decode_init(AVCodecContext *avctx)
  214. {
  215.     APEContext *s = avctx->priv_data;
  216.     int i;
  217.  
  218.     if (avctx->extradata_size != 6) {
  219.         av_log(avctx, AV_LOG_ERROR, "Incorrect extradata\n");
  220.         return AVERROR(EINVAL);
  221.     }
  222.     if (avctx->channels > 2) {
  223.         av_log(avctx, AV_LOG_ERROR, "Only mono and stereo is supported\n");
  224.         return AVERROR(EINVAL);
  225.     }
  226.     s->bps = avctx->bits_per_coded_sample;
  227.     switch (s->bps) {
  228.     case 8:
  229.         avctx->sample_fmt = AV_SAMPLE_FMT_U8P;
  230.         break;
  231.     case 16:
  232.         avctx->sample_fmt = AV_SAMPLE_FMT_S16P;
  233.         break;
  234.     case 24:
  235.         avctx->sample_fmt = AV_SAMPLE_FMT_S32P;
  236.         break;
  237.     default:
  238.         avpriv_request_sample(avctx,
  239.                               "%d bits per coded sample", s->bps);
  240.         return AVERROR_PATCHWELCOME;
  241.     }
  242.     s->avctx             = avctx;
  243.     s->channels          = avctx->channels;
  244.     s->fileversion       = AV_RL16(avctx->extradata);
  245.     s->compression_level = AV_RL16(avctx->extradata + 2);
  246.     s->flags             = AV_RL16(avctx->extradata + 4);
  247.  
  248.     av_log(avctx, AV_LOG_DEBUG, "Compression Level: %d - Flags: %d\n",
  249.            s->compression_level, s->flags);
  250.     if (s->compression_level % 1000 || s->compression_level > COMPRESSION_LEVEL_INSANE ||
  251.         !s->compression_level ||
  252.         (s->fileversion < 3930 && s->compression_level == COMPRESSION_LEVEL_INSANE)) {
  253.         av_log(avctx, AV_LOG_ERROR, "Incorrect compression level %d\n",
  254.                s->compression_level);
  255.         return AVERROR_INVALIDDATA;
  256.     }
  257.     s->fset = s->compression_level / 1000 - 1;
  258.     for (i = 0; i < APE_FILTER_LEVELS; i++) {
  259.         if (!ape_filter_orders[s->fset][i])
  260.             break;
  261.         FF_ALLOC_OR_GOTO(avctx, s->filterbuf[i],
  262.                          (ape_filter_orders[s->fset][i] * 3 + HISTORY_SIZE) * 4,
  263.                          filter_alloc_fail);
  264.     }
  265.  
  266.     if (s->fileversion < 3860) {
  267.         s->entropy_decode_mono   = entropy_decode_mono_0000;
  268.         s->entropy_decode_stereo = entropy_decode_stereo_0000;
  269.     } else if (s->fileversion < 3900) {
  270.         s->entropy_decode_mono   = entropy_decode_mono_3860;
  271.         s->entropy_decode_stereo = entropy_decode_stereo_3860;
  272.     } else if (s->fileversion < 3930) {
  273.         s->entropy_decode_mono   = entropy_decode_mono_3900;
  274.         s->entropy_decode_stereo = entropy_decode_stereo_3900;
  275.     } else if (s->fileversion < 3990) {
  276.         s->entropy_decode_mono   = entropy_decode_mono_3900;
  277.         s->entropy_decode_stereo = entropy_decode_stereo_3930;
  278.     } else {
  279.         s->entropy_decode_mono   = entropy_decode_mono_3990;
  280.         s->entropy_decode_stereo = entropy_decode_stereo_3990;
  281.     }
  282.  
  283.     if (s->fileversion < 3930) {
  284.         s->predictor_decode_mono   = predictor_decode_mono_3800;
  285.         s->predictor_decode_stereo = predictor_decode_stereo_3800;
  286.     } else if (s->fileversion < 3950) {
  287.         s->predictor_decode_mono   = predictor_decode_mono_3930;
  288.         s->predictor_decode_stereo = predictor_decode_stereo_3930;
  289.     } else {
  290.         s->predictor_decode_mono   = predictor_decode_mono_3950;
  291.         s->predictor_decode_stereo = predictor_decode_stereo_3950;
  292.     }
  293.  
  294.     ff_dsputil_init(&s->dsp, avctx);
  295.     avctx->channel_layout = (avctx->channels==2) ? AV_CH_LAYOUT_STEREO : AV_CH_LAYOUT_MONO;
  296.  
  297.     return 0;
  298. filter_alloc_fail:
  299.     ape_decode_close(avctx);
  300.     return AVERROR(ENOMEM);
  301. }
  302.  
  303. /**
  304.  * @name APE range decoding functions
  305.  * @{
  306.  */
  307.  
  308. #define CODE_BITS    32
  309. #define TOP_VALUE    ((unsigned int)1 << (CODE_BITS-1))
  310. #define SHIFT_BITS   (CODE_BITS - 9)
  311. #define EXTRA_BITS   ((CODE_BITS-2) % 8 + 1)
  312. #define BOTTOM_VALUE (TOP_VALUE >> 8)
  313.  
  314. /** Start the decoder */
  315. static inline void range_start_decoding(APEContext *ctx)
  316. {
  317.     ctx->rc.buffer = bytestream_get_byte(&ctx->ptr);
  318.     ctx->rc.low    = ctx->rc.buffer >> (8 - EXTRA_BITS);
  319.     ctx->rc.range  = (uint32_t) 1 << EXTRA_BITS;
  320. }
  321.  
  322. /** Perform normalization */
  323. static inline void range_dec_normalize(APEContext *ctx)
  324. {
  325.     while (ctx->rc.range <= BOTTOM_VALUE) {
  326.         ctx->rc.buffer <<= 8;
  327.         if(ctx->ptr < ctx->data_end) {
  328.             ctx->rc.buffer += *ctx->ptr;
  329.             ctx->ptr++;
  330.         } else {
  331.             ctx->error = 1;
  332.         }
  333.         ctx->rc.low    = (ctx->rc.low << 8)    | ((ctx->rc.buffer >> 1) & 0xFF);
  334.         ctx->rc.range  <<= 8;
  335.     }
  336. }
  337.  
  338. /**
  339.  * Calculate culmulative frequency for next symbol. Does NO update!
  340.  * @param ctx decoder context
  341.  * @param tot_f is the total frequency or (code_value)1<<shift
  342.  * @return the culmulative frequency
  343.  */
  344. static inline int range_decode_culfreq(APEContext *ctx, int tot_f)
  345. {
  346.     range_dec_normalize(ctx);
  347.     ctx->rc.help = ctx->rc.range / tot_f;
  348.     return ctx->rc.low / ctx->rc.help;
  349. }
  350.  
  351. /**
  352.  * Decode value with given size in bits
  353.  * @param ctx decoder context
  354.  * @param shift number of bits to decode
  355.  */
  356. static inline int range_decode_culshift(APEContext *ctx, int shift)
  357. {
  358.     range_dec_normalize(ctx);
  359.     ctx->rc.help = ctx->rc.range >> shift;
  360.     return ctx->rc.low / ctx->rc.help;
  361. }
  362.  
  363.  
  364. /**
  365.  * Update decoding state
  366.  * @param ctx decoder context
  367.  * @param sy_f the interval length (frequency of the symbol)
  368.  * @param lt_f the lower end (frequency sum of < symbols)
  369.  */
  370. static inline void range_decode_update(APEContext *ctx, int sy_f, int lt_f)
  371. {
  372.     ctx->rc.low  -= ctx->rc.help * lt_f;
  373.     ctx->rc.range = ctx->rc.help * sy_f;
  374. }
  375.  
  376. /** Decode n bits (n <= 16) without modelling */
  377. static inline int range_decode_bits(APEContext *ctx, int n)
  378. {
  379.     int sym = range_decode_culshift(ctx, n);
  380.     range_decode_update(ctx, 1, sym);
  381.     return sym;
  382. }
  383.  
  384.  
  385. #define MODEL_ELEMENTS 64
  386.  
  387. /**
  388.  * Fixed probabilities for symbols in Monkey Audio version 3.97
  389.  */
  390. static const uint16_t counts_3970[22] = {
  391.         0, 14824, 28224, 39348, 47855, 53994, 58171, 60926,
  392.     62682, 63786, 64463, 64878, 65126, 65276, 65365, 65419,
  393.     65450, 65469, 65480, 65487, 65491, 65493,
  394. };
  395.  
  396. /**
  397.  * Probability ranges for symbols in Monkey Audio version 3.97
  398.  */
  399. static const uint16_t counts_diff_3970[21] = {
  400.     14824, 13400, 11124, 8507, 6139, 4177, 2755, 1756,
  401.     1104, 677, 415, 248, 150, 89, 54, 31,
  402.     19, 11, 7, 4, 2,
  403. };
  404.  
  405. /**
  406.  * Fixed probabilities for symbols in Monkey Audio version 3.98
  407.  */
  408. static const uint16_t counts_3980[22] = {
  409.         0, 19578, 36160, 48417, 56323, 60899, 63265, 64435,
  410.     64971, 65232, 65351, 65416, 65447, 65466, 65476, 65482,
  411.     65485, 65488, 65490, 65491, 65492, 65493,
  412. };
  413.  
  414. /**
  415.  * Probability ranges for symbols in Monkey Audio version 3.98
  416.  */
  417. static const uint16_t counts_diff_3980[21] = {
  418.     19578, 16582, 12257, 7906, 4576, 2366, 1170, 536,
  419.     261, 119, 65, 31, 19, 10, 6, 3,
  420.     3, 2, 1, 1, 1,
  421. };
  422.  
  423. /**
  424.  * Decode symbol
  425.  * @param ctx decoder context
  426.  * @param counts probability range start position
  427.  * @param counts_diff probability range widths
  428.  */
  429. static inline int range_get_symbol(APEContext *ctx,
  430.                                    const uint16_t counts[],
  431.                                    const uint16_t counts_diff[])
  432. {
  433.     int symbol, cf;
  434.  
  435.     cf = range_decode_culshift(ctx, 16);
  436.  
  437.     if(cf > 65492){
  438.         symbol= cf - 65535 + 63;
  439.         range_decode_update(ctx, 1, cf);
  440.         if(cf > 65535)
  441.             ctx->error=1;
  442.         return symbol;
  443.     }
  444.     /* figure out the symbol inefficiently; a binary search would be much better */
  445.     for (symbol = 0; counts[symbol + 1] <= cf; symbol++);
  446.  
  447.     range_decode_update(ctx, counts_diff[symbol], counts[symbol]);
  448.  
  449.     return symbol;
  450. }
  451. /** @} */ // group rangecoder
  452.  
  453. static inline void update_rice(APERice *rice, unsigned int x)
  454. {
  455.     int lim = rice->k ? (1 << (rice->k + 4)) : 0;
  456.     rice->ksum += ((x + 1) / 2) - ((rice->ksum + 16) >> 5);
  457.  
  458.     if (rice->ksum < lim)
  459.         rice->k--;
  460.     else if (rice->ksum >= (1 << (rice->k + 5)))
  461.         rice->k++;
  462. }
  463.  
  464. static inline int get_rice_ook(GetBitContext *gb, int k)
  465. {
  466.     unsigned int x;
  467.  
  468.     x = get_unary(gb, 1, get_bits_left(gb));
  469.  
  470.     if (k)
  471.         x = (x << k) | get_bits(gb, k);
  472.  
  473.     return x;
  474. }
  475.  
  476. static inline int ape_decode_value_3860(APEContext *ctx, GetBitContext *gb,
  477.                                         APERice *rice)
  478. {
  479.     unsigned int x, overflow;
  480.  
  481.     overflow = get_unary(gb, 1, get_bits_left(gb));
  482.  
  483.     if (ctx->fileversion > 3880) {
  484.         while (overflow >= 16) {
  485.             overflow -= 16;
  486.             rice->k  += 4;
  487.         }
  488.     }
  489.  
  490.     if (!rice->k)
  491.         x = overflow;
  492.     else if(rice->k <= MIN_CACHE_BITS) {
  493.         x = (overflow << rice->k) + get_bits(gb, rice->k);
  494.     } else {
  495.         av_log(ctx->avctx, AV_LOG_ERROR, "Too many bits: %d\n", rice->k);
  496.         return AVERROR_INVALIDDATA;
  497.     }
  498.     rice->ksum += x - (rice->ksum + 8 >> 4);
  499.     if (rice->ksum < (rice->k ? 1 << (rice->k + 4) : 0))
  500.         rice->k--;
  501.     else if (rice->ksum >= (1 << (rice->k + 5)) && rice->k < 24)
  502.         rice->k++;
  503.  
  504.     /* Convert to signed */
  505.     if (x & 1)
  506.         return (x >> 1) + 1;
  507.     else
  508.         return -(x >> 1);
  509. }
  510.  
  511. static inline int ape_decode_value_3900(APEContext *ctx, APERice *rice)
  512. {
  513.     unsigned int x, overflow;
  514.     int tmpk;
  515.  
  516.     overflow = range_get_symbol(ctx, counts_3970, counts_diff_3970);
  517.  
  518.     if (overflow == (MODEL_ELEMENTS - 1)) {
  519.         tmpk = range_decode_bits(ctx, 5);
  520.         overflow = 0;
  521.     } else
  522.         tmpk = (rice->k < 1) ? 0 : rice->k - 1;
  523.  
  524.     if (tmpk <= 16 || ctx->fileversion < 3910) {
  525.         if (tmpk > 23) {
  526.             av_log(ctx->avctx, AV_LOG_ERROR, "Too many bits: %d\n", tmpk);
  527.             return AVERROR_INVALIDDATA;
  528.         }
  529.         x = range_decode_bits(ctx, tmpk);
  530.     } else if (tmpk <= 32) {
  531.         x = range_decode_bits(ctx, 16);
  532.         x |= (range_decode_bits(ctx, tmpk - 16) << 16);
  533.     } else {
  534.         av_log(ctx->avctx, AV_LOG_ERROR, "Too many bits: %d\n", tmpk);
  535.         return AVERROR_INVALIDDATA;
  536.     }
  537.     x += overflow << tmpk;
  538.  
  539.     update_rice(rice, x);
  540.  
  541.     /* Convert to signed */
  542.     if (x & 1)
  543.         return (x >> 1) + 1;
  544.     else
  545.         return -(x >> 1);
  546. }
  547.  
  548. static inline int ape_decode_value_3990(APEContext *ctx, APERice *rice)
  549. {
  550.     unsigned int x, overflow;
  551.     int base, pivot;
  552.  
  553.     pivot = rice->ksum >> 5;
  554.     if (pivot == 0)
  555.         pivot = 1;
  556.  
  557.     overflow = range_get_symbol(ctx, counts_3980, counts_diff_3980);
  558.  
  559.     if (overflow == (MODEL_ELEMENTS - 1)) {
  560.         overflow  = range_decode_bits(ctx, 16) << 16;
  561.         overflow |= range_decode_bits(ctx, 16);
  562.     }
  563.  
  564.     if (pivot < 0x10000) {
  565.         base = range_decode_culfreq(ctx, pivot);
  566.         range_decode_update(ctx, 1, base);
  567.     } else {
  568.         int base_hi = pivot, base_lo;
  569.         int bbits = 0;
  570.  
  571.         while (base_hi & ~0xFFFF) {
  572.             base_hi >>= 1;
  573.             bbits++;
  574.         }
  575.         base_hi = range_decode_culfreq(ctx, base_hi + 1);
  576.         range_decode_update(ctx, 1, base_hi);
  577.         base_lo = range_decode_culfreq(ctx, 1 << bbits);
  578.         range_decode_update(ctx, 1, base_lo);
  579.  
  580.         base = (base_hi << bbits) + base_lo;
  581.     }
  582.  
  583.     x = base + overflow * pivot;
  584.  
  585.     update_rice(rice, x);
  586.  
  587.     /* Convert to signed */
  588.     if (x & 1)
  589.         return (x >> 1) + 1;
  590.     else
  591.         return -(x >> 1);
  592. }
  593.  
  594. static void decode_array_0000(APEContext *ctx, GetBitContext *gb,
  595.                               int32_t *out, APERice *rice, int blockstodecode)
  596. {
  597.     int i;
  598.     int ksummax, ksummin;
  599.  
  600.     rice->ksum = 0;
  601.     for (i = 0; i < 5; i++) {
  602.         out[i] = get_rice_ook(&ctx->gb, 10);
  603.         rice->ksum += out[i];
  604.     }
  605.     rice->k = av_log2(rice->ksum / 10) + 1;
  606.     for (; i < 64; i++) {
  607.         out[i] = get_rice_ook(&ctx->gb, rice->k);
  608.         rice->ksum += out[i];
  609.         rice->k = av_log2(rice->ksum / ((i + 1) * 2)) + 1;
  610.     }
  611.     ksummax = 1 << rice->k + 7;
  612.     ksummin = rice->k ? (1 << rice->k + 6) : 0;
  613.     for (; i < blockstodecode; i++) {
  614.         out[i] = get_rice_ook(&ctx->gb, rice->k);
  615.         rice->ksum += out[i] - out[i - 64];
  616.         while (rice->ksum < ksummin) {
  617.             rice->k--;
  618.             ksummin = rice->k ? ksummin >> 1 : 0;
  619.             ksummax >>= 1;
  620.         }
  621.         while (rice->ksum >= ksummax) {
  622.             rice->k++;
  623.             if (rice->k > 24)
  624.                 return;
  625.             ksummax <<= 1;
  626.             ksummin = ksummin ? ksummin << 1 : 128;
  627.         }
  628.     }
  629.  
  630.     for (i = 0; i < blockstodecode; i++) {
  631.         if (out[i] & 1)
  632.             out[i] = (out[i] >> 1) + 1;
  633.         else
  634.             out[i] = -(out[i] >> 1);
  635.     }
  636. }
  637.  
  638. static void entropy_decode_mono_0000(APEContext *ctx, int blockstodecode)
  639. {
  640.     decode_array_0000(ctx, &ctx->gb, ctx->decoded[0], &ctx->riceY,
  641.                       blockstodecode);
  642. }
  643.  
  644. static void entropy_decode_stereo_0000(APEContext *ctx, int blockstodecode)
  645. {
  646.     decode_array_0000(ctx, &ctx->gb, ctx->decoded[0], &ctx->riceY,
  647.                       blockstodecode);
  648.     decode_array_0000(ctx, &ctx->gb, ctx->decoded[1], &ctx->riceX,
  649.                       blockstodecode);
  650. }
  651.  
  652. static void entropy_decode_mono_3860(APEContext *ctx, int blockstodecode)
  653. {
  654.     int32_t *decoded0 = ctx->decoded[0];
  655.  
  656.     while (blockstodecode--)
  657.         *decoded0++ = ape_decode_value_3860(ctx, &ctx->gb, &ctx->riceY);
  658. }
  659.  
  660. static void entropy_decode_stereo_3860(APEContext *ctx, int blockstodecode)
  661. {
  662.     int32_t *decoded0 = ctx->decoded[0];
  663.     int32_t *decoded1 = ctx->decoded[1];
  664.     int blocks = blockstodecode;
  665.  
  666.     while (blockstodecode--)
  667.         *decoded0++ = ape_decode_value_3860(ctx, &ctx->gb, &ctx->riceY);
  668.     while (blocks--)
  669.         *decoded1++ = ape_decode_value_3860(ctx, &ctx->gb, &ctx->riceX);
  670. }
  671.  
  672. static void entropy_decode_mono_3900(APEContext *ctx, int blockstodecode)
  673. {
  674.     int32_t *decoded0 = ctx->decoded[0];
  675.  
  676.     while (blockstodecode--)
  677.         *decoded0++ = ape_decode_value_3900(ctx, &ctx->riceY);
  678. }
  679.  
  680. static void entropy_decode_stereo_3900(APEContext *ctx, int blockstodecode)
  681. {
  682.     int32_t *decoded0 = ctx->decoded[0];
  683.     int32_t *decoded1 = ctx->decoded[1];
  684.     int blocks = blockstodecode;
  685.  
  686.     while (blockstodecode--)
  687.         *decoded0++ = ape_decode_value_3900(ctx, &ctx->riceY);
  688.     range_dec_normalize(ctx);
  689.     // because of some implementation peculiarities we need to backpedal here
  690.     ctx->ptr -= 1;
  691.     range_start_decoding(ctx);
  692.     while (blocks--)
  693.         *decoded1++ = ape_decode_value_3900(ctx, &ctx->riceX);
  694. }
  695.  
  696. static void entropy_decode_stereo_3930(APEContext *ctx, int blockstodecode)
  697. {
  698.     int32_t *decoded0 = ctx->decoded[0];
  699.     int32_t *decoded1 = ctx->decoded[1];
  700.  
  701.     while (blockstodecode--) {
  702.         *decoded0++ = ape_decode_value_3900(ctx, &ctx->riceY);
  703.         *decoded1++ = ape_decode_value_3900(ctx, &ctx->riceX);
  704.     }
  705. }
  706.  
  707. static void entropy_decode_mono_3990(APEContext *ctx, int blockstodecode)
  708. {
  709.     int32_t *decoded0 = ctx->decoded[0];
  710.  
  711.     while (blockstodecode--)
  712.         *decoded0++ = ape_decode_value_3990(ctx, &ctx->riceY);
  713. }
  714.  
  715. static void entropy_decode_stereo_3990(APEContext *ctx, int blockstodecode)
  716. {
  717.     int32_t *decoded0 = ctx->decoded[0];
  718.     int32_t *decoded1 = ctx->decoded[1];
  719.  
  720.     while (blockstodecode--) {
  721.         *decoded0++ = ape_decode_value_3990(ctx, &ctx->riceY);
  722.         *decoded1++ = ape_decode_value_3990(ctx, &ctx->riceX);
  723.     }
  724. }
  725.  
  726. static int init_entropy_decoder(APEContext *ctx)
  727. {
  728.     /* Read the CRC */
  729.     if (ctx->fileversion >= 3900) {
  730.         if (ctx->data_end - ctx->ptr < 6)
  731.             return AVERROR_INVALIDDATA;
  732.         ctx->CRC = bytestream_get_be32(&ctx->ptr);
  733.     } else {
  734.         ctx->CRC = get_bits_long(&ctx->gb, 32);
  735.     }
  736.  
  737.     /* Read the frame flags if they exist */
  738.     ctx->frameflags = 0;
  739.     if ((ctx->fileversion > 3820) && (ctx->CRC & 0x80000000)) {
  740.         ctx->CRC &= ~0x80000000;
  741.  
  742.         if (ctx->data_end - ctx->ptr < 6)
  743.             return AVERROR_INVALIDDATA;
  744.         ctx->frameflags = bytestream_get_be32(&ctx->ptr);
  745.     }
  746.  
  747.     /* Initialize the rice structs */
  748.     ctx->riceX.k = 10;
  749.     ctx->riceX.ksum = (1 << ctx->riceX.k) * 16;
  750.     ctx->riceY.k = 10;
  751.     ctx->riceY.ksum = (1 << ctx->riceY.k) * 16;
  752.  
  753.     if (ctx->fileversion >= 3900) {
  754.         /* The first 8 bits of input are ignored. */
  755.         ctx->ptr++;
  756.  
  757.         range_start_decoding(ctx);
  758.     }
  759.  
  760.     return 0;
  761. }
  762.  
  763. static const int32_t initial_coeffs_fast_3320[1] = {
  764.     375,
  765. };
  766.  
  767. static const int32_t initial_coeffs_a_3800[3] = {
  768.     64, 115, 64,
  769. };
  770.  
  771. static const int32_t initial_coeffs_b_3800[2] = {
  772.     740, 0
  773. };
  774.  
  775. static const int32_t initial_coeffs_3930[4] = {
  776.     360, 317, -109, 98
  777. };
  778.  
  779. static void init_predictor_decoder(APEContext *ctx)
  780. {
  781.     APEPredictor *p = &ctx->predictor;
  782.  
  783.     /* Zero the history buffers */
  784.     memset(p->historybuffer, 0, PREDICTOR_SIZE * sizeof(*p->historybuffer));
  785.     p->buf = p->historybuffer;
  786.  
  787.     /* Initialize and zero the coefficients */
  788.     if (ctx->fileversion < 3930) {
  789.         if (ctx->compression_level == COMPRESSION_LEVEL_FAST) {
  790.             memcpy(p->coeffsA[0], initial_coeffs_fast_3320,
  791.                    sizeof(initial_coeffs_fast_3320));
  792.             memcpy(p->coeffsA[1], initial_coeffs_fast_3320,
  793.                    sizeof(initial_coeffs_fast_3320));
  794.         } else {
  795.             memcpy(p->coeffsA[0], initial_coeffs_a_3800,
  796.                    sizeof(initial_coeffs_a_3800));
  797.             memcpy(p->coeffsA[1], initial_coeffs_a_3800,
  798.                    sizeof(initial_coeffs_a_3800));
  799.         }
  800.     } else {
  801.         memcpy(p->coeffsA[0], initial_coeffs_3930, sizeof(initial_coeffs_3930));
  802.         memcpy(p->coeffsA[1], initial_coeffs_3930, sizeof(initial_coeffs_3930));
  803.     }
  804.     memset(p->coeffsB, 0, sizeof(p->coeffsB));
  805.     if (ctx->fileversion < 3930) {
  806.         memcpy(p->coeffsB[0], initial_coeffs_b_3800,
  807.                sizeof(initial_coeffs_b_3800));
  808.         memcpy(p->coeffsB[1], initial_coeffs_b_3800,
  809.                sizeof(initial_coeffs_b_3800));
  810.     }
  811.  
  812.     p->filterA[0] = p->filterA[1] = 0;
  813.     p->filterB[0] = p->filterB[1] = 0;
  814.     p->lastA[0]   = p->lastA[1]   = 0;
  815.  
  816.     p->sample_pos = 0;
  817. }
  818.  
  819. /** Get inverse sign of integer (-1 for positive, 1 for negative and 0 for zero) */
  820. static inline int APESIGN(int32_t x) {
  821.     return (x < 0) - (x > 0);
  822. }
  823.  
  824. static av_always_inline int filter_fast_3320(APEPredictor *p,
  825.                                              const int decoded, const int filter,
  826.                                              const int delayA)
  827. {
  828.     int32_t predictionA;
  829.  
  830.     p->buf[delayA] = p->lastA[filter];
  831.     if (p->sample_pos < 3) {
  832.         p->lastA[filter]   = decoded;
  833.         p->filterA[filter] = decoded;
  834.         return decoded;
  835.     }
  836.  
  837.     predictionA = p->buf[delayA] * 2 - p->buf[delayA - 1];
  838.     p->lastA[filter] = decoded + (predictionA  * p->coeffsA[filter][0] >> 9);
  839.  
  840.     if ((decoded ^ predictionA) > 0)
  841.         p->coeffsA[filter][0]++;
  842.     else
  843.         p->coeffsA[filter][0]--;
  844.  
  845.     p->filterA[filter] += p->lastA[filter];
  846.  
  847.     return p->filterA[filter];
  848. }
  849.  
  850. static av_always_inline int filter_3800(APEPredictor *p,
  851.                                         const int decoded, const int filter,
  852.                                         const int delayA,  const int delayB,
  853.                                         const int start,   const int shift)
  854. {
  855.     int32_t predictionA, predictionB, sign;
  856.     int32_t d0, d1, d2, d3, d4;
  857.  
  858.     p->buf[delayA] = p->lastA[filter];
  859.     p->buf[delayB] = p->filterB[filter];
  860.     if (p->sample_pos < start) {
  861.         predictionA = decoded + p->filterA[filter];
  862.         p->lastA[filter]   = decoded;
  863.         p->filterB[filter] = decoded;
  864.         p->filterA[filter] = predictionA;
  865.         return predictionA;
  866.     }
  867.     d2 =  p->buf[delayA];
  868.     d1 = (p->buf[delayA] - p->buf[delayA - 1]) << 1;
  869.     d0 =  p->buf[delayA] + ((p->buf[delayA - 2] - p->buf[delayA - 1]) << 3);
  870.     d3 =  p->buf[delayB] * 2 - p->buf[delayB - 1];
  871.     d4 =  p->buf[delayB];
  872.  
  873.     predictionA = d0 * p->coeffsA[filter][0] +
  874.                   d1 * p->coeffsA[filter][1] +
  875.                   d2 * p->coeffsA[filter][2];
  876.  
  877.     sign = APESIGN(decoded);
  878.     p->coeffsA[filter][0] += (((d0 >> 30) & 2) - 1) * sign;
  879.     p->coeffsA[filter][1] += (((d1 >> 28) & 8) - 4) * sign;
  880.     p->coeffsA[filter][2] += (((d2 >> 28) & 8) - 4) * sign;
  881.  
  882.     predictionB = d3 * p->coeffsB[filter][0] -
  883.                   d4 * p->coeffsB[filter][1];
  884.     p->lastA[filter] = decoded + (predictionA >> 11);
  885.     sign = APESIGN(p->lastA[filter]);
  886.     p->coeffsB[filter][0] += (((d3 >> 29) & 4) - 2) * sign;
  887.     p->coeffsB[filter][1] -= (((d4 >> 30) & 2) - 1) * sign;
  888.  
  889.     p->filterB[filter] = p->lastA[filter] + (predictionB >> shift);
  890.     p->filterA[filter] = p->filterB[filter] + ((p->filterA[filter] * 31) >> 5);
  891.  
  892.     return p->filterA[filter];
  893. }
  894.  
  895. static void long_filter_high_3800(int32_t *buffer, int order, int shift,
  896.                                   int32_t *coeffs, int32_t *delay, int length)
  897. {
  898.     int i, j;
  899.     int32_t dotprod, sign;
  900.  
  901.     memset(coeffs, 0, order * sizeof(*coeffs));
  902.     for (i = 0; i < order; i++)
  903.         delay[i] = buffer[i];
  904.     for (i = order; i < length; i++) {
  905.         dotprod = 0;
  906.         sign = APESIGN(buffer[i]);
  907.         for (j = 0; j < order; j++) {
  908.             dotprod += delay[j] * coeffs[j];
  909.             coeffs[j] -= (((delay[j] >> 30) & 2) - 1) * sign;
  910.         }
  911.         buffer[i] -= dotprod >> shift;
  912.         for (j = 0; j < order - 1; j++)
  913.             delay[j] = delay[j + 1];
  914.         delay[order - 1] = buffer[i];
  915.     }
  916. }
  917.  
  918. static void long_filter_ehigh_3830(int32_t *buffer, int length)
  919. {
  920.     int i, j;
  921.     int32_t dotprod, sign;
  922.     int32_t coeffs[8], delay[8];
  923.  
  924.     memset(coeffs, 0, sizeof(coeffs));
  925.     memset(delay,  0, sizeof(delay));
  926.     for (i = 0; i < length; i++) {
  927.         dotprod = 0;
  928.         sign = APESIGN(buffer[i]);
  929.         for (j = 7; j >= 0; j--) {
  930.             dotprod += delay[j] * coeffs[j];
  931.             coeffs[j] -= (((delay[j] >> 30) & 2) - 1) * sign;
  932.         }
  933.         for (j = 7; j > 0; j--)
  934.             delay[j] = delay[j - 1];
  935.         delay[0] = buffer[i];
  936.         buffer[i] -= dotprod >> 9;
  937.     }
  938. }
  939.  
  940. static void predictor_decode_stereo_3800(APEContext *ctx, int count)
  941. {
  942.     APEPredictor *p = &ctx->predictor;
  943.     int32_t *decoded0 = ctx->decoded[0];
  944.     int32_t *decoded1 = ctx->decoded[1];
  945.     int32_t coeffs[256], delay[256];
  946.     int start = 4, shift = 10;
  947.  
  948.     if (ctx->compression_level == COMPRESSION_LEVEL_HIGH) {
  949.         start = 16;
  950.         long_filter_high_3800(decoded0, 16, 9, coeffs, delay, count);
  951.         long_filter_high_3800(decoded1, 16, 9, coeffs, delay, count);
  952.     } else if (ctx->compression_level == COMPRESSION_LEVEL_EXTRA_HIGH) {
  953.         int order = 128, shift2 = 11;
  954.  
  955.         if (ctx->fileversion >= 3830) {
  956.             order <<= 1;
  957.             shift++;
  958.             shift2++;
  959.             long_filter_ehigh_3830(decoded0 + order, count - order);
  960.             long_filter_ehigh_3830(decoded1 + order, count - order);
  961.         }
  962.         start = order;
  963.         long_filter_high_3800(decoded0, order, shift2, coeffs, delay, count);
  964.         long_filter_high_3800(decoded1, order, shift2, coeffs, delay, count);
  965.     }
  966.  
  967.     while (count--) {
  968.         int X = *decoded0, Y = *decoded1;
  969.         if (ctx->compression_level == COMPRESSION_LEVEL_FAST) {
  970.             *decoded0 = filter_fast_3320(p, Y, 0, YDELAYA);
  971.             decoded0++;
  972.             *decoded1 = filter_fast_3320(p, X, 1, XDELAYA);
  973.             decoded1++;
  974.         } else {
  975.             *decoded0 = filter_3800(p, Y, 0, YDELAYA, YDELAYB,
  976.                                     start, shift);
  977.             decoded0++;
  978.             *decoded1 = filter_3800(p, X, 1, XDELAYA, XDELAYB,
  979.                                     start, shift);
  980.             decoded1++;
  981.         }
  982.  
  983.         /* Combined */
  984.         p->buf++;
  985.         p->sample_pos++;
  986.  
  987.         /* Have we filled the history buffer? */
  988.         if (p->buf == p->historybuffer + HISTORY_SIZE) {
  989.             memmove(p->historybuffer, p->buf,
  990.                     PREDICTOR_SIZE * sizeof(*p->historybuffer));
  991.             p->buf = p->historybuffer;
  992.         }
  993.     }
  994. }
  995.  
  996. static void predictor_decode_mono_3800(APEContext *ctx, int count)
  997. {
  998.     APEPredictor *p = &ctx->predictor;
  999.     int32_t *decoded0 = ctx->decoded[0];
  1000.     int32_t coeffs[256], delay[256];
  1001.     int start = 4, shift = 10;
  1002.  
  1003.     if (ctx->compression_level == COMPRESSION_LEVEL_HIGH) {
  1004.         start = 16;
  1005.         long_filter_high_3800(decoded0, 16, 9, coeffs, delay, count);
  1006.     } else if (ctx->compression_level == COMPRESSION_LEVEL_EXTRA_HIGH) {
  1007.         int order = 128, shift2 = 11;
  1008.  
  1009.         if (ctx->fileversion >= 3830) {
  1010.             order <<= 1;
  1011.             shift++;
  1012.             shift2++;
  1013.             long_filter_ehigh_3830(decoded0 + order, count - order);
  1014.         }
  1015.         start = order;
  1016.         long_filter_high_3800(decoded0, order, shift2, coeffs, delay, count);
  1017.     }
  1018.  
  1019.     while (count--) {
  1020.         if (ctx->compression_level == COMPRESSION_LEVEL_FAST) {
  1021.             *decoded0 = filter_fast_3320(p, *decoded0, 0, YDELAYA);
  1022.             decoded0++;
  1023.         } else {
  1024.             *decoded0 = filter_3800(p, *decoded0, 0, YDELAYA, YDELAYB,
  1025.                                     start, shift);
  1026.             decoded0++;
  1027.         }
  1028.  
  1029.         /* Combined */
  1030.         p->buf++;
  1031.         p->sample_pos++;
  1032.  
  1033.         /* Have we filled the history buffer? */
  1034.         if (p->buf == p->historybuffer + HISTORY_SIZE) {
  1035.             memmove(p->historybuffer, p->buf,
  1036.                     PREDICTOR_SIZE * sizeof(*p->historybuffer));
  1037.             p->buf = p->historybuffer;
  1038.         }
  1039.     }
  1040. }
  1041.  
  1042. static av_always_inline int predictor_update_3930(APEPredictor *p,
  1043.                                                   const int decoded, const int filter,
  1044.                                                   const int delayA)
  1045. {
  1046.     int32_t predictionA, sign;
  1047.     int32_t d0, d1, d2, d3;
  1048.  
  1049.     p->buf[delayA]     = p->lastA[filter];
  1050.     d0 = p->buf[delayA    ];
  1051.     d1 = p->buf[delayA    ] - p->buf[delayA - 1];
  1052.     d2 = p->buf[delayA - 1] - p->buf[delayA - 2];
  1053.     d3 = p->buf[delayA - 2] - p->buf[delayA - 3];
  1054.  
  1055.     predictionA = d0 * p->coeffsA[filter][0] +
  1056.                   d1 * p->coeffsA[filter][1] +
  1057.                   d2 * p->coeffsA[filter][2] +
  1058.                   d3 * p->coeffsA[filter][3];
  1059.  
  1060.     p->lastA[filter] = decoded + (predictionA >> 9);
  1061.     p->filterA[filter] = p->lastA[filter] + ((p->filterA[filter] * 31) >> 5);
  1062.  
  1063.     sign = APESIGN(decoded);
  1064.     p->coeffsA[filter][0] += ((d0 < 0) * 2 - 1) * sign;
  1065.     p->coeffsA[filter][1] += ((d1 < 0) * 2 - 1) * sign;
  1066.     p->coeffsA[filter][2] += ((d2 < 0) * 2 - 1) * sign;
  1067.     p->coeffsA[filter][3] += ((d3 < 0) * 2 - 1) * sign;
  1068.  
  1069.     return p->filterA[filter];
  1070. }
  1071.  
  1072. static void predictor_decode_stereo_3930(APEContext *ctx, int count)
  1073. {
  1074.     APEPredictor *p = &ctx->predictor;
  1075.     int32_t *decoded0 = ctx->decoded[0];
  1076.     int32_t *decoded1 = ctx->decoded[1];
  1077.  
  1078.     ape_apply_filters(ctx, ctx->decoded[0], ctx->decoded[1], count);
  1079.  
  1080.     while (count--) {
  1081.         /* Predictor Y */
  1082.         int Y = *decoded1, X = *decoded0;
  1083.         *decoded0 = predictor_update_3930(p, Y, 0, YDELAYA);
  1084.         decoded0++;
  1085.         *decoded1 = predictor_update_3930(p, X, 1, XDELAYA);
  1086.         decoded1++;
  1087.  
  1088.         /* Combined */
  1089.         p->buf++;
  1090.  
  1091.         /* Have we filled the history buffer? */
  1092.         if (p->buf == p->historybuffer + HISTORY_SIZE) {
  1093.             memmove(p->historybuffer, p->buf,
  1094.                     PREDICTOR_SIZE * sizeof(*p->historybuffer));
  1095.             p->buf = p->historybuffer;
  1096.         }
  1097.     }
  1098. }
  1099.  
  1100. static void predictor_decode_mono_3930(APEContext *ctx, int count)
  1101. {
  1102.     APEPredictor *p = &ctx->predictor;
  1103.     int32_t *decoded0 = ctx->decoded[0];
  1104.  
  1105.     ape_apply_filters(ctx, ctx->decoded[0], NULL, count);
  1106.  
  1107.     while (count--) {
  1108.         *decoded0 = predictor_update_3930(p, *decoded0, 0, YDELAYA);
  1109.         decoded0++;
  1110.  
  1111.         p->buf++;
  1112.  
  1113.         /* Have we filled the history buffer? */
  1114.         if (p->buf == p->historybuffer + HISTORY_SIZE) {
  1115.             memmove(p->historybuffer, p->buf,
  1116.                     PREDICTOR_SIZE * sizeof(*p->historybuffer));
  1117.             p->buf = p->historybuffer;
  1118.         }
  1119.     }
  1120. }
  1121.  
  1122. static av_always_inline int predictor_update_filter(APEPredictor *p,
  1123.                                                     const int decoded, const int filter,
  1124.                                                     const int delayA,  const int delayB,
  1125.                                                     const int adaptA,  const int adaptB)
  1126. {
  1127.     int32_t predictionA, predictionB, sign;
  1128.  
  1129.     p->buf[delayA]     = p->lastA[filter];
  1130.     p->buf[adaptA]     = APESIGN(p->buf[delayA]);
  1131.     p->buf[delayA - 1] = p->buf[delayA] - p->buf[delayA - 1];
  1132.     p->buf[adaptA - 1] = APESIGN(p->buf[delayA - 1]);
  1133.  
  1134.     predictionA = p->buf[delayA    ] * p->coeffsA[filter][0] +
  1135.                   p->buf[delayA - 1] * p->coeffsA[filter][1] +
  1136.                   p->buf[delayA - 2] * p->coeffsA[filter][2] +
  1137.                   p->buf[delayA - 3] * p->coeffsA[filter][3];
  1138.  
  1139.     /*  Apply a scaled first-order filter compression */
  1140.     p->buf[delayB]     = p->filterA[filter ^ 1] - ((p->filterB[filter] * 31) >> 5);
  1141.     p->buf[adaptB]     = APESIGN(p->buf[delayB]);
  1142.     p->buf[delayB - 1] = p->buf[delayB] - p->buf[delayB - 1];
  1143.     p->buf[adaptB - 1] = APESIGN(p->buf[delayB - 1]);
  1144.     p->filterB[filter] = p->filterA[filter ^ 1];
  1145.  
  1146.     predictionB = p->buf[delayB    ] * p->coeffsB[filter][0] +
  1147.                   p->buf[delayB - 1] * p->coeffsB[filter][1] +
  1148.                   p->buf[delayB - 2] * p->coeffsB[filter][2] +
  1149.                   p->buf[delayB - 3] * p->coeffsB[filter][3] +
  1150.                   p->buf[delayB - 4] * p->coeffsB[filter][4];
  1151.  
  1152.     p->lastA[filter] = decoded + ((predictionA + (predictionB >> 1)) >> 10);
  1153.     p->filterA[filter] = p->lastA[filter] + ((p->filterA[filter] * 31) >> 5);
  1154.  
  1155.     sign = APESIGN(decoded);
  1156.     p->coeffsA[filter][0] += p->buf[adaptA    ] * sign;
  1157.     p->coeffsA[filter][1] += p->buf[adaptA - 1] * sign;
  1158.     p->coeffsA[filter][2] += p->buf[adaptA - 2] * sign;
  1159.     p->coeffsA[filter][3] += p->buf[adaptA - 3] * sign;
  1160.     p->coeffsB[filter][0] += p->buf[adaptB    ] * sign;
  1161.     p->coeffsB[filter][1] += p->buf[adaptB - 1] * sign;
  1162.     p->coeffsB[filter][2] += p->buf[adaptB - 2] * sign;
  1163.     p->coeffsB[filter][3] += p->buf[adaptB - 3] * sign;
  1164.     p->coeffsB[filter][4] += p->buf[adaptB - 4] * sign;
  1165.  
  1166.     return p->filterA[filter];
  1167. }
  1168.  
  1169. static void predictor_decode_stereo_3950(APEContext *ctx, int count)
  1170. {
  1171.     APEPredictor *p = &ctx->predictor;
  1172.     int32_t *decoded0 = ctx->decoded[0];
  1173.     int32_t *decoded1 = ctx->decoded[1];
  1174.  
  1175.     ape_apply_filters(ctx, ctx->decoded[0], ctx->decoded[1], count);
  1176.  
  1177.     while (count--) {
  1178.         /* Predictor Y */
  1179.         *decoded0 = predictor_update_filter(p, *decoded0, 0, YDELAYA, YDELAYB,
  1180.                                             YADAPTCOEFFSA, YADAPTCOEFFSB);
  1181.         decoded0++;
  1182.         *decoded1 = predictor_update_filter(p, *decoded1, 1, XDELAYA, XDELAYB,
  1183.                                             XADAPTCOEFFSA, XADAPTCOEFFSB);
  1184.         decoded1++;
  1185.  
  1186.         /* Combined */
  1187.         p->buf++;
  1188.  
  1189.         /* Have we filled the history buffer? */
  1190.         if (p->buf == p->historybuffer + HISTORY_SIZE) {
  1191.             memmove(p->historybuffer, p->buf,
  1192.                     PREDICTOR_SIZE * sizeof(*p->historybuffer));
  1193.             p->buf = p->historybuffer;
  1194.         }
  1195.     }
  1196. }
  1197.  
  1198. static void predictor_decode_mono_3950(APEContext *ctx, int count)
  1199. {
  1200.     APEPredictor *p = &ctx->predictor;
  1201.     int32_t *decoded0 = ctx->decoded[0];
  1202.     int32_t predictionA, currentA, A, sign;
  1203.  
  1204.     ape_apply_filters(ctx, ctx->decoded[0], NULL, count);
  1205.  
  1206.     currentA = p->lastA[0];
  1207.  
  1208.     while (count--) {
  1209.         A = *decoded0;
  1210.  
  1211.         p->buf[YDELAYA] = currentA;
  1212.         p->buf[YDELAYA - 1] = p->buf[YDELAYA] - p->buf[YDELAYA - 1];
  1213.  
  1214.         predictionA = p->buf[YDELAYA    ] * p->coeffsA[0][0] +
  1215.                       p->buf[YDELAYA - 1] * p->coeffsA[0][1] +
  1216.                       p->buf[YDELAYA - 2] * p->coeffsA[0][2] +
  1217.                       p->buf[YDELAYA - 3] * p->coeffsA[0][3];
  1218.  
  1219.         currentA = A + (predictionA >> 10);
  1220.  
  1221.         p->buf[YADAPTCOEFFSA]     = APESIGN(p->buf[YDELAYA    ]);
  1222.         p->buf[YADAPTCOEFFSA - 1] = APESIGN(p->buf[YDELAYA - 1]);
  1223.  
  1224.         sign = APESIGN(A);
  1225.         p->coeffsA[0][0] += p->buf[YADAPTCOEFFSA    ] * sign;
  1226.         p->coeffsA[0][1] += p->buf[YADAPTCOEFFSA - 1] * sign;
  1227.         p->coeffsA[0][2] += p->buf[YADAPTCOEFFSA - 2] * sign;
  1228.         p->coeffsA[0][3] += p->buf[YADAPTCOEFFSA - 3] * sign;
  1229.  
  1230.         p->buf++;
  1231.  
  1232.         /* Have we filled the history buffer? */
  1233.         if (p->buf == p->historybuffer + HISTORY_SIZE) {
  1234.             memmove(p->historybuffer, p->buf,
  1235.                     PREDICTOR_SIZE * sizeof(*p->historybuffer));
  1236.             p->buf = p->historybuffer;
  1237.         }
  1238.  
  1239.         p->filterA[0] = currentA + ((p->filterA[0] * 31) >> 5);
  1240.         *(decoded0++) = p->filterA[0];
  1241.     }
  1242.  
  1243.     p->lastA[0] = currentA;
  1244. }
  1245.  
  1246. static void do_init_filter(APEFilter *f, int16_t *buf, int order)
  1247. {
  1248.     f->coeffs = buf;
  1249.     f->historybuffer = buf + order;
  1250.     f->delay       = f->historybuffer + order * 2;
  1251.     f->adaptcoeffs = f->historybuffer + order;
  1252.  
  1253.     memset(f->historybuffer, 0, (order * 2) * sizeof(*f->historybuffer));
  1254.     memset(f->coeffs, 0, order * sizeof(*f->coeffs));
  1255.     f->avg = 0;
  1256. }
  1257.  
  1258. static void init_filter(APEContext *ctx, APEFilter *f, int16_t *buf, int order)
  1259. {
  1260.     do_init_filter(&f[0], buf, order);
  1261.     do_init_filter(&f[1], buf + order * 3 + HISTORY_SIZE, order);
  1262. }
  1263.  
  1264. static void do_apply_filter(APEContext *ctx, int version, APEFilter *f,
  1265.                             int32_t *data, int count, int order, int fracbits)
  1266. {
  1267.     int res;
  1268.     int absres;
  1269.  
  1270.     while (count--) {
  1271.         /* round fixedpoint scalar product */
  1272.         res = ctx->dsp.scalarproduct_and_madd_int16(f->coeffs, f->delay - order,
  1273.                                                     f->adaptcoeffs - order,
  1274.                                                     order, APESIGN(*data));
  1275.         res = (res + (1 << (fracbits - 1))) >> fracbits;
  1276.         res += *data;
  1277.         *data++ = res;
  1278.  
  1279.         /* Update the output history */
  1280.         *f->delay++ = av_clip_int16(res);
  1281.  
  1282.         if (version < 3980) {
  1283.             /* Version ??? to < 3.98 files (untested) */
  1284.             f->adaptcoeffs[0]  = (res == 0) ? 0 : ((res >> 28) & 8) - 4;
  1285.             f->adaptcoeffs[-4] >>= 1;
  1286.             f->adaptcoeffs[-8] >>= 1;
  1287.         } else {
  1288.             /* Version 3.98 and later files */
  1289.  
  1290.             /* Update the adaption coefficients */
  1291.             absres = FFABS(res);
  1292.             if (absres)
  1293.                 *f->adaptcoeffs = ((res & (-1<<31)) ^ (-1<<30)) >>
  1294.                                   (25 + (absres <= f->avg*3) + (absres <= f->avg*4/3));
  1295.             else
  1296.                 *f->adaptcoeffs = 0;
  1297.  
  1298.             f->avg += (absres - f->avg) / 16;
  1299.  
  1300.             f->adaptcoeffs[-1] >>= 1;
  1301.             f->adaptcoeffs[-2] >>= 1;
  1302.             f->adaptcoeffs[-8] >>= 1;
  1303.         }
  1304.  
  1305.         f->adaptcoeffs++;
  1306.  
  1307.         /* Have we filled the history buffer? */
  1308.         if (f->delay == f->historybuffer + HISTORY_SIZE + (order * 2)) {
  1309.             memmove(f->historybuffer, f->delay - (order * 2),
  1310.                     (order * 2) * sizeof(*f->historybuffer));
  1311.             f->delay = f->historybuffer + order * 2;
  1312.             f->adaptcoeffs = f->historybuffer + order;
  1313.         }
  1314.     }
  1315. }
  1316.  
  1317. static void apply_filter(APEContext *ctx, APEFilter *f,
  1318.                          int32_t *data0, int32_t *data1,
  1319.                          int count, int order, int fracbits)
  1320. {
  1321.     do_apply_filter(ctx, ctx->fileversion, &f[0], data0, count, order, fracbits);
  1322.     if (data1)
  1323.         do_apply_filter(ctx, ctx->fileversion, &f[1], data1, count, order, fracbits);
  1324. }
  1325.  
  1326. static void ape_apply_filters(APEContext *ctx, int32_t *decoded0,
  1327.                               int32_t *decoded1, int count)
  1328. {
  1329.     int i;
  1330.  
  1331.     for (i = 0; i < APE_FILTER_LEVELS; i++) {
  1332.         if (!ape_filter_orders[ctx->fset][i])
  1333.             break;
  1334.         apply_filter(ctx, ctx->filters[i], decoded0, decoded1, count,
  1335.                      ape_filter_orders[ctx->fset][i],
  1336.                      ape_filter_fracbits[ctx->fset][i]);
  1337.     }
  1338. }
  1339.  
  1340. static int init_frame_decoder(APEContext *ctx)
  1341. {
  1342.     int i, ret;
  1343.     if ((ret = init_entropy_decoder(ctx)) < 0)
  1344.         return ret;
  1345.     init_predictor_decoder(ctx);
  1346.  
  1347.     for (i = 0; i < APE_FILTER_LEVELS; i++) {
  1348.         if (!ape_filter_orders[ctx->fset][i])
  1349.             break;
  1350.         init_filter(ctx, ctx->filters[i], ctx->filterbuf[i],
  1351.                     ape_filter_orders[ctx->fset][i]);
  1352.     }
  1353.     return 0;
  1354. }
  1355.  
  1356. static void ape_unpack_mono(APEContext *ctx, int count)
  1357. {
  1358.     if (ctx->frameflags & APE_FRAMECODE_STEREO_SILENCE) {
  1359.         /* We are pure silence, so we're done. */
  1360.         av_log(ctx->avctx, AV_LOG_DEBUG, "pure silence mono\n");
  1361.         return;
  1362.     }
  1363.  
  1364.     ctx->entropy_decode_mono(ctx, count);
  1365.  
  1366.     /* Now apply the predictor decoding */
  1367.     ctx->predictor_decode_mono(ctx, count);
  1368.  
  1369.     /* Pseudo-stereo - just copy left channel to right channel */
  1370.     if (ctx->channels == 2) {
  1371.         memcpy(ctx->decoded[1], ctx->decoded[0], count * sizeof(*ctx->decoded[1]));
  1372.     }
  1373. }
  1374.  
  1375. static void ape_unpack_stereo(APEContext *ctx, int count)
  1376. {
  1377.     int32_t left, right;
  1378.     int32_t *decoded0 = ctx->decoded[0];
  1379.     int32_t *decoded1 = ctx->decoded[1];
  1380.  
  1381.     if (ctx->frameflags & APE_FRAMECODE_STEREO_SILENCE) {
  1382.         /* We are pure silence, so we're done. */
  1383.         av_log(ctx->avctx, AV_LOG_DEBUG, "pure silence stereo\n");
  1384.         return;
  1385.     }
  1386.  
  1387.     ctx->entropy_decode_stereo(ctx, count);
  1388.  
  1389.     /* Now apply the predictor decoding */
  1390.     ctx->predictor_decode_stereo(ctx, count);
  1391.  
  1392.     /* Decorrelate and scale to output depth */
  1393.     while (count--) {
  1394.         left = *decoded1 - (*decoded0 / 2);
  1395.         right = left + *decoded0;
  1396.  
  1397.         *(decoded0++) = left;
  1398.         *(decoded1++) = right;
  1399.     }
  1400. }
  1401.  
  1402. static int ape_decode_frame(AVCodecContext *avctx, void *data,
  1403.                             int *got_frame_ptr, AVPacket *avpkt)
  1404. {
  1405.     AVFrame *frame     = data;
  1406.     const uint8_t *buf = avpkt->data;
  1407.     APEContext *s = avctx->priv_data;
  1408.     uint8_t *sample8;
  1409.     int16_t *sample16;
  1410.     int32_t *sample24;
  1411.     int i, ch, ret;
  1412.     int blockstodecode;
  1413.  
  1414.     /* this should never be negative, but bad things will happen if it is, so
  1415.        check it just to make sure. */
  1416.     av_assert0(s->samples >= 0);
  1417.  
  1418.     if(!s->samples){
  1419.         uint32_t nblocks, offset;
  1420.         int buf_size;
  1421.  
  1422.         if (!avpkt->size) {
  1423.             *got_frame_ptr = 0;
  1424.             return 0;
  1425.         }
  1426.         if (avpkt->size < 8) {
  1427.             av_log(avctx, AV_LOG_ERROR, "Packet is too small\n");
  1428.             return AVERROR_INVALIDDATA;
  1429.         }
  1430.         buf_size = avpkt->size & ~3;
  1431.         if (buf_size != avpkt->size) {
  1432.             av_log(avctx, AV_LOG_WARNING, "packet size is not a multiple of 4. "
  1433.                    "extra bytes at the end will be skipped.\n");
  1434.         }
  1435.         if (s->fileversion < 3950) // previous versions overread two bytes
  1436.             buf_size += 2;
  1437.         av_fast_malloc(&s->data, &s->data_size, buf_size);
  1438.         if (!s->data)
  1439.             return AVERROR(ENOMEM);
  1440.         s->dsp.bswap_buf((uint32_t*)s->data, (const uint32_t*)buf, buf_size >> 2);
  1441.         memset(s->data + (buf_size & ~3), 0, buf_size & 3);
  1442.         s->ptr = s->data;
  1443.         s->data_end = s->data + buf_size;
  1444.  
  1445.         nblocks = bytestream_get_be32(&s->ptr);
  1446.         offset  = bytestream_get_be32(&s->ptr);
  1447.         if (s->fileversion >= 3900) {
  1448.             if (offset > 3) {
  1449.                 av_log(avctx, AV_LOG_ERROR, "Incorrect offset passed\n");
  1450.                 s->data = NULL;
  1451.                 return AVERROR_INVALIDDATA;
  1452.             }
  1453.             if (s->data_end - s->ptr < offset) {
  1454.                 av_log(avctx, AV_LOG_ERROR, "Packet is too small\n");
  1455.                 return AVERROR_INVALIDDATA;
  1456.             }
  1457.             s->ptr += offset;
  1458.         } else {
  1459.             init_get_bits(&s->gb, s->ptr, (s->data_end - s->ptr) * 8);
  1460.             if (s->fileversion > 3800)
  1461.                 skip_bits_long(&s->gb, offset * 8);
  1462.             else
  1463.                 skip_bits_long(&s->gb, offset);
  1464.         }
  1465.  
  1466.         if (!nblocks || nblocks > INT_MAX) {
  1467.             av_log(avctx, AV_LOG_ERROR, "Invalid sample count: %u.\n", nblocks);
  1468.             return AVERROR_INVALIDDATA;
  1469.         }
  1470.         s->samples = nblocks;
  1471.  
  1472.         /* Initialize the frame decoder */
  1473.         if (init_frame_decoder(s) < 0) {
  1474.             av_log(avctx, AV_LOG_ERROR, "Error reading frame header\n");
  1475.             return AVERROR_INVALIDDATA;
  1476.         }
  1477.     }
  1478.  
  1479.     if (!s->data) {
  1480.         *got_frame_ptr = 0;
  1481.         return avpkt->size;
  1482.     }
  1483.  
  1484.     blockstodecode = FFMIN(s->blocks_per_loop, s->samples);
  1485.     // for old files coefficients were not interleaved,
  1486.     // so we need to decode all of them at once
  1487.     if (s->fileversion < 3930)
  1488.         blockstodecode = s->samples;
  1489.  
  1490.     /* reallocate decoded sample buffer if needed */
  1491.     av_fast_malloc(&s->decoded_buffer, &s->decoded_size,
  1492.                    2 * FFALIGN(blockstodecode, 8) * sizeof(*s->decoded_buffer));
  1493.     if (!s->decoded_buffer)
  1494.         return AVERROR(ENOMEM);
  1495.     memset(s->decoded_buffer, 0, s->decoded_size);
  1496.     s->decoded[0] = s->decoded_buffer;
  1497.     s->decoded[1] = s->decoded_buffer + FFALIGN(blockstodecode, 8);
  1498.  
  1499.     /* get output buffer */
  1500.     frame->nb_samples = blockstodecode;
  1501.     if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
  1502.         return ret;
  1503.  
  1504.     s->error=0;
  1505.  
  1506.     if ((s->channels == 1) || (s->frameflags & APE_FRAMECODE_PSEUDO_STEREO))
  1507.         ape_unpack_mono(s, blockstodecode);
  1508.     else
  1509.         ape_unpack_stereo(s, blockstodecode);
  1510.     emms_c();
  1511.  
  1512.     if (s->error) {
  1513.         s->samples=0;
  1514.         av_log(avctx, AV_LOG_ERROR, "Error decoding frame\n");
  1515.         return AVERROR_INVALIDDATA;
  1516.     }
  1517.  
  1518.     switch (s->bps) {
  1519.     case 8:
  1520.         for (ch = 0; ch < s->channels; ch++) {
  1521.             sample8 = (uint8_t *)frame->data[ch];
  1522.             for (i = 0; i < blockstodecode; i++)
  1523.                 *sample8++ = (s->decoded[ch][i] + 0x80) & 0xff;
  1524.         }
  1525.         break;
  1526.     case 16:
  1527.         for (ch = 0; ch < s->channels; ch++) {
  1528.             sample16 = (int16_t *)frame->data[ch];
  1529.             for (i = 0; i < blockstodecode; i++)
  1530.                 *sample16++ = s->decoded[ch][i];
  1531.         }
  1532.         break;
  1533.     case 24:
  1534.         for (ch = 0; ch < s->channels; ch++) {
  1535.             sample24 = (int32_t *)frame->data[ch];
  1536.             for (i = 0; i < blockstodecode; i++)
  1537.                 *sample24++ = s->decoded[ch][i] << 8;
  1538.         }
  1539.         break;
  1540.     }
  1541.  
  1542.     s->samples -= blockstodecode;
  1543.  
  1544.     *got_frame_ptr = 1;
  1545.  
  1546.     return !s->samples ? avpkt->size : 0;
  1547. }
  1548.  
  1549. static void ape_flush(AVCodecContext *avctx)
  1550. {
  1551.     APEContext *s = avctx->priv_data;
  1552.     s->samples= 0;
  1553. }
  1554.  
  1555. #define OFFSET(x) offsetof(APEContext, x)
  1556. #define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM)
  1557. static const AVOption options[] = {
  1558.     { "max_samples", "maximum number of samples decoded per call",             OFFSET(blocks_per_loop), AV_OPT_TYPE_INT,   { .i64 = 4608 },    1,       INT_MAX, PAR, "max_samples" },
  1559.     { "all",         "no maximum. decode all samples for each packet at once", 0,                       AV_OPT_TYPE_CONST, { .i64 = INT_MAX }, INT_MIN, INT_MAX, PAR, "max_samples" },
  1560.     { NULL},
  1561. };
  1562.  
  1563. static const AVClass ape_decoder_class = {
  1564.     .class_name = "APE decoder",
  1565.     .item_name  = av_default_item_name,
  1566.     .option     = options,
  1567.     .version    = LIBAVUTIL_VERSION_INT,
  1568. };
  1569.  
  1570. AVCodec ff_ape_decoder = {
  1571.     .name           = "ape",
  1572.     .long_name      = NULL_IF_CONFIG_SMALL("Monkey's Audio"),
  1573.     .type           = AVMEDIA_TYPE_AUDIO,
  1574.     .id             = AV_CODEC_ID_APE,
  1575.     .priv_data_size = sizeof(APEContext),
  1576.     .init           = ape_decode_init,
  1577.     .close          = ape_decode_close,
  1578.     .decode         = ape_decode_frame,
  1579.     .capabilities   = CODEC_CAP_SUBFRAMES | CODEC_CAP_DELAY | CODEC_CAP_DR1,
  1580.     .flush          = ape_flush,
  1581.     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_U8P,
  1582.                                                       AV_SAMPLE_FMT_S16P,
  1583.                                                       AV_SAMPLE_FMT_S32P,
  1584.                                                       AV_SAMPLE_FMT_NONE },
  1585.     .priv_class     = &ape_decoder_class,
  1586. };
  1587.