Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * AAC encoder
  3.  * Copyright (C) 2008 Konstantin Shishkov
  4.  *
  5.  * This file is part of FFmpeg.
  6.  *
  7.  * FFmpeg is free software; you can redistribute it and/or
  8.  * modify it under the terms of the GNU Lesser General Public
  9.  * License as published by the Free Software Foundation; either
  10.  * version 2.1 of the License, or (at your option) any later version.
  11.  *
  12.  * FFmpeg is distributed in the hope that it will be useful,
  13.  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14.  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15.  * Lesser General Public License for more details.
  16.  *
  17.  * You should have received a copy of the GNU Lesser General Public
  18.  * License along with FFmpeg; if not, write to the Free Software
  19.  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20.  */
  21.  
  22. /**
  23.  * @file
  24.  * AAC encoder
  25.  */
  26.  
  27. /***********************************
  28.  *              TODOs:
  29.  * add sane pulse detection
  30.  ***********************************/
  31.  
  32. #include "libavutil/float_dsp.h"
  33. #include "libavutil/opt.h"
  34. #include "avcodec.h"
  35. #include "put_bits.h"
  36. #include "internal.h"
  37. #include "mpeg4audio.h"
  38. #include "kbdwin.h"
  39. #include "sinewin.h"
  40.  
  41. #include "aac.h"
  42. #include "aactab.h"
  43. #include "aacenc.h"
  44. #include "aacenctab.h"
  45. #include "aacenc_utils.h"
  46.  
  47. #include "psymodel.h"
  48.  
  49. /**
  50.  * Make AAC audio config object.
  51.  * @see 1.6.2.1 "Syntax - AudioSpecificConfig"
  52.  */
  53. static void put_audio_specific_config(AVCodecContext *avctx)
  54. {
  55.     PutBitContext pb;
  56.     AACEncContext *s = avctx->priv_data;
  57.  
  58.     init_put_bits(&pb, avctx->extradata, avctx->extradata_size);
  59.     put_bits(&pb, 5, s->profile+1); //profile
  60.     put_bits(&pb, 4, s->samplerate_index); //sample rate index
  61.     put_bits(&pb, 4, s->channels);
  62.     //GASpecificConfig
  63.     put_bits(&pb, 1, 0); //frame length - 1024 samples
  64.     put_bits(&pb, 1, 0); //does not depend on core coder
  65.     put_bits(&pb, 1, 0); //is not extension
  66.  
  67.     //Explicitly Mark SBR absent
  68.     put_bits(&pb, 11, 0x2b7); //sync extension
  69.     put_bits(&pb, 5,  AOT_SBR);
  70.     put_bits(&pb, 1,  0);
  71.     flush_put_bits(&pb);
  72. }
  73.  
  74. #define WINDOW_FUNC(type) \
  75. static void apply_ ##type ##_window(AVFloatDSPContext *fdsp, \
  76.                                     SingleChannelElement *sce, \
  77.                                     const float *audio)
  78.  
  79. WINDOW_FUNC(only_long)
  80. {
  81.     const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
  82.     const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
  83.     float *out = sce->ret_buf;
  84.  
  85.     fdsp->vector_fmul        (out,        audio,        lwindow, 1024);
  86.     fdsp->vector_fmul_reverse(out + 1024, audio + 1024, pwindow, 1024);
  87. }
  88.  
  89. WINDOW_FUNC(long_start)
  90. {
  91.     const float *lwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
  92.     const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
  93.     float *out = sce->ret_buf;
  94.  
  95.     fdsp->vector_fmul(out, audio, lwindow, 1024);
  96.     memcpy(out + 1024, audio + 1024, sizeof(out[0]) * 448);
  97.     fdsp->vector_fmul_reverse(out + 1024 + 448, audio + 1024 + 448, swindow, 128);
  98.     memset(out + 1024 + 576, 0, sizeof(out[0]) * 448);
  99. }
  100.  
  101. WINDOW_FUNC(long_stop)
  102. {
  103.     const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
  104.     const float *swindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
  105.     float *out = sce->ret_buf;
  106.  
  107.     memset(out, 0, sizeof(out[0]) * 448);
  108.     fdsp->vector_fmul(out + 448, audio + 448, swindow, 128);
  109.     memcpy(out + 576, audio + 576, sizeof(out[0]) * 448);
  110.     fdsp->vector_fmul_reverse(out + 1024, audio + 1024, lwindow, 1024);
  111. }
  112.  
  113. WINDOW_FUNC(eight_short)
  114. {
  115.     const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
  116.     const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
  117.     const float *in = audio + 448;
  118.     float *out = sce->ret_buf;
  119.     int w;
  120.  
  121.     for (w = 0; w < 8; w++) {
  122.         fdsp->vector_fmul        (out, in, w ? pwindow : swindow, 128);
  123.         out += 128;
  124.         in  += 128;
  125.         fdsp->vector_fmul_reverse(out, in, swindow, 128);
  126.         out += 128;
  127.     }
  128. }
  129.  
  130. static void (*const apply_window[4])(AVFloatDSPContext *fdsp,
  131.                                      SingleChannelElement *sce,
  132.                                      const float *audio) = {
  133.     [ONLY_LONG_SEQUENCE]   = apply_only_long_window,
  134.     [LONG_START_SEQUENCE]  = apply_long_start_window,
  135.     [EIGHT_SHORT_SEQUENCE] = apply_eight_short_window,
  136.     [LONG_STOP_SEQUENCE]   = apply_long_stop_window
  137. };
  138.  
  139. static void apply_window_and_mdct(AACEncContext *s, SingleChannelElement *sce,
  140.                                   float *audio)
  141. {
  142.     int i;
  143.     float *output = sce->ret_buf;
  144.  
  145.     apply_window[sce->ics.window_sequence[0]](s->fdsp, sce, audio);
  146.  
  147.     if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE)
  148.         s->mdct1024.mdct_calc(&s->mdct1024, sce->coeffs, output);
  149.     else
  150.         for (i = 0; i < 1024; i += 128)
  151.             s->mdct128.mdct_calc(&s->mdct128, &sce->coeffs[i], output + i*2);
  152.     memcpy(audio, audio + 1024, sizeof(audio[0]) * 1024);
  153.     memcpy(sce->pcoeffs, sce->coeffs, sizeof(sce->pcoeffs));
  154. }
  155.  
  156. /**
  157.  * Encode ics_info element.
  158.  * @see Table 4.6 (syntax of ics_info)
  159.  */
  160. static void put_ics_info(AACEncContext *s, IndividualChannelStream *info)
  161. {
  162.     int w;
  163.  
  164.     put_bits(&s->pb, 1, 0);                // ics_reserved bit
  165.     put_bits(&s->pb, 2, info->window_sequence[0]);
  166.     put_bits(&s->pb, 1, info->use_kb_window[0]);
  167.     if (info->window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
  168.         put_bits(&s->pb, 6, info->max_sfb);
  169.         put_bits(&s->pb, 1, !!info->predictor_present);
  170.     } else {
  171.         put_bits(&s->pb, 4, info->max_sfb);
  172.         for (w = 1; w < 8; w++)
  173.             put_bits(&s->pb, 1, !info->group_len[w]);
  174.     }
  175. }
  176.  
  177. /**
  178.  * Encode MS data.
  179.  * @see 4.6.8.1 "Joint Coding - M/S Stereo"
  180.  */
  181. static void encode_ms_info(PutBitContext *pb, ChannelElement *cpe)
  182. {
  183.     int i, w;
  184.  
  185.     put_bits(pb, 2, cpe->ms_mode);
  186.     if (cpe->ms_mode == 1)
  187.         for (w = 0; w < cpe->ch[0].ics.num_windows; w += cpe->ch[0].ics.group_len[w])
  188.             for (i = 0; i < cpe->ch[0].ics.max_sfb; i++)
  189.                 put_bits(pb, 1, cpe->ms_mask[w*16 + i]);
  190. }
  191.  
  192. /**
  193.  * Produce integer coefficients from scalefactors provided by the model.
  194.  */
  195. static void adjust_frame_information(ChannelElement *cpe, int chans)
  196. {
  197.     int i, w, w2, g, ch;
  198.     int maxsfb, cmaxsfb;
  199.  
  200.     for (ch = 0; ch < chans; ch++) {
  201.         IndividualChannelStream *ics = &cpe->ch[ch].ics;
  202.         maxsfb = 0;
  203.         cpe->ch[ch].pulse.num_pulse = 0;
  204.         for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
  205.             for (w2 =  0; w2 < ics->group_len[w]; w2++) {
  206.                 for (cmaxsfb = ics->num_swb; cmaxsfb > 0 && cpe->ch[ch].zeroes[w*16+cmaxsfb-1]; cmaxsfb--)
  207.                     ;
  208.                 maxsfb = FFMAX(maxsfb, cmaxsfb);
  209.             }
  210.         }
  211.         ics->max_sfb = maxsfb;
  212.  
  213.         //adjust zero bands for window groups
  214.         for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
  215.             for (g = 0; g < ics->max_sfb; g++) {
  216.                 i = 1;
  217.                 for (w2 = w; w2 < w + ics->group_len[w]; w2++) {
  218.                     if (!cpe->ch[ch].zeroes[w2*16 + g]) {
  219.                         i = 0;
  220.                         break;
  221.                     }
  222.                 }
  223.                 cpe->ch[ch].zeroes[w*16 + g] = i;
  224.             }
  225.         }
  226.     }
  227.  
  228.     if (chans > 1 && cpe->common_window) {
  229.         IndividualChannelStream *ics0 = &cpe->ch[0].ics;
  230.         IndividualChannelStream *ics1 = &cpe->ch[1].ics;
  231.         int msc = 0;
  232.         ics0->max_sfb = FFMAX(ics0->max_sfb, ics1->max_sfb);
  233.         ics1->max_sfb = ics0->max_sfb;
  234.         for (w = 0; w < ics0->num_windows*16; w += 16)
  235.             for (i = 0; i < ics0->max_sfb; i++)
  236.                 if (cpe->ms_mask[w+i])
  237.                     msc++;
  238.         if (msc == 0 || ics0->max_sfb == 0)
  239.             cpe->ms_mode = 0;
  240.         else
  241.             cpe->ms_mode = msc < ics0->max_sfb * ics0->num_windows ? 1 : 2;
  242.     }
  243. }
  244.  
  245. static void apply_intensity_stereo(ChannelElement *cpe)
  246. {
  247.     int w, w2, g, i;
  248.     IndividualChannelStream *ics = &cpe->ch[0].ics;
  249.     if (!cpe->common_window)
  250.         return;
  251.     for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
  252.         for (w2 =  0; w2 < ics->group_len[w]; w2++) {
  253.             int start = (w+w2) * 128;
  254.             for (g = 0; g < ics->num_swb; g++) {
  255.                 int p  = -1 + 2 * (cpe->ch[1].band_type[w*16+g] - 14);
  256.                 float scale = cpe->ch[0].is_ener[w*16+g];
  257.                 if (!cpe->is_mask[w*16 + g]) {
  258.                     start += ics->swb_sizes[g];
  259.                     continue;
  260.                 }
  261.                 for (i = 0; i < ics->swb_sizes[g]; i++) {
  262.                     float sum = (cpe->ch[0].coeffs[start+i] + p*cpe->ch[1].coeffs[start+i])*scale;
  263.                     cpe->ch[0].coeffs[start+i] = sum;
  264.                     cpe->ch[1].coeffs[start+i] = 0.0f;
  265.                 }
  266.                 start += ics->swb_sizes[g];
  267.             }
  268.         }
  269.     }
  270. }
  271.  
  272. static void apply_mid_side_stereo(ChannelElement *cpe)
  273. {
  274.     int w, w2, g, i;
  275.     IndividualChannelStream *ics = &cpe->ch[0].ics;
  276.     if (!cpe->common_window)
  277.         return;
  278.     for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
  279.         for (w2 =  0; w2 < ics->group_len[w]; w2++) {
  280.             int start = (w+w2) * 128;
  281.             for (g = 0; g < ics->num_swb; g++) {
  282.                 if (!cpe->ms_mask[w*16 + g]) {
  283.                     start += ics->swb_sizes[g];
  284.                     continue;
  285.                 }
  286.                 for (i = 0; i < ics->swb_sizes[g]; i++) {
  287.                     float L = (cpe->ch[0].coeffs[start+i] + cpe->ch[1].coeffs[start+i]) * 0.5f;
  288.                     float R = L - cpe->ch[1].coeffs[start+i];
  289.                     cpe->ch[0].coeffs[start+i] = L;
  290.                     cpe->ch[1].coeffs[start+i] = R;
  291.                 }
  292.                 start += ics->swb_sizes[g];
  293.             }
  294.         }
  295.     }
  296. }
  297.  
  298. /**
  299.  * Encode scalefactor band coding type.
  300.  */
  301. static void encode_band_info(AACEncContext *s, SingleChannelElement *sce)
  302. {
  303.     int w;
  304.  
  305.     if (s->coder->set_special_band_scalefactors)
  306.         s->coder->set_special_band_scalefactors(s, sce);
  307.  
  308.     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
  309.         s->coder->encode_window_bands_info(s, sce, w, sce->ics.group_len[w], s->lambda);
  310. }
  311.  
  312. /**
  313.  * Encode scalefactors.
  314.  */
  315. static void encode_scale_factors(AVCodecContext *avctx, AACEncContext *s,
  316.                                  SingleChannelElement *sce)
  317. {
  318.     int diff, off_sf = sce->sf_idx[0], off_pns = sce->sf_idx[0] - NOISE_OFFSET;
  319.     int off_is = 0, noise_flag = 1;
  320.     int i, w;
  321.  
  322.     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  323.         for (i = 0; i < sce->ics.max_sfb; i++) {
  324.             if (!sce->zeroes[w*16 + i]) {
  325.                 if (sce->band_type[w*16 + i] == NOISE_BT) {
  326.                     diff = sce->sf_idx[w*16 + i] - off_pns;
  327.                     off_pns = sce->sf_idx[w*16 + i];
  328.                     if (noise_flag-- > 0) {
  329.                         put_bits(&s->pb, NOISE_PRE_BITS, diff + NOISE_PRE);
  330.                         continue;
  331.                     }
  332.                 } else if (sce->band_type[w*16 + i] == INTENSITY_BT  ||
  333.                            sce->band_type[w*16 + i] == INTENSITY_BT2) {
  334.                     diff = sce->sf_idx[w*16 + i] - off_is;
  335.                     off_is = sce->sf_idx[w*16 + i];
  336.                 } else {
  337.                     diff = sce->sf_idx[w*16 + i] - off_sf;
  338.                     off_sf = sce->sf_idx[w*16 + i];
  339.                 }
  340.                 diff += SCALE_DIFF_ZERO;
  341.                 av_assert0(diff >= 0 && diff <= 120);
  342.                 put_bits(&s->pb, ff_aac_scalefactor_bits[diff], ff_aac_scalefactor_code[diff]);
  343.             }
  344.         }
  345.     }
  346. }
  347.  
  348. /**
  349.  * Encode pulse data.
  350.  */
  351. static void encode_pulses(AACEncContext *s, Pulse *pulse)
  352. {
  353.     int i;
  354.  
  355.     put_bits(&s->pb, 1, !!pulse->num_pulse);
  356.     if (!pulse->num_pulse)
  357.         return;
  358.  
  359.     put_bits(&s->pb, 2, pulse->num_pulse - 1);
  360.     put_bits(&s->pb, 6, pulse->start);
  361.     for (i = 0; i < pulse->num_pulse; i++) {
  362.         put_bits(&s->pb, 5, pulse->pos[i]);
  363.         put_bits(&s->pb, 4, pulse->amp[i]);
  364.     }
  365. }
  366.  
  367. /**
  368.  * Encode spectral coefficients processed by psychoacoustic model.
  369.  */
  370. static void encode_spectral_coeffs(AACEncContext *s, SingleChannelElement *sce)
  371. {
  372.     int start, i, w, w2;
  373.  
  374.     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  375.         start = 0;
  376.         for (i = 0; i < sce->ics.max_sfb; i++) {
  377.             if (sce->zeroes[w*16 + i]) {
  378.                 start += sce->ics.swb_sizes[i];
  379.                 continue;
  380.             }
  381.             for (w2 = w; w2 < w + sce->ics.group_len[w]; w2++) {
  382.                 s->coder->quantize_and_encode_band(s, &s->pb,
  383.                                                    &sce->coeffs[start + w2*128],
  384.                                                    NULL, sce->ics.swb_sizes[i],
  385.                                                    sce->sf_idx[w*16 + i],
  386.                                                    sce->band_type[w*16 + i],
  387.                                                    s->lambda,
  388.                                                    sce->ics.window_clipping[w]);
  389.             }
  390.             start += sce->ics.swb_sizes[i];
  391.         }
  392.     }
  393. }
  394.  
  395. /**
  396.  * Downscale spectral coefficients for near-clipping windows to avoid artifacts
  397.  */
  398. static void avoid_clipping(AACEncContext *s, SingleChannelElement *sce)
  399. {
  400.     int start, i, j, w;
  401.  
  402.     if (sce->ics.clip_avoidance_factor < 1.0f) {
  403.         for (w = 0; w < sce->ics.num_windows; w++) {
  404.             start = 0;
  405.             for (i = 0; i < sce->ics.max_sfb; i++) {
  406.                 float *swb_coeffs = &sce->coeffs[start + w*128];
  407.                 for (j = 0; j < sce->ics.swb_sizes[i]; j++)
  408.                     swb_coeffs[j] *= sce->ics.clip_avoidance_factor;
  409.                 start += sce->ics.swb_sizes[i];
  410.             }
  411.         }
  412.     }
  413. }
  414.  
  415. /**
  416.  * Encode one channel of audio data.
  417.  */
  418. static int encode_individual_channel(AVCodecContext *avctx, AACEncContext *s,
  419.                                      SingleChannelElement *sce,
  420.                                      int common_window)
  421. {
  422.     put_bits(&s->pb, 8, sce->sf_idx[0]);
  423.     if (!common_window) {
  424.         put_ics_info(s, &sce->ics);
  425.         if (s->coder->encode_main_pred)
  426.             s->coder->encode_main_pred(s, sce);
  427.     }
  428.     encode_band_info(s, sce);
  429.     encode_scale_factors(avctx, s, sce);
  430.     encode_pulses(s, &sce->pulse);
  431.     put_bits(&s->pb, 1, !!sce->tns.present);
  432.     if (s->coder->encode_tns_info)
  433.         s->coder->encode_tns_info(s, sce);
  434.     put_bits(&s->pb, 1, 0); //ssr
  435.     encode_spectral_coeffs(s, sce);
  436.     return 0;
  437. }
  438.  
  439. /**
  440.  * Write some auxiliary information about the created AAC file.
  441.  */
  442. static void put_bitstream_info(AACEncContext *s, const char *name)
  443. {
  444.     int i, namelen, padbits;
  445.  
  446.     namelen = strlen(name) + 2;
  447.     put_bits(&s->pb, 3, TYPE_FIL);
  448.     put_bits(&s->pb, 4, FFMIN(namelen, 15));
  449.     if (namelen >= 15)
  450.         put_bits(&s->pb, 8, namelen - 14);
  451.     put_bits(&s->pb, 4, 0); //extension type - filler
  452.     padbits = -put_bits_count(&s->pb) & 7;
  453.     avpriv_align_put_bits(&s->pb);
  454.     for (i = 0; i < namelen - 2; i++)
  455.         put_bits(&s->pb, 8, name[i]);
  456.     put_bits(&s->pb, 12 - padbits, 0);
  457. }
  458.  
  459. /*
  460.  * Copy input samples.
  461.  * Channels are reordered from libavcodec's default order to AAC order.
  462.  */
  463. static void copy_input_samples(AACEncContext *s, const AVFrame *frame)
  464. {
  465.     int ch;
  466.     int end = 2048 + (frame ? frame->nb_samples : 0);
  467.     const uint8_t *channel_map = aac_chan_maps[s->channels - 1];
  468.  
  469.     /* copy and remap input samples */
  470.     for (ch = 0; ch < s->channels; ch++) {
  471.         /* copy last 1024 samples of previous frame to the start of the current frame */
  472.         memcpy(&s->planar_samples[ch][1024], &s->planar_samples[ch][2048], 1024 * sizeof(s->planar_samples[0][0]));
  473.  
  474.         /* copy new samples and zero any remaining samples */
  475.         if (frame) {
  476.             memcpy(&s->planar_samples[ch][2048],
  477.                    frame->extended_data[channel_map[ch]],
  478.                    frame->nb_samples * sizeof(s->planar_samples[0][0]));
  479.         }
  480.         memset(&s->planar_samples[ch][end], 0,
  481.                (3072 - end) * sizeof(s->planar_samples[0][0]));
  482.     }
  483. }
  484.  
  485. static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
  486.                             const AVFrame *frame, int *got_packet_ptr)
  487. {
  488.     AACEncContext *s = avctx->priv_data;
  489.     float **samples = s->planar_samples, *samples2, *la, *overlap;
  490.     ChannelElement *cpe;
  491.     SingleChannelElement *sce;
  492.     int i, ch, w, chans, tag, start_ch, ret;
  493.     int ms_mode = 0, is_mode = 0, tns_mode = 0, pred_mode = 0;
  494.     int chan_el_counter[4];
  495.     FFPsyWindowInfo windows[AAC_MAX_CHANNELS];
  496.  
  497.     if (s->last_frame == 2)
  498.         return 0;
  499.  
  500.     /* add current frame to queue */
  501.     if (frame) {
  502.         if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
  503.             return ret;
  504.     }
  505.  
  506.     copy_input_samples(s, frame);
  507.     if (s->psypp)
  508.         ff_psy_preprocess(s->psypp, s->planar_samples, s->channels);
  509.  
  510.     if (!avctx->frame_number)
  511.         return 0;
  512.  
  513.     start_ch = 0;
  514.     for (i = 0; i < s->chan_map[0]; i++) {
  515.         FFPsyWindowInfo* wi = windows + start_ch;
  516.         tag      = s->chan_map[i+1];
  517.         chans    = tag == TYPE_CPE ? 2 : 1;
  518.         cpe      = &s->cpe[i];
  519.         for (ch = 0; ch < chans; ch++) {
  520.             IndividualChannelStream *ics = &cpe->ch[ch].ics;
  521.             int cur_channel = start_ch + ch;
  522.             float clip_avoidance_factor;
  523.             overlap  = &samples[cur_channel][0];
  524.             samples2 = overlap + 1024;
  525.             la       = samples2 + (448+64);
  526.             if (!frame)
  527.                 la = NULL;
  528.             if (tag == TYPE_LFE) {
  529.                 wi[ch].window_type[0] = ONLY_LONG_SEQUENCE;
  530.                 wi[ch].window_shape   = 0;
  531.                 wi[ch].num_windows    = 1;
  532.                 wi[ch].grouping[0]    = 1;
  533.  
  534.                 /* Only the lowest 12 coefficients are used in a LFE channel.
  535.                  * The expression below results in only the bottom 8 coefficients
  536.                  * being used for 11.025kHz to 16kHz sample rates.
  537.                  */
  538.                 ics->num_swb = s->samplerate_index >= 8 ? 1 : 3;
  539.             } else {
  540.                 wi[ch] = s->psy.model->window(&s->psy, samples2, la, cur_channel,
  541.                                               ics->window_sequence[0]);
  542.             }
  543.             ics->window_sequence[1] = ics->window_sequence[0];
  544.             ics->window_sequence[0] = wi[ch].window_type[0];
  545.             ics->use_kb_window[1]   = ics->use_kb_window[0];
  546.             ics->use_kb_window[0]   = wi[ch].window_shape;
  547.             ics->num_windows        = wi[ch].num_windows;
  548.             ics->swb_sizes          = s->psy.bands    [ics->num_windows == 8];
  549.             ics->num_swb            = tag == TYPE_LFE ? ics->num_swb : s->psy.num_bands[ics->num_windows == 8];
  550.             ics->max_sfb            = FFMIN(ics->max_sfb, ics->num_swb);
  551.             ics->swb_offset         = wi[ch].window_type[0] == EIGHT_SHORT_SEQUENCE ?
  552.                                         ff_swb_offset_128 [s->samplerate_index]:
  553.                                         ff_swb_offset_1024[s->samplerate_index];
  554.             ics->tns_max_bands      = wi[ch].window_type[0] == EIGHT_SHORT_SEQUENCE ?
  555.                                         ff_tns_max_bands_128 [s->samplerate_index]:
  556.                                         ff_tns_max_bands_1024[s->samplerate_index];
  557.             clip_avoidance_factor = 0.0f;
  558.             for (w = 0; w < ics->num_windows; w++)
  559.                 ics->group_len[w] = wi[ch].grouping[w];
  560.             for (w = 0; w < ics->num_windows; w++) {
  561.                 if (wi[ch].clipping[w] > CLIP_AVOIDANCE_FACTOR) {
  562.                     ics->window_clipping[w] = 1;
  563.                     clip_avoidance_factor = FFMAX(clip_avoidance_factor, wi[ch].clipping[w]);
  564.                 } else {
  565.                     ics->window_clipping[w] = 0;
  566.                 }
  567.             }
  568.             if (clip_avoidance_factor > CLIP_AVOIDANCE_FACTOR) {
  569.                 ics->clip_avoidance_factor = CLIP_AVOIDANCE_FACTOR / clip_avoidance_factor;
  570.             } else {
  571.                 ics->clip_avoidance_factor = 1.0f;
  572.             }
  573.  
  574.             apply_window_and_mdct(s, &cpe->ch[ch], overlap);
  575.             if (isnan(cpe->ch->coeffs[0])) {
  576.                 av_log(avctx, AV_LOG_ERROR, "Input contains NaN\n");
  577.                 return AVERROR(EINVAL);
  578.             }
  579.             avoid_clipping(s, &cpe->ch[ch]);
  580.         }
  581.         start_ch += chans;
  582.     }
  583.     if ((ret = ff_alloc_packet2(avctx, avpkt, 8192 * s->channels, 0)) < 0)
  584.         return ret;
  585.     do {
  586.         int frame_bits;
  587.  
  588.         init_put_bits(&s->pb, avpkt->data, avpkt->size);
  589.  
  590.         if ((avctx->frame_number & 0xFF)==1 && !(avctx->flags & AV_CODEC_FLAG_BITEXACT))
  591.             put_bitstream_info(s, LIBAVCODEC_IDENT);
  592.         start_ch = 0;
  593.         memset(chan_el_counter, 0, sizeof(chan_el_counter));
  594.         for (i = 0; i < s->chan_map[0]; i++) {
  595.             FFPsyWindowInfo* wi = windows + start_ch;
  596.             const float *coeffs[2];
  597.             tag      = s->chan_map[i+1];
  598.             chans    = tag == TYPE_CPE ? 2 : 1;
  599.             cpe      = &s->cpe[i];
  600.             cpe->common_window = 0;
  601.             memset(cpe->is_mask, 0, sizeof(cpe->is_mask));
  602.             memset(cpe->ms_mask, 0, sizeof(cpe->ms_mask));
  603.             put_bits(&s->pb, 3, tag);
  604.             put_bits(&s->pb, 4, chan_el_counter[tag]++);
  605.             for (ch = 0; ch < chans; ch++) {
  606.                 sce = &cpe->ch[ch];
  607.                 coeffs[ch] = sce->coeffs;
  608.                 sce->ics.predictor_present = 0;
  609.                 memset(&sce->ics.prediction_used, 0, sizeof(sce->ics.prediction_used));
  610.                 memset(&sce->tns, 0, sizeof(TemporalNoiseShaping));
  611.                 for (w = 0; w < 128; w++)
  612.                     if (sce->band_type[w] > RESERVED_BT)
  613.                         sce->band_type[w] = 0;
  614.             }
  615.             s->psy.model->analyze(&s->psy, start_ch, coeffs, wi);
  616.             for (ch = 0; ch < chans; ch++) {
  617.                 s->cur_channel = start_ch + ch;
  618.                 s->coder->search_for_quantizers(avctx, s, &cpe->ch[ch], s->lambda);
  619.             }
  620.             if (chans > 1
  621.                 && wi[0].window_type[0] == wi[1].window_type[0]
  622.                 && wi[0].window_shape   == wi[1].window_shape) {
  623.  
  624.                 cpe->common_window = 1;
  625.                 for (w = 0; w < wi[0].num_windows; w++) {
  626.                     if (wi[0].grouping[w] != wi[1].grouping[w]) {
  627.                         cpe->common_window = 0;
  628.                         break;
  629.                     }
  630.                 }
  631.             }
  632.             for (ch = 0; ch < chans; ch++) { /* TNS and PNS */
  633.                 sce = &cpe->ch[ch];
  634.                 s->cur_channel = start_ch + ch;
  635.                 if (s->options.pns && s->coder->search_for_pns)
  636.                     s->coder->search_for_pns(s, avctx, sce);
  637.                 if (s->options.tns && s->coder->search_for_tns)
  638.                     s->coder->search_for_tns(s, sce);
  639.                 if (s->options.tns && s->coder->apply_tns_filt)
  640.                     s->coder->apply_tns_filt(s, sce);
  641.                 if (sce->tns.present)
  642.                     tns_mode = 1;
  643.             }
  644.             s->cur_channel = start_ch;
  645.             if (s->options.intensity_stereo) { /* Intensity Stereo */
  646.                 if (s->coder->search_for_is)
  647.                     s->coder->search_for_is(s, avctx, cpe);
  648.                 if (cpe->is_mode) is_mode = 1;
  649.                 apply_intensity_stereo(cpe);
  650.             }
  651.             if (s->options.pred) { /* Prediction */
  652.                 for (ch = 0; ch < chans; ch++) {
  653.                     sce = &cpe->ch[ch];
  654.                     s->cur_channel = start_ch + ch;
  655.                     if (s->options.pred && s->coder->search_for_pred)
  656.                         s->coder->search_for_pred(s, sce);
  657.                     if (cpe->ch[ch].ics.predictor_present) pred_mode = 1;
  658.                 }
  659.                 if (s->coder->adjust_common_prediction)
  660.                     s->coder->adjust_common_prediction(s, cpe);
  661.                 for (ch = 0; ch < chans; ch++) {
  662.                     sce = &cpe->ch[ch];
  663.                     s->cur_channel = start_ch + ch;
  664.                     if (s->options.pred && s->coder->apply_main_pred)
  665.                         s->coder->apply_main_pred(s, sce);
  666.                 }
  667.                 s->cur_channel = start_ch;
  668.             }
  669.             if (s->options.stereo_mode) { /* Mid/Side stereo */
  670.                 if (s->options.stereo_mode == -1 && s->coder->search_for_ms)
  671.                     s->coder->search_for_ms(s, cpe);
  672.                 else if (cpe->common_window)
  673.                     memset(cpe->ms_mask, 1, sizeof(cpe->ms_mask));
  674.                 for (w = 0; w < 128; w++)
  675.                     cpe->ms_mask[w] = cpe->is_mask[w] ? 0 : cpe->ms_mask[w];
  676.                 apply_mid_side_stereo(cpe);
  677.             }
  678.             adjust_frame_information(cpe, chans);
  679.             if (chans == 2) {
  680.                 put_bits(&s->pb, 1, cpe->common_window);
  681.                 if (cpe->common_window) {
  682.                     put_ics_info(s, &cpe->ch[0].ics);
  683.                     if (s->coder->encode_main_pred)
  684.                         s->coder->encode_main_pred(s, &cpe->ch[0]);
  685.                     encode_ms_info(&s->pb, cpe);
  686.                     if (cpe->ms_mode) ms_mode = 1;
  687.                 }
  688.             }
  689.             for (ch = 0; ch < chans; ch++) {
  690.                 s->cur_channel = start_ch + ch;
  691.                 encode_individual_channel(avctx, s, &cpe->ch[ch], cpe->common_window);
  692.             }
  693.             start_ch += chans;
  694.         }
  695.  
  696.         frame_bits = put_bits_count(&s->pb);
  697.         if (frame_bits <= 6144 * s->channels - 3) {
  698.             s->psy.bitres.bits = frame_bits / s->channels;
  699.             break;
  700.         }
  701.         if (is_mode || ms_mode || tns_mode || pred_mode) {
  702.             for (i = 0; i < s->chan_map[0]; i++) {
  703.                 // Must restore coeffs
  704.                 chans = tag == TYPE_CPE ? 2 : 1;
  705.                 cpe = &s->cpe[i];
  706.                 for (ch = 0; ch < chans; ch++)
  707.                     memcpy(cpe->ch[ch].coeffs, cpe->ch[ch].pcoeffs, sizeof(cpe->ch[ch].coeffs));
  708.             }
  709.         }
  710.  
  711.         s->lambda *= avctx->bit_rate * 1024.0f / avctx->sample_rate / frame_bits;
  712.  
  713.     } while (1);
  714.  
  715.     put_bits(&s->pb, 3, TYPE_END);
  716.     flush_put_bits(&s->pb);
  717.     avctx->frame_bits = put_bits_count(&s->pb);
  718.  
  719.     // rate control stuff
  720.     if (!(avctx->flags & AV_CODEC_FLAG_QSCALE)) {
  721.         float ratio = avctx->bit_rate * 1024.0f / avctx->sample_rate / avctx->frame_bits;
  722.         s->lambda *= ratio;
  723.         s->lambda = FFMIN(s->lambda, 65536.f);
  724.     }
  725.  
  726.     if (!frame)
  727.         s->last_frame++;
  728.  
  729.     ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
  730.                        &avpkt->duration);
  731.  
  732.     avpkt->size = put_bits_count(&s->pb) >> 3;
  733.     *got_packet_ptr = 1;
  734.     return 0;
  735. }
  736.  
  737. static av_cold int aac_encode_end(AVCodecContext *avctx)
  738. {
  739.     AACEncContext *s = avctx->priv_data;
  740.  
  741.     ff_mdct_end(&s->mdct1024);
  742.     ff_mdct_end(&s->mdct128);
  743.     ff_psy_end(&s->psy);
  744.     ff_lpc_end(&s->lpc);
  745.     if (s->psypp)
  746.         ff_psy_preprocess_end(s->psypp);
  747.     av_freep(&s->buffer.samples);
  748.     av_freep(&s->cpe);
  749.     av_freep(&s->fdsp);
  750.     ff_af_queue_close(&s->afq);
  751.     return 0;
  752. }
  753.  
  754. static av_cold int dsp_init(AVCodecContext *avctx, AACEncContext *s)
  755. {
  756.     int ret = 0;
  757.  
  758.     s->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
  759.     if (!s->fdsp)
  760.         return AVERROR(ENOMEM);
  761.  
  762.     // window init
  763.     ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
  764.     ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
  765.     ff_init_ff_sine_windows(10);
  766.     ff_init_ff_sine_windows(7);
  767.  
  768.     if ((ret = ff_mdct_init(&s->mdct1024, 11, 0, 32768.0)) < 0)
  769.         return ret;
  770.     if ((ret = ff_mdct_init(&s->mdct128,   8, 0, 32768.0)) < 0)
  771.         return ret;
  772.  
  773.     return 0;
  774. }
  775.  
  776. static av_cold int alloc_buffers(AVCodecContext *avctx, AACEncContext *s)
  777. {
  778.     int ch;
  779.     FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->buffer.samples, s->channels, 3 * 1024 * sizeof(s->buffer.samples[0]), alloc_fail);
  780.     FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->cpe, s->chan_map[0], sizeof(ChannelElement), alloc_fail);
  781.     FF_ALLOCZ_OR_GOTO(avctx, avctx->extradata, 5 + AV_INPUT_BUFFER_PADDING_SIZE, alloc_fail);
  782.  
  783.     for(ch = 0; ch < s->channels; ch++)
  784.         s->planar_samples[ch] = s->buffer.samples + 3 * 1024 * ch;
  785.  
  786.     return 0;
  787. alloc_fail:
  788.     return AVERROR(ENOMEM);
  789. }
  790.  
  791. static av_cold int aac_encode_init(AVCodecContext *avctx)
  792. {
  793.     AACEncContext *s = avctx->priv_data;
  794.     int i, ret = 0;
  795.     const uint8_t *sizes[2];
  796.     uint8_t grouping[AAC_MAX_CHANNELS];
  797.     int lengths[2];
  798.  
  799.     avctx->frame_size = 1024;
  800.  
  801.     for (i = 0; i < 16; i++)
  802.         if (avctx->sample_rate == avpriv_mpeg4audio_sample_rates[i])
  803.             break;
  804.  
  805.     s->channels = avctx->channels;
  806.  
  807.     ERROR_IF(i == 16 || i >= ff_aac_swb_size_1024_len || i >= ff_aac_swb_size_128_len,
  808.              "Unsupported sample rate %d\n", avctx->sample_rate);
  809.     ERROR_IF(s->channels > AAC_MAX_CHANNELS,
  810.              "Unsupported number of channels: %d\n", s->channels);
  811.     WARN_IF(1024.0 * avctx->bit_rate / avctx->sample_rate > 6144 * s->channels,
  812.              "Too many bits per frame requested, clamping to max\n");
  813.     if (avctx->profile == FF_PROFILE_AAC_MAIN) {
  814.         s->options.pred = 1;
  815.     } else if ((avctx->profile == FF_PROFILE_AAC_LOW ||
  816.                 avctx->profile == FF_PROFILE_UNKNOWN) && s->options.pred) {
  817.         s->profile = 0; /* Main */
  818.         WARN_IF(1, "Prediction requested, changing profile to AAC-Main\n");
  819.     } else if (avctx->profile == FF_PROFILE_AAC_LOW ||
  820.                avctx->profile == FF_PROFILE_UNKNOWN) {
  821.         s->profile = 1; /* Low */
  822.     } else {
  823.         ERROR_IF(1, "Unsupported profile %d\n", avctx->profile);
  824.     }
  825.  
  826.     if (s->options.aac_coder != AAC_CODER_TWOLOOP) {
  827.         s->options.intensity_stereo = 0;
  828.         s->options.pns = 0;
  829.     }
  830.  
  831.     avctx->bit_rate = (int)FFMIN(
  832.         6144 * s->channels / 1024.0 * avctx->sample_rate,
  833.         avctx->bit_rate);
  834.  
  835.     s->samplerate_index = i;
  836.  
  837.     s->chan_map = aac_chan_configs[s->channels-1];
  838.  
  839.     if ((ret = dsp_init(avctx, s)) < 0)
  840.         goto fail;
  841.  
  842.     if ((ret = alloc_buffers(avctx, s)) < 0)
  843.         goto fail;
  844.  
  845.     avctx->extradata_size = 5;
  846.     put_audio_specific_config(avctx);
  847.  
  848.     sizes[0]   = ff_aac_swb_size_1024[i];
  849.     sizes[1]   = ff_aac_swb_size_128[i];
  850.     lengths[0] = ff_aac_num_swb_1024[i];
  851.     lengths[1] = ff_aac_num_swb_128[i];
  852.     for (i = 0; i < s->chan_map[0]; i++)
  853.         grouping[i] = s->chan_map[i + 1] == TYPE_CPE;
  854.     if ((ret = ff_psy_init(&s->psy, avctx, 2, sizes, lengths,
  855.                            s->chan_map[0], grouping)) < 0)
  856.         goto fail;
  857.     s->psypp = ff_psy_preprocess_init(avctx);
  858.     s->coder = &ff_aac_coders[s->options.aac_coder];
  859.     ff_lpc_init(&s->lpc, 2*avctx->frame_size, TNS_MAX_ORDER, FF_LPC_TYPE_LEVINSON);
  860.  
  861.     if (HAVE_MIPSDSPR1)
  862.         ff_aac_coder_init_mips(s);
  863.  
  864.     s->lambda = avctx->global_quality > 0 ? avctx->global_quality : 120;
  865.  
  866.     ff_aac_tableinit();
  867.  
  868.     avctx->initial_padding = 1024;
  869.     ff_af_queue_init(avctx, &s->afq);
  870.  
  871.     return 0;
  872. fail:
  873.     aac_encode_end(avctx);
  874.     return ret;
  875. }
  876.  
  877. #define AACENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
  878. static const AVOption aacenc_options[] = {
  879.     {"stereo_mode", "Stereo coding method", offsetof(AACEncContext, options.stereo_mode), AV_OPT_TYPE_INT, {.i64 = 0}, -1, 1, AACENC_FLAGS, "stereo_mode"},
  880.         {"auto",     "Selected by the Encoder", 0, AV_OPT_TYPE_CONST, {.i64 = -1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
  881.         {"ms_off",   "Disable Mid/Side coding", 0, AV_OPT_TYPE_CONST, {.i64 =  0 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
  882.         {"ms_force", "Force Mid/Side for the whole frame if possible", 0, AV_OPT_TYPE_CONST, {.i64 =  1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
  883.     {"aac_coder", "Coding algorithm", offsetof(AACEncContext, options.aac_coder), AV_OPT_TYPE_INT, {.i64 = AAC_CODER_TWOLOOP}, 0, AAC_CODER_NB-1, AACENC_FLAGS, "aac_coder"},
  884.         {"faac",     "FAAC-inspired method",      0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAAC},    INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
  885.         {"anmr",     "ANMR method",               0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_ANMR},    INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
  886.         {"twoloop",  "Two loop searching method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_TWOLOOP}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
  887.         {"fast",     "Constant quantizer",        0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAST},    INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
  888.     {"aac_pns", "Perceptual Noise Substitution", offsetof(AACEncContext, options.pns), AV_OPT_TYPE_INT, {.i64 = 1}, 0, 1, AACENC_FLAGS, "aac_pns"},
  889.         {"disable",  "Disable perceptual noise substitution", 0, AV_OPT_TYPE_CONST, {.i64 =  0 }, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_pns"},
  890.         {"enable",   "Enable perceptual noise substitution",  0, AV_OPT_TYPE_CONST, {.i64 =  1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_pns"},
  891.     {"aac_is", "Intensity stereo coding", offsetof(AACEncContext, options.intensity_stereo), AV_OPT_TYPE_INT, {.i64 = 1}, 0, 1, AACENC_FLAGS, "intensity_stereo"},
  892.         {"disable",  "Disable intensity stereo coding", 0, AV_OPT_TYPE_CONST, {.i64 = 0}, INT_MIN, INT_MAX, AACENC_FLAGS, "intensity_stereo"},
  893.         {"enable",   "Enable intensity stereo coding", 0, AV_OPT_TYPE_CONST, {.i64 = 1}, INT_MIN, INT_MAX, AACENC_FLAGS, "intensity_stereo"},
  894.     {"aac_tns", "Temporal noise shaping", offsetof(AACEncContext, options.tns), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, AACENC_FLAGS, "aac_tns"},
  895.         {"disable",  "Disable temporal noise shaping", 0, AV_OPT_TYPE_CONST, {.i64 = 0}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_tns"},
  896.         {"enable",   "Enable temporal noise shaping", 0, AV_OPT_TYPE_CONST, {.i64 = 1}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_tns"},
  897.     {"aac_pred", "AAC-Main prediction", offsetof(AACEncContext, options.pred), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, AACENC_FLAGS, "aac_pred"},
  898.         {"disable",  "Disable AAC-Main prediction", 0, AV_OPT_TYPE_CONST, {.i64 = 0}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_pred"},
  899.         {"enable",   "Enable AAC-Main prediction", 0, AV_OPT_TYPE_CONST, {.i64 = 1}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_pred"},
  900.     {NULL}
  901. };
  902.  
  903. static const AVClass aacenc_class = {
  904.     "AAC encoder",
  905.     av_default_item_name,
  906.     aacenc_options,
  907.     LIBAVUTIL_VERSION_INT,
  908. };
  909.  
  910. AVCodec ff_aac_encoder = {
  911.     .name           = "aac",
  912.     .long_name      = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"),
  913.     .type           = AVMEDIA_TYPE_AUDIO,
  914.     .id             = AV_CODEC_ID_AAC,
  915.     .priv_data_size = sizeof(AACEncContext),
  916.     .init           = aac_encode_init,
  917.     .encode2        = aac_encode_frame,
  918.     .close          = aac_encode_end,
  919.     .supported_samplerates = mpeg4audio_sample_rates,
  920.     .capabilities   = AV_CODEC_CAP_SMALL_LAST_FRAME | AV_CODEC_CAP_DELAY |
  921.                       AV_CODEC_CAP_EXPERIMENTAL,
  922.     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLTP,
  923.                                                      AV_SAMPLE_FMT_NONE },
  924.     .priv_class     = &aacenc_class,
  925. };
  926.