Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4349 Serge 1
/*
2
 * Nellymoser encoder
3
 * This code is developed as part of Google Summer of Code 2008 Program.
4
 *
5
 * Copyright (c) 2008 Bartlomiej Wolowiec
6
 *
7
 * This file is part of FFmpeg.
8
 *
9
 * FFmpeg is free software; you can redistribute it and/or
10
 * modify it under the terms of the GNU Lesser General Public
11
 * License as published by the Free Software Foundation; either
12
 * version 2.1 of the License, or (at your option) any later version.
13
 *
14
 * FFmpeg is distributed in the hope that it will be useful,
15
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
 * Lesser General Public License for more details.
18
 *
19
 * You should have received a copy of the GNU Lesser General Public
20
 * License along with FFmpeg; if not, write to the Free Software
21
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22
 */
23
 
24
/**
25
 * @file
26
 * Nellymoser encoder
27
 * by Bartlomiej Wolowiec
28
 *
29
 * Generic codec information: libavcodec/nellymoserdec.c
30
 *
31
 * Some information also from: http://samples.mplayerhq.hu/A-codecs/Nelly_Moser/ASAO/ASAO.zip
32
 *                             (Copyright Joseph Artsimovich and UAB "DKD")
33
 *
34
 * for more information about nellymoser format, visit:
35
 * http://wiki.multimedia.cx/index.php?title=Nellymoser
36
 */
37
 
38
#include "libavutil/float_dsp.h"
39
#include "libavutil/mathematics.h"
40
#include "nellymoser.h"
41
#include "avcodec.h"
42
#include "audio_frame_queue.h"
43
#include "fft.h"
44
#include "internal.h"
45
#include "sinewin.h"
46
 
47
#define BITSTREAM_WRITER_LE
48
#include "put_bits.h"
49
 
50
#define POW_TABLE_SIZE (1<<11)
51
#define POW_TABLE_OFFSET 3
52
#define OPT_SIZE ((1<<15) + 3000)
53
 
54
typedef struct NellyMoserEncodeContext {
55
    AVCodecContext  *avctx;
56
    int             last_frame;
57
    AVFloatDSPContext fdsp;
58
    FFTContext      mdct_ctx;
59
    AudioFrameQueue afq;
60
    DECLARE_ALIGNED(32, float, mdct_out)[NELLY_SAMPLES];
61
    DECLARE_ALIGNED(32, float, in_buff)[NELLY_SAMPLES];
62
    DECLARE_ALIGNED(32, float, buf)[3 * NELLY_BUF_LEN];     ///< sample buffer
63
    float           (*opt )[OPT_SIZE];
64
    uint8_t         (*path)[OPT_SIZE];
65
} NellyMoserEncodeContext;
66
 
67
static float pow_table[POW_TABLE_SIZE];     ///< -pow(2, -i / 2048.0 - 3.0);
68
 
69
static const uint8_t sf_lut[96] = {
70
     0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  4,  4,
71
     5,  5,  5,  6,  7,  7,  8,  8,  9, 10, 11, 11, 12, 13, 13, 14,
72
    15, 15, 16, 17, 17, 18, 19, 19, 20, 21, 22, 22, 23, 24, 25, 26,
73
    27, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40,
74
    41, 41, 42, 43, 44, 45, 45, 46, 47, 48, 49, 50, 51, 52, 52, 53,
75
    54, 55, 55, 56, 57, 57, 58, 59, 59, 60, 60, 60, 61, 61, 61, 62,
76
};
77
 
78
static const uint8_t sf_delta_lut[78] = {
79
     0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  4,  4,
80
     4,  5,  5,  5,  6,  6,  7,  7,  8,  8,  9, 10, 10, 11, 11, 12,
81
    13, 13, 14, 15, 16, 17, 17, 18, 19, 19, 20, 21, 21, 22, 22, 23,
82
    23, 24, 24, 25, 25, 25, 26, 26, 26, 26, 27, 27, 27, 27, 27, 28,
83
    28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 30,
84
};
85
 
86
static const uint8_t quant_lut[230] = {
87
     0,
88
 
89
     0,  1,  2,
90
 
91
     0,  1,  2,  3,  4,  5,  6,
92
 
93
     0,  1,  1,  2,  2,  3,  3,  4,  5,  6,  7,  8,  9, 10, 11, 11,
94
    12, 13, 13, 13, 14,
95
 
96
     0,  1,  1,  2,  2,  2,  3,  3,  4,  4,  5,  5,  6,  6,  7,  8,
97
     8,  9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
98
    22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 29,
99
    30,
100
 
101
     0,  1,  1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  3,  3,  3,  3,
102
     4,  4,  4,  5,  5,  5,  6,  6,  7,  7,  7,  8,  8,  9,  9,  9,
103
    10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 15, 15,
104
    15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 20, 20, 20,
105
    21, 21, 22, 22, 23, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32,
106
    33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45,
107
    46, 47, 47, 48, 48, 49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52,
108
    53, 53, 53, 54, 54, 54, 55, 55, 55, 56, 56, 56, 57, 57, 57, 57,
109
    58, 58, 58, 58, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, 61,
110
    61, 61, 61, 61, 62,
111
};
112
 
113
static const float quant_lut_mul[7] = { 0.0,  0.0,  2.0,  2.0,  5.0, 12.0,  36.6 };
114
static const float quant_lut_add[7] = { 0.0,  0.0,  2.0,  7.0, 21.0, 56.0, 157.0 };
115
static const uint8_t quant_lut_offset[8] = { 0, 0, 1, 4, 11, 32, 81, 230 };
116
 
117
static void apply_mdct(NellyMoserEncodeContext *s)
118
{
119
    float *in0 = s->buf;
120
    float *in1 = s->buf + NELLY_BUF_LEN;
121
    float *in2 = s->buf + 2 * NELLY_BUF_LEN;
122
 
123
    s->fdsp.vector_fmul        (s->in_buff,                 in0, ff_sine_128, NELLY_BUF_LEN);
124
    s->fdsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in1, ff_sine_128, NELLY_BUF_LEN);
125
    s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out, s->in_buff);
126
 
127
    s->fdsp.vector_fmul        (s->in_buff,                 in1, ff_sine_128, NELLY_BUF_LEN);
128
    s->fdsp.vector_fmul_reverse(s->in_buff + NELLY_BUF_LEN, in2, ff_sine_128, NELLY_BUF_LEN);
129
    s->mdct_ctx.mdct_calc(&s->mdct_ctx, s->mdct_out + NELLY_BUF_LEN, s->in_buff);
130
}
131
 
132
static av_cold int encode_end(AVCodecContext *avctx)
133
{
134
    NellyMoserEncodeContext *s = avctx->priv_data;
135
 
136
    ff_mdct_end(&s->mdct_ctx);
137
 
138
    if (s->avctx->trellis) {
139
        av_free(s->opt);
140
        av_free(s->path);
141
    }
142
    ff_af_queue_close(&s->afq);
143
 
144
    return 0;
145
}
146
 
147
static av_cold int encode_init(AVCodecContext *avctx)
148
{
149
    NellyMoserEncodeContext *s = avctx->priv_data;
150
    int i, ret;
151
 
152
    if (avctx->channels != 1) {
153
        av_log(avctx, AV_LOG_ERROR, "Nellymoser supports only 1 channel\n");
154
        return AVERROR(EINVAL);
155
    }
156
 
157
    if (avctx->sample_rate != 8000 && avctx->sample_rate != 16000 &&
158
        avctx->sample_rate != 11025 &&
159
        avctx->sample_rate != 22050 && avctx->sample_rate != 44100 &&
160
        avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL) {
161
        av_log(avctx, AV_LOG_ERROR, "Nellymoser works only with 8000, 16000, 11025, 22050 and 44100 sample rate\n");
162
        return AVERROR(EINVAL);
163
    }
164
 
165
    avctx->frame_size = NELLY_SAMPLES;
166
    avctx->delay      = NELLY_BUF_LEN;
167
    ff_af_queue_init(avctx, &s->afq);
168
    s->avctx = avctx;
169
    if ((ret = ff_mdct_init(&s->mdct_ctx, 8, 0, 32768.0)) < 0)
170
        goto error;
171
    avpriv_float_dsp_init(&s->fdsp, avctx->flags & CODEC_FLAG_BITEXACT);
172
 
173
    /* Generate overlap window */
174
    ff_init_ff_sine_windows(7);
175
    for (i = 0; i < POW_TABLE_SIZE; i++)
176
        pow_table[i] = -pow(2, -i / 2048.0 - 3.0 + POW_TABLE_OFFSET);
177
 
178
    if (s->avctx->trellis) {
179
        s->opt  = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(float  ));
180
        s->path = av_malloc(NELLY_BANDS * OPT_SIZE * sizeof(uint8_t));
181
        if (!s->opt || !s->path) {
182
            ret = AVERROR(ENOMEM);
183
            goto error;
184
        }
185
    }
186
 
187
    return 0;
188
error:
189
    encode_end(avctx);
190
    return ret;
191
}
192
 
193
#define find_best(val, table, LUT, LUT_add, LUT_size) \
194
    best_idx = \
195
        LUT[av_clip ((lrintf(val) >> 8) + LUT_add, 0, LUT_size - 1)]; \
196
    if (fabs(val - table[best_idx]) > fabs(val - table[best_idx + 1])) \
197
        best_idx++;
198
 
199
static void get_exponent_greedy(NellyMoserEncodeContext *s, float *cand, int *idx_table)
200
{
201
    int band, best_idx, power_idx = 0;
202
    float power_candidate;
203
 
204
    //base exponent
205
    find_best(cand[0], ff_nelly_init_table, sf_lut, -20, 96);
206
    idx_table[0] = best_idx;
207
    power_idx = ff_nelly_init_table[best_idx];
208
 
209
    for (band = 1; band < NELLY_BANDS; band++) {
210
        power_candidate = cand[band] - power_idx;
211
        find_best(power_candidate, ff_nelly_delta_table, sf_delta_lut, 37, 78);
212
        idx_table[band] = best_idx;
213
        power_idx += ff_nelly_delta_table[best_idx];
214
    }
215
}
216
 
217
static inline float distance(float x, float y, int band)
218
{
219
    //return pow(fabs(x-y), 2.0);
220
    float tmp = x - y;
221
    return tmp * tmp;
222
}
223
 
224
static void get_exponent_dynamic(NellyMoserEncodeContext *s, float *cand, int *idx_table)
225
{
226
    int i, j, band, best_idx;
227
    float power_candidate, best_val;
228
 
229
    float  (*opt )[OPT_SIZE] = s->opt ;
230
    uint8_t(*path)[OPT_SIZE] = s->path;
231
 
232
    for (i = 0; i < NELLY_BANDS * OPT_SIZE; i++) {
233
        opt[0][i] = INFINITY;
234
    }
235
 
236
    for (i = 0; i < 64; i++) {
237
        opt[0][ff_nelly_init_table[i]] = distance(cand[0], ff_nelly_init_table[i], 0);
238
        path[0][ff_nelly_init_table[i]] = i;
239
    }
240
 
241
    for (band = 1; band < NELLY_BANDS; band++) {
242
        int q, c = 0;
243
        float tmp;
244
        int idx_min, idx_max, idx;
245
        power_candidate = cand[band];
246
        for (q = 1000; !c && q < OPT_SIZE; q <<= 2) {
247
            idx_min = FFMAX(0, cand[band] - q);
248
            idx_max = FFMIN(OPT_SIZE, cand[band - 1] + q);
249
            for (i = FFMAX(0, cand[band - 1] - q); i < FFMIN(OPT_SIZE, cand[band - 1] + q); i++) {
250
                if ( isinf(opt[band - 1][i]) )
251
                    continue;
252
                for (j = 0; j < 32; j++) {
253
                    idx = i + ff_nelly_delta_table[j];
254
                    if (idx > idx_max)
255
                        break;
256
                    if (idx >= idx_min) {
257
                        tmp = opt[band - 1][i] + distance(idx, power_candidate, band);
258
                        if (opt[band][idx] > tmp) {
259
                            opt[band][idx] = tmp;
260
                            path[band][idx] = j;
261
                            c = 1;
262
                        }
263
                    }
264
                }
265
            }
266
        }
267
        assert(c); //FIXME
268
    }
269
 
270
    best_val = INFINITY;
271
    best_idx = -1;
272
    band = NELLY_BANDS - 1;
273
    for (i = 0; i < OPT_SIZE; i++) {
274
        if (best_val > opt[band][i]) {
275
            best_val = opt[band][i];
276
            best_idx = i;
277
        }
278
    }
279
    for (band = NELLY_BANDS - 1; band >= 0; band--) {
280
        idx_table[band] = path[band][best_idx];
281
        if (band) {
282
            best_idx -= ff_nelly_delta_table[path[band][best_idx]];
283
        }
284
    }
285
}
286
 
287
/**
288
 * Encode NELLY_SAMPLES samples. It assumes, that samples contains 3 * NELLY_BUF_LEN values
289
 *  @param s               encoder context
290
 *  @param output          output buffer
291
 *  @param output_size     size of output buffer
292
 */
293
static void encode_block(NellyMoserEncodeContext *s, unsigned char *output, int output_size)
294
{
295
    PutBitContext pb;
296
    int i, j, band, block, best_idx, power_idx = 0;
297
    float power_val, coeff, coeff_sum;
298
    float pows[NELLY_FILL_LEN];
299
    int bits[NELLY_BUF_LEN], idx_table[NELLY_BANDS];
300
    float cand[NELLY_BANDS];
301
 
302
    apply_mdct(s);
303
 
304
    init_put_bits(&pb, output, output_size * 8);
305
 
306
    i = 0;
307
    for (band = 0; band < NELLY_BANDS; band++) {
308
        coeff_sum = 0;
309
        for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
310
            coeff_sum += s->mdct_out[i                ] * s->mdct_out[i                ]
311
                       + s->mdct_out[i + NELLY_BUF_LEN] * s->mdct_out[i + NELLY_BUF_LEN];
312
        }
313
        cand[band] =
314
            log(FFMAX(1.0, coeff_sum / (ff_nelly_band_sizes_table[band] << 7))) * 1024.0 / M_LN2;
315
    }
316
 
317
    if (s->avctx->trellis) {
318
        get_exponent_dynamic(s, cand, idx_table);
319
    } else {
320
        get_exponent_greedy(s, cand, idx_table);
321
    }
322
 
323
    i = 0;
324
    for (band = 0; band < NELLY_BANDS; band++) {
325
        if (band) {
326
            power_idx += ff_nelly_delta_table[idx_table[band]];
327
            put_bits(&pb, 5, idx_table[band]);
328
        } else {
329
            power_idx = ff_nelly_init_table[idx_table[0]];
330
            put_bits(&pb, 6, idx_table[0]);
331
        }
332
        power_val = pow_table[power_idx & 0x7FF] / (1 << ((power_idx >> 11) + POW_TABLE_OFFSET));
333
        for (j = 0; j < ff_nelly_band_sizes_table[band]; i++, j++) {
334
            s->mdct_out[i] *= power_val;
335
            s->mdct_out[i + NELLY_BUF_LEN] *= power_val;
336
            pows[i] = power_idx;
337
        }
338
    }
339
 
340
    ff_nelly_get_sample_bits(pows, bits);
341
 
342
    for (block = 0; block < 2; block++) {
343
        for (i = 0; i < NELLY_FILL_LEN; i++) {
344
            if (bits[i] > 0) {
345
                const float *table = ff_nelly_dequantization_table + (1 << bits[i]) - 1;
346
                coeff = s->mdct_out[block * NELLY_BUF_LEN + i];
347
                best_idx =
348
                    quant_lut[av_clip (
349
                            coeff * quant_lut_mul[bits[i]] + quant_lut_add[bits[i]],
350
                            quant_lut_offset[bits[i]],
351
                            quant_lut_offset[bits[i]+1] - 1
352
                            )];
353
                if (fabs(coeff - table[best_idx]) > fabs(coeff - table[best_idx + 1]))
354
                    best_idx++;
355
 
356
                put_bits(&pb, bits[i], best_idx);
357
            }
358
        }
359
        if (!block)
360
            put_bits(&pb, NELLY_HEADER_BITS + NELLY_DETAIL_BITS - put_bits_count(&pb), 0);
361
    }
362
 
363
    flush_put_bits(&pb);
364
    memset(put_bits_ptr(&pb), 0, output + output_size - put_bits_ptr(&pb));
365
}
366
 
367
static int encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
368
                        const AVFrame *frame, int *got_packet_ptr)
369
{
370
    NellyMoserEncodeContext *s = avctx->priv_data;
371
    int ret;
372
 
373
    if (s->last_frame)
374
        return 0;
375
 
376
    memcpy(s->buf, s->buf + NELLY_SAMPLES, NELLY_BUF_LEN * sizeof(*s->buf));
377
    if (frame) {
378
        memcpy(s->buf + NELLY_BUF_LEN, frame->data[0],
379
               frame->nb_samples * sizeof(*s->buf));
380
        if (frame->nb_samples < NELLY_SAMPLES) {
381
            memset(s->buf + NELLY_BUF_LEN + frame->nb_samples, 0,
382
                   (NELLY_SAMPLES - frame->nb_samples) * sizeof(*s->buf));
383
            if (frame->nb_samples >= NELLY_BUF_LEN)
384
                s->last_frame = 1;
385
        }
386
        if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
387
            return ret;
388
    } else {
389
        memset(s->buf + NELLY_BUF_LEN, 0, NELLY_SAMPLES * sizeof(*s->buf));
390
        s->last_frame = 1;
391
    }
392
 
393
    if ((ret = ff_alloc_packet2(avctx, avpkt, NELLY_BLOCK_LEN)) < 0)
394
        return ret;
395
    encode_block(s, avpkt->data, avpkt->size);
396
 
397
    /* Get the next frame pts/duration */
398
    ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
399
                       &avpkt->duration);
400
 
401
    *got_packet_ptr = 1;
402
    return 0;
403
}
404
 
405
AVCodec ff_nellymoser_encoder = {
406
    .name           = "nellymoser",
407
    .long_name      = NULL_IF_CONFIG_SMALL("Nellymoser Asao"),
408
    .type           = AVMEDIA_TYPE_AUDIO,
409
    .id             = AV_CODEC_ID_NELLYMOSER,
410
    .priv_data_size = sizeof(NellyMoserEncodeContext),
411
    .init           = encode_init,
412
    .encode2        = encode_frame,
413
    .close          = encode_end,
414
    .capabilities   = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY,
415
    .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLT,
416
                                                     AV_SAMPLE_FMT_NONE },
417
};